From b46f1f5aa89bc1aeb3558ca21c35f6af63c0ebda Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 28 Jul 2016 18:23:57 +0200 Subject: [PATCH 01/80] Revamp ProbePID_SSH + Add ProbePID_Serial --- framework/monitor.py | 193 ++++++++++++++++++++++++++++++++++-------- framework/plumbing.py | 2 +- 2 files changed, 157 insertions(+), 38 deletions(-) diff --git a/framework/monitor.py b/framework/monitor.py index 1511f66..afd476c 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -303,7 +303,7 @@ def set_data_model(self, dm): def add_probe(self, probe, blocking=False, after_feedback_retrieval=False): if probe.__class__.__name__ in self.probe_users: - raise AddExistingProbeToMonitorError(probe.__class_.__name__) + raise AddExistingProbeToMonitorError(probe.__class__.__name__) if blocking: self.probe_users[probe.__class__.__name__] = BlockingProbeUser(probe, after_feedback_retrieval) @@ -588,55 +588,133 @@ def set_timestamp(self): def get_timestamp(self): return self._now -class ProbePID_SSH(Probe): + + +class SSH_Backend(object): + + def __init__(self, sshd_ip, sshd_port=2, username='', password=''): + if not ssh_module: + raise eh.UnavailablePythonModule('Python module for SSH is not available!') + + self.sshd_ip = sshd_ip + self.sshd_port = sshd_port + self.username = username + self.password = password + + self.client = None + + def start(self): + self.client = ssh.SSHClient() + self.client.set_missing_host_key_policy(ssh.AutoAddPolicy()) + self.client.connect(self.sshd_ip, port=self.sshd_port, + username=self.username, + password=self.password) + + def stop(self): + self.client.close() + + def exec_command(self, cmd): + ssh_in, ssh_out, ssh_err = \ + self.client.exec_command(cmd) + + if ssh_err.read(): + # the command does not exist on the system + raise BackendError('The command does not exist on the host') + else: + return ssh_out.read() + + +class Serial_Backend(object): + + def __init__(self, serial_port, baudrate=115200, read_duration=2, username=None, password=None): + if not serial_module: + raise eh.UnavailablePythonModule('Python module for Serial is not available!') + + self.serial_port = serial_port + self.baudrate = baudrate + self.read_duration = read_duration + self.username = username + self.password = password + + self.client = None + + def start(self): + self.ser = serial.Serial(self.serial_port, self.baudrate, timeout=1, + dsrdtr=True, rtscts=True) + if self.username is not None: + assert self.password is not None + time.sleep(0.1) + self.ser.flushInput() + self.ser.write(self.username) + time.sleep(0.1) + pass_prompt = self.ser.read(1) + retry = 0 + while pass_prompt.find('p') == -1: + time.sleep(1) + retry += 1 + if retry > 2: + raise BackendError('Unable to establish a connection with the serial line.') + else: + pass_prompt = self.ser.readline() + time.sleep(0.1) + self.ser.write(self.password) + + def stop(self): + self.ser.close() + + def exec_command(self, cmd): + self.ser.flushInput() + self.ser.write(cmd) + result = b'' + t0 = datetime.datetime.now() + duration = -1 + try: + while duration < self.read_duration: + now = datetime.datetime.now() + duration = (now - t0).total_seconds() + time.sleep(0.1) + res = self.ser.readline() + result += res + except serial.SerialException: + raise BackendError('Exception while reading serial line') + else: + return result + + +class BackendError(Exception): pass + +class ProbePID(Probe): """ - This generic probe enables you to monitor a process PID through an - SSH connection. + This is the base class for the generic probes that enable you to monitor a process PID. + The monitoring can be done through different backend. The current ones are SSH and Serial. Attributes: process_name (str): name of the process to monitor. - sshd_ip (str): IP of the SSH server. - sshd_port (int): port of the SSH server. - username (str): username to connect with. - password (str): password related to the username. max_attempts (int): maximum number of attempts for getting the process ID. delay_between_attempts (float): delay in seconds between each attempt. delay (float): delay before retrieving the process PID. - ssh_command_pattern (str): format string for the ssh command. '{0:s}' refer + command_pattern (str): format string for the ssh command. '{0:s}' refer to the process name. """ process_name = None - sshd_ip = None - sshd_port = 22 - username = None - password = None + command_pattern = 'pgrep {0:s}' max_attempts = 10 delay_between_attempts = 0.1 delay = 0.5 - ssh_command_pattern = 'pgrep {0:s}' - - def __init__(self): - assert(self.process_name != None) - assert(self.sshd_ip != None) - assert(self.username != None) - assert(self.password != None) - - if not ssh_module: - raise eh.UnavailablePythonModule('Python module for SSH is not available!') + def __init__(self, backend): + assert self.process_name != None + self._backend = backend Probe.__init__(self) def _get_pid(self, logger): - ssh_in, ssh_out, ssh_err = \ - self.client.exec_command(self.ssh_command_pattern.format(self.process_name)) - - if ssh_err.read(): - # fallback method as previous command does not exist on the system + try: + res = self._backend.exec_command(self.command_pattern.format(self.process_name)) + except BackendError: fallback_cmd = 'ps a -opid,comm' - ssh_in, ssh_out, ssh_err = self.client.exec_command(fallback_cmd) - res = ssh_out.read() + res = self._backend.exec_command(fallback_cmd) if sys.version_info[0] > 2: res = res.decode('latin_1') pid_list = res.split('\n') @@ -648,7 +726,6 @@ def _get_pid(self, logger): # process not found pid = -1 else: - res = ssh_out.read() if sys.version_info[0] > 2: res = res.decode('latin_1') l = res.split() @@ -667,11 +744,7 @@ def _get_pid(self, logger): return pid def start(self, dm, target, logger): - self.client = ssh.SSHClient() - self.client.set_missing_host_key_policy(ssh.AutoAddPolicy()) - self.client.connect(self.sshd_ip, port=self.sshd_port, - username=self.username, - password=self.password) + self._backend.start() self._saved_pid = self._get_pid(logger) if self._saved_pid < 0: msg = "*** INIT ERROR: unable to retrieve process PID ***\n" @@ -684,7 +757,7 @@ def start(self, dm, target, logger): return ProbeStatus(self._saved_pid, info=msg) def stop(self, dm, target, logger): - self.client.close() + self._backend.stop() def main(self, dm, target, logger): cpt = self.max_attempts @@ -715,6 +788,52 @@ def main(self, dm, target, logger): return status +class ProbePID_SSH(ProbePID): + """ + Generic probe that enable you to monitor a process PID through SSH. + + Attributes: + sshd_ip (str): IP of the SSH server. + sshd_port (int): port of the SSH server. + username (str): username to connect with. + password (str): password related to the username. + """ + sshd_ip = None + sshd_port = 22 + username = None + password = None + + def __init__(self): + assert self.sshd_ip != None + assert self.username != None + assert self.password != None + ProbePID.__init__(self, SSH_Backend(sshd_ip=self.sshd_ip, sshd_port=self.sshd_port, + username=self.username, password=self.password)) + +class ProbePID_Serial(ProbePID): + """ + Generic probe that enable you to monitor a process PID through a Serial line. + + Attributes: + serial_port (str): path to the tty device file + baudrate (int): baudrate of the serial line + read_duration (str): time duration for retrieving characters from the serial line. + username (str): username to connect with. If None, no authentication step will be attempted. + password (str): password related to the username. + """ + + serial_port = None + baudrate=115200 + read_duration=2 + username = None + password = None + + def __init__(self): + assert self.serial_port != None + ProbePID.__init__(self, Serial_Backend(serial_port=self.serial_port, baudrate=self.baudrate, + read_duration=self.read_duration, + username=self.username, password=self.password)) + def probe(project): def internal_func(probe_cls): project.monitor.add_probe(probe_cls(), blocking=False) diff --git a/framework/plumbing.py b/framework/plumbing.py index ce8eccc..a0e7b58 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -2921,7 +2921,7 @@ def show_probes(self): self.lg.print_console('') for p in probes: msg = "name: %s (status: %s, delay: %f) --> " % \ - (p, repr(self.prj.get_probe_status(p).get_status()), + (p, repr(self.mon.get_probe_status(p).get_status()), self.prj.get_probe_delay(p)) if self.prj.is_probe_stuck(p): From b3ad9759bf65e6e94590a9cab416d93f3fe07ffb Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Fri, 29 Jul 2016 19:14:04 +0200 Subject: [PATCH 02/80] Fix ProbePID_Serial + fix monitoring/plumbing to wait for probe init + fix show_probes --- docs/source/probes.rst | 28 ++++++++ framework/monitor.py | 159 ++++++++++++++++++++++++++++------------- framework/plumbing.py | 11 ++- projects/tuto_proj.py | 10 ++- 4 files changed, 155 insertions(+), 53 deletions(-) diff --git a/docs/source/probes.rst b/docs/source/probes.rst index 1d845fb..6790058 100644 --- a/docs/source/probes.rst +++ b/docs/source/probes.rst @@ -36,3 +36,31 @@ Usage Example: .. seealso:: Refer to the class definition itself to look for the parameters available. + +ProbePID_Serial +=============== + +Reference: + :class:`framework.monitor.ProbePID_Serial` + +Description: + This generic probe enables you to monitor a process PID through a + Serial console. + +Usage Example: + Within your project file you can add such a probe like this: + + .. code-block:: python + :linenos: + + # Assuming your Project() is referred by the 'project' variable + + @blocking_probe(project) + class health_check(ProbePID_Serial): + process_name = 'the_process_to_monitor' + serial_port = '/dev/ttyUSB0' + username = 'user' + password = 'pass' + + .. seealso:: Refer to the class definition itself to look for the parameters available. + diff --git a/framework/monitor.py b/framework/monitor.py index afd476c..4cbecf0 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -33,11 +33,12 @@ class ProbeUser(object): - timeout = 10.0 + timeout = 20.0 def __init__(self, probe): self._probe = probe self._thread = None + self._started_event = threading.Event() self._stop_event = threading.Event() def start(self, *args, **kwargs): @@ -61,6 +62,17 @@ def join(self, timeout): self._stop_event.clear() + def wait_for_probe_init(self, timeout=None): + try: + self._wait_for_probe(self._started_event, timeout) + except ProbeTimeoutError as e: + e.blocking_methods = ["start()"] + raise e + + # Once a probe has started we do not clear self._started_event to avoid blocking the framework + # in the situation where this method will be called again while the probe won't have been + # restarted (currently in launch_operator, after having started the operator). + def is_alive(self): return self._thread is not None and self._thread.is_alive() @@ -80,14 +92,33 @@ def set_probe_delay(self, delay): def get_probe_status(self): return self._probe.status + def _notify_probe_started(self): + self._started_event.set() + def _go_on(self): return not self._stop_event.is_set() def _wait(self, delay): self._stop_event.wait(delay) + def _wait_for_probe(self, event, timeout=None): + """ + Wait for the probe to trigger a specific event + """ + timeout = ProbeUser.timeout if timeout is None else timeout + start = datetime.datetime.now() + + while not event.is_set(): + if (datetime.datetime.now() - start).total_seconds() >= timeout: + self.stop() + raise ProbeTimeoutError(self.__class__.__name__, timeout) + if not self.is_alive() or not self._go_on(): + break + event.wait(1) + def _clear(self): """ Clear all events """ + self._started_event.clear() self._stop_event.clear() def _run(self, *args, **kwargs): @@ -100,6 +131,8 @@ def _run(self, *args, **kwargs): if status is not None: self._probe.status = status + self._notify_probe_started() + while self._go_on(): try: self._probe.status = self._probe.main(*args, **kwargs) @@ -147,27 +180,12 @@ def stop(self): def notify_data_ready(self): self._arm_event.set() - def _wait_for_probe(self, event, timeout=None): - """ - Wait for the probe to trigger a specific event - """ - timeout = ProbeUser.timeout if timeout is None else timeout - start = datetime.datetime.now() - - while not event.is_set(): - if (datetime.datetime.now() - start).total_seconds() >= timeout: - self.stop() - raise ProbeTimeoutError(self.__class__.__name__, timeout) - if not self.is_alive() or not self._go_on(): - break - event.wait(1) - def wait_until_armed(self, timeout=None): try: self._wait_for_probe(self._armed_event, timeout) except ProbeTimeoutError as e: - e.blocking_methods = ["start()", "arm()"] + e.blocking_methods = ["arm()"] raise finally: self._armed_event.clear() @@ -180,7 +198,6 @@ def wait_until_ready(self, timeout=None): e.blocking_methods = ["main()"] raise e - def notify_blocking(self): self._blocking_event.set() @@ -247,6 +264,8 @@ def _run(self, *args, **kwargs): if status is not None: self._probe.status = status + self._notify_probe_started() + while self._go_on(): if not self._wait_for_data_ready(): @@ -348,7 +367,6 @@ def start_probe(self, probe): return False return True - def stop_probe(self, probe): probe_name = self._get_probe_ref(probe) if probe_name in self.probe_users: @@ -410,12 +428,14 @@ def _wait_for_specific_probes(self, probe_user_class, probe_user_wait_method, pr .format(e.probe_name, e.blocking_methods), code=Error.OperationCancelled) + def do_after_probes_init(self): + self._wait_for_specific_probes(ProbeUser, ProbeUser.wait_for_probe_init) + def do_before_sending_data(self): if not self.__enable: return self._target_status = None - for _, probe_user in self.probe_users.items(): if isinstance(probe_user, BlockingProbeUser): probe_user.notify_data_ready() @@ -626,15 +646,20 @@ def exec_command(self, cmd): class Serial_Backend(object): - def __init__(self, serial_port, baudrate=115200, read_duration=2, username=None, password=None): + def __init__(self, serial_port, baudrate=115200, username=None, password=None, + slowness_factor=5): if not serial_module: raise eh.UnavailablePythonModule('Python module for Serial is not available!') self.serial_port = serial_port self.baudrate = baudrate - self.read_duration = read_duration - self.username = username - self.password = password + self.slowness_factor = slowness_factor + if sys.version_info[0] > 2: + self.username = bytes(username, 'latin_1') + self.password = bytes(password, 'latin_1') + else: + self.username = username + self.password = password self.client = None @@ -643,42 +668,74 @@ def start(self): dsrdtr=True, rtscts=True) if self.username is not None: assert self.password is not None - time.sleep(0.1) self.ser.flushInput() - self.ser.write(self.username) + self.ser.write(self.username+b'\r\n') time.sleep(0.1) - pass_prompt = self.ser.read(1) + self.ser.readline() # we read login echo + pass_prompt = self.ser.readline() retry = 0 - while pass_prompt.find('p') == -1: - time.sleep(1) + eot_sent = False + while pass_prompt.lower().find(b'password') == -1: retry += 1 - if retry > 2: + if retry > 3 and eot_sent: + self.stop() raise BackendError('Unable to establish a connection with the serial line.') - else: + elif retry > 3: + # we send an EOT if ever the console was not in its initial state + # (already logged, or with the password prompt, ...) when we first write on + # the serial line. + self.ser.write(b'\x04\r\n') + time.sleep(self.slowness_factor*0.6) + self.ser.flushInput() + self.ser.write(self.username+b'\r\n') + time.sleep(0.1) + self.ser.readline() # we consume the login echo pass_prompt = self.ser.readline() + retry = 0 + eot_sent = True + else: + pass_prompt = b''.join(self._read_serial(duration=self.slowness_factor*0.2)) time.sleep(0.1) - self.ser.write(self.password) + self.ser.write(self.password+b'\r\n') + time.sleep(self.slowness_factor) def stop(self): + self.ser.write(b'\x04\r\n') # we send an EOT (Ctrl+D) self.ser.close() def exec_command(self, cmd): + if sys.version_info[0] > 2: + cmd = bytes(cmd, 'latin_1') + cmd += b'\r\n' self.ser.flushInput() self.ser.write(cmd) - result = b'' - t0 = datetime.datetime.now() - duration = -1 + time.sleep(0.1) + self.ser.readline() # we consume the 'writing echo' from the input try: - while duration < self.read_duration: - now = datetime.datetime.now() - duration = (now - t0).total_seconds() - time.sleep(0.1) - res = self.ser.readline() - result += res + result = self._read_serial(duration=self.slowness_factor*0.4) except serial.SerialException: raise BackendError('Exception while reading serial line') else: - return result + # We have to remove the new prompt line at the end. + # But in our testing environment, the two last entries had to be removed, namely + # 'prompt_line \r\n' and 'prompt_line ' !? + # print('\n*** DBG: ', result) + result = result[:-2] + return b''.join(result) + + def _read_serial(self, duration): + result = [] + t0 = datetime.datetime.now() + delta = -1 + while delta < duration: + now = datetime.datetime.now() + delta = (now - t0).total_seconds() + time.sleep(0.1) + res = self.ser.readline() + if res == b'': + break + result.append(res) + return result class BackendError(Exception): pass @@ -815,24 +872,28 @@ class ProbePID_Serial(ProbePID): Generic probe that enable you to monitor a process PID through a Serial line. Attributes: - serial_port (str): path to the tty device file - baudrate (int): baudrate of the serial line - read_duration (str): time duration for retrieving characters from the serial line. + serial_port (str): path to the tty device file. + baudrate (int): baudrate of the serial line. username (str): username to connect with. If None, no authentication step will be attempted. password (str): password related to the username. + slowness_factor (int): characterize the slowness of the monitored system. The scale goes from + 1 (fastest) to 10 (slowest). This factor is a base metric to compute the time to wait + for the authentication step to terminate (if `username` and `password` parameter are provided) + and other operations involving to wait for the monitored system. """ serial_port = None - baudrate=115200 - read_duration=2 + baudrate = 115200 username = None password = None + slowness_factor = 5 def __init__(self): assert self.serial_port != None + assert 10 >= self.slowness_factor >= 1 ProbePID.__init__(self, Serial_Backend(serial_port=self.serial_port, baudrate=self.baudrate, - read_duration=self.read_duration, - username=self.username, password=self.password)) + username=self.username, password=self.password, + slowness_factor=self.slowness_factor)) def probe(project): def internal_func(probe_cls): diff --git a/framework/plumbing.py b/framework/plumbing.py index a0e7b58..54bb159 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -922,6 +922,7 @@ def __start_fmk_plumbing(self): if delay is not None: self.mon.set_probe_delay(pname, delay) self.mon.start_probe(pname) + self.mon.do_after_probes_init() self.prj.start() if self.tg.probes: time.sleep(0.5) @@ -2317,6 +2318,8 @@ def launch_operator(self, name, user_input=UserInputContainer(), use_existing_se except: self._handle_user_code_exception('Operator has crashed during its start() method') return False + finally: + self.mon.do_after_probes_init() # operator.start() can start probes. if not ok: self.set_error("The _start() method of Operator '%s' has returned an error!" % name, @@ -2922,11 +2925,11 @@ def show_probes(self): for p in probes: msg = "name: %s (status: %s, delay: %f) --> " % \ (p, repr(self.mon.get_probe_status(p).get_status()), - self.prj.get_probe_delay(p)) + self.mon.get_probe_delay(p)) - if self.prj.is_probe_stuck(p): + if self.mon.is_probe_stuck(p): msg += "stuck" - elif self.prj.is_probe_launched(p): + elif self.mon.is_probe_launched(p): msg += "launched" else: msg += "stopped" @@ -2941,6 +2944,8 @@ def launch_probe(self, name): if not ok: self.set_error('Probe does not exist (or already launched)', code=Error.CommandError) + self.mon.do_after_probes_init() + return ok @EnforceOrder(accepted_states=['S2']) diff --git a/projects/tuto_proj.py b/projects/tuto_proj.py index ef4676f..8c5c1af 100644 --- a/projects/tuto_proj.py +++ b/projects/tuto_proj.py @@ -115,11 +115,19 @@ def main(self, dm, target, logger): return ProbeStatus(status) +@blocking_probe(project) +class serial_probe_test(ProbePID_Serial): + process_name = 'bash' + serial_port = '/dev/ttyUSB0' + slowness_factor = 4 + username = 'test' + password = 'test' ### TARGETS ALLOCATION ### targets = [(EmptyTarget(), (P1, 2), (P2, 1.4), health_check), - tuto_tg, net_tg, udpnet_tg, udpnetsrv_tg, rawnetsrv_tg] + tuto_tg, net_tg, udpnet_tg, udpnetsrv_tg, rawnetsrv_tg, + (EmptyTarget(), serial_probe_test)] ### OPERATOR DEFINITION ### From e5f3c7bdffd3201a2dba6a646d5dbba1c394f75e Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 1 Aug 2016 20:34:45 +0200 Subject: [PATCH 03/80] Add new parameters to ProbePID_Serial + minor fixes/updates --- framework/monitor.py | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/framework/monitor.py b/framework/monitor.py index 4cbecf0..9d5842c 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -646,13 +646,21 @@ def exec_command(self, cmd): class Serial_Backend(object): - def __init__(self, serial_port, baudrate=115200, username=None, password=None, + def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbits=1, + xonxoff=False, rtscts=False, dsrdtr=False, + username=None, password=None, slowness_factor=5): if not serial_module: raise eh.UnavailablePythonModule('Python module for Serial is not available!') self.serial_port = serial_port self.baudrate = baudrate + self.bytesize = bytesize + self.parity = parity + self.stopbits= stopbits + self.xonxoff = xonxoff + self.rtscts = rtscts + self.dsrdtr = dsrdtr self.slowness_factor = slowness_factor if sys.version_info[0] > 2: self.username = bytes(username, 'latin_1') @@ -664,8 +672,10 @@ def __init__(self, serial_port, baudrate=115200, username=None, password=None, self.client = None def start(self): - self.ser = serial.Serial(self.serial_port, self.baudrate, timeout=1, - dsrdtr=True, rtscts=True) + self.ser = serial.Serial(self.serial_port, self.baudrate, bytesize=self.bytesize, + parity=self.parity, stopbits=self.stopbits, + xonxoff=self.xonxoff, dsrdtr=self.dsrdtr, rtscts=self.rtscts, + timeout=1) if self.username is not None: assert self.password is not None self.ser.flushInput() @@ -721,7 +731,11 @@ def exec_command(self, cmd): # 'prompt_line \r\n' and 'prompt_line ' !? # print('\n*** DBG: ', result) result = result[:-2] - return b''.join(result) + ret = b''.join(result) + if ret.find(b'command not found') != -1: + raise BackendError('The command does not exist on the host') + else: + return ret def _read_serial(self, duration): result = [] @@ -730,7 +744,6 @@ def _read_serial(self, duration): while delta < duration: now = datetime.datetime.now() delta = (now - t0).total_seconds() - time.sleep(0.1) res = self.ser.readline() if res == b'': break @@ -828,7 +841,7 @@ def main(self, dm, target, logger): status = ProbeStatus() if current_pid == -10: - status.set_status(10) + status.set_status(-10) status.set_private_info("ERROR with the ssh command") elif current_pid == -1: status.set_status(-2) @@ -839,7 +852,7 @@ def main(self, dm, target, logger): status.set_private_info("'{:s}' PID({:d}) has changed!".format(self.process_name, current_pid)) else: - status.set_status(0) + status.set_status(current_pid) status.set_private_info(None) return status @@ -873,7 +886,13 @@ class ProbePID_Serial(ProbePID): Attributes: serial_port (str): path to the tty device file. - baudrate (int): baudrate of the serial line. + baudrate (int): baud rate of the serial line. + bytesize (int): number of data bits. (5, 6, 7, or 8) + parity (str): parity checking. ('N', 'O, 'E', 'M', or 'S') + stopbits (int): number of stop bits. (1, 1.5 or 2) + xonxoff (bool): enable software flow control. + rtscts (bool): enable hardware (RTS/CTS) flow control. + dsrdtr (bool): enable hardware (DSR/DTR) flow control. username (str): username to connect with. If None, no authentication step will be attempted. password (str): password related to the username. slowness_factor (int): characterize the slowness of the monitored system. The scale goes from @@ -884,6 +903,12 @@ class ProbePID_Serial(ProbePID): serial_port = None baudrate = 115200 + bytesize = 8 + parity = 'N' + stopbits = 1 + xonxoff = False + rtscts = False + dsrdtr = False username = None password = None slowness_factor = 5 @@ -892,6 +917,9 @@ def __init__(self): assert self.serial_port != None assert 10 >= self.slowness_factor >= 1 ProbePID.__init__(self, Serial_Backend(serial_port=self.serial_port, baudrate=self.baudrate, + bytesize=self.bytesize, parity=self.parity, + stopbits=self.stopbits, xonxoff=self.xonxoff, + rtscts=self.rtscts, dsrdtr=self.dsrdtr, username=self.username, password=self.password, slowness_factor=self.slowness_factor)) From ab203e3f542e31857a2ee7bc39d81b87ef23303c Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 1 Aug 2016 20:36:02 +0200 Subject: [PATCH 04/80] Scenario Infra: register information regarding DataProcess()'s seed --- framework/plumbing.py | 3 +++ framework/scenario.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/framework/plumbing.py b/framework/plumbing.py index 54bb159..90535ea 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -1554,6 +1554,7 @@ def _handle_data_desc(self, data_desc): return None else: seed = Data(seed) + seed.set_initial_dmaker([data_desc.seed.upper(), 'g_'+data_desc.seed, None]) else: if not isinstance(data_desc.seed, Data): self.set_error(msg='DataProcess object contains an unrecognized seed type!', @@ -1580,6 +1581,8 @@ def _handle_data_desc(self, data_desc): return None else: data = Data(node) + data.set_initial_dmaker([data_desc.upper(), 'g_'+data_desc, + UserInputContainer()]) else: self.set_error( msg='Data descriptor type is not recognized {!s}!'.format(type(data_desc)), diff --git a/framework/scenario.py b/framework/scenario.py index bf45f8b..944efa7 100644 --- a/framework/scenario.py +++ b/framework/scenario.py @@ -192,8 +192,10 @@ def node(self): return self._data_desc.outcomes.node elif self._data_desc.seed is not None: if isinstance(self._data_desc.seed, str): + seed_name = self._data_desc.seed node = self._dm.get_data(self._data_desc.seed) self._data_desc.seed = Data(node) + self._data_desc.seed.set_initial_dmaker([seed_name.upper(), 'g_'+seed_name, None]) return node elif isinstance(self._data_desc.seed, Data): return self._data_desc.seed.node # if data is raw, .node is None From ab90c9ea2a2a47c4e804a5c566c7bbd1b2071cd8 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Tue, 2 Aug 2016 16:31:25 +0200 Subject: [PATCH 05/80] Generic probe usage modification + add new ProbeMem - Add a new generic probe to monitor memory consumption ('ProbeMem') - Revamp interaction between generic probes and backends - Update Probe API with the new method .reset() - Minor fixes and enhancements regarding probe monitoring --- docs/source/probes.rst | 102 +++++++++------ docs/source/tutorial.rst | 2 +- framework/monitor.py | 268 ++++++++++++++++++++++++++++----------- framework/plumbing.py | 25 ++-- projects/tuto_proj.py | 18 ++- 5 files changed, 285 insertions(+), 130 deletions(-) diff --git a/docs/source/probes.rst b/docs/source/probes.rst index 6790058..90d1703 100644 --- a/docs/source/probes.rst +++ b/docs/source/probes.rst @@ -1,66 +1,94 @@ .. _probes: -Generic Probes -************** +Generic Probes and Backend +************************** The following section present some generic probes that inherit from :class:`framework.monitor.Probe`. They can be used within your project -files (refer to :ref:`tuto:project`) by only inheriting from them -and providing the expected parameters, +files (refer to :ref:`tuto:project`) by inheriting from them +and providing the expected parameters. Besides, you have to provide them with a means to +access the monitored system, namely a :class:`framework.monitor.Backend`. Note that you can use +the same backend for simultaneous probes. -ProbePID_SSH -============ +Let's illustrate this with the following example where two probes are used to monitor a process +through an SSH connection. One is used to check if the PID of the process has changed after each +data sending, and the other one to check if the memory used by the process has exceeded +its initial memory footprint by 5% (with a probing period of 0.2 second). -Reference: - :class:`framework.monitor.ProbePID_SSH` - -Description: - This generic probe enables you to monitor a process PID through an - SSH connection. - -Usage Example: - Within your project file you can add such a probe like this: +The project file should look like this: .. code-block:: python :linenos: # Assuming your Project() is referred by the 'project' variable + ssh_backend = SSH_Backend(username='user', password='pass', + sshd_ip='127.0.0.1', sshd_port=22) + @blocking_probe(project) - class health_check(ProbePID_SSH): + class health_check(ProbePID): + process_name = 'the_process_to_monitor' + backend = ssh_backend + + @probe(project) + class probe_mem(ProbeMem): process_name = 'the_process_to_monitor' - sshd_ip = '127.0.0.1' - sshd_port = 22 - username = 'user' - password = 'pass' + tolerance = 5 + backend = ssh_backend - .. seealso:: Refer to the class definition itself to look for the parameters available. + targets = [ (YourTarget(), probe_pid, (probe_mem, 0.2)) ] -ProbePID_Serial +Generic Backend =============== +.. seealso:: Refer to the class documentation for more details. + +SSH_Backend +----------- + Reference: - :class:`framework.monitor.ProbePID_Serial` + :class:`framework.monitor.SSH_Backend` Description: - This generic probe enables you to monitor a process PID through a - Serial console. + This generic backend enables you to interact with a monitored system through an + SSH connection. -Usage Example: - Within your project file you can add such a probe like this: - .. code-block:: python - :linenos: +Serial_Backend +-------------- - # Assuming your Project() is referred by the 'project' variable +Reference: + :class:`framework.monitor.Serial_Backend` - @blocking_probe(project) - class health_check(ProbePID_Serial): - process_name = 'the_process_to_monitor' - serial_port = '/dev/ttyUSB0' - username = 'user' - password = 'pass' +Description: + This generic backend enables you to interact with a monitored system through an + serial line. + + +Generic Probes +============== + +.. seealso:: Refer to the class documentation for more details. + +ProbePID +-------- + +Reference: + :class:`framework.monitor.ProbePID` + +Description: + This generic probe enables you to monitor a process PID through an + SSH connection. + +ProbeMem +-------- + +Reference: + :class:`framework.monitor.ProbeMem` + +Description: + Generic probe that enables you to monitor the process memory (RSS...) consumption. + It can be done by specifying a ``threshold`` and/or a ``tolerance`` ratio. - .. seealso:: Refer to the class definition itself to look for the parameters available. diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 98de2e8..1410cd6 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -2445,7 +2445,7 @@ status has been recorded, coming either from: .. seealso:: Refer to :ref:`probes` for details on the available generic - probes that you can use within you project. + probes that you can use within your project. In order to associate one or more probe to a target, you have to add them within the ``targets`` global variable of the related project file (refer to :ref:`tuto:project`). More precisely, for a target ``A``, diff --git a/framework/monitor.py b/framework/monitor.py index 9d5842c..d59fd8c 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -90,6 +90,7 @@ def set_probe_delay(self, delay): self._probe.delay = delay def get_probe_status(self): + self._probe.reset() return self._probe.status def _notify_probe_started(self): @@ -543,6 +544,7 @@ def arm(self, dm, target, logger): Called by the framework just before sending a data. Args: + dm: the current data model target: the current target logger: the current logger """ @@ -568,6 +570,19 @@ def main(self, dm, target, logger): """ raise NotImplementedError + def reset(self): + """ + To be overloaded by user-code (if needed). + + Called each time the probe status is retrieved by the framework + (through :meth:`Monitor.get_probe_status`). + Useful especially for periodic probe that may need to be reset after each + data sending. + + Note: shall be stateless and reentrant. + """ + pass + def configure(self, *args): """ (Optional method) To be overloaded with any signature that fits your needs @@ -579,6 +594,7 @@ def configure(self, *args): """ pass + class ProbeStatus(object): def __init__(self, status=None, info=None): @@ -609,31 +625,70 @@ def get_timestamp(self): return self._now +class Backend(object): + + def __init__(self): + self._started = False + self._sync_lock = threading.Lock() -class SSH_Backend(object): + def start(self): + with self._sync_lock: + if not self._started: + self._started = True + self._start() + + def stop(self): + with self._sync_lock: + if self._started: + self._started = False + self._stop() + + def exec_command(self, cmd): + with self._sync_lock: + return self._exec_command(cmd) + + def _exec_command(self, cmd): + raise NotImplementedError + + def _start(self): + pass - def __init__(self, sshd_ip, sshd_port=2, username='', password=''): + def _stop(self): + pass + + +class SSH_Backend(Backend): + """ + Backend to execute command through a serial line. + """ + def __init__(self, username, password, sshd_ip, sshd_port=22): + """ + Args: + sshd_ip (str): IP of the SSH server. + sshd_port (int): port of the SSH server. + username (str): username to connect with. + password (str): password related to the username. + """ + Backend.__init__(self) if not ssh_module: raise eh.UnavailablePythonModule('Python module for SSH is not available!') - self.sshd_ip = sshd_ip self.sshd_port = sshd_port self.username = username self.password = password - self.client = None - def start(self): + def _start(self): self.client = ssh.SSHClient() self.client.set_missing_host_key_policy(ssh.AutoAddPolicy()) self.client.connect(self.sshd_ip, port=self.sshd_port, username=self.username, password=self.password) - def stop(self): + def _stop(self): self.client.close() - def exec_command(self, cmd): + def _exec_command(self, cmd): ssh_in, ssh_out, ssh_err = \ self.client.exec_command(cmd) @@ -644,12 +699,32 @@ def exec_command(self, cmd): return ssh_out.read() -class Serial_Backend(object): - +class Serial_Backend(Backend): + """ + Backend to execute command through a serial line. + """ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbits=1, xonxoff=False, rtscts=False, dsrdtr=False, username=None, password=None, slowness_factor=5): + """ + Args: + serial_port (str): path to the tty device file. (e.g., '/dev/ttyUSB0') + baudrate (int): baud rate of the serial line. + bytesize (int): number of data bits. (5, 6, 7, or 8) + parity (str): parity checking. ('N', 'O, 'E', 'M', or 'S') + stopbits (int): number of stop bits. (1, 1.5 or 2) + xonxoff (bool): enable software flow control. + rtscts (bool): enable hardware (RTS/CTS) flow control. + dsrdtr (bool): enable hardware (DSR/DTR) flow control. + username (str): username to connect with. If None, no authentication step will be attempted. + password (str): password related to the username. + slowness_factor (int): characterize the slowness of the monitored system. The scale goes from + 1 (fastest) to 10 (slowest). This factor is a base metric to compute the time to wait + for the authentication step to terminate (if `username` and `password` parameter are provided) + and other operations involving to wait for the monitored system. + """ + Backend.__init__(self) if not serial_module: raise eh.UnavailablePythonModule('Python module for Serial is not available!') @@ -671,7 +746,7 @@ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbit self.client = None - def start(self): + def _start(self): self.ser = serial.Serial(self.serial_port, self.baudrate, bytesize=self.bytesize, parity=self.parity, stopbits=self.stopbits, xonxoff=self.xonxoff, dsrdtr=self.dsrdtr, rtscts=self.rtscts, @@ -709,11 +784,11 @@ def start(self): self.ser.write(self.password+b'\r\n') time.sleep(self.slowness_factor) - def stop(self): + def _stop(self): self.ser.write(b'\x04\r\n') # we send an EOT (Ctrl+D) self.ser.close() - def exec_command(self, cmd): + def _exec_command(self, cmd): if sys.version_info[0] > 2: cmd = bytes(cmd, 'latin_1') cmd += b'\r\n' @@ -755,10 +830,13 @@ class BackendError(Exception): pass class ProbePID(Probe): """ - This is the base class for the generic probes that enable you to monitor a process PID. - The monitoring can be done through different backend. The current ones are SSH and Serial. + Generic probe that enables you to monitor a process PID. + + The monitoring can be done through different backend (e.g., :class:`SSH_Backend`, + :class:`Serial_Backend`). Attributes: + backend (Backend): backend to be used (e.g., :class:`SSH_Backend`). process_name (str): name of the process to monitor. max_attempts (int): maximum number of attempts for getting the process ID. @@ -768,29 +846,33 @@ class ProbePID(Probe): command_pattern (str): format string for the ssh command. '{0:s}' refer to the process name. """ + backend = None process_name = None command_pattern = 'pgrep {0:s}' max_attempts = 10 delay_between_attempts = 0.1 delay = 0.5 - def __init__(self, backend): + def __init__(self): assert self.process_name != None - self._backend = backend + assert self.backend != None Probe.__init__(self) def _get_pid(self, logger): try: - res = self._backend.exec_command(self.command_pattern.format(self.process_name)) + res = self.backend.exec_command(self.command_pattern.format(self.process_name)) except BackendError: fallback_cmd = 'ps a -opid,comm' - res = self._backend.exec_command(fallback_cmd) + res = self.backend.exec_command(fallback_cmd) if sys.version_info[0] > 2: res = res.decode('latin_1') pid_list = res.split('\n') for entry in pid_list: if entry.find(self.process_name) >= 0: - pid = int(entry.split()[0]) + try: + pid = int(entry.split()[0]) + except ValueError: + pid = -10 break else: # process not found @@ -814,7 +896,7 @@ def _get_pid(self, logger): return pid def start(self, dm, target, logger): - self._backend.start() + self.backend.start() self._saved_pid = self._get_pid(logger) if self._saved_pid < 0: msg = "*** INIT ERROR: unable to retrieve process PID ***\n" @@ -827,7 +909,7 @@ def start(self, dm, target, logger): return ProbeStatus(self._saved_pid, info=msg) def stop(self, dm, target, logger): - self._backend.stop() + self.backend.stop() def main(self, dm, target, logger): cpt = self.max_attempts @@ -842,7 +924,7 @@ def main(self, dm, target, logger): if current_pid == -10: status.set_status(-10) - status.set_private_info("ERROR with the ssh command") + status.set_private_info("ERROR with the command") elif current_pid == -1: status.set_status(-2) status.set_private_info("'{:s}' is not running anymore!".format(self.process_name)) @@ -858,70 +940,104 @@ def main(self, dm, target, logger): return status -class ProbePID_SSH(ProbePID): +class ProbeMem(Probe): """ - Generic probe that enable you to monitor a process PID through SSH. + Generic probe that enables you to monitor the process memory (RSS...) consumption. + It can be done by specifying a ``threshold`` and/or a ``tolerance`` ratio. + + The monitoring can be done through different backend (e.g., :class:`SSH_Backend`, + :class:`Serial_Backend`). Attributes: - sshd_ip (str): IP of the SSH server. - sshd_port (int): port of the SSH server. - username (str): username to connect with. - password (str): password related to the username. + backend (Backend): backend to be used (e.g., :class:`SSH_Backend`). + process_name (str): name of the process to monitor. + threshold (int): memory (RSS) threshold in bytes that the monitored process should not exceed. + tolerance (int): tolerance expressed in percentage of the memory (RSS) the process was + using at the beginning of the monitoring. + command_pattern (str): format string for the ssh command. '{0:s}' refer + to the process name. """ - sshd_ip = None - sshd_port = 22 - username = None - password = None + backend = None + process_name = None + threshold = None + tolerance = 2 + command_pattern = 'ps -e -orss,comm | grep {0:s}' def __init__(self): - assert self.sshd_ip != None - assert self.username != None - assert self.password != None - ProbePID.__init__(self, SSH_Backend(sshd_ip=self.sshd_ip, sshd_port=self.sshd_port, - username=self.username, password=self.password)) + assert self.process_name != None + assert self.backend != None + Probe.__init__(self) -class ProbePID_Serial(ProbePID): - """ - Generic probe that enable you to monitor a process PID through a Serial line. + def _get_mem(self): + res = self.backend.exec_command(self.command_pattern.format(self.process_name)) - Attributes: - serial_port (str): path to the tty device file. - baudrate (int): baud rate of the serial line. - bytesize (int): number of data bits. (5, 6, 7, or 8) - parity (str): parity checking. ('N', 'O, 'E', 'M', or 'S') - stopbits (int): number of stop bits. (1, 1.5 or 2) - xonxoff (bool): enable software flow control. - rtscts (bool): enable hardware (RTS/CTS) flow control. - dsrdtr (bool): enable hardware (DSR/DTR) flow control. - username (str): username to connect with. If None, no authentication step will be attempted. - password (str): password related to the username. - slowness_factor (int): characterize the slowness of the monitored system. The scale goes from - 1 (fastest) to 10 (slowest). This factor is a base metric to compute the time to wait - for the authentication step to terminate (if `username` and `password` parameter are provided) - and other operations involving to wait for the monitored system. - """ + if sys.version_info[0] > 2: + res = res.decode('latin_1') + proc_list = res.split('\n') + for entry in proc_list: + if entry.find(self.process_name) >= 0: + try: + rss = int(entry.split()[0]) + except ValueError: + rss = -10 + break + else: + # process not found + rss = -1 - serial_port = None - baudrate = 115200 - bytesize = 8 - parity = 'N' - stopbits = 1 - xonxoff = False - rtscts = False - dsrdtr = False - username = None - password = None - slowness_factor = 5 + return rss + + def start(self, dm, target, logger): + self.backend.start() + self._saved_mem = self._get_mem() + self.reset() + if self._saved_mem < 0: + msg = "*** INIT ERROR: unable to retrieve process RSS ***\n" + else: + msg = "*** INIT: '{:s}' current RSS: {:d} ***\n".format(self.process_name, + self._saved_mem) + return ProbeStatus(self._saved_mem, info=msg) + + def stop(self, dm, target, logger): + self.backend.stop() + + def main(self, dm, target, logger): + current_mem = self._get_mem() + + status = ProbeStatus() + + if current_mem == -10: + status.set_status(-10) + status.set_private_info("ERROR with the command") + elif current_mem == -1: + status.set_status(-2) + status.set_private_info("'{:s}' is not found!".format(self.process_name)) + else: + if current_mem > self._max_mem: + self._max_mem = current_mem + + ok = True + err_msg = '' + if self.threshold is not None and self._max_mem > self.threshold: + ok = False + err_msg += '\nThreshold exceeded' + if self.tolerance is not None: + delta = abs(self._max_mem - self._saved_mem) + if (delta/float(self._saved_mem))*100 > self.tolerance: + ok = False + err_msg += '\nTolerance exceeded' + if not ok: + status.set_status(-1) + status.set_private_info(err_msg) + else: + status.set_status(self._max_mem) + status.set_private_info("*** '{:s}' current RSS: {:d} ***\n" + .format(self.process_name, self._saved_mem)) + return status + + def reset(self): + self._max_mem = self._saved_mem - def __init__(self): - assert self.serial_port != None - assert 10 >= self.slowness_factor >= 1 - ProbePID.__init__(self, Serial_Backend(serial_port=self.serial_port, baudrate=self.baudrate, - bytesize=self.bytesize, parity=self.parity, - stopbits=self.stopbits, xonxoff=self.xonxoff, - rtscts=self.rtscts, dsrdtr=self.dsrdtr, - username=self.username, password=self.password, - slowness_factor=self.slowness_factor)) def probe(project): def internal_func(probe_cls): diff --git a/framework/plumbing.py b/framework/plumbing.py index 90535ea..6b0cb71 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -504,9 +504,10 @@ def _recover_target(self): "will be terminated.") return target_recovered - def monitor_probes(self, force_record=False): + def monitor_probes(self, prefix=None, force_record=False): probes = self.mon.get_probes_names() ok = True + prefix_printed = False for pname in probes: if self.mon.is_probe_launched(pname): pstatus = self.mon.get_probe_status(pname) @@ -514,17 +515,21 @@ def monitor_probes(self, force_record=False): if err < 0 or force_record: if err < 0: ok = False + if prefix and not prefix_printed: + prefix_printed = True + self.lg.print_console('\n*** {:s} ***'.format(prefix), rgb=Color.FEEDBACK) tstamp = pstatus.get_timestamp() priv = pstatus.get_private_info() self.lg.log_probe_feedback(source="Probe '{:s}'".format(pname), timestamp=tstamp, content=priv, status_code=err) - if not ok: - return self._recover_target() - else: - return True + ret = self._recover_target() if not ok else True + if prefix and not ok: + self.lg.print_console('*'*(len(prefix)+8)+'\n', rgb=Color.FEEDBACK) + + return ret @EnforceOrder(initial_func=True, final_state='get_projs') def get_data_models(self): @@ -1521,7 +1526,7 @@ def _do_sending_and_logging_init(self, data_list): self.set_feedback_timeout(fbk_timeout, do_show=False) self.tg.cleanup() - self.monitor_probes() + self.monitor_probes(prefix='Probe Status Before Sending Data') if blocked_data: self._handle_data_callbacks(blocked_data, hook=HOOK.after_fbk) @@ -1763,7 +1768,7 @@ def send_data_and_log(self, data_list, original_data=None, verbose=False): if self._burst_countdown == self._burst: cont1 = self.log_target_feedback() # We handle probe feedback if any - cont2 = self.monitor_probes() + cont2 = self.monitor_probes(force_record=True) self.tg.cleanup() self._do_after_feedback_retrieval(data_list) @@ -1983,8 +1988,8 @@ def log_target_feedback(self): def log_target_residual_feedback(self): err_detected1, err_detected2 = False, False if self.__send_enabled: - p = "\n::[ RESIDUAL TARGET FEEDBACK ]::" - e = "::[ ------------------------ ]::\n" + p = "\n*** RESIDUAL TARGET FEEDBACK ***" + e = "********************************\n" try: err_detected1 = self.lg.log_collected_target_feedback(preamble=p, epilogue=e) except NotImplementedError: @@ -3295,7 +3300,7 @@ def precmd(self, line): else: self.__error = True - self.__error_msg = 'You shall first load a project and/or enable all fuzzing components!' + self.__error_msg = 'You shall first load a project and/or enable all the framework components!' return '' diff --git a/projects/tuto_proj.py b/projects/tuto_proj.py index 8c5c1af..39ccf7b 100644 --- a/projects/tuto_proj.py +++ b/projects/tuto_proj.py @@ -115,19 +115,25 @@ def main(self, dm, target, logger): return ProbeStatus(status) +serial_backend = Serial_Backend('/dev/ttyUSB0', username='test', password='test', slowness_factor=4) + @blocking_probe(project) -class serial_probe_test(ProbePID_Serial): +class probe_pid(ProbePID): + backend = serial_backend process_name = 'bash' - serial_port = '/dev/ttyUSB0' - slowness_factor = 4 - username = 'test' - password = 'test' + +@probe(project) +class probe_mem(ProbeMem): + backend = serial_backend + process_name = 'bash' + tolerance = 1 + ### TARGETS ALLOCATION ### targets = [(EmptyTarget(), (P1, 2), (P2, 1.4), health_check), tuto_tg, net_tg, udpnet_tg, udpnetsrv_tg, rawnetsrv_tg, - (EmptyTarget(), serial_probe_test)] + (EmptyTarget(), probe_pid, (probe_mem, 0.2))] ### OPERATOR DEFINITION ### From 7503cf9e22e75b14fe11fc1c79960df8ce3a40b1 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Tue, 2 Aug 2016 21:37:11 +0200 Subject: [PATCH 06/80] Various fixes regarding tSTRUCT + minor stuff - Fix 'Env.nodes_to_corrupt' update in a specific situation. - Fix tSTRUCT regarding 'init' parameter... - Fix Encoder.to_bytes() in a specific situation with python3 - Update PPPoE DM: enhance robustness of the scenarios main callback - Add 'cmd_notfound' parameter to Serial_Backend --- data_models/protocols/pppoe_strategy.py | 12 ++--- framework/data_model.py | 10 ++--- framework/encoders.py | 5 ++- framework/fuzzing_primitives.py | 2 +- framework/generic_data_makers.py | 58 +++++++++++++++---------- framework/logger.py | 8 ++-- framework/monitor.py | 24 ++++++---- framework/plumbing.py | 4 +- 8 files changed, 73 insertions(+), 50 deletions(-) diff --git a/data_models/protocols/pppoe_strategy.py b/data_models/protocols/pppoe_strategy.py index 62cb48c..86d221b 100644 --- a/data_models/protocols/pppoe_strategy.py +++ b/data_models/protocols/pppoe_strategy.py @@ -65,11 +65,14 @@ def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi'): continue print(' [ {:s} received! ]'.format(x.upper())) next_step.node.freeze() - next_step.node['.*/mac_dst'] = mac_src error_msg = '\n*** The node has no path to: {:s}. Thus, ignore it.\n'\ ' (probable reason: the node has been fuzzed in a way that makes the' \ 'path unavailable)' + try: + next_step.node['.*/mac_dst'] = mac_src + except: + print(error_msg.format('mac_dst')) try: next_step.node['.*/tag_sn/value/v101'] = service_name except: @@ -85,12 +88,11 @@ def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi'): if host_uniq is not None: new_tag = env.dm.get_data('tag_host_uniq') + new_tag['.*/v103'] = host_uniq try: - new_tag['.*/v103'] = host_uniq - except: - print(error_msg.format('service_name')) - else: next_step.node['.*/host_uniq_stub'].set_contents(new_tag) + except: + print(error_msg.format('host_uniq_stub')) else: print('\n***WARNING: Host-Uniq not provided') next_step.node.unfreeze(recursive=True, reevaluate_constraints=True) diff --git a/framework/data_model.py b/framework/data_model.py index 4e7d503..2bed90d 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -287,7 +287,7 @@ def __copy__(self): new_data._pending_ops = {} # we do not copy pending_ops if self.node is not None: - e = Node(self.node.name, base_node=self.node, ignore_frozen_state=False) + e = Node(self.node.name, base_node=self.node, ignore_frozen_state=False, new_env=True) new_data._dm.set_new_env(e) new_data.update_from_node(e) return new_data @@ -4909,10 +4909,10 @@ def __init__(self, name, base_node=None, copy_dico=None, ignore_frozen_state=Fal self._delayed_jobs_called = base_node._delayed_jobs_called if new_env: - self.env = copy.copy(base_node.env) + self.env = Env() if ignore_frozen_state else copy.copy(base_node.env) else: self.env = base_node.env - + node_dico = self.set_contents(base_node, copy_dico=copy_dico, ignore_frozen_state=ignore_frozen_state, accept_external_entanglement=accept_external_entanglement, @@ -6486,11 +6486,11 @@ def update_node_refs(self, node_dico, ignore_frozen_state): del self.nodes_to_corrupt[old_node] new_nodes_to_corrupt[new_node] = op + self.nodes_to_corrupt = new_nodes_to_corrupt + if self.is_empty(): return - self.nodes_to_corrupt = new_nodes_to_corrupt - if ignore_frozen_state: self.exhausted_nodes = [] self.env4NT.reset() diff --git a/framework/encoders.py b/framework/encoders.py index 6033999..b21afda 100644 --- a/framework/encoders.py +++ b/framework/encoders.py @@ -82,7 +82,10 @@ def init_encoding_scheme(self, arg): def to_bytes(val): if isinstance(val, (str, bytes)): if sys.version_info[0] > 2 and not isinstance(val, bytes): - new_val = bytes(val, 'latin_1') + try: + new_val = bytes(val, 'latin_1') + except UnicodeEncodeError: + new_val = val.encode('utf8') else: new_val = val elif sys.version_info[0] == 2 and isinstance(val, unicode): diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index a679f9e..31c5bbe 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -115,9 +115,9 @@ def __iter__(self): self._cpt += 1 if self._cpt <= self._initial_step and self._cpt > 1: - self._initial_step = 1 print("\n*** DEBUG: initial_step idx ({:d}) is after" \ " the last idx ({:d})!\n".format(self._initial_step, self._cpt-1)) + self._initial_step = 1 self.consumed_node_path = consumed_node.get_path_from(self._root_node) if self.consumed_node_path == None: return diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index 415fdd1..e92cbf2 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -449,43 +449,55 @@ def set_seed(self, prev_data): def disrupt_data(self, dm, target, data): stop = False - for i in range(self.init): + if self.idx == 0: + step_idx = self.init-1 + else: + step_idx = self.idx + + while self.idx <= step_idx: if self.exist_cst_nodelist: consumed_node = self.exist_cst_nodelist.pop() - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_EXIST_COND) - op_performed = 'existence condition switched' + if self.idx == step_idx: + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_EXIST_COND) + op_performed = 'existence condition switched' elif self.qty_cst_nodelist_1: consumed_node = self.qty_cst_nodelist_1.pop() - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_QTY_SYNC, - corrupt_op=lambda x: x+1) - op_performed = 'increase quantity constraint by 1' + if self.idx == step_idx: + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_QTY_SYNC, + corrupt_op=lambda x: x+1) + op_performed = 'increase quantity constraint by 1' elif self.qty_cst_nodelist_2: consumed_node = self.qty_cst_nodelist_2.pop() - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_QTY_SYNC, - corrupt_op=lambda x: max(x-1, 0)) - op_performed = 'decrease quantity constraint by 1' + if self.idx == step_idx: + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_QTY_SYNC, + corrupt_op=lambda x: max(x-1, 0)) + op_performed = 'decrease quantity constraint by 1' elif self.size_cst_nodelist_1: consumed_node = self.size_cst_nodelist_1.pop() - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_SIZE_SYNC, - corrupt_op=lambda x: x+1) - op_performed = 'increase size constraint by 1' + if self.idx == step_idx: + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_SIZE_SYNC, + corrupt_op=lambda x: x+1) + op_performed = 'increase size constraint by 1' elif self.size_cst_nodelist_2: consumed_node = self.size_cst_nodelist_2.pop() - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_SIZE_SYNC, - corrupt_op=lambda x: max(x-1, 0)) - op_performed = 'decrease size constraint by 1' + if self.idx == step_idx: + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_SIZE_SYNC, + corrupt_op=lambda x: max(x-1, 0)) + op_performed = 'decrease size constraint by 1' elif self.deep and self.minmax_cst_nodelist_1: consumed_node, mini, maxi = self.minmax_cst_nodelist_1.pop() - new_mini = max(0, mini-1) - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_NODE_QTY, - corrupt_op=lambda x, y: (new_mini, new_mini)) - op_performed = "set node amount to its minimum minus one" + if self.idx == step_idx: + new_mini = max(0, mini-1) + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_NODE_QTY, + corrupt_op=lambda x, y: (new_mini, new_mini)) + op_performed = "set node amount to its minimum minus one" elif self.deep and self.minmax_cst_nodelist_2: consumed_node, mini, maxi = self.minmax_cst_nodelist_2.pop() - new_maxi = (maxi+1) - self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_NODE_QTY, - corrupt_op=lambda x, y: (new_maxi, new_maxi)) - op_performed = "set node amount to its maximum plus one" + if self.idx == step_idx: + new_maxi = (maxi+1) + self.seed.env.add_node_to_corrupt(consumed_node, corrupt_type=Node.CORRUPT_NODE_QTY, + corrupt_op=lambda x, y: (new_maxi, new_maxi)) + op_performed = "set node amount to its maximum plus one" else: stop = True break diff --git a/framework/logger.py b/framework/logger.py index ff6996b..18024a0 100644 --- a/framework/logger.py +++ b/framework/logger.py @@ -348,7 +348,7 @@ def log_collected_target_feedback(self, preamble=None, epilogue=None): record = False if preamble is not None: - self.log_fn(preamble, do_record=record) + self.log_fn(preamble, do_record=record, rgb=Color.FMKINFO) for fbk, idx in zip(fbk_list, range(len(fbk_list))): timestamp, m, status = fbk @@ -368,7 +368,7 @@ def log_collected_target_feedback(self, preamble=None, epilogue=None): error_detected = True if epilogue is not None: - self.log_fn(epilogue, do_record=record) + self.log_fn(epilogue, do_record=record, rgb=Color.FMKINFO) return error_detected @@ -385,7 +385,7 @@ def log_target_feedback_from(self, feedback, timestamp, record = False if preamble is not None: - self.log_fn(preamble, do_record=record) + self.log_fn(preamble, do_record=record, rgb=Color.FMKINFO) if not decoded_feedback and (status_code is None or status_code >= 0): msg_hdr = "### No Target Feedback!" if source is None else '### No Target Feedback from "{!s}"!'.format( @@ -422,7 +422,7 @@ def log_target_feedback_from(self, feedback, timestamp, status_code=status_code) if epilogue is not None: - self.log_fn(epilogue, do_record=record) + self.log_fn(epilogue, do_record=record, rgb=Color.FMKINFO) def log_operator_feedback(self, feedback, timestamp, op_name, status_code=None): if feedback is None: diff --git a/framework/monitor.py b/framework/monitor.py index d59fd8c..2a5ff0f 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -705,8 +705,8 @@ class Serial_Backend(Backend): """ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbits=1, xonxoff=False, rtscts=False, dsrdtr=False, - username=None, password=None, - slowness_factor=5): + username=None, password=None, slowness_factor=5, + cmd_notfound=b'command not found'): """ Args: serial_port (str): path to the tty device file. (e.g., '/dev/ttyUSB0') @@ -723,6 +723,8 @@ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbit 1 (fastest) to 10 (slowest). This factor is a base metric to compute the time to wait for the authentication step to terminate (if `username` and `password` parameter are provided) and other operations involving to wait for the monitored system. + cmd_notfound (bytes): pattern used to detect if the command does not exist on the + monitored system. """ Backend.__init__(self) if not serial_module: @@ -737,6 +739,7 @@ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbit self.rtscts = rtscts self.dsrdtr = dsrdtr self.slowness_factor = slowness_factor + self.cmd_notfound = cmd_notfound if sys.version_info[0] > 2: self.username = bytes(username, 'latin_1') self.password = bytes(password, 'latin_1') @@ -807,7 +810,7 @@ def _exec_command(self, cmd): # print('\n*** DBG: ', result) result = result[:-2] ret = b''.join(result) - if ret.find(b'command not found') != -1: + if ret.find(self.cmd_notfound) != -1: raise BackendError('The command does not exist on the host') else: return ret @@ -888,7 +891,10 @@ def _get_pid(self, logger): nl_before=True) pid = -10 elif len(l) == 1: - pid = int(l[0]) + try: + pid = int(l[0]) + except ValueError: + pid = -10 else: # process not found pid = -1 @@ -1017,22 +1023,22 @@ def main(self, dm, target, logger): self._max_mem = current_mem ok = True + info = "*** '{:s}' maximum RSS: {:d} ***\n".format(self.process_name, self._max_mem) err_msg = '' if self.threshold is not None and self._max_mem > self.threshold: ok = False - err_msg += '\nThreshold exceeded' + err_msg += '\n*** Threshold exceeded ***' if self.tolerance is not None: delta = abs(self._max_mem - self._saved_mem) if (delta/float(self._saved_mem))*100 > self.tolerance: ok = False - err_msg += '\nTolerance exceeded' + err_msg += '\n*** Tolerance exceeded ***' if not ok: status.set_status(-1) - status.set_private_info(err_msg) + status.set_private_info(err_msg+'\n'+info) else: status.set_status(self._max_mem) - status.set_private_info("*** '{:s}' current RSS: {:d} ***\n" - .format(self.process_name, self._saved_mem)) + status.set_private_info(info) return status def reset(self): diff --git a/framework/plumbing.py b/framework/plumbing.py index 6b0cb71..5b46ae1 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -517,7 +517,7 @@ def monitor_probes(self, prefix=None, force_record=False): ok = False if prefix and not prefix_printed: prefix_printed = True - self.lg.print_console('\n*** {:s} ***'.format(prefix), rgb=Color.FEEDBACK) + self.lg.print_console('\n*** {:s} ***'.format(prefix), rgb=Color.FMKINFO) tstamp = pstatus.get_timestamp() priv = pstatus.get_private_info() self.lg.log_probe_feedback(source="Probe '{:s}'".format(pname), @@ -527,7 +527,7 @@ def monitor_probes(self, prefix=None, force_record=False): ret = self._recover_target() if not ok else True if prefix and not ok: - self.lg.print_console('*'*(len(prefix)+8)+'\n', rgb=Color.FEEDBACK) + self.lg.print_console('*'*(len(prefix)+8)+'\n', rgb=Color.FMKINFO) return ret From 07679da295dd6183b02afaac4c81d31ed992d672 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Wed, 3 Aug 2016 11:42:07 +0200 Subject: [PATCH 07/80] Add comments regarding DJobs in Env.__copy__ + add assertion --- framework/data_model.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index 2bed90d..a8a82a8 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -6603,9 +6603,25 @@ def __copy__(self): new_env.nodes_to_corrupt = copy.copy(self.nodes_to_corrupt) new_env.env4NT = copy.copy(self.env4NT) new_env._dm = copy.copy(self._dm) - new_env._sorted_jobs = copy.copy(self._sorted_jobs) - new_env._djob_keys = copy.copy(self._djob_keys) - new_env._djob_groups = copy.copy(self._djob_groups) + + # DJobs are ignored in the Env copy, because they only matters + # in the context of one node graph (Nodes + 1 unique Env) for performing delayed jobs + # in that graph. Indeed, all delayed jobs are registered dynamically + # (especially in the process of freezing a graph) and does not + # provide information even in the case of a frozen graph cloning. + # All DJobs information are ephemeral, they should only exist in the time frame of + # a node graph operation (e.g., freezing, absorption). If DJobs exists while an Env() + # is in the process of being copied, it is most probably a bug. + # + # WARNING: If DJobs need to evolve in the future to support copy, DJobGroup should be updated + # during this copy for updating the nodes in its node_list attribute. + assert not self._sorted_jobs and not self._djob_keys and not self._djob_groups + new_env._sorted_jobs = None + new_env._djob_keys = None + new_env._djob_groups = None + # new_env._sorted_jobs = copy.copy(self._sorted_jobs) + # new_env._djob_keys = copy.copy(self._djob_keys) + # new_env._djob_groups = copy.copy(self._djob_groups) new_env.id_list = copy.copy(self.id_list) # new_env.cpt = 0 return new_env From 26e4998807cac6f0b9b2c123588d0d249a9c12c0 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Wed, 3 Aug 2016 16:47:07 +0200 Subject: [PATCH 08/80] Clean up + correct some misspellings in the documentation --- docs/source/data_manip.rst | 14 +++---- docs/source/scenario.rst | 6 +-- framework/data_model.py | 74 ++++++++++-------------------------- framework/logger.py | 22 ++++------- framework/monitor.py | 2 +- framework/plumbing.py | 7 ++-- framework/tactics_helpers.py | 2 +- 7 files changed, 43 insertions(+), 84 deletions(-) diff --git a/docs/source/data_manip.rst b/docs/source/data_manip.rst index 6be99b8..7d9c155 100644 --- a/docs/source/data_manip.rst +++ b/docs/source/data_manip.rst @@ -66,19 +66,19 @@ To guide you over what is possible to perform, let's consider the following data This is what we call a data descriptor. It cannot be used directly, it should first be transformed to ``fuddly`` internal representation based on :class:`framework.data_model.Node`. -The code below show how to perform that: +The code below shows how to perform that: .. code-block:: python :linenos: mh = ModelHelper() - rnode = mh.create_graph_from_desc(enc_desc) + rnode = mh.create_graph_from_desc(example_desc) rnode.set_env(Env()) ``fuddly`` models data as directed acyclic graphs whose terminal nodes describe the different parts of a data format (refer to :ref:`data-model`). In order to -enable elaborated manipulations it also create a specific object to share between all the nodes +enable elaborated manipulations it also creates a specific object to share between all the nodes some common information related to the graph: the :class:`framework.data_model.Env` object. You should note that we create this *environment* object and setup the root node with it. Actually it provides all the nodes of the graph with this environment. From now on it is possible to access @@ -107,7 +107,7 @@ Generate Data a.k.a. Freeze a Graph If you want to get a data from the graph you have to freeze it first as it represents many different potential data at once (actually it acts like a template). To do so, just call the method :meth:`framework.data_model.Node.freeze` on the root node. It will provide you with a nested set of -list containing the frozen value for each node selected within the graph to provide you with a data. +lists containing the frozen value for each node selected within the graph to provide you with a data. What is way more interesting in the general case is obtaining a byte string of the data. For this you just have to call :meth:`framework.data_model.Node.to_bytes` on the root node which will @@ -590,7 +590,7 @@ This infrastructure is based on the following primitives: - :meth:`framework.data_model.Env.remove_node_to_corrupt` The typical way to perform a corruption with this infrastructure is illustrated in what follows. -The example perform a corruption that change from the model the allowed amount for a specific +This example performs a corruption that changes from the model the allowed amount for a specific node (``targeted_node``) of a graph (referenced by ``rnode``) that can be created during the data generation from the graph. @@ -607,8 +607,8 @@ generation from the graph. rnode.env.remove_node_to_corrupt(targeted_node) From now on, you have still a clean graph referenced by ``rnode``, and a corrupted one referenced -by ``corrupt_rnode``. You can now instanciate some data from ``corrupt_rnode`` that complies to an -altered data model (because we change the grammar that constrain the data generation). +by ``corrupt_rnode``. You can now instantiate some data from ``corrupt_rnode`` that complies to an +altered data model (because we change the grammar that constrains the data generation). The corruption operations currently defined are: diff --git a/docs/source/scenario.rst b/docs/source/scenario.rst index f58c4ac..581d287 100644 --- a/docs/source/scenario.rst +++ b/docs/source/scenario.rst @@ -17,7 +17,7 @@ Once a `scenario` has been defined and registered, ``Fuddly`` will automatically `Generators` and `Disruptors`. The Generators which are backed by a scenario are prefixed by ``SC_``. -A `scenario` is a state-machine. Its description follow an oriented graph where the nodes, called +A `scenario` is a state-machine. Its description follows an oriented graph where the nodes, called `steps`, define the data to be sent to the target. The transitions that interconnect these steps can be guarded by different kinds of callbacks that trigger at different moment (before the framework sends the data, after sending the data, or after having retrieved any feedback @@ -56,7 +56,7 @@ Let's begin with a simple example that interconnect 3 steps in a loop without an tactics.register_scenarios(sc1) -You should first note that scenarios have to be described in a ``*_strategy.py`` file that match +You should first note that scenarios have to be described in a ``*_strategy.py`` file that matches the data model you base your scenarios on. In our case we use the data model ``mydf`` defined in ``tuto.py`` (refer to :ref:`dm:mydf` for further explanation on file organization). The special object ``tactics`` (line 4) is usually used to register the data makers (`disruptors` or @@ -68,7 +68,7 @@ From line 9 to 11 we define 3 :class:`framework.scenario.Step`: - The first one commands the framework to send a data of type ``exist_cond`` (which is the name of a data registered in the data model ``mydf``) as well as starting 2 tasks (threaded entities of the framework) that will emit each one a specific data. The first one will send the specified string every 5 seconds - while the other one will send another string only once. Finally, the step set also the maximum + while the other one will send another string only once. Finally, the step sets also the maximum time duration that ``Fuddly`` should respect for collecting the feedback from the target (feedback timeout). This timeout is actually handled by the ``Target`` object, which may decide to respect it or not. For instance the ``NetworkTarget`` respect it while the ``EmptyTarget`` (default target) diff --git a/framework/data_model.py b/framework/data_model.py index 2bed90d..f6326d7 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -660,10 +660,8 @@ class RawCondition(NodeCondition): def __init__(self, val=None, neg_val=None): ''' Args: - val (bytes): byte representation (or list of byte representations) - that satisfies the condition - neg_val (bytes): byte representation (or list of byte representations) - that does NOT satisfy the condition + val (bytes/:obj:`list` of bytes): value(s) that satisfies the condition + neg_val (bytes/:obj:`list` of bytes): value(s) that does NOT satisfy the condition ''' assert((val is not None and neg_val is None) or (val is None and neg_val is not None)) if val is not None: @@ -703,8 +701,8 @@ class IntCondition(NodeCondition): def __init__(self, val=None, neg_val=None): ''' Args: - val (int): integer (or integer list) that satisfies the condition - neg_val (int): integer (or integer list) that does NOT satisfy the condition + val (int/:obj:`list` of int): integer(s) that satisfies the condition + neg_val (int/:obj:`list` of int): integer(s) that does NOT satisfy the condition ''' assert((val is not None and neg_val is None) or (val is None and neg_val is not None)) if val is not None: @@ -738,9 +736,11 @@ class BitFieldCondition(NodeCondition): def __init__(self, sf, val=None, neg_val=None): ''' Args: - sf (int): subfield (or subfield list) of the BitField() on which the condition apply - val (int): integer (or integer list or list of integer list) that satisfies the condition - neg_val (int): integer (or integer list or list of integer list) that does NOT satisfy the condition + sf (int/:obj:`list` of int): subfield(s) of the BitField() on which the condition apply + val (int/:obj:`list` of int/:obj:`list` of :obj:`list` of int): integer(s) that + satisfies the condition(s) + neg_val (int/:obj:`list` of int/:obj:`list` of :obj:`list` of int): integer(s) that + does NOT satisfy the condition(s) ''' assert(val is not None or neg_val is not None) @@ -4802,7 +4802,7 @@ class Node(object): '''A Node is the basic building-block used within a graph-based data model. Attributes: - internals (dict: str --> :class:`NodeInternals`): Contains all the configuration of a + internals (:obj:`dict` of :obj:`str` --> :class:`NodeInternals`): Contains all the configuration of a node. A configuration is associated to the internals/contents of a node, which can live independently of the other configuration. @@ -5174,10 +5174,7 @@ def __get_confs(self): '''Property giving all node's configurations (read only)''' def _set_subtrees_current_conf(self, node, conf, reverse, ignore_entanglement=False): - if node.is_conf_existing(conf): - conf2 = conf - else: - conf2 = node.current_conf + conf2 = conf if node.is_conf_existing(conf) else node.current_conf if not reverse: node.current_conf = conf2 @@ -5515,15 +5512,8 @@ def make_infinite(self, conf=None, all_conf=False, recursive=False): self.clear_attr(NodeInternals.Finite, conf, all_conf=all_conf, recursive=recursive) def _compute_confs(self, conf, recursive): - if recursive: - next_conf = conf - else: - next_conf = None - - if not self.is_conf_existing(conf): - current_conf = self.current_conf - else: - current_conf = conf + next_conf = conf if recursive else None + current_conf = conf if self.is_conf_existing(conf) else self.current_conf if self.is_genfunc(current_conf): next_conf = conf @@ -5744,19 +5734,13 @@ def get_node_by_path(self, path_regexp=None, path=None, conf=None): def _get_all_paths_rec(self, pname, htable, conf, recursive, first=True, clone_idx=0): - if recursive: - next_conf = conf - else: - next_conf = None + next_conf = conf if recursive else None if not self.is_conf_existing(conf): conf = self.current_conf internal = self.internals[conf] - if first: - name = self.name - else: - name = pname + '/' + self.name + name = self.name if first else pname + '/' + self.name if name in htable: htable[(name, clone_idx)] = self @@ -5847,15 +5831,8 @@ def freeze(self, conf=None, recursive=True, return_node_internals=False): def _get_value(self, conf=None, recursive=True, return_node_internals=False): - if recursive: - next_conf = conf - else: - next_conf = None - - if not self.is_conf_existing(conf): - conf2 = self.current_conf - else: - conf2 = conf + next_conf = conf if recursive else None + conf2 = conf if self.is_conf_existing(conf) else self.current_conf if self.is_genfunc(conf2): next_conf = conf @@ -5975,10 +5952,7 @@ def unfreeze(self, conf=None, recursive=True, dont_change_state=False, ignore_en reevaluate_constraints=False): self._delayed_jobs_called = False - if conf is not None: - next_conf = conf - else: - next_conf = None + next_conf = conf if not self.is_conf_existing(conf): conf = self.current_conf @@ -6174,10 +6148,7 @@ def is_node_used_more_than_once(name): for item in l: if re.search(name+'$', item[0]): node_list.append(item[1]) - if len(node_list) != len(set(node_list)): - return True - else: - return False + return len(node_list) != len(set(node_list)) if is_node_used_more_than_once(node.name): graph_deco = ' --> M' @@ -6437,7 +6408,7 @@ def remove_node_to_corrupt(self, node): del self.nodes_to_corrupt[node] def exhausted_node_exists(self): - return False if len(self.exhausted_nodes) == 0 else True + return len(self.exhausted_nodes) > 0 def get_exhausted_nodes(self): return copy.copy(self.exhausted_nodes) @@ -6446,10 +6417,7 @@ def notify_exhausted_node(self, node): self.exhausted_nodes.append(node) def is_node_exhausted(self, node): - if node in self.exhausted_nodes: - return True - else: - return False + return node in self.exhausted_nodes def clear_exhausted_node(self, node): try: diff --git a/framework/logger.py b/framework/logger.py index 18024a0..1f05f6c 100644 --- a/framework/logger.py +++ b/framework/logger.py @@ -281,15 +281,11 @@ def commit_log_entry(self, group_id, prj_name, tg_name): def log_fmk_info(self, info, nl_before=False, nl_after=False, rgb=Color.FMKINFO, data_id=None, do_record=True): now = datetime.datetime.now() - if nl_before: - p = '\n' - else: - p = '' - if nl_after: - s = '\n' - else: - s = '' - msg = p + "*** [ %s ] ***" % info + s + + p = '\n' if nl_before else '' + s = '\n' if nl_after else '' + + msg = "{prefix:s}*** [ {message:s} ] ***{suffix:s}".format(prefix=p, suffix=s, message=info) self.log_fn(msg, rgb=rgb) data_id = self.last_data_id if data_id is None else data_id if do_record: @@ -722,12 +718,8 @@ def print_console(self, msg, nl_before=True, nl_after=False, rgb=None, style=Non prefix = p + self.p - if sys.version_info[0] > 2: - if issubclass(msg.__class__, Data) or isinstance(msg, bytes): - msg = repr(msg) - else: - if issubclass(msg.__class__, Data): - msg = repr(msg) + if (sys.version_info[0] > 2 and isinstance(msg, bytes)) or issubclass(msg.__class__, Data): + msg = repr(msg) suffix = '' if limit_output and len(msg) > raw_limit: diff --git a/framework/monitor.py b/framework/monitor.py index 2a5ff0f..c7cdd91 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -197,7 +197,7 @@ def wait_until_ready(self, timeout=None): self._wait_for_probe(self._probe_status_event, timeout) except ProbeTimeoutError as e: e.blocking_methods = ["main()"] - raise e + raise def notify_blocking(self): self._blocking_event.set() diff --git a/framework/plumbing.py b/framework/plumbing.py index 5b46ae1..3bf04ac 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -374,8 +374,7 @@ def reload_dm(self): self.reload_dm() # reloading is based on name because DM objects have changed - ok = self.load_multiple_data_model(name_list=name_list, reload_dm=True) - if not ok: + if not self.load_multiple_data_model(name_list=name_list, reload_dm=True): self.set_error("Error encountered while reloading the composed Data Model") else: @@ -392,8 +391,8 @@ def reload_dm(self): return False self._cleanup_dm_attrs_from_fmk() - ok = self._load_data_model() - if not ok: + + if not self._load_data_model(): return False self.prj.set_data_model(self.dm) diff --git a/framework/tactics_helpers.py b/framework/tactics_helpers.py index f91e758..b200683 100644 --- a/framework/tactics_helpers.py +++ b/framework/tactics_helpers.py @@ -443,7 +443,7 @@ def _handle_user_inputs(dmaker, ui): setattr(dmaker, k, ui_val) if dmaker._gen_args_desc and \ - (issubclass(dmaker.__class__, Disruptor) or issubclass(dmaker.__class__, StatefulDisruptor)) and \ + issubclass(dmaker.__class__, (Disruptor, StatefulDisruptor)) and \ dmaker._gen_args_desc == GENERIC_ARGS: modelwalker_inputs_handling_helper(dmaker, generic_ui) From a3197ada1f353cbc9e158cf3666d30c890a9c490 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Wed, 3 Aug 2016 19:21:00 +0200 Subject: [PATCH 09/80] Move to 'utf8' internal representation Enable to support String() with unicode characters and to fix a coding inconsistency in a specific situation with PythonCodec_Enc(). --- data_models/example.py | 6 +- data_models/protocols/pppoe.py | 4 +- docs/source/data_model.rst | 7 ++- framework/basic_primitives.py | 9 +-- framework/data_model.py | 45 +++++-------- framework/database.py | 20 ++---- framework/encoders.py | 36 ++--------- framework/global_resources.py | 31 +++++++++ framework/logger.py | 25 +++----- framework/monitor.py | 29 +++++---- framework/target.py | 5 +- framework/test.py | 112 +++++++++++++++++---------------- framework/value_types.py | 21 +------ 13 files changed, 160 insertions(+), 190 deletions(-) diff --git a/data_models/example.py b/data_models/example.py index 954f042..eabc765 100644 --- a/data_models/example.py +++ b/data_models/example.py @@ -60,7 +60,7 @@ def build_data_model(self): kv.add_conf('ALT') kv.set_values(tux_subparts_3, conf='ALT') - tux_subparts_4 = ['[\xc2]PLIP', '[\xc2]GLOUP'] + tux_subparts_4 = [u'[\u00c2]PLIP', u'[\u00c2]GLOUP'] ku.add_conf('ALT') ku.set_values(tux_subparts_4, conf='ALT') @@ -132,9 +132,9 @@ def build_data_model(self): concat.set_func(fct, tux) if sys.version_info[0] > 2: - fct = lambda x: b'___' + bytes(chr(x[1]), 'latin_1') + b'___' + fct = lambda x: b'___' + bytes(chr(x[1]), internal_repr_codec) + b'___' else: - fct = lambda x: b'___' + bytes(x[1]) + b'___' + fct = lambda x: b'___' + x[1] + b'___' concat.add_conf('ALT') concat.set_func(fct, tux, conf='ALT') diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index f834c6b..d0445d3 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -1,5 +1,3 @@ -# -*- coding: latin-1 -*- - ################################################################################ # # Copyright 2014-2016 Eric Lacombe @@ -58,7 +56,7 @@ def build_data_model(self): {'name': 'v101', # Service Name 'exists_if': (IntCondition(0x0101), 'type'), 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=['my \xfcber service']), # \xfc=ü encoded in latin1 + 'contents': UTF8(val_list=[u'my \u00fcber service']), }, {'name': 'v102', # AC name 'exists_if': (IntCondition(0x0102), 'type'), diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index bf5b5cd..fa6626a 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -163,11 +163,14 @@ that enables to handle transparently any encoding scheme: Below the different currently defined string types: -- :class:`framework.value_types.String`: General purpose character string. +- :class:`framework.value_types.String`: General purpose character string (with ``utf8`` encoding). - :class:`framework.value_types.Filename`: Filename. Similar to the type ``String``, but some disruptors like ``tTYPE`` will generate more specific test cases. - :class:`framework.value_types.UTF8`: ``String`` encoded in ``UTF8``. + It provides the same encoding as a ``String``, but using it in a data model for describing UTF8 + fields is preferable because: a disruptor may use that information for playing around UTF8, and you + are agnostic from String encoding choice. - :class:`framework.value_types.UTF16_LE`: ``String`` encoded in ``UTF16`` little-endian. Note that some test cases on the encoding scheme are defined. - :class:`framework.value_types.UTF16_BE`: ``String`` encoded in ``UTF16`` big-endian. @@ -175,7 +178,7 @@ Below the different currently defined string types: - :class:`framework.value_types.Codec`: ``String`` encoded in any standard encoding supported by Python. You have to provide the parameter ``encoding_arg`` with the codec you want to use. If no codec is provided, this class will behave the same as the class - :class:`framework.value_types.String`, that is, the ``latin_1`` codec will be used. + :class:`framework.value_types.String`, that is, the ``utf8`` codec will be used. - :class:`framework.value_types.GZIP`: ``String`` compressed with ``zlib``. The parameter ``encoding_arg`` is used to specify the level of compression (0-9). - :class:`framework.value_types.GSM7bitPacking`: ``String`` encoded in conformity diff --git a/framework/basic_primitives.py b/framework/basic_primitives.py index 53ab234..92ab0d7 100644 --- a/framework/basic_primitives.py +++ b/framework/basic_primitives.py @@ -26,6 +26,8 @@ import string import array +from framework.global_resources import convert_to_internal_repr + def rand_string(size=None, mini=1, maxi=10, str_set=string.printable): out = "" @@ -35,12 +37,7 @@ def rand_string(size=None, mini=1, maxi=10, str_set=string.printable): val = random.choice(str_set) out += val - if sys.version_info[0] > 2: - out = bytes(out, 'latin_1') - else: - out = bytes(out) - - return out + return convert_to_internal_repr(out) def corrupt_bytes(s, p=0.01, n=None, ctrl_char=False): diff --git a/framework/data_model.py b/framework/data_model.py index a8a82a8..74e26e6 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -96,8 +96,9 @@ def get_initial_dmaker(self): return self._type def update_from_str_or_bytes(self, data_str): - if sys.version_info[0] > 2 and not isinstance(data_str, bytes): - data_str = bytes(data_str, 'latin_1') + if sys.version_info[0] > 2: + if not isinstance(data_str, bytes): + data_str = data_str.encode('utf8') self.raw = data_str self.node = None @@ -123,10 +124,7 @@ def to_str(self): val = self.node.to_str() return val else: - if sys.version_info[0] > 2: - return self.raw.decode('latin_1') - else: - return self.raw + return unconvert_from_internal_repr(self.raw) def make_blocked(self): self._blocked = True @@ -407,25 +405,8 @@ def flatten(nested): yield x -def convert_to_internal_repr(val): - if isinstance(val, int): - val = bytes(val) - elif not isinstance(val, (str, bytes)): - val = repr(val) - elif sys.version_info[0] > 2 and not isinstance(val, bytes): - val = bytes(val, 'latin_1') - return val - -def unconvert_from_internal_repr(val): - assert(isinstance(val, bytes)) - if sys.version_info[0] > 2: - val = val.decode('latin_1') - return val - - nodes_weight_re = re.compile('(.*?)\((.*)\)') - class AbsorbStatus(Enum): Accept = 1 @@ -1880,12 +1861,7 @@ def _init_specific(self, arg): @staticmethod def _convert_to_internal_repr(val): - if not isinstance(val, (str, bytes)): - val = repr(val) - if sys.version_info[0] > 2 and not isinstance(val, bytes): - val = bytes(val, 'latin_1') - return val - + return convert_to_internal_repr(val) def _make_private_specific(self, ignore_frozen_state, accept_external_entanglement): if ignore_frozen_state: @@ -5937,6 +5913,17 @@ def to_str(self, conf=None, recursive=True): val = self.to_bytes(conf=conf, recursive=recursive) return unconvert_from_internal_repr(val) + def to_ascii(self, conf=None, recursive=True): + val = self.to_str(conf=conf, recursive=recursive) + try: + if sys.version_info[0] > 2: + val = eval('{!a}'.format(val)) + else: + val = str(val) + except: + val = repr(val) + finally: + return val def _tobytes(self, conf=None, recursive=True): diff --git a/framework/database.py b/framework/database.py index ce9fbb1..30769c8 100644 --- a/framework/database.py +++ b/framework/database.py @@ -499,10 +499,9 @@ def handle_dmaker(dmk_pattern, info, dmk_type, dmk_name, name_sep_sz, id_src=Non msg += colorize(" | ID source: ", rgb=Color.FMKINFO) msg += colorize(str(id_src), rgb=Color.FMKSUBINFO) if info is not None: + info = gr.unconvert_from_internal_repr(info) if sys.version_info[0] > 2: - info = info.decode("latin_1") - else: - info = str(info) + info = eval('{!a}'.format(info)) info = info.split('\n') for i in info: chks = chunk_lines(i, page_width - prefix_sz - 10) @@ -584,11 +583,7 @@ def handle_dmaker(dmk_pattern, info, dmk_type, dmk_name, name_sep_sz, id_src=Non msg = '' if with_data: msg += colorize("\n Sent Data:\n", rgb=Color.FMKINFOGROUP) - if sys.version_info[0] > 2: - data_content = data_content.decode("latin_1") - data_content = "{!a}".format(data_content) - else: - data_content = repr(str(data_content)) + data_content = gr.unconvert_from_internal_repr(data_content) if len(data_content) > limit_data_sz: data_content = data_content[:limit_data_sz] data_content = data_content @@ -608,11 +603,9 @@ def handle_dmaker(dmk_pattern, info, dmk_type, dmk_name, name_sep_sz, id_src=Non colorize(")", rgb=Color.FMKINFOGROUP) + \ colorize(" = {!s}".format(status), rgb=Color.FMKSUBINFO) if content: + content = gr.unconvert_from_internal_repr(content) if sys.version_info[0] > 2: - content = content.decode("latin_1") - content = "{!a}".format(content) - else: - content = repr(str(content)) + content = eval('{!a}'.format(content)) chks = chunk_lines(content, page_width - 4) for c in chks: c_sz = len(c) @@ -948,8 +941,7 @@ def get_data_with_specific_fbk(self, fbk, prj_name=None, fbk_src=None, display=T colorized=True): colorize = self._get_color_function(colorized) - if sys.version_info[0] > 2: - fbk = bytes(fbk, 'latin_1') + fbk = gr.convert_to_internal_repr(fbk) if fbk_src: fbk_records = self.execute_sql_statement( diff --git a/framework/encoders.py b/framework/encoders.py index b21afda..adf7784 100644 --- a/framework/encoders.py +++ b/framework/encoders.py @@ -27,6 +27,8 @@ import copy import binascii +from framework.global_resources import * + class Encoder(object): def __init__(self, encoding_arg): self._encoding_arg = encoding_arg @@ -80,33 +82,7 @@ def init_encoding_scheme(self, arg): @staticmethod def to_bytes(val): - if isinstance(val, (str, bytes)): - if sys.version_info[0] > 2 and not isinstance(val, bytes): - try: - new_val = bytes(val, 'latin_1') - except UnicodeEncodeError: - new_val = val.encode('utf8') - else: - new_val = val - elif sys.version_info[0] == 2 and isinstance(val, unicode): - try: - new_val = val.encode('latin_1') - except UnicodeEncodeError: - new_val = val.encode('utf8') - elif isinstance(val, (tuple, list)): - new_val = [] - for v in val: - if sys.version_info[0] > 2 and not isinstance(v, bytes): - new_v = bytes(v, 'latin_1') - else: - new_v = v - new_val.append(new_v) - elif val is None: - new_val = b'' - else: - raise ValueError - - return new_val + return convert_to_internal_repr(val) class PythonCodec_Enc(Encoder): @@ -115,17 +91,17 @@ class PythonCodec_Enc(Encoder): """ def init_encoding_scheme(self, arg=None): if arg is None: - self._codec = 'latin_1' + self._codec = internal_repr_codec else: self._codec = arg def encode(self, val): - enc = val.decode('latin_1').encode(self._codec) + enc = val.decode(internal_repr_codec, 'replace').encode(self._codec) return enc def decode(self, val): try: - dec = val.decode(self._codec) + dec = val.decode(self._codec, 'strict') except: dec = b'' return Encoder.to_bytes(dec) diff --git a/framework/global_resources.py b/framework/global_resources.py index ef46d32..2a75ea8 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -66,6 +66,37 @@ fmk_folder = app_folder + os.sep + 'framework' + os.sep +internal_repr_codec = 'utf8' +def convert_to_internal_repr(val): + if val is None: + val = b'' + elif isinstance(val, int): + val = bytes(val) + # elif not isinstance(val, (str, bytes)): + # val = repr(val) + elif isinstance(val, (tuple, list)): + new_val = [] + for v in val: + new_v = convert_to_internal_repr(v) + new_val.append(new_v) + val = new_val + elif sys.version_info[0] > 2: + if not isinstance(val, bytes): + val = val.encode(internal_repr_codec) + elif isinstance(val, unicode): # only for python2 + val = val.encode(internal_repr_codec) + elif isinstance(val, str): # only for python2 (and str ~ ascii) + pass + else: + print(val, repr(val)) + raise ValueError + return val + +def unconvert_from_internal_repr(val): + assert(isinstance(val, bytes)) + return val.decode(internal_repr_codec, 'replace') + + class Error(object): Reserved = -1 diff --git a/framework/logger.py b/framework/logger.py index 18024a0..c7dd2fe 100644 --- a/framework/logger.py +++ b/framework/logger.py @@ -133,7 +133,7 @@ def init_logfn(x, nl_before=True, nl_after=False, rgb=None, style=None, verbose= rgb = None style = None elif issubclass(x.__class__, bytes) and sys.version_info[0] > 2: - data = repr(x) if self.__export_raw_data else x.decode('latin-1') + data = repr(x) if self.__export_raw_data else x.decode(internal_repr_codec) else: data = x self.print_console(data, nl_before=nl_before, nl_after=nl_after, rgb=rgb, style=style) @@ -173,7 +173,7 @@ def intern_func(x, nl_before=True, nl_after=False, rgb=None, style=None, verbose rgb = None style = None elif issubclass(x.__class__, bytes) and sys.version_info[0] > 2: - data = repr(x) if self.__export_raw_data else x.decode('latin-1') + data = repr(x) if self.__export_raw_data else x.decode(internal_repr_codec) else: data = x self.print_console(data, nl_before=nl_before, nl_after=nl_after, rgb=rgb, style=style) @@ -261,10 +261,7 @@ def commit_log_entry(self, group_id, prj_name, tg_name): info = self._current_dmaker_info.get((dmaker_type,dmaker_name), None) if info is not None: info = '\n'.join(info) - if sys.version_info[0] > 2: - info = bytes(info, 'latin_1') - else: - info = bytes(info) + info = convert_to_internal_repr(info) self.fmkDB.insert_steps(self.last_data_id, step_id, dmaker_type, dmaker_name, self._current_src_data_id, str(user_input), info) @@ -309,10 +306,8 @@ def collect_target_feedback(self, fbk, status_code=None): """ now = datetime.datetime.now() - if sys.version_info[0] > 2 and isinstance(fbk, bytes): - fbk = fbk.decode('latin_1') with self._tg_fbk_lck: - self._tg_fbk.append((now, str(fbk), status_code)) + self._tg_fbk.append((now, fbk, status_code)) def log_collected_target_feedback(self, preamble=None, epilogue=None): """ @@ -469,25 +464,23 @@ def _decode_target_feedback(self, feedback): for f in feedback: new_f = f.strip() if sys.version_info[0] > 2 and new_f and isinstance(new_f, bytes): - new_f = new_f.decode('latin_1') - new_f = '{!a}'.format(new_f) + new_f = new_f.decode(internal_repr_codec) + new_f = eval('{!a}'.format(new_f)) new_fbk.append(new_f) if not list(filter(lambda x: x != b'', new_fbk)): new_fbk = None else: new_fbk = feedback.strip() if sys.version_info[0] > 2 and new_fbk and isinstance(new_fbk, bytes): - new_fbk = new_fbk.decode('latin_1') - new_fbk = '{!a}'.format(new_fbk) + new_fbk = new_fbk.decode(internal_repr_codec) + new_fbk = eval('{!a}'.format(new_fbk)) return new_fbk def _encode_target_feedback(self, feedback): if feedback is None: return None - if sys.version_info[0] > 2 and not isinstance(feedback, bytes): - feedback = bytes(feedback, 'latin_1') - return feedback + return convert_to_internal_repr(feedback) def log_probe_feedback(self, source, timestamp, content, status_code, force_record=False): if self.last_data_recordable or not self.__explicit_data_recording or force_record: diff --git a/framework/monitor.py b/framework/monitor.py index 2a5ff0f..89f0721 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -627,8 +627,13 @@ def get_timestamp(self): class Backend(object): - def __init__(self): + def __init__(self, codec='latin_1'): + """ + Args: + codec (str): codec used by the monitored system to answer. + """ self._started = False + self.codec = codec self._sync_lock = threading.Lock() def start(self): @@ -661,15 +666,16 @@ class SSH_Backend(Backend): """ Backend to execute command through a serial line. """ - def __init__(self, username, password, sshd_ip, sshd_port=22): + def __init__(self, username, password, sshd_ip, sshd_port=22, codec='latin_1'): """ Args: sshd_ip (str): IP of the SSH server. sshd_port (int): port of the SSH server. username (str): username to connect with. password (str): password related to the username. + codec (str): codec used by the monitored system to answer. """ - Backend.__init__(self) + Backend.__init__(self, codec=codec) if not ssh_module: raise eh.UnavailablePythonModule('Python module for SSH is not available!') self.sshd_ip = sshd_ip @@ -706,7 +712,7 @@ class Serial_Backend(Backend): def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbits=1, xonxoff=False, rtscts=False, dsrdtr=False, username=None, password=None, slowness_factor=5, - cmd_notfound=b'command not found'): + cmd_notfound=b'command not found', codec='latin_1'): """ Args: serial_port (str): path to the tty device file. (e.g., '/dev/ttyUSB0') @@ -725,8 +731,9 @@ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbit and other operations involving to wait for the monitored system. cmd_notfound (bytes): pattern used to detect if the command does not exist on the monitored system. + codec (str): codec used to send/receive information through the serial line """ - Backend.__init__(self) + Backend.__init__(self, codec=codec) if not serial_module: raise eh.UnavailablePythonModule('Python module for Serial is not available!') @@ -741,8 +748,8 @@ def __init__(self, serial_port, baudrate=115200, bytesize=8, parity='N', stopbit self.slowness_factor = slowness_factor self.cmd_notfound = cmd_notfound if sys.version_info[0] > 2: - self.username = bytes(username, 'latin_1') - self.password = bytes(password, 'latin_1') + self.username = bytes(username, self.codec) + self.password = bytes(password, self.codec) else: self.username = username self.password = password @@ -793,7 +800,7 @@ def _stop(self): def _exec_command(self, cmd): if sys.version_info[0] > 2: - cmd = bytes(cmd, 'latin_1') + cmd = bytes(cmd, self.codec) cmd += b'\r\n' self.ser.flushInput() self.ser.write(cmd) @@ -868,7 +875,7 @@ def _get_pid(self, logger): fallback_cmd = 'ps a -opid,comm' res = self.backend.exec_command(fallback_cmd) if sys.version_info[0] > 2: - res = res.decode('latin_1') + res = res.decode(self.backend.codec) pid_list = res.split('\n') for entry in pid_list: if entry.find(self.process_name) >= 0: @@ -882,7 +889,7 @@ def _get_pid(self, logger): pid = -1 else: if sys.version_info[0] > 2: - res = res.decode('latin_1') + res = res.decode(self.backend.codec) l = res.split() if len(l) > 1: logger.print_console("*** ERROR: more than one PID detected for process name '{:s}'" @@ -978,7 +985,7 @@ def _get_mem(self): res = self.backend.exec_command(self.command_pattern.format(self.process_name)) if sys.version_info[0] > 2: - res = res.decode('latin_1') + res = res.decode(self.backend.codec) proc_list = res.split('\n') for entry in proc_list: if entry.find(self.process_name) >= 0: diff --git a/framework/target.py b/framework/target.py index bdca838..398e8ea 100644 --- a/framework/target.py +++ b/framework/target.py @@ -1588,13 +1588,14 @@ def get_feedback(self, delay=0.2): class SIMTarget(Target): delay_between_write = 0.1 # without, it seems some commands can be lost - def __init__(self, serial_port, baudrate, pin_code, targeted_tel_num): + def __init__(self, serial_port, baudrate, pin_code, targeted_tel_num, codec='latin_1'): self.serial_port = serial_port self.baudrate = baudrate self.tel_num = targeted_tel_num self.pin_code = pin_code + self.codec = codec if sys.version_info[0]>2: - self.pin_code = bytes(self.pin_code, 'latin_1') + self.pin_code = bytes(self.pin_code, self.codec) self.set_feedback_timeout(2) def start(self): diff --git a/framework/test.py b/framework/test.py index f44b84d..6341d23 100644 --- a/framework/test.py +++ b/framework/test.py @@ -1,4 +1,4 @@ -# -*- coding: latin-1 -*- +# -*- coding: utf8 -*- ################################################################################ # @@ -509,14 +509,16 @@ def test_01(self): node_ex1.set_current_conf('ALT', root_regexp=None) + nonascii_test_str = u'\u00c2'.encode(internal_repr_codec) + node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: res2 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: res2 = False print(msg) @@ -524,7 +526,7 @@ def test_01(self): node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' in msg or b' ~(X)~ ' in msg or b'[<]' in msg or b'[\xc2]' in msg: + if b' ~(..)~ ' in msg or b' ~(X)~ ' in msg or b'[<]' in msg or nonascii_test_str in msg: res2 = False print(msg) @@ -535,7 +537,7 @@ def test_01(self): node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: res2 = False print(msg) @@ -658,19 +660,19 @@ def test_01(self): res1 = True msg = node_ex1.to_bytes(conf='ALT') - if b'[<]' not in msg or b'[\xc2]' not in msg: + if b'[<]' not in msg or nonascii_test_str not in msg: res1 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.to_bytes(conf='ALT') - if b'[<]' not in msg or b'[\xc2]' not in msg: + if b'[<]' not in msg or nonascii_test_str not in msg: res1 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.get_node_by_path('TUX$').to_bytes(conf='ALT', recursive=False) - if b'[<]' in msg or b'[\xc2]' in msg or b' ~(..)~ TUX ~(..)~ ' not in msg: + if b'[<]' in msg or nonascii_test_str in msg or b' ~(..)~ TUX ~(..)~ ' not in msg: res1 = False print(msg) @@ -1596,7 +1598,7 @@ def test_NonTermVisitor(self): data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') # idx == 3 nonterm_consumer = NonTermVisitor(respect_order=True) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=10): - print(colorize('[%d] '%idx + rnode.to_str(), rgb=Color.INFO)) + print(colorize('[%d] '%idx + rnode.to_ascii(), rgb=Color.INFO)) self.assertEqual(idx, 3) print('***') @@ -1604,7 +1606,7 @@ def test_NonTermVisitor(self): data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') # idx == 3 nonterm_consumer = NonTermVisitor(respect_order=False) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=10): - print(colorize('[%d] '%idx + rnode.to_str(), rgb=Color.INFO)) + print(colorize('[%d] '%idx + rnode.to_ascii(), rgb=Color.INFO)) self.assertEqual(idx, 3) print('***') @@ -1657,41 +1659,41 @@ def test_basics(self): data = mh.create_graph_from_desc(shape_desc) raw_vals = [ - ' [!] ++++++++++ [!] ::=:: [!] ', - ' [!] ++++++++++ [!] ::?:: [!] ', - ' [!] ++++++++++ [!] ::\xff:: [!] ', - ' [!] ++++++++++ [!] ::\x00:: [!] ', - ' [!] ++++++++++ [!] ::\x01:: [!] ', - ' [!] ++++++++++ [!] ::\x80:: [!] ', - ' [!] ++++++++++ [!] ::\x7f:: [!] ', - ' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit - ' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::=:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::?:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\xff:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x00:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', - ' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit - ' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::=:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::?:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\xff:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' + b' [!] ++++++++++ [!] ::=:: [!] ', + b' [!] ++++++++++ [!] ::?:: [!] ', + b' [!] ++++++++++ [!] ::\xff:: [!] ', + b' [!] ++++++++++ [!] ::\x00:: [!] ', + b' [!] ++++++++++ [!] ::\x01:: [!] ', + b' [!] ++++++++++ [!] ::\x80:: [!] ', + b' [!] ++++++++++ [!] ::\x7f:: [!] ', + b' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit + b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::=:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::?:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\xff:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x00:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', + b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit + b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::=:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::?:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\xff:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' ] tn_consumer = TypedNodeDisruption() @@ -1701,7 +1703,7 @@ def test_basics(self): negative_node_subkinds=[String]) tn_consumer.set_node_interest(internals_criteria=ic) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, tn_consumer, make_determinist=True, max_steps=100): - val = rnode.to_str() + val = rnode.to_bytes() print(colorize('[%d] '%idx + repr(val), rgb=Color.INFO)) if idx not in [8, 22]: self.assertEqual(val, raw_vals[idx-1]) @@ -2096,7 +2098,7 @@ def test_absorb_nonterm_2(self): top.set_env(Env()) # 2*nint_3 + nstr_1 + nstr_2 + 2*nint_2 + nint_1 - msg = '\xef\xfe\xef\xfeSTR1str222\xcf\xab\xcd' + msg = b'\xef\xfe\xef\xfeSTR1str222\xcf\xab\xcd' status, off, size, name = top.absorb(msg) print('\n ---[message to absorb]---') @@ -2257,10 +2259,10 @@ def nint_10_helper(blob, constraints, node_internals): top.set_env(Env()) top2.set_env(Env()) - msg = '\xe1\xe2\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678YEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' + msg = b'\xe1\xe2\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678YEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' # middle1: nint_1_alt + nint_3 + 2*nint_1 + nstr_1('ABCD') + nint_51 + 2*nstr_50 + nint_50 - msg2 = '\xff\xe2\x88\x99\xe1\xe2\xcd\xabABCD\xef\xfeIAMHERE\xbfYEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' + msg2 = b'\xff\xe2\x88\x99\xe1\xe2\xcd\xabABCD\xef\xfeIAMHERE\xbfYEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' print('\n****** top ******\n') status, off, size, name = top.absorb(msg) @@ -2972,31 +2974,31 @@ def decode(self, val): gsm_dec = gsm_t.decode(gsm_enc) self.assertEqual(msg, gsm_dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) #' b'o\xf9 \xe7a' vtype = UTF16_LE(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = UTF16_BE(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = UTF8(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = Codec(max_sz=20, encoding_arg=None) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = Codec(max_sz=20, encoding_arg='utf_32') enc = vtype.encode(msg) dec = vtype.decode(enc) @@ -3057,7 +3059,7 @@ def test_encoded_str_2(self): self.assertEqual(status, AbsorbStatus.Reject) - raw_data = b'\x05' + b'\xC3\xBCber' + b'padding' # \xC3\xBC = � in UTF8 + raw_data = b'\x05' + b'\xC3\xBCber' + b'padding' # \xC3\xBC = ü in UTF8 status, off, size, name = node_abs2.absorb(raw_data, constraints=AbsNoCsts(size=True, struct=True)) diff --git a/framework/value_types.py b/framework/value_types.py index e370abb..29ff54e 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -44,6 +44,7 @@ from framework.data_model import AbsorbStatus, AbsCsts, convert_to_internal_repr, unconvert_from_internal_repr from framework.encoders import * from framework.error_handling import * +from framework.global_resources import * DEBUG = False @@ -99,26 +100,8 @@ def pretty_print(self): @staticmethod def _str2internal(arg): - if isinstance(arg, (tuple, list)): - new_arg = [] - for v in arg: - if sys.version_info[0] > 2 and not isinstance(v, bytes): - new_v = bytes(v, 'latin_1') - else: - new_v = v - new_arg.append(new_v) - elif isinstance(arg, (str, bytes)): - if sys.version_info[0] > 2 and not isinstance(arg, bytes): - new_arg = bytes(arg, 'latin_1') - else: - new_arg = arg - elif sys.version_info[0] == 2 and isinstance(arg, unicode): - new_arg = arg.encode('latin_1') - else: - raise ValueError + return convert_to_internal_repr(arg) - return new_arg - class VT_Alt(VT): From d160bded796424b9c6a6ad7966968ee272ae146d Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 4 Aug 2016 00:21:51 +0200 Subject: [PATCH 10/80] Minor fixes regarding internal encoding handling --- framework/data_model.py | 6 +----- framework/global_resources.py | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index 24bf8e1..a3c4353 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -96,11 +96,7 @@ def get_initial_dmaker(self): return self._type def update_from_str_or_bytes(self, data_str): - if sys.version_info[0] > 2: - if not isinstance(data_str, bytes): - data_str = data_str.encode('utf8') - - self.raw = data_str + self.raw = convert_to_internal_repr(data_str) self.node = None def update_from_node(self, node): diff --git a/framework/global_resources.py b/framework/global_resources.py index 2a75ea8..8192c9a 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -71,9 +71,7 @@ def convert_to_internal_repr(val): if val is None: val = b'' elif isinstance(val, int): - val = bytes(val) - # elif not isinstance(val, (str, bytes)): - # val = repr(val) + val = str(val).encode(internal_repr_codec) elif isinstance(val, (tuple, list)): new_val = [] for v in val: From 6496b9039528e154ed2f28870b3a30962d9f35a3 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 4 Aug 2016 14:19:21 +0200 Subject: [PATCH 11/80] Add ASCII() and LATIN_1() node types + Fix - Robustness improvements concerning Encoded String. - Fix String 'ascii_mode' when encoding test cases are provided. --- docs/source/data_model.rst | 6 +++++- framework/basic_primitives.py | 2 -- framework/value_types.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index fa6626a..1188e49 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -163,10 +163,14 @@ that enables to handle transparently any encoding scheme: Below the different currently defined string types: -- :class:`framework.value_types.String`: General purpose character string (with ``utf8`` encoding). +- :class:`framework.value_types.String`: General purpose character string (with ``UTF8`` encoding). - :class:`framework.value_types.Filename`: Filename. Similar to the type ``String``, but some disruptors like ``tTYPE`` will generate more specific test cases. +- :class:`framework.value_types.ASCII`: ``String`` encoded in ``ASCII``. + Note that additional test cases on the encoding scheme are defined (e.g., set the most + significant bit of a character to 1). +- :class:`framework.value_types.LATIN_1`: ``String`` encoded in ``LATIN_1``. - :class:`framework.value_types.UTF8`: ``String`` encoded in ``UTF8``. It provides the same encoding as a ``String``, but using it in a data model for describing UTF8 fields is preferable because: a disruptor may use that information for playing around UTF8, and you diff --git a/framework/basic_primitives.py b/framework/basic_primitives.py index 92ab0d7..00e06f9 100644 --- a/framework/basic_primitives.py +++ b/framework/basic_primitives.py @@ -64,8 +64,6 @@ def corrupt_bits(s, p=0.01, n=None, ascii=False): s[i//8] ^= 1 << (i%8) if ascii: s[i//8] &= 0x7f - else: - s[i//8] |= 0x80 return bytes(s) diff --git a/framework/value_types.py b/framework/value_types.py index 29ff54e..5fa1f6e 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -776,7 +776,11 @@ def set_description(self, val_list=None, size=None, min_sz=None, def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_list=True): if self.encoded_string: - val_sz = len(self.encode(value)) + try: + enc_val = self.encode(value) + except: + return False + val_sz = len(enc_val) if not force_max_enc_sz and not force_min_enc_sz: if self.max_encoded_sz is None or val_sz > self.max_encoded_sz: self.max_encoded_sz = val_sz @@ -892,6 +896,15 @@ def enable_fuzz_mode(self): enc_cases = self.encoding_test_cases(orig_val, self.max_sz, self.min_sz, self.min_encoded_sz, self.max_encoded_sz) if enc_cases: + if self.ascii_mode: + new_enc_cases = [] + for v in enc_cases: + s = '' + for i in bytearray(v): + s += chr(i & 0x7f) + new_enc_cases.append(bytes(s)) + enc_cases = new_enc_cases + self.val_list_fuzzy += enc_cases self.val_list_save = self.val_list @@ -1325,6 +1338,20 @@ def new_meth(meth): @from_encoder(PythonCodec_Enc) class Codec(String): pass +@from_encoder(PythonCodec_Enc, 'ascii') +class ASCII(String): + def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_encoded_sz): + enc_val = bytearray(self.encode(current_val)) + if len(enc_val) > 0: + enc_val[0] |= 0x80 + enc_val = bytes(enc_val) + else: + enc_val = b'\xe9' + return [enc_val] + +@from_encoder(PythonCodec_Enc, 'latin_1') +class LATIN_1(String): pass + @from_encoder(PythonCodec_Enc, 'utf_16_le') class UTF16_LE(String): def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_encoded_sz): From fe9c7f50282b5e79f93f8f34fd83db12afa32205 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 4 Aug 2016 16:04:41 +0200 Subject: [PATCH 12/80] fmkdb.py: add support for removing a range of data IDs --- tools/fmkdb.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/fmkdb.py b/tools/fmkdb.py index 050255b..2cdb9b5 100755 --- a/tools/fmkdb.py +++ b/tools/fmkdb.py @@ -82,7 +82,9 @@ help='Extract data from provided data ID range') group.add_argument('-e', '--export-one-data', type=int, metavar='DATA_ID', help='Extract data from the provided data ID') -group.add_argument('--remove-data', type=int, metavar='DATA_ID', +group.add_argument('--remove-data', nargs=2, metavar=('FIRST_DATA_ID','LAST_DATA_ID'), type=int, + help='Remove data from provided data ID range and all related information from fmkDB') +group.add_argument('-r', '--remove-one-data', type=int, metavar='DATA_ID', help='Remove data ID and all related information from fmkDB') group = parser.add_argument_group('Fuddly Database Analysis') @@ -157,6 +159,7 @@ def colorize(string, rgb=None, ansi=None, bg=None, ansi_bg=None, fd=1): export_data = args.export_data export_one_data = args.export_one_data remove_data = args.remove_data + remove_one_data = args.remove_one_data impact_analysis = args.data_with_impact data_without_fbk = args.data_without_fbk @@ -210,9 +213,13 @@ def colorize(string, rgb=None, ansi=None, bg=None, ansi_bg=None, fd=1): else: fmkdb.export_data(first=export_one_data, colorized=colorized) - elif remove_data is not None: + elif remove_data is not None or remove_one_data is not None: handle_confirmation() - fmkdb.remove_data(remove_data, colorized=colorized) + if remove_data is not None: + for i in range(remove_data[0], remove_data[1]+1): + fmkdb.remove_data(i, colorized=colorized) + else: + fmkdb.remove_data(remove_one_data, colorized=colorized) elif impact_analysis: fmkdb.get_data_with_impact(prj_name=prj_name, fbk_src=fbk_src, verbose=verbose, From 2d47da83e08d65adcdcfd08a0b466c415e8a089c Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 4 Aug 2016 21:19:06 +0200 Subject: [PATCH 13/80] Update/Fix PPPoE DM + Add BYTES() type node + Fix encoding issues --- data_models/file_formats/jpg.py | 6 +- data_models/file_formats/png.py | 14 +-- data_models/file_formats/zip.py | 42 ++++----- data_models/protocols/pppoe.py | 34 +++---- data_models/protocols/pppoe_strategy.py | 117 ++++++++++++++++++------ data_models/protocols/sms.py | 2 +- docs/source/data_model.rst | 4 +- framework/data_model_helpers.py | 6 +- framework/encoders.py | 7 +- framework/global_resources.py | 12 ++- framework/logger.py | 4 +- framework/monitor.py | 13 +-- framework/value_types.py | 4 +- 13 files changed, 168 insertions(+), 97 deletions(-) diff --git a/data_models/file_formats/jpg.py b/data_models/file_formats/jpg.py index af9175c..d045ac6 100644 --- a/data_models/file_formats/jpg.py +++ b/data_models/file_formats/jpg.py @@ -75,7 +75,7 @@ def build_data_model(self): {'name': 'jpg', 'contents': [ {'name': 'before_SOF', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'absorb_csts': AbsNoCsts(), 'set_attrs': MH.Attr.Abs_Postpone, 'mutable': False}, @@ -119,7 +119,7 @@ def build_data_model(self): ]}, {'name': 'between_SOF_SOS', - 'contents': String(), + 'contents': BYTES(), 'random': True, 'absorb_csts': AbsNoCsts(), 'set_attrs': MH.Attr.Abs_Postpone, @@ -164,7 +164,7 @@ def build_data_model(self): {'name': 'afterSOS', 'mutable': False, - 'contents': String(min_sz=0), + 'contents': BYTES(min_sz=0), 'absorb_csts': AbsNoCsts()} ]} diff --git a/data_models/file_formats/png.py b/data_models/file_formats/png.py index 031158d..133cb15 100644 --- a/data_models/file_formats/png.py +++ b/data_models/file_formats/png.py @@ -58,16 +58,16 @@ def build_data_model(self): {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': String(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': BYTES(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,200), 'contents': [ {'name': 'len', 'contents': UINT32_be()}, {'name': 'type', - 'contents': String(val_list=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': BYTES(val_list=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', - 'contents': lambda x: Node('data', value_type=String(size=x[0].cc.get_raw_value())), + 'contents': lambda x: Node('data', value_type=BYTES(size=x[0].cc.get_raw_value())), 'node_args': ['len']}, {'name': 'crc32_gen', 'contents': MH.CRC(vt=UINT32_be, clear_attrs=[MH.Attr.Mutable]), @@ -80,7 +80,7 @@ def build_data_model(self): {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': String(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': BYTES(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,200), 'contents': [ @@ -91,7 +91,7 @@ def build_data_model(self): {'weight': 10, 'contents': [ {'name': 'type1', - 'contents': String(val_list=['IHDR'], size=4), + 'contents': BYTES(val_list=['IHDR'], size=4), 'absorb_csts': AbsFullCsts()}, {'name': 'width', 'contents': UINT32_be()}, @@ -111,9 +111,9 @@ def build_data_model(self): {'weight': 5, 'contents': [ {'name': 'type2', - 'contents': String(val_list=['IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': BYTES(val_list=['IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', - 'contents': lambda x: Node('data', value_type=String(size=x.get_raw_value())), + 'contents': lambda x: Node('data', value_type=BYTES(size=x.get_raw_value())), 'node_args': 'len'} ]} ]}, diff --git a/data_models/file_formats/zip.py b/data_models/file_formats/zip.py index 11b50e8..a4286d6 100644 --- a/data_models/file_formats/zip.py +++ b/data_models/file_formats/zip.py @@ -64,12 +64,12 @@ def build_data_model(self): {'name': 'ZIP', 'contents': [ {'name': 'start_padding', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'qty': (0, 1), 'clear_attrs': MH.Attr.Mutable, 'alt': [ {'conf': 'ABS', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'set_attrs': MH.Attr.Abs_Postpone, 'clear_attrs': MH.Attr.Mutable, 'absorb_csts': AbsNoCsts()} @@ -132,20 +132,20 @@ def build_data_model(self): {'name': 'extra_field', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - String(size=x.get_raw_value())), + BYTES(size=x.get_raw_value())), 'node_args': 'extra_field_length'} ]}, {'name': 'data', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - String(val_list=[zlib.compress(b'a'*x.get_raw_value())])), + BYTES(val_list=[zlib.compress(b'a'*x.get_raw_value())])), 'node_args': 'uncompressed_size', 'alt': [ {'conf': 'ABS', 'type': MH.Generator, 'custo_clear': MH.Custo.Gen.ResetOnUnfreeze, 'contents': lambda x: Node('cts', value_type=\ - String(size=x.get_raw_value())), + BYTES(size=x.get_raw_value())), 'node_args': 'compressed_size'} ]}, {'name': 'data_desc', @@ -160,12 +160,12 @@ def build_data_model(self): ]}, {'name': 'no_data_desc', 'exists_if': (BitFieldCondition(sf=1, val=0), 'gp_bit_flag'), - 'contents': String(size=0)} + 'contents': BYTES(size=0)} ]} ]}, {'name': 'archive_desc_header', 'qty': (0,1), - 'contents': String(size=0), + 'contents': BYTES(size=0), 'alt': [ {'conf': 'ABS', 'contents': [ @@ -178,7 +178,7 @@ def build_data_model(self): {'name': 'extra_enc_field', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - String(size=x.get_raw_value())), + BYTES(size=x.get_raw_value())), 'node_args': 'extra_enc_field_len'} ]} ]}, @@ -189,10 +189,10 @@ def build_data_model(self): 'sync_qty_with': 'file', 'contents': [ {'name': 'unsupported_fields', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'alt': [ {'conf': 'ABS', - 'contents': String(size=10), + 'contents': BYTES(size=10), 'set_attrs': [MH.Attr.Abs_Postpone], 'absorb_csts': AbsNoCsts()} ]}, @@ -231,7 +231,7 @@ def build_data_model(self): 'alt': [ {'conf': 'ABS', 'contents': lambda x: Node('cts', value_type=\ - String(size=x.cc.generated_node.get_raw_value())), + BYTES(size=x.cc.generated_node.get_raw_value())), 'node_args': ('file_name_length', 2)} ]}, {'name': ('extra_field', 2), 'contents': MH.COPY_VALUE(path='header/extra_field/cts$', depth=1), @@ -239,12 +239,12 @@ def build_data_model(self): 'alt': [ {'conf': 'ABS', 'contents': lambda x: Node('cts', value_type=\ - String(size=x.cc.generated_node.get_raw_value())), + BYTES(size=x.cc.generated_node.get_raw_value())), 'node_args': ('extra_field_length', 2)} ]}, {'name': 'file_comment', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - String(size=x.get_raw_value())), + BYTES(size=x.get_raw_value())), 'node_args': 'file_comment_length'} ]} ]}, @@ -253,12 +253,12 @@ def build_data_model(self): {'weight': 5, 'contents': [ {'name': 'empty', - 'contents': String(size=0)}, + 'contents': BYTES(size=0)}, ]}, {'weight': 1, 'contents': [ {'name': 'full', - 'contents': String(val_list=['PK\x06\x06'+'A'*20+'PK\x06\x07'+'B'*16])}, + 'contents': BYTES(val_list=['PK\x06\x06'+'A'*20+'PK\x06\x07'+'B'*16])}, ]}, ], 'alt': [ @@ -274,7 +274,7 @@ def build_data_model(self): 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'record_meta_data', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'set_attrs': [MH.Attr.Abs_Postpone], 'absorb_csts': AbsNoCsts()}, {'name': 'zip64_sig_locator', @@ -282,10 +282,10 @@ def build_data_model(self): 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'locator_meta_data', - 'contents': String(size=16)} + 'contents': BYTES(size=16)} ]}, {'name': 'empty_end_of_cdir', - 'contents': String(size=0)} + 'contents': BYTES(size=0)} ]} ]} ]}, @@ -322,16 +322,16 @@ def build_data_model(self): 'contents': UINT32_le(maxi=2**10)}, {'name': 'ZIP_comment', 'contents': lambda x: Node('cts', value_type=\ - String(size=x.get_raw_value())), + BYTES(size=x.get_raw_value())), 'node_args': 'ZIP_comment_len'} ]} ]}, {'name': 'end_padding', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'qty': (0,1), 'alt': [ {'conf': 'ABS', - 'contents': String(size=0), + 'contents': BYTES(size=0), 'absorb_csts': AbsNoCsts()} ]} ]} diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index d0445d3..ded52b9 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -51,8 +51,8 @@ def build_data_model(self): 'contents': [ {'name': 'v000', # Final Tag (optional) 'exists_if': (IntCondition(0), 'type'), - 'sync_size_with': 'len', - 'contents': String(size=0)}, + 'sync_enc_size_with': 'len', + 'contents': BYTES(size=0)}, {'name': 'v101', # Service Name 'exists_if': (IntCondition(0x0101), 'type'), 'sync_enc_size_with': 'len', @@ -65,13 +65,13 @@ def build_data_model(self): }, {'name': 'v103', # Host Identifier 'exists_if': (IntCondition(0x0103), 'type'), - 'sync_size_with': 'len', - 'contents': String(val_list=['Host Identifier']), + 'sync_enc_size_with': 'len', + 'contents': BYTES(val_list=['Host Identifier']), }, {'name': 'v104', # Cookie 'exists_if': (IntCondition(0x0104), 'type'), - 'sync_size_with': 'len', - 'contents': String(val_list=['Cookie'], min_sz=0,max_sz=1000), + 'sync_enc_size_with': 'len', + 'contents': BYTES(val_list=['Cookie'], min_sz=0, max_sz=1000), }, {'name': 'v105', # Vendor Specific 'exists_if': (IntCondition(0x0105), 'type'), @@ -81,14 +81,14 @@ def build_data_model(self): subfield_val_lists=[None,[0]], subfield_descs=['type','version']) }, {'name': 'remainder', - 'sync_size_with': ('len', 4), - 'contents': String(val_list=['unspecified...'], min_sz=0,max_sz=1000), + 'sync_enc_size_with': ('len', 4), + 'contents': BYTES(val_list=['unspecified...'], min_sz=0, max_sz=1000), }, ]}, {'name': 'v110', # Relay session ID 'exists_if': (IntCondition(0x0110), 'type'), - 'sync_size_with': 'len', - 'contents': String(size=12)}, + 'sync_enc_size_with': 'len', + 'contents': BYTES(size=12)}, {'name': 'v201', 'exists_if': (IntCondition([0x201, 0x202]), 'type'), 'sync_enc_size_with': 'len', @@ -135,11 +135,11 @@ def build_data_model(self): {'name': 'mac_dst', 'semantics': 'mac_dst', 'mutable': False, - 'contents': String(size=6)}, + 'contents': BYTES(size=6)}, {'name': 'mac_src', 'semantics': 'mac_src', 'mutable': False, - 'contents': String(size=6)}, + 'contents': BYTES(size=6)}, {'name': 'proto', 'mutable': False, 'contents': UINT16_be(int_list=[0x8863])}, @@ -178,7 +178,7 @@ def build_data_model(self): (tag_ac_name, 1), (tag_service_name.get_clone(), 1), {'name': 'host_uniq_stub', - 'contents': String(val_list=[''])}, + 'contents': BYTES(val_list=[''])}, (tag_node.get_clone(), 0, 4) ]}, {'name': '4padr', @@ -197,9 +197,9 @@ def build_data_model(self): # Accept PPPoE session Case {'weight': 10, 'contents': [ - (tag_ac_name.get_clone(), 1), + (tag_service_name.get_clone(), 1), {'name': ('host_uniq_stub', 2), - 'contents': String(val_list=[''])}, + 'contents': BYTES(val_list=[''])}, (tag_node_4pads, 0, 4) ]}, # Reject PPPoE session Case @@ -219,7 +219,7 @@ def build_data_model(self): ]} ]}, {'name': 'padding', - 'contents': String(max_sz=0), + 'contents': BYTES(max_sz=0), 'absorb_csts': AbsNoCsts(), 'mutable': False}, ]} @@ -229,7 +229,7 @@ def build_data_model(self): pppoe_msg.make_random(recursive=True) padi = pppoe_msg.get_clone('padi') - padi['.*/mac_dst'].set_values(value_type=String(val_list=['\xff\xff\xff\xff\xff\xff'])) + padi['.*/mac_dst'].set_values(value_type=BYTES(val_list=['\xff\xff\xff\xff\xff\xff'])) padi['.*/code'].set_values(value_type=UINT8(int_list=[0x9])) pado = pppoe_msg.get_clone('pado') diff --git a/data_models/protocols/pppoe_strategy.py b/data_models/protocols/pppoe_strategy.py index 86d221b..16c0986 100644 --- a/data_models/protocols/pppoe_strategy.py +++ b/data_models/protocols/pppoe_strategy.py @@ -27,7 +27,7 @@ tactics = Tactics() -def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi'): +def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi', update=False): if not feedback: print('\n\n*** No Feedback!') return False @@ -54,6 +54,9 @@ def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi'): raise ValueError else: raise ValueError + + if data is None: + return False off = data.find(mac_dst) data = data[off:] result = msg_x.absorb(data, constraints=AbsNoCsts(size=True, struct=True)) @@ -64,38 +67,45 @@ def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi'): except: continue print(' [ {:s} received! ]'.format(x.upper())) - next_step.node.freeze() - error_msg = '\n*** The node has no path to: {:s}. Thus, ignore it.\n'\ - ' (probable reason: the node has been fuzzed in a way that makes the' \ - 'path unavailable)' - try: - next_step.node['.*/mac_dst'] = mac_src - except: - print(error_msg.format('mac_dst')) - try: - next_step.node['.*/tag_sn/value/v101'] = service_name - except: - print(error_msg.format('service_name')) + t_fix_pppoe_msg_fields.mac_src = mac_src + t_fix_pppoe_msg_fields.service_name = service_name + host_uniq = msg_x['.*/value/v103'] if host_uniq is not None: host_uniq = host_uniq.to_bytes() env.host_uniq = host_uniq - elif hasattr(env, 'host_uniq'): + t_fix_pppoe_msg_fields.host_uniq = host_uniq + elif update and hasattr(env, 'host_uniq'): host_uniq = env.host_uniq else: pass - if host_uniq is not None: - new_tag = env.dm.get_data('tag_host_uniq') - new_tag['.*/v103'] = host_uniq + if update: + next_step.node.freeze() + error_msg = '\n*** The node has no path to: {:s}. Thus, ignore it.\n'\ + ' (probable reason: the node has been fuzzed in a way that makes the' \ + 'path unavailable)' try: - next_step.node['.*/host_uniq_stub'].set_contents(new_tag) + next_step.node['.*/mac_dst'] = mac_src except: - print(error_msg.format('host_uniq_stub')) - else: - print('\n***WARNING: Host-Uniq not provided') - next_step.node.unfreeze(recursive=True, reevaluate_constraints=True) + print(error_msg.format('mac_dst')) + try: + next_step.node['.*/tag_sn/value/v101'] = service_name + except: + print(error_msg.format('service_name')) + + if host_uniq is not None: + new_tag = env.dm.get_data('tag_host_uniq') + new_tag['.*/v103'] = host_uniq + try: + next_step.node['.*/host_uniq_stub'].set_contents(new_tag) + except: + print(error_msg.format('host_uniq_stub')) + else: + print('\n***WARNING: Host-Uniq not provided') + next_step.node.unfreeze(recursive=True, reevaluate_constraints=True) + return True print(' [ {:s} not found! ]'.format(x.upper())) @@ -109,12 +119,62 @@ def retrieve_padr_from_feedback(env, current_step, next_step, feedback): def retrieve_padi_from_feedback(env, current_step, next_step, feedback): return retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi') +def retrieve_padi_from_feedback_and_update(env, current_step, next_step, feedback): + return retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi', update=True) + + +@disruptor(tactics, dtype="FIX_FIELDS", weight=1) +class t_fix_pppoe_msg_fields(Disruptor): + + mac_src = None + service_name = None + host_uniq = None + + def disrupt_data(self, dm, target, prev_data): + n = prev_data.node + error_msg = '\n*** The node has no path to: {:s}. Thus, ignore it.\n'\ + ' (probable reason: the node has been fuzzed in a way that makes the' \ + 'path unavailable)' + if self.mac_src: + try: + n['.*/mac_dst'] = self.mac_src + prev_data.add_info("update 'mac_src'") + except: + print(error_msg.format('mac_dst')) + else: + print("\n*** 'mac_src' not found in the environment! ***") + + if self.service_name: + try: + n['.*/tag_sn/value/v101'] = self.service_name + prev_data.add_info("update 'service_name'") + except: + print(error_msg.format('service_name')) + else: + print("\n*** 'service_name' not found in the environment! ***") + + if self.host_uniq: + new_tag = dm.get_data('tag_host_uniq') + new_tag['.*/v103'] = self.host_uniq + try: + n['.*/host_uniq_stub'].set_contents(new_tag) + prev_data.add_info("update 'host_uniq'") + except: + print(error_msg.format('host_uniq_stub')) + else: + print("\n*** 'host_uniq_stub' not found in the environment! ***") + + n.unfreeze(recursive=True, reevaluate_constraints=True) + n.freeze() + + return prev_data + ### PADI fuzz scenario ### step_wait_padi = NoDataStep(fbk_timeout=1) -dp_pado = DataProcess(process=[('tTYPE', UI(init=1), UI(order=True))], seed='pado') -dp_pado.append_new_process([('tSTRUCT', UI(init=1), UI(deep=True))]) +dp_pado = DataProcess(process=[('tTYPE', UI(init=1), UI(order=True)), 'FIX_FIELDS'], seed='pado') +dp_pado.append_new_process([('tSTRUCT', UI(init=1), UI(deep=True)), 'FIX_FIELDS']) step_send_pado = Step(dp_pado) # step_send_pado = Step('pado') step_end = Step('padt') @@ -130,18 +190,19 @@ def retrieve_padi_from_feedback(env, current_step, next_step, feedback): step_wait_padi = NoDataStep(fbk_timeout=1) step_send_valid_pado = Step('pado') -dp_pads = DataProcess(process=[('tTYPE#2', UI(init=1), UI(order=True))], seed='pads') -dp_pads.append_new_process([('tSTRUCT#2', UI(init=1), UI(deep=True))]) +dp_pads = DataProcess(process=[('tTYPE#2', UI(init=1), UI(order=True)), 'FIX_FIELDS'], seed='pads') +dp_pads.append_new_process([('tSTRUCT#2', UI(init=1), UI(deep=True)), 'FIX_FIELDS']) step_send_fuzzed_pads = Step(dp_pads) step_wait_padr = NoDataStep(fbk_timeout=1) -step_wait_padi.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback) +step_wait_padi.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback_and_update) step_send_valid_pado.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback) +step_send_valid_pado.connect_to(step_wait_padr) step_send_fuzzed_pads.connect_to(step_wait_padr) step_wait_padr.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback) -step_wait_padr.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback) +step_wait_padr.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback_and_update) sc2 = Scenario('PADS') sc2.set_anchor(step_wait_padi) diff --git a/data_models/protocols/sms.py b/data_models/protocols/sms.py index 41d650f..6208144 100644 --- a/data_models/protocols/sms.py +++ b/data_models/protocols/sms.py @@ -264,7 +264,7 @@ def build_data_model(self): 'node_args': ['SPI_p1','SPI_p2','KIc','KID_RC','TAR','CNTR','PCNTR','SecData']}, {'name': 'SecData', - 'contents': String(min_sz=1, max_sz=100, determinist=False)} + 'contents': BYTES(min_sz=1, max_sz=100, determinist=False)} ]}, ]}, diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 1188e49..6548fef 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -163,7 +163,9 @@ that enables to handle transparently any encoding scheme: Below the different currently defined string types: -- :class:`framework.value_types.String`: General purpose character string (with ``UTF8`` encoding). +- :class:`framework.value_types.String`: General purpose ``UTF8`` character string. +- :class:`framework.value_types.BYTES`: General purpose byte string (alias to + :class:`framework.value_types.LATIN_1`). - :class:`framework.value_types.Filename`: Filename. Similar to the type ``String``, but some disruptors like ``tTYPE`` will generate more specific test cases. diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 3141b62..5fd7972 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -180,7 +180,7 @@ def qty(node_name, vt, set_attrs, clear_attrs, node): def TIMESTAMP(time_format="%H%M%S", utc=False, set_attrs=[], clear_attrs=[]): ''' - Return a *generator* that returns the current time (in a String node). + Return a *generator* that returns the current time (in a BYTES node). Args: time_format (str): time format to be used by the generator. @@ -193,7 +193,7 @@ def timestamp(time_format, utc, set_attrs, clear_attrs): else: now = datetime.datetime.now() ts = now.strftime(time_format) - n = Node('cts', value_type=fvt.String(val_list=[ts], size=len(ts))) + n = Node('cts', value_type=fvt.BYTES(val_list=[ts], size=len(ts))) n.set_semantics(NodeSemantics(['timestamp'])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -283,7 +283,7 @@ def map_func(vt, func, set_attrs, clear_attrs, nodes): return functools.partial(map_func, vt, func, set_attrs, clear_attrs) @staticmethod - def CYCLE(vals, depth=1, vt=fvt.String, + def CYCLE(vals, depth=1, vt=fvt.BYTES, set_attrs=[], clear_attrs=[]): '''Return a *generator* that iterates other the provided value list and returns at each step a `vt` node corresponding to the diff --git a/framework/encoders.py b/framework/encoders.py index adf7784..b81b43c 100644 --- a/framework/encoders.py +++ b/framework/encoders.py @@ -96,8 +96,11 @@ def init_encoding_scheme(self, arg=None): self._codec = arg def encode(self, val): - enc = val.decode(internal_repr_codec, 'replace').encode(self._codec) - return enc + try: + enc = val.decode(internal_repr_codec, 'strict') + except: + enc = val.decode('latin_1') + return enc.encode(self._codec) def decode(self, val): try: diff --git a/framework/global_resources.py b/framework/global_resources.py index 8192c9a..397d55b 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -79,21 +79,23 @@ def convert_to_internal_repr(val): new_val.append(new_v) val = new_val elif sys.version_info[0] > 2: - if not isinstance(val, bytes): + if isinstance(val, str): val = val.encode(internal_repr_codec) elif isinstance(val, unicode): # only for python2 val = val.encode(internal_repr_codec) - elif isinstance(val, str): # only for python2 (and str ~ ascii) + elif isinstance(val, str): # only for python2 pass else: - print(val, repr(val)) raise ValueError return val def unconvert_from_internal_repr(val): assert(isinstance(val, bytes)) - return val.decode(internal_repr_codec, 'replace') - + try: + dec_val = val.decode(internal_repr_codec, 'strict') + except: + dec_val = val.decode('latin_1') + return dec_val class Error(object): diff --git a/framework/logger.py b/framework/logger.py index bb4dbdc..0481852 100644 --- a/framework/logger.py +++ b/framework/logger.py @@ -460,7 +460,7 @@ def _decode_target_feedback(self, feedback): for f in feedback: new_f = f.strip() if sys.version_info[0] > 2 and new_f and isinstance(new_f, bytes): - new_f = new_f.decode(internal_repr_codec) + new_f = new_f.decode(internal_repr_codec, 'replace') new_f = eval('{!a}'.format(new_f)) new_fbk.append(new_f) if not list(filter(lambda x: x != b'', new_fbk)): @@ -468,7 +468,7 @@ def _decode_target_feedback(self, feedback): else: new_fbk = feedback.strip() if sys.version_info[0] > 2 and new_fbk and isinstance(new_fbk, bytes): - new_fbk = new_fbk.decode(internal_repr_codec) + new_fbk = new_fbk.decode(internal_repr_codec, 'replace') new_fbk = eval('{!a}'.format(new_fbk)) return new_fbk diff --git a/framework/monitor.py b/framework/monitor.py index 76f360e..a2a9245 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -760,7 +760,7 @@ def _start(self): self.ser = serial.Serial(self.serial_port, self.baudrate, bytesize=self.bytesize, parity=self.parity, stopbits=self.stopbits, xonxoff=self.xonxoff, dsrdtr=self.dsrdtr, rtscts=self.rtscts, - timeout=1) + timeout=self.slowness_factor*0.1) if self.username is not None: assert self.password is not None self.ser.flushInput() @@ -780,7 +780,7 @@ def _start(self): # (already logged, or with the password prompt, ...) when we first write on # the serial line. self.ser.write(b'\x04\r\n') - time.sleep(self.slowness_factor*0.6) + time.sleep(self.slowness_factor*0.8) self.ser.flushInput() self.ser.write(self.username+b'\r\n') time.sleep(0.1) @@ -789,10 +789,11 @@ def _start(self): retry = 0 eot_sent = True else: - pass_prompt = b''.join(self._read_serial(duration=self.slowness_factor*0.2)) + chunks = self._read_serial(duration=self.slowness_factor*0.2) + pass_prompt = b''.join(chunks) time.sleep(0.1) self.ser.write(self.password+b'\r\n') - time.sleep(self.slowness_factor) + time.sleep(self.slowness_factor*0.6) def _stop(self): self.ser.write(b'\x04\r\n') # we send an EOT (Ctrl+D) @@ -807,7 +808,7 @@ def _exec_command(self, cmd): time.sleep(0.1) self.ser.readline() # we consume the 'writing echo' from the input try: - result = self._read_serial(duration=self.slowness_factor*0.4) + result = self._read_serial(duration=self.slowness_factor*0.8) except serial.SerialException: raise BackendError('Exception while reading serial line') else: @@ -872,7 +873,7 @@ def _get_pid(self, logger): try: res = self.backend.exec_command(self.command_pattern.format(self.process_name)) except BackendError: - fallback_cmd = 'ps a -opid,comm' + fallback_cmd = 'ps a -opid,comm | grep {0:s}'.format(self.process_name) res = self.backend.exec_command(fallback_cmd) if sys.version_info[0] > 2: res = res.decode(self.backend.codec) diff --git a/framework/value_types.py b/framework/value_types.py index 5fa1f6e..cb8c886 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -656,7 +656,7 @@ def rewind(self): def _check_sizes(self): if self.val_list is not None: for v in self.val_list: - sz = len(v) + sz = len(unconvert_from_internal_repr(v)) if self.max_sz is not None: assert(self.max_sz >= sz >= self.min_sz) else: @@ -1352,6 +1352,8 @@ def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_e @from_encoder(PythonCodec_Enc, 'latin_1') class LATIN_1(String): pass +BYTES = LATIN_1 + @from_encoder(PythonCodec_Enc, 'utf_16_le') class UTF16_LE(String): def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_encoded_sz): From fbe969f3a24debded76369d470df9a0604bb6e15 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Fri, 5 Aug 2016 10:40:30 +0200 Subject: [PATCH 14/80] Use unconvert_from_internal_repr() in PythonCodec_Enc. --- framework/encoders.py | 6 +----- framework/global_resources.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/framework/encoders.py b/framework/encoders.py index b81b43c..71304be 100644 --- a/framework/encoders.py +++ b/framework/encoders.py @@ -96,11 +96,7 @@ def init_encoding_scheme(self, arg=None): self._codec = arg def encode(self, val): - try: - enc = val.decode(internal_repr_codec, 'strict') - except: - enc = val.decode('latin_1') - return enc.encode(self._codec) + return unconvert_from_internal_repr(val).encode(self._codec) def decode(self, val): try: diff --git a/framework/global_resources.py b/framework/global_resources.py index 397d55b..c6a143d 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -90,7 +90,7 @@ def convert_to_internal_repr(val): return val def unconvert_from_internal_repr(val): - assert(isinstance(val, bytes)) + # assert isinstance(val, bytes) try: dec_val = val.decode(internal_repr_codec, 'strict') except: From 680b0b7be23a4cafe540ccea5fe124883c44bd93 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Fri, 5 Aug 2016 12:59:25 +0200 Subject: [PATCH 15/80] Polish Node.show() regarding encoded types --- framework/data_model.py | 18 +++++++++--------- framework/value_types.py | 18 +++++++++++------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index a3c4353..808486f 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -209,9 +209,9 @@ def get_contents(self, do_copy=False): return contents - def show(self, log_func=lambda x: x): + def show(self, raw_limit=200, log_func=lambda x: x): if self.node is not None: - self.node.show(raw_limit=200, log_func=log_func) + self.node.show(raw_limit=raw_limit, log_func=log_func) else: print(self.raw) @@ -1236,7 +1236,7 @@ def is_exhausted(self): def is_frozen(self): raise NotImplementedError - def pretty_print(self): + def pretty_print(self, max_size=None): return None def _get_value(self, conf=None, recursive=True, return_node_internals=False): @@ -2085,8 +2085,8 @@ def is_exhausted(self): else: return False - def pretty_print(self): - return self.value_type.pretty_print() + def pretty_print(self, max_size=None): + return self.value_type.pretty_print(max_size=max_size) def __getattr__(self, name): vt = self.__getattribute__('value_type') @@ -5965,9 +5965,9 @@ def unfreeze_all(self, recursive=True, ignore_entanglement=False): - def pretty_print(self, conf=None): + def pretty_print(self, max_size=None, conf=None): conf = self.__check_conf(conf) - return self.internals[conf].pretty_print() + return self.internals[conf].pretty_print(max_size=max_size) def get_nodes_names(self, conf=None, verbose=False, terminal_only=False): l = [] @@ -6172,7 +6172,7 @@ def is_node_used_more_than_once(name): if node.is_term(conf_tmp): raw = node._tobytes() raw_len = len(raw) - val = node.pretty_print() + val = node.pretty_print(max_size=raw_limit) prefix = "{:s}".format(indent_term) name = "{:s} ".format(name) @@ -6194,7 +6194,7 @@ def is_node_used_more_than_once(name): print_nonterm_func("{:s} ".format(indent_spc) , nl=False, log_func=log_func) print_contents_func("\_ {:s}".format(val), log_func=log_func) print_nonterm_func("{:s} ".format(indent_spc) , nl=False, log_func=log_func) - if raw_limit and raw_len > raw_limit: + if raw_limit is not None and raw_len > raw_limit: print_raw_func("\_raw: {:s}".format(repr(raw[:raw_limit])), nl=False, log_func=log_func) print_raw_func(" ...", hlight=True, log_func=log_func) diff --git a/framework/value_types.py b/framework/value_types.py index cb8c886..1cfa2a3 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -95,7 +95,7 @@ def is_exhausted(self): def set_size_from_constraints(self, size=None, encoded_size=None): raise NotImplementedError - def pretty_print(self): + def pretty_print(self, max_size=None): return None @staticmethod @@ -951,12 +951,15 @@ def set_size_from_constraints(self, size=None, encoded_size=None): else: raise ValueError - def pretty_print(self): + def pretty_print(self, max_size=None): if self.drawn_val is None: self.get_value() - if self.encoded_string: + if self.encoded_string and not isinstance(self, BYTES): dec = self.drawn_val + sz = len(dec) + if max_size is not None and sz > max_size: + dec = dec[:max_size] return repr(dec) + ' [decoded, sz=' + str(len(dec)) + ']' else: return None @@ -1242,7 +1245,7 @@ def get_current_encoded_value(self): def set_size_from_constraints(self, size=None, encoded_size=None): raise DataModelDefinitionError - def pretty_print(self): + def pretty_print(self, max_size=None): if self.drawn_val is None: self.get_value() @@ -1352,7 +1355,8 @@ def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_e @from_encoder(PythonCodec_Enc, 'latin_1') class LATIN_1(String): pass -BYTES = LATIN_1 +@from_encoder(PythonCodec_Enc, 'latin_1') +class BYTES(String): pass @from_encoder(PythonCodec_Enc, 'utf_16_le') class UTF16_LE(String): @@ -1441,7 +1445,7 @@ def _convert_value(self, val): return VT._str2internal(str(val)) # return str(val) - def pretty_print(self): + def pretty_print(self, max_size=None): if self.drawn_val is None: self.get_value() @@ -1749,7 +1753,7 @@ def set_size_from_constraints(self, size=None, encoded_size=None): raise DataModelDefinitionError - def pretty_print(self): + def pretty_print(self, max_size=None): first_pass = True for lim, sz, val_list, extrems, i in zip(self.subfield_limits[::-1], From d81e1ec602b04d405162b492d14c3b81e446a35d Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Wed, 22 Jun 2016 15:34:39 +0200 Subject: [PATCH 16/80] RegexParser class supporting (, ) and \ as special chars --- framework/data_model_helpers.py | 92 +++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 5fd7972..3d3bba6 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -510,6 +510,98 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) +class State(object): + def run(self, context): + raise NotImplementedError + +class RegexParser(object): + + # supported special char ()\ + + class InitialState(State): + + def run(self, context): + if context.input == '(': + context.flush() + return RegexParser.InsideParenthesis + elif context.input == ')': + raise Exception("Can not close ) without opening it") + elif context.input == '\\': + return RegexParser.Escaping + else: + context.buffer += context.input + return self.__class__ + + class Escaping(object): + + def run(self, context): + context.buffer += context.input + return context.last_state.__class__ + + class InsideParenthesis(State): + + def run(self, context): + if context.input == '(': + raise Exception("Can not open parenthesis multiple times") + elif context.input == ')': + context.flush() + return RegexParser.InitialState + elif context.input == '\\': + return RegexParser.Escaping + else: + context.buffer += context.input + return self.__class__ + + + + def __init__(self): + self.current_state = RegexParser.InitialState() + self._name = None + self._input = None + self._buffer = "" + + self.last_state = None + + self._terminal_nodes = [] + + @property + def input(self): + return self._input + + @property + def buffer(self): + return self._buffer + + @buffer.setter + def buffer(self, buffer): + self._buffer = buffer + + def flush(self): + if len(self._buffer) > 0: + type = fvt.INT_str if self._buffer.isdigit() else fvt.String + self._terminal_nodes.append(self._create_terminal_node(self._name + str(len(self._terminal_nodes)+1), + type, contents=[self._buffer])) + self._buffer = "" + + def run(self, inputs, name): + + self._name = name + + for self._input in inputs: + next_state = self.current_state.run(self) + self.last_state = self.current_state + self.current_state = next_state() + + if not isinstance(self.current_state, RegexParser.InitialState): + raise + + return self._terminal_nodes + + + def _create_terminal_node(self, name, type, contents=None, alphabet=None, qty=None): + return Node(name=name, vt=fvt.String(val_list=contents)) + + class ModelHelper(object): HIGH_PRIO = 1 From aceb69989272264e4d1831c55ce1233d06a315c8 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 23 Jun 2016 11:54:24 +0200 Subject: [PATCH 17/80] Add support for quantifier {}, *, ?, + --- framework/data_model_helpers.py | 139 ++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 26 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 3d3bba6..875fb7a 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -516,54 +516,129 @@ def run(self, context): class RegexParser(object): - # supported special char ()\ - class InitialState(State): - def run(self, context): - if context.input == '(': - context.flush() - return RegexParser.InsideParenthesis - elif context.input == ')': - raise Exception("Can not close ) without opening it") - elif context.input == '\\': - return RegexParser.Escaping + def run(self, ctx): + + if ctx.input in ('?', '*', '+', '{'): + + if len(ctx.buffer) == 0: + raise Exception + + if not isinstance(ctx.old_state, RegexParser.InsideParenthesis): + buffer = ctx.buffer[-1] + ctx.buffer = ctx.buffer[:-1] + ctx.flush() + ctx.buffer = buffer + + if ctx.input == '{': + return RegexParser.QtyState + elif ctx.input == '+': + ctx.min = 1 + elif ctx.input == '?': + ctx.max = 1 + + if ctx.min is None: + ctx.min = 0 + + ctx.flush() + else: - context.buffer += context.input - return self.__class__ + if isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.QtyState)): + ctx.flush() + + if ctx.input == '}': + raise Exception + elif ctx.input == ')': + raise Exception + + elif ctx.input == '(': + ctx.flush() + return RegexParser.InsideParenthesis + + elif ctx.input == '\\': + return RegexParser.Escaping + + else: + ctx.buffer += ctx.input + + return self.__class__ + + + class QtyState(State): + + def __init__(self): + pass + + def run(self, ctx): + + if ctx.input == ',': + ctx.max = "" + elif ctx.input.isdigit(): + if ctx.max is not None: + ctx.max += ctx.input + else: + if ctx.min is None: + ctx.min = "" + ctx.min += ctx.input + elif ctx.input == "}": + + ctx.min = 0 if ctx.min is None else int(ctx.min) + + if ctx.max is None: + ctx.max = ctx.min + elif len(ctx.max) == 0: + ctx.max = None + else: + ctx.max = int(ctx.max) + + if ctx.max is not None and (ctx.min > ctx.max or ctx.min == ctx.max == 0): + raise Exception + + return RegexParser.InitialState + elif ctx.input.isspace(): + pass + else: + raise Exception + + return self.__class__ + class Escaping(object): def run(self, context): context.buffer += context.input - return context.last_state.__class__ + return context.old_state.__class__ class InsideParenthesis(State): def run(self, context): - if context.input == '(': - raise Exception("Can not open parenthesis multiple times") + if context.input in ('(', '?', '*', '+', '{', '}'): + raise Exception elif context.input == ')': - context.flush() return RegexParser.InitialState elif context.input == '\\': return RegexParser.Escaping else: context.buffer += context.input - return self.__class__ + + return self.__class__ def __init__(self): - self.current_state = RegexParser.InitialState() + self.current_state = RegexParser.InitialState() # last ended state + self.old_state = self.current_state self._name = None self._input = None self._buffer = "" - self.last_state = None + self.min = None + self.max = None self._terminal_nodes = [] + @property def input(self): return self._input @@ -578,22 +653,34 @@ def buffer(self, buffer): def flush(self): if len(self._buffer) > 0: + + if self.min is None and self.max is None: + self.min = self.max = 1 + type = fvt.INT_str if self._buffer.isdigit() else fvt.String - self._terminal_nodes.append(self._create_terminal_node(self._name + str(len(self._terminal_nodes)+1), - type, contents=[self._buffer])) - self._buffer = "" + terminal_node = self._create_terminal_node(self._name + str(len(self._terminal_nodes)+1), + type, contents=[self._buffer], qty=(self.min, self.max)) + self._terminal_nodes.append(terminal_node) + self.reset() + + def reset(self): + self._buffer = "" + self.min = None + self.max = None def run(self, inputs, name): self._name = name for self._input in inputs: - next_state = self.current_state.run(self) - self.last_state = self.current_state - self.current_state = next_state() + next_state_class = self.current_state.run(self) + self.old_state = self.current_state + self.current_state = next_state_class() if not isinstance(self.current_state, RegexParser.InitialState): - raise + raise Exception + elif len(self.buffer) > 0: + self.flush() return self._terminal_nodes From f46f0fa124c0e425dc6f4bae8aa2ef9a3c782edf Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 23 Jun 2016 14:19:50 +0200 Subject: [PATCH 18/80] Add support for [] --- framework/data_model_helpers.py | 58 +++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 875fb7a..e04f355 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -525,7 +525,7 @@ def run(self, ctx): if len(ctx.buffer) == 0: raise Exception - if not isinstance(ctx.old_state, RegexParser.InsideParenthesis): + if not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): buffer = ctx.buffer[-1] ctx.buffer = ctx.buffer[:-1] ctx.flush() @@ -544,7 +544,9 @@ def run(self, ctx): ctx.flush() else: - if isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.QtyState)): + if isinstance(ctx.old_state, (RegexParser.InsideParenthesis, + RegexParser.InsideSquareBrackets, + RegexParser.QtyState)): ctx.flush() if ctx.input == '}': @@ -552,10 +554,17 @@ def run(self, ctx): elif ctx.input == ')': raise Exception + elif ctx.input == ']': + raise Exception + elif ctx.input == '(': ctx.flush() return RegexParser.InsideParenthesis + elif ctx.input == '[': + ctx.flush() + return RegexParser.InsideSquareBrackets + elif ctx.input == '\\': return RegexParser.Escaping @@ -613,7 +622,8 @@ def run(self, context): class InsideParenthesis(State): def run(self, context): - if context.input in ('(', '?', '*', '+', '{', '}'): + if context.input in ('(', '[', ']', '?', '*', '+', '{', '}'): + print context.input raise Exception elif context.input == ')': return RegexParser.InitialState @@ -624,6 +634,19 @@ def run(self, context): return self.__class__ + class InsideSquareBrackets(State): + + def run(self, context): + if context.input in ('[', '(', ')', '?', '*', '+', '{', '}'): + raise Exception + elif context.input == ']': + return RegexParser.InitialState + elif context.input == '\\': + return RegexParser.Escaping + else: + context.alphabet += context.input + + return self.__class__ def __init__(self): @@ -632,6 +655,7 @@ def __init__(self): self._name = None self._input = None self._buffer = "" + self._alphabet = "" self.min = None self.max = None @@ -651,20 +675,40 @@ def buffer(self): def buffer(self, buffer): self._buffer = buffer + @property + def alphabet(self): + return self._alphabet + + @alphabet.setter + def alphabet(self, alphabet): + self._alphabet = alphabet + def flush(self): - if len(self._buffer) > 0: + + if not (len(self._buffer) == 0 and len(self._alphabet) == 0): if self.min is None and self.max is None: self.min = self.max = 1 + # print "buffer: " + self._buffer + # print "alphabet: " + self._alphabet + # print + # print + type = fvt.INT_str if self._buffer.isdigit() else fvt.String - terminal_node = self._create_terminal_node(self._name + str(len(self._terminal_nodes)+1), - type, contents=[self._buffer], qty=(self.min, self.max)) + name = self._name + str(len(self._terminal_nodes)+1) + contents = [self._buffer] if len(self._buffer) > 0 else None + alphabet = self._alphabet if len(self._alphabet) > 0 else None + + terminal_node = self._create_terminal_node(name, type, contents=contents, alphabet=alphabet, + qty=(self.min, self.max)) self._terminal_nodes.append(terminal_node) self.reset() + def reset(self): self._buffer = "" + self._alphabet = "" self.min = None self.max = None @@ -679,7 +723,7 @@ def run(self, inputs, name): if not isinstance(self.current_state, RegexParser.InitialState): raise Exception - elif len(self.buffer) > 0: + elif len(self._buffer) > 0 or len(self._alphabet) > 0 : self.flush() return self._terminal_nodes From 1ac451a3a6d629246c001d818966db64a90cba77 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Fri, 24 Jun 2016 12:36:51 +0200 Subject: [PATCH 19/80] Add support for | --- framework/data_model_helpers.py | 144 +++++++++++++++++++++----------- 1 file changed, 95 insertions(+), 49 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index e04f355..f49cd08 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -522,14 +522,19 @@ def run(self, ctx): if ctx.input in ('?', '*', '+', '{'): - if len(ctx.buffer) == 0: + # if there is nothing to quantify + if len(ctx.buffer) == len(ctx.alphabet) == 0 and len(ctx.contents) == 1: raise Exception - if not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): - buffer = ctx.buffer[-1] + # if only one char is quantified + if not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets))\ + and len(ctx.contents) > 1: + raise Exception + elif not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): + alphabet = ctx.buffer[-1] ctx.buffer = ctx.buffer[:-1] ctx.flush() - ctx.buffer = buffer + ctx.alphabet = alphabet if ctx.input == '{': return RegexParser.QtyState @@ -541,35 +546,56 @@ def run(self, ctx): if ctx.min is None: ctx.min = 0 + # flush the buffer only ctx.flush() else: - if isinstance(ctx.old_state, (RegexParser.InsideParenthesis, - RegexParser.InsideSquareBrackets, - RegexParser.QtyState)): + + if isinstance(ctx.old_state, (RegexParser.QtyState, RegexParser.InsideSquareBrackets)): + # flush the buffer only ctx.flush() - if ctx.input == '}': - raise Exception - elif ctx.input == ')': - raise Exception + if ctx.input == '|': + # no terminal_node because | has priority + if len(ctx.terminal_nodes) > 0: + raise Exception - elif ctx.input == ']': - raise Exception + ctx.contents.append("") - elif ctx.input == '(': - ctx.flush() - return RegexParser.InsideParenthesis + else: - elif ctx.input == '[': - ctx.flush() - return RegexParser.InsideSquareBrackets + if isinstance(ctx.old_state, RegexParser.InsideParenthesis): + ctx.flush() - elif ctx.input == '\\': - return RegexParser.Escaping + if ctx.input == '}': + raise Exception + elif ctx.input == ')': + raise Exception - else: - ctx.buffer += ctx.input + elif ctx.input == ']': + raise Exception + + elif ctx.input == '(': + if len(ctx.buffer) > 0 and len(ctx.contents) > 1: + raise Exception + + if len(ctx.contents) == 1 and len(ctx.contents[0]) > 0: + ctx.flush() + return RegexParser.InsideParenthesis + + elif ctx.input == '[': + if len(ctx.contents) > 1: + raise Exception + + if len(ctx.contents) == 1 and len(ctx.contents[0]) > 0: + ctx.flush() + return RegexParser.InsideSquareBrackets + + elif ctx.input == '\\': + return RegexParser.Escaping + + else: + ctx.buffer += ctx.input return self.__class__ @@ -621,16 +647,17 @@ def run(self, context): class InsideParenthesis(State): - def run(self, context): - if context.input in ('(', '[', ']', '?', '*', '+', '{', '}'): - print context.input + def run(self, ctx): + if ctx.input in ('(', '[', ']', '?', '*', '+', '{', '}'): raise Exception - elif context.input == ')': + elif ctx.input == ')': return RegexParser.InitialState - elif context.input == '\\': + elif ctx.input == '\\': return RegexParser.Escaping + elif ctx.input == '|': + ctx.contents.append("") else: - context.buffer += context.input + ctx.buffer += ctx.input return self.__class__ @@ -652,9 +679,10 @@ def run(self, context): def __init__(self): self.current_state = RegexParser.InitialState() # last ended state self.old_state = self.current_state + self._name = None self._input = None - self._buffer = "" + self._contents = [""] self._alphabet = "" self.min = None @@ -669,11 +697,15 @@ def input(self): @property def buffer(self): - return self._buffer + return self._contents[-1] @buffer.setter def buffer(self, buffer): - self._buffer = buffer + self._contents[-1] = buffer + + @property + def contents(self): + return self._contents @property def alphabet(self): @@ -683,31 +715,44 @@ def alphabet(self): def alphabet(self, alphabet): self._alphabet = alphabet + @property + def terminal_nodes(self): + return self._terminal_nodes + def flush(self): - if not (len(self._buffer) == 0 and len(self._alphabet) == 0): + if self.min is None and self.max is None: + self.min = self.max = 1 - if self.min is None and self.max is None: - self.min = self.max = 1 + # print "buffer: " + self._buffer + # print "alphabet: " + self._alphabet + # print + # print - # print "buffer: " + self._buffer - # print "alphabet: " + self._alphabet - # print - # print + # type = fvt.INT_str if all(content.isdigit() for content in self.contents) else fvt.String + type = fvt.String + name = self._name + str(len(self._terminal_nodes) + 1) + + if len(self.alphabet) > 0: + contents = None + alphabet = self._alphabet + else: + if all(len(content) == 1 for content in self.contents): + alphabet = "".join(self.contents) + contents = None + else: + contents = self.contents + alphabet = None - type = fvt.INT_str if self._buffer.isdigit() else fvt.String - name = self._name + str(len(self._terminal_nodes)+1) - contents = [self._buffer] if len(self._buffer) > 0 else None - alphabet = self._alphabet if len(self._alphabet) > 0 else None - terminal_node = self._create_terminal_node(name, type, contents=contents, alphabet=alphabet, - qty=(self.min, self.max)) - self._terminal_nodes.append(terminal_node) - self.reset() + terminal_node = self._create_terminal_node(name, type, contents=contents, alphabet=alphabet, + qty=(self.min, self.max)) + self._terminal_nodes.append(terminal_node) + self.reset() def reset(self): - self._buffer = "" + self._contents = [""] self._alphabet = "" self.min = None self.max = None @@ -723,7 +768,8 @@ def run(self, inputs, name): if not isinstance(self.current_state, RegexParser.InitialState): raise Exception - elif len(self._buffer) > 0 or len(self._alphabet) > 0 : + + if len(self._contents[0]) > 0 or len(self._contents) > 1 or len(self._alphabet) > 0 or inputs == "": self.flush() return self._terminal_nodes From 3b767582df65de1c5286d8608171dfae81973de2 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Tue, 28 Jun 2016 16:05:10 +0200 Subject: [PATCH 20/80] Add support for shape in RegexParser --- framework/data_model_helpers.py | 242 ++++++++++++++++++-------------- 1 file changed, 134 insertions(+), 108 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index f49cd08..989815d 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -510,97 +510,120 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) + + + class State(object): def run(self, context): raise NotImplementedError + + + class RegexParser(object): - class InitialState(State): + class PickState(State): def run(self, ctx): + if ctx.input == '|' and (len(ctx.terminal_nodes) == 0 or + (len(ctx.terminal_nodes) == 1 and ctx.buffer is None)): + ctx.pick = True - if ctx.input in ('?', '*', '+', '{'): + if ctx.pick and ctx.input != '|' or not ctx.pick and ctx.input == '|': + raise Exception + elif ctx.pick and ctx.input == '|': + ctx.append_to_contents("") - # if there is nothing to quantify - if len(ctx.buffer) == len(ctx.alphabet) == 0 and len(ctx.contents) == 1: - raise Exception + return RegexParser.InitialState + else: + return RegexParser.InitialState().run(ctx) - # if only one char is quantified - if not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets))\ - and len(ctx.contents) > 1: - raise Exception - elif not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): - alphabet = ctx.buffer[-1] - ctx.buffer = ctx.buffer[:-1] - ctx.flush() - ctx.alphabet = alphabet - if ctx.input == '{': - return RegexParser.QtyState - elif ctx.input == '+': - ctx.min = 1 - elif ctx.input == '?': - ctx.max = 1 + class InitialState(State): + + def run(self, ctx): + + if ctx.input == '|': + return RegexParser.PickState().run(ctx) + + if ctx.input == '(': + if ctx.buffer is not None and len(ctx.buffer) == 0: + ctx.contents = ctx.contents[:-1] + if ctx.contents is not None and len(ctx.contents) == 0: + ctx.contents = None + ctx.flush() + ctx.append_to_contents("") + return RegexParser.InsideParenthesis - if ctx.min is None: - ctx.min = 0 + elif ctx.input == '[': + if ctx.buffer is not None and len(ctx.buffer) == 0: + ctx.contents = ctx.contents[:-1] - # flush the buffer only + if ctx.contents is not None and len(ctx.contents) == 0: + ctx.contents = None ctx.flush() + ctx.append_to_alphabet("") + return RegexParser.InsideSquareBrackets + + elif ctx.input in ('?', '*', '+', '{', '}', ')', ']'): + raise Exception + + elif ctx.input == '\\': + return RegexParser.Escaping else: + ctx.append_to_buffer(ctx.input) - if isinstance(ctx.old_state, (RegexParser.QtyState, RegexParser.InsideSquareBrackets)): - # flush the buffer only - ctx.flush() + return RegexParser.QtyState - if ctx.input == '|': - # no terminal_node because | has priority - if len(ctx.terminal_nodes) > 0: - raise Exception - ctx.contents.append("") + class QtyState(State): - else: + def run(self, ctx): - if isinstance(ctx.old_state, RegexParser.InsideParenthesis): - ctx.flush() + if ctx.input not in ('?', '*', '+', '{'): - if ctx.input == '}': - raise Exception - elif ctx.input == ')': - raise Exception + # cases: (...) & [...] without any quantifier + if isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): + ctx.flush() + + return RegexParser.InitialState().run(ctx) - elif ctx.input == ']': - raise Exception + if not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): - elif ctx.input == '(': - if len(ctx.buffer) > 0 and len(ctx.contents) > 1: - raise Exception + if ctx.pick and len(ctx.contents) > 1 and len(ctx.buffer) > 1: + raise Exception - if len(ctx.contents) == 1 and len(ctx.contents[0]) > 0: - ctx.flush() - return RegexParser.InsideParenthesis + if len(ctx.buffer) == 1: + if len(ctx.contents) > 1: + content = ctx.buffer + ctx.contents = ctx.contents[:-1] + ctx.flush() + ctx.append_to_buffer(content) - elif ctx.input == '[': - if len(ctx.contents) > 1: - raise Exception + else: # len(ctx.buffer) > 1 + content = ctx.buffer[-1] + ctx.buffer = ctx.buffer[:-1] + ctx.flush() + ctx.append_to_buffer(content) - if len(ctx.contents) == 1 and len(ctx.contents[0]) > 0: - ctx.flush() - return RegexParser.InsideSquareBrackets + if ctx.input == '{': + return RegexParser.InsideBrackets + elif ctx.input == '+': + ctx.min = 1 + elif ctx.input == '?': + ctx.max = 1 - elif ctx.input == '\\': - return RegexParser.Escaping + if ctx.min is None: + ctx.min = 0 - else: - ctx.buffer += ctx.input + # flush the buffer only + ctx.flush() - return self.__class__ + return RegexParser.PickState - class QtyState(State): + class InsideBrackets(State): def __init__(self): pass @@ -627,10 +650,11 @@ def run(self, ctx): else: ctx.max = int(ctx.max) - if ctx.max is not None and (ctx.min > ctx.max or ctx.min == ctx.max == 0): + if ctx.max is not None and ctx.min > ctx.max: raise Exception - return RegexParser.InitialState + ctx.flush() + return RegexParser.PickState elif ctx.input.isspace(): pass else: @@ -648,42 +672,50 @@ def run(self, context): class InsideParenthesis(State): def run(self, ctx): + if ctx.input in ('(', '[', ']', '?', '*', '+', '{', '}'): raise Exception elif ctx.input == ')': - return RegexParser.InitialState + return RegexParser.QtyState elif ctx.input == '\\': return RegexParser.Escaping elif ctx.input == '|': - ctx.contents.append("") + ctx.append_to_contents("") else: - ctx.buffer += ctx.input + ctx.append_to_buffer(ctx.input) return self.__class__ class InsideSquareBrackets(State): - def run(self, context): - if context.input in ('[', '(', ')', '?', '*', '+', '{', '}'): - raise Exception - elif context.input == ']': - return RegexParser.InitialState - elif context.input == '\\': + def run(self, ctx): + + if ctx.input == ']': + return RegexParser.QtyState + elif ctx.input == '\\': return RegexParser.Escaping + elif ctx.input in ('[', '(', ')', '?', '*', '+', '{', '}'): + raise Exception else: - context.alphabet += context.input + ctx.append_to_alphabet(ctx.input) return self.__class__ + + def __init__(self): self.current_state = RegexParser.InitialState() # last ended state self.old_state = self.current_state + self.state = [] + self._name = None self._input = None - self._contents = [""] - self._alphabet = "" + self.contents = [""] + self.alphabet = None + + self.pick = False # pick context ? self.min = None self.max = None @@ -695,25 +727,31 @@ def __init__(self): def input(self): return self._input + def append_to_contents(self, content): + if self.contents is None: + self.contents = [] + self.contents.append(content) + + def append_to_buffer(self, str): + if self.contents is None: + self.contents = [""] + self.contents[-1] += str + + def append_to_alphabet(self, alphabet): + if self.alphabet is None: + self.alphabet = "" + self.alphabet += alphabet + @property def buffer(self): - return self._contents[-1] + return None if self.contents is None else self.contents[-1] @buffer.setter def buffer(self, buffer): - self._contents[-1] = buffer + if self.contents is None: + self.contents = [""] + self.contents[-1] = buffer - @property - def contents(self): - return self._contents - - @property - def alphabet(self): - return self._alphabet - - @alphabet.setter - def alphabet(self, alphabet): - self._alphabet = alphabet @property def terminal_nodes(self): @@ -721,56 +759,44 @@ def terminal_nodes(self): def flush(self): + if self.contents is None and self.alphabet is None: + return + if self.min is None and self.max is None: self.min = self.max = 1 - # print "buffer: " + self._buffer - # print "alphabet: " + self._alphabet - # print - # print - # type = fvt.INT_str if all(content.isdigit() for content in self.contents) else fvt.String type = fvt.String name = self._name + str(len(self._terminal_nodes) + 1) - if len(self.alphabet) > 0: - contents = None - alphabet = self._alphabet - else: - if all(len(content) == 1 for content in self.contents): - alphabet = "".join(self.contents) - contents = None - else: - contents = self.contents - alphabet = None - - terminal_node = self._create_terminal_node(name, type, contents=contents, alphabet=alphabet, + terminal_node = self._create_terminal_node(name, type, contents=self.contents, alphabet=self.alphabet, qty=(self.min, self.max)) self._terminal_nodes.append(terminal_node) self.reset() def reset(self): - self._contents = [""] - self._alphabet = "" + self.contents = None + self.alphabet = None self.min = None self.max = None def run(self, inputs, name): self._name = name - for self._input in inputs: next_state_class = self.current_state.run(self) self.old_state = self.current_state self.current_state = next_state_class() - if not isinstance(self.current_state, RegexParser.InitialState): + if isinstance(self.current_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): raise Exception - if len(self._contents[0]) > 0 or len(self._contents) > 1 or len(self._alphabet) > 0 or inputs == "": - self.flush() + if inputs == "": + self.append_to_buffer("") + + self.flush() return self._terminal_nodes From fd01f4754b9ab395fcd9796ee66c7147538d2e20 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Mon, 4 Jul 2016 09:32:40 +0200 Subject: [PATCH 21/80] Revamp tests' organization --- test/__init__.py | 65 + test/__main__.py | 38 + test/integration/__init__.py | 26 + .../integration/test_integration.py | 1523 ++++++----------- test/unit/__init__.py | 28 + test/unit/test_data_model.py | 92 + test/unit/test_monitor.py | 114 ++ 7 files changed, 929 insertions(+), 957 deletions(-) create mode 100644 test/__init__.py create mode 100644 test/__main__.py create mode 100644 test/integration/__init__.py rename framework/test.py => test/integration/test_integration.py (71%) create mode 100644 test/unit/__init__.py create mode 100644 test/unit/test_data_model.py create mode 100644 test/unit/test_monitor.py diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..5e72eb3 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,65 @@ +################################################################################ +# +# Copyright 2014-2016 Eric Lacombe +# +################################################################################ +# +# This file is part of fuddly. +# +# fuddly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fuddly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fuddly. If not, see +# +################################################################################ + +from __future__ import print_function + +import argparse +import sys + +mock_module = True +try: + import unittest.mock as mock_mod +except ImportError: + try: + import mock as mock_mod + except ImportError: + mock_module = False + print('ERROR: python-mock module is not installed! ' + 'Should be installed to be able to run tests.') + + +ddt_module = True +try: + import ddt +except ImportError: + ddt_module = False + print('ERROR: python(3)-ddt module is not installed! ' + 'Should be installed to be able to run tests.') + +if not (mock_module and ddt_module): + sys.exit("Some dependencies are missing: enable to launch tests.") + +mock = mock_mod + + +parser = argparse.ArgumentParser(description='Process arguments.') +parser.add_argument('-a', '--all', action='store_true', + help='Run all test cases. Some can take lot of time. (Disabled by default.)') +parser.add_argument('--ignore-dm-specifics', action='store_true', + help='Run Data Models specific test cases. (Enabled by default.)') + +test_args = parser.parse_known_args() +run_long_tests = test_args[0].all +ignore_data_model_specifics = test_args[0].ignore_dm_specifics + +args = [sys.argv[0]] + test_args[1] diff --git a/test/__main__.py b/test/__main__.py new file mode 100644 index 0000000..e677a85 --- /dev/null +++ b/test/__main__.py @@ -0,0 +1,38 @@ +################################################################################ +# +# Copyright 2014-2016 Eric Lacombe +# +################################################################################ +# +# This file is part of fuddly. +# +# fuddly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fuddly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fuddly. If not, see +# +################################################################################ + +import unittest + +from test import args +import test.unit, test.integration + +if len(args) == 2 and args[1] == "test": + del args[1] + +if len(args) == 1: + args.append('test.unit') + args.append('test.integration') + +unittest.main(verbosity=2, argv=args, defaultTest=None, exit=False) + + diff --git a/test/integration/__init__.py b/test/integration/__init__.py new file mode 100644 index 0000000..0eda8e5 --- /dev/null +++ b/test/integration/__init__.py @@ -0,0 +1,26 @@ +################################################################################ +# +# Copyright 2014-2016 Eric Lacombe +# +################################################################################ +# +# This file is part of fuddly. +# +# fuddly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fuddly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fuddly. If not, see +# +################################################################################ + +__all__ = [] + +from test.integration.test_integration import * diff --git a/framework/test.py b/test/integration/test_integration.py similarity index 71% rename from framework/test.py rename to test/integration/test_integration.py index 6341d23..e05f935 100644 --- a/framework/test.py +++ b/test/integration/test_integration.py @@ -1,5 +1,3 @@ -# -*- coding: utf8 -*- - ################################################################################ # # Copyright 2014-2016 Eric Lacombe @@ -22,81 +20,40 @@ # along with fuddly. If not, see # ################################################################################ - from __future__ import print_function import sys -import copy -import re -import functools -import binascii import unittest -import collections - -import argparse sys.path.append('.') -from framework.data_model import * from framework.value_types import * -from libs.external_modules import * - import data_models.example as example -import data_models.protocols.usb from framework.fuzzing_primitives import * -from framework.basic_primitives import * from framework.plumbing import * -from framework.target import * -from framework.logger import * -from framework.operator_helpers import * - from framework.data_model_helpers import * from framework.encoders import * -mock_module = True -try: - import unittest.mock as mock -except ImportError: - try: - import mock - except ImportError: - mock_module = False - print('ERROR: python-mock module is not installed! ' - 'Should be installed to be able to run every test.') -ddt_module = True -try: - import ddt -except ImportError: - ddt_module = False - print('ERROR: python(3)-ddt module is not installed! ' - 'Should be installed to be able to run every test.') +from test import ignore_data_model_specifics, run_long_tests +def setUpModule(): + global fmk, dm, results + fmk = FmkPlumbing() + fmk.run_project(name='tuto', dm_name='example') + dm = example.data_model + results = collections.OrderedDict() - -parser = argparse.ArgumentParser(description='Process arguments.') -parser.add_argument('-a', '--all', action='store_true', - help='Run all test cases. Some can take lot of time. (Disabled by default.)') -parser.add_argument('--ignore-dm-specifics', action='store_true', - help='Run Data Models specific test cases. (Enabled by default.)') - -parser.add_argument('--force', action='store_true', - help='Force testing even if some package dependencies are missing. (Disabled by default.)') - -test_args = parser.parse_known_args() -run_long_tests = test_args[0].all -ignore_data_model_specifics = test_args[0].ignore_dm_specifics -force = test_args[0].force - -if (not mock_module or not ddt_module) and not force: - sys.exit("Some dependencies are missing: use --force to run tests anyway.") +def tearDownModule(): + global fmk + fmk.exit_fmk() class TEST_Fuzzy_INT16(Fuzzy_INT16): int_list = ['TEST_OK', 'BLABLA', 'PLOP'] - + def __init__(self, endian=None, supp_list=None): self.endian = endian self.idx = 0 @@ -109,12 +66,10 @@ def _convert_value(self, val): return val - ######## Tests cases begins Here ######## # Legacy --> Need to be revamped class TestBasics(unittest.TestCase): - @classmethod def setUpClass(cls): cls.dm = example.data_model @@ -132,7 +87,7 @@ def test_01(self): print('Flatten 1: ', repr(node_ex1.to_bytes())) print('Flatten 1: ', repr(node_ex1.to_bytes())) l = node_ex1.get_value() - hk = list(node_ex1.iter_paths(only_paths=True)) + hk = set(node_ex1.get_all_paths().keys()) # print(l) # # print('\n\n ####### \n\n') @@ -154,7 +109,7 @@ def test_01(self): print('\n### TEST 1: cross check self.node.get_all_paths().keys() and get_nodes_names() ###') - print('*** Hkeys from self.node.iter_paths(only_paths=True):') + print('*** Hkeys from self.node.get_all_paths().keys():') hk = sorted(hk) for k in hk: print(k) @@ -176,7 +131,6 @@ def test_01(self): results['test1'] = res1 and res2 - print('\n### TEST 2: generate two different EX1 ###') node_ex1.unfreeze() @@ -189,7 +143,6 @@ def test_01(self): results['test2'] = val1 != val2 - print('\n### TEST 3: generate 4 identical TUX (with last one flatten) ###') tux = dm.get_data('TUX') @@ -206,7 +159,6 @@ def test_01(self): res = val1 == val2 and val1 == val3 results['test3'] = res - print('\n### TEST 4: generate 2 different flatten TUX ###') tux.unfreeze() @@ -219,7 +171,6 @@ def test_01(self): res = val1 != val2 results['test4'] = res - print('\n### TEST 5: test get_node_by_path() ###') print('\n*** test 5.1: get_node_by_path() with exact path') @@ -245,7 +196,8 @@ def test_01(self): print('name: %s, result: %s' % ('TUX', tux2.get_node_by_path('TUX').get_path_from(tux2))) print('name: %s, result: %s' % ('TX', tux2.get_node_by_path('TX').get_path_from(tux2))) print('name: %s, result: %s' % ('KU', tux2.get_node_by_path('KU', conf='ALT').get_path_from(tux2))) - print('name: %s, result: %s' % ('MARK3', tux2.get_node_by_path('MARK3', conf='ALT').get_path_from(tux2, conf='ALT'))) + print('name: %s, result: %s' % ( + 'MARK3', tux2.get_node_by_path('MARK3', conf='ALT').get_path_from(tux2, conf='ALT'))) print('\n*** test 5.3: call get_node_by_path() with real regexp') @@ -280,7 +232,7 @@ def test_01(self): print(e) c1 = NodeInternalsCriteria(mandatory_attrs=[NodeInternals.Mutable], - node_kinds=[NodeInternals_TypedValue]) + node_kinds=[NodeInternals_TypedValue]) c2 = NodeInternalsCriteria(node_kinds=[NodeInternals_TypedValue]) @@ -359,25 +311,22 @@ def test_01(self): print('\n') - # val = cmp(csts1, csts2) + # val = cmp(csts1, csts2) val = (csts1 > csts2) - (csts1 < csts2) if val != 0: res1 = False - print('> l2:') l2 = tux2.get_reachable_nodes(internals_criteria=crit) for e in l2: print(e.get_path_from(tux2)) - print('\n*** test 7.2:') res2 = len(l2) == len(l1) print('len(l2) == len(l1)? %r' % res2) - print('\n*** test 7.3:') tux = dm.get_data('TUX') @@ -407,7 +356,6 @@ def test_01(self): zip_l = zip(c_l1, c_l2) - test = 1 for zl1, zl2 in zip_l: @@ -426,19 +374,17 @@ def test_01(self): res3 = True - # val = cmp(c_l1, c_l2) + # val = cmp(c_l1, c_l2) val = (c_l1 > c_l2) - (c_l1 < c_l2) if val != 0: res3 = False else: res3 = True - print(res1, res2, res3) results['test7'] = res1 and res2 and res3 - print('\n### TEST 8: set_current_conf()') node_ex1 = dm.get_data('EX1') @@ -496,7 +442,6 @@ def test_01(self): print(msg) node_ex1.unfreeze_all() - print('\n*** test 8.2:') print('\n***** test 8.2.0: subparts:') @@ -509,16 +454,14 @@ def test_01(self): node_ex1.set_current_conf('ALT', root_regexp=None) - nonascii_test_str = u'\u00c2'.encode(internal_repr_codec) - node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: res2 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: res2 = False print(msg) @@ -526,7 +469,7 @@ def test_01(self): node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' in msg or b' ~(X)~ ' in msg or b'[<]' in msg or nonascii_test_str in msg: + if b' ~(..)~ ' in msg or b' ~(X)~ ' in msg or b'[<]' in msg or b'[\xc2]' in msg: res2 = False print(msg) @@ -537,7 +480,7 @@ def test_01(self): node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: res2 = False print(msg) @@ -592,7 +535,6 @@ def test_01(self): res5 = False print(msg) - print('\n*** test 8.6:') node_ex1 = dm.get_data('EX1') @@ -604,7 +546,7 @@ def test_01(self): # node_kind3 = [NodeInternals_NonTerm] crit = NodeInternalsCriteria(mandatory_attrs=[NodeInternals.Mutable], - node_kinds=[NodeInternals_NonTerm]) + node_kinds=[NodeInternals_NonTerm]) node_ex1.unfreeze_all() @@ -618,15 +560,13 @@ def test_01(self): else: res6 = False - print('Results:') print(res0, res1, res2, res21, res3, res4, res5, res6) results['test8'] = res0 and res1 and res2 and res21 and res3 and res4 and res5 and res6 - print('\n### TEST 9: test the constraint type: =+(w1,w2,...)\n' \ - '--> can be False in really rare case') + '--> can be False in really rare case') node_ex1 = dm.get_data('EX1') @@ -637,11 +577,10 @@ def test_01(self): if b' ~(..)~ TUX ~(..)~ ' not in msg: res = False break - # print(msg) + # print(msg) results['test9'] = res - print('\n### TEST 10: test fuzzing primitives') print('\n*** test 10.1: fuzz_data_tree()') @@ -650,8 +589,6 @@ def test_01(self): fuzz_data_tree(node_ex1) node_ex1.get_value() - - print('\n### TEST 11: test terminal Node alternate conf') print('\n*** test 11.1: value type Node') @@ -660,27 +597,26 @@ def test_01(self): res1 = True msg = node_ex1.to_bytes(conf='ALT') - if b'[<]' not in msg or nonascii_test_str not in msg: + if b'[<]' not in msg or b'[\xc2]' not in msg: res1 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.to_bytes(conf='ALT') - if b'[<]' not in msg or nonascii_test_str not in msg: + if b'[<]' not in msg or b'[\xc2]' not in msg: res1 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.get_node_by_path('TUX$').to_bytes(conf='ALT', recursive=False) - if b'[<]' in msg or nonascii_test_str in msg or b' ~(..)~ TUX ~(..)~ ' not in msg: + if b'[<]' in msg or b'[\xc2]' in msg or b' ~(..)~ TUX ~(..)~ ' not in msg: res1 = False print(msg) - print('\n*****\n') crit = NodeInternalsCriteria(mandatory_attrs=[NodeInternals.Mutable], - node_kinds=[NodeInternals_TypedValue]) + node_kinds=[NodeInternals_TypedValue]) node_ex1.unfreeze_all() @@ -694,7 +630,6 @@ def test_01(self): else: res2 = False - print('\n*** test 11.2: func type Node') node_ex1 = dm.get_data('EX1') @@ -717,7 +652,6 @@ def test_01(self): res3 = False print(msg) - print(res1, res2, res3) results['test11'] = res1 and res2 and res3 @@ -729,20 +663,26 @@ def test_01(self): print('\n*** test 12.1:') node_ex1 = dm.get_data('EX1') - for i in node_ex1.iter_paths(only_paths=True): + htbl = node_ex1.get_all_paths() + l = sorted(list(htbl.keys())) + for i in l: print(i) print('\n******\n') node_ex1.get_value() - for i in node_ex1.iter_paths(only_paths=True): + htbl = node_ex1.get_all_paths() + l = sorted(list(htbl.keys())) + for i in l: print(i) print('\n******\n') node_ex1.unfreeze_all() node_ex1.get_value() - for i in node_ex1.iter_paths(only_paths=True): + htbl = node_ex1.get_all_paths() + l = sorted(list(htbl.keys())) + for i in l: print(i) print('\n*** test 13: test typed_value Node') @@ -763,7 +703,7 @@ def test_01(self): print('Node.env: ', e.env) print('Node.value_type: ', e.cc.get_value_type()) vt[e] = e.cc.get_value_type() - if issubclass(vt[e].__class__, VT_Alt): #isinstance(vt[e], (BitField, String)): + if issubclass(vt[e].__class__, VT_Alt): # isinstance(vt[e], (BitField, String)): continue compat = list(vt[e].compat_cls.values()) compat.remove(vt[e].__class__) @@ -830,7 +770,6 @@ def test_01(self): e.set_values(value_type=c()) print("Set new value type '%s' for the Node %s!" % (c, e.name)) - print('\n> part2:\n') print('--[ EVT1 ]-----[ EVT2 ]--') @@ -855,14 +794,11 @@ def test_01(self): e.set_values(value_type=c()) print("Set new value type '%s' for the Node %s!" % (c, e.name)) - evt.unfreeze_all() - print(res1, res2) results['test13'] = res1 and res2 - print('\n*** test 14: test Proxy Node') res1 = False @@ -872,7 +808,7 @@ def test_01(self): print('Blend Node starts with:') print(msg[:300] + b' ...') print('\nAnd ends with:') - print(b'... ' + msg[len(msg)-300:len(msg)]) + print(b'... ' + msg[len(msg) - 300:len(msg)]) print("\n--> Call unfreeze()") blend.unfreeze() @@ -882,7 +818,7 @@ def test_01(self): print('\nBlend Node starts with:') print(msg2[:300] + b' ...') print('\nAnd ends with:') - print(b'... ' + msg2[len(msg2)-300:len(msg2)]) + print(b'... ' + msg2[len(msg2) - 300:len(msg2)]) print('\n### SUMMARY ###') @@ -893,19 +829,15 @@ def test_01(self): self.assertTrue(v) - class TestMisc(unittest.TestCase): - @classmethod def setUpClass(cls): cls.dm = example.data_model cls.dm.load_data_model(fmk._name2dm) - def setUp(self): pass - def _loop_nodes(self, node, cpt=20, criteria_func=None, transform=lambda x: x): stop_loop = False for i in range(cpt): @@ -924,7 +856,7 @@ def _loop_nodes(self, node, cpt=20, criteria_func=None, transform=lambda x: x): return i - # @unittest.skip("demonstrating skipping") + # @unittest.skip("demonstrating skipping") def test_Node_unfreeze_dont_change_state(self): ''' unfreeze(dont_change_state) @@ -947,14 +879,15 @@ def test_Node_unfreeze_dont_change_state(self): self.assertTrue(res1) - def test_TypedNode_1(self): evt = dm.get_data('TVE') evt.get_value() print('=======[ PATHS ]========') - for i in evt.iter_paths(only_paths=True): + htbl = evt.get_all_paths() + l = sorted(list(htbl.keys())) + for i in l: print(i) print('\n=======[ Typed Nodes ]========') @@ -1007,7 +940,7 @@ def test_TypedNode_1(self): if not issubclass(vt.__class__, VT_Alt): print(' node vt endian: ', node.cc.get_value_type().endian) print(' node orig value: (hexlified) {0!s:s}, {0!s:s}'.format(binascii.hexlify(orig_node_val), - orig_node_val)) + orig_node_val)) print(' node corrupted value: (hexlified) {0!s:s}, {0!s:s}'.format(binascii.hexlify(node.to_bytes()), node.to_bytes())) else: @@ -1024,7 +957,6 @@ def test_TypedNode_1(self): self.assertTrue(turn_nb_list == good_list, msg=msg) - def test_Node_Attr_01(self): ''' Value Node make_random()/make_determinist() @@ -1044,7 +976,7 @@ def test_Node_Attr_01(self): evt.unfreeze() print(evt.to_bytes()) - # self.assertEqual(idx, ) + # self.assertEqual(idx, ) def test_NonTerm_Attr_01(self): ''' @@ -1057,23 +989,23 @@ def test_NonTerm_Attr_01(self): print('\n -=[ determinist & finite (loop count: %d) ]=- \n' % loop_count) - nt = dm.get_data('NonTerm') + nt = dm.get_data('NonTerm') nt.make_finite(all_conf=True, recursive=True) nt.make_determinist(all_conf=True, recursive=True) nb = self._loop_nodes(nt, loop_count, criteria_func=crit_func) - + self.assertEqual(nb, 6) print('\n -=[ determinist & infinite (loop count: %d) ]=- \n' % loop_count) - nt = dm.get_data('NonTerm') + nt = dm.get_data('NonTerm') nt.make_infinite(all_conf=True, recursive=True) nt.make_determinist(all_conf=True, recursive=True) self._loop_nodes(nt, loop_count, criteria_func=crit_func) print('\n -=[ random & infinite (loop count: %d) ]=- \n' % loop_count) - nt = dm.get_data('NonTerm') + nt = dm.get_data('NonTerm') # nt.make_infinite(all_conf=True, recursive=True) self._loop_nodes(nt, loop_count, criteria_func=crit_func) @@ -1084,7 +1016,6 @@ def test_NonTerm_Attr_01(self): nb = self._loop_nodes(nt, loop_count, criteria_func=crit_func) self.assertEqual(nb, 6) - def test_BitField_Attr_01(self): ''' @@ -1096,9 +1027,9 @@ def test_BitField_Attr_01(self): print('\n -=[ random & infinite (loop count: %d) ]=- \n' % loop_count) - t = BitField(subfield_limits=[2,6,10,12], - subfield_val_lists=[[4,2,1], [2,15,16,3], None, [1]], - subfield_val_extremums=[None, None, [3,11], None], + t = BitField(subfield_limits=[2, 6, 10, 12], + subfield_val_lists=[[4, 2, 1], [2, 15, 16, 3], None, [1]], + subfield_val_extremums=[None, None, [3, 11], None], padding=0, lsb_padding=True, endian=VT.LittleEndian) node = Node('BF', value_type=t) node.set_env(Env()) @@ -1107,7 +1038,6 @@ def test_BitField_Attr_01(self): print('\n -=[ determinist & infinite (loop count: %d) ]=- \n' % loop_count) - node_copy = Node('BF_copy', base_node=node, ignore_frozen_state=True) node_copy.set_env(Env()) node_copy.make_determinist(all_conf=True, recursive=True) @@ -1129,13 +1059,12 @@ def test_BitField_Attr_01(self): node_copy3.make_finite(all_conf=True, recursive=True) self._loop_nodes(node_copy3, loop_count, criteria_func=lambda x: True, transform=binascii.b2a_hex) - def test_BitField(self): loop_count = 20 e_bf = Node('BF') - vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[4,2,1], None, [10,13]], + vt = BitField(subfield_sizes=[4, 4, 4], + subfield_val_lists=[[4, 2, 1], None, [10, 13]], subfield_val_extremums=[None, [14, 15], None], padding=0, lsb_padding=False, endian=VT.BigEndian) e_bf.set_values(value_type=vt) @@ -1167,7 +1096,7 @@ def test_BitField(self): print('\n***') print('Random & finite: (should result in only 1 possible values)') - vt = BitField(subfield_sizes=[4,4], subfield_val_lists=[[0x3], [0xF]]) + vt = BitField(subfield_sizes=[4, 4], subfield_val_lists=[[0x3], [0xF]]) e = Node('bf_test', value_type=vt) e.set_env(Env()) e.make_finite() @@ -1176,62 +1105,60 @@ def test_BitField(self): self.assertEqual(count, 1) - def test_BitField_basic_features(self): print('\n***** [ BitField ] *****\n') i = 0 ok = True - t = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[1],[1],[1],[1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], padding=0, lsb_padding=False, endian=VT.LittleEndian) val = binascii.b2a_hex(t.get_value()) print(t.pretty_print(), t.drawn_val) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'4501') - - t = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[1],[1],[1],[1]], + + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], padding=0, lsb_padding=True, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'5140') - - t = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[1],[1],[1],[1]], + + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], padding=1, lsb_padding=True, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'517f') - - t = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[1],[1],[1],[1]], + + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], padding=0, lsb_padding=False, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 - self.assertEqual(val,b'0145') - - t = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[1],[1],[1],[1]], + self.assertEqual(val, b'0145') + + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], padding=1, lsb_padding=False, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'fd45') - - t = BitField(subfield_sizes=[2,4,2,2], subfield_val_lists=[[1],[1],[1],[1]], + + t = BitField(subfield_sizes=[2, 4, 2, 2], subfield_val_lists=[[1], [1], [1], [1]], padding=1, lsb_padding=False, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 - self.assertEqual(val,b'fd45') - + self.assertEqual(val, b'fd45') print('\n******** subfield_val_list\n') # Note that 4 in subfield 1 and 16 in subfield 2 are ignored # --> 6 different values are output before looping - t = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[4,2,1],[2,15,16,3],[2,3,0],[1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[4, 2, 1], [2, 15, 16, 3], [2, 3, 0], [1]], padding=0, lsb_padding=True, endian=VT.LittleEndian, determinist=True) for i in range(30): val = binascii.b2a_hex(t.get_value()) @@ -1246,12 +1173,11 @@ def test_BitField_basic_features(self): print(t.pretty_print(), ' --> ', t.get_current_raw_val()) print('*** [%d] ' % i, val[i]) - print(list(val.values())[:15]) self.assertEqual(list(val.values())[:15], - [b'c062',b'0062',b'4062',b'806f', b'8060',b'8063',b'8061', - b'8064',b'806e',b'8072',b'8042',b'8052',b'80e2',b'8022',b'80a2']) - + [b'c062', b'0062', b'4062', b'806f', b'8060', b'8063', b'8061', + b'8064', b'806e', b'8072', b'8042', b'8052', b'80e2', b'8022', b'80a2']) + print('\n********\n') t.switch_mode() @@ -1259,11 +1185,10 @@ def test_BitField_basic_features(self): val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) - print('\n******** subfield_val_extremums\n') # --> 14 different values are output before looping - t = BitField(subfield_limits=[2,6,8,10], subfield_val_extremums=[[1,2],[4,12],[0,3],[2,3]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_extremums=[[1, 2], [4, 12], [0, 3], [2, 3]], padding=0, lsb_padding=True, endian=VT.LittleEndian, determinist=True) for i in range(30): val = binascii.b2a_hex(t.get_value()) @@ -1276,7 +1201,6 @@ def test_BitField_basic_features(self): val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) - print('\n********\n') t.switch_mode() @@ -1284,11 +1208,10 @@ def test_BitField_basic_features(self): val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) - print('\n******** rewind() tests \n') - t = BitField(subfield_limits=[2,6,8,10], - subfield_val_extremums=[[1,2],[4,12],[0,3],None], + t = BitField(subfield_limits=[2, 6, 8, 10], + subfield_val_extremums=[[1, 2], [4, 12], [0, 3], None], subfield_val_lists=[None, None, None, [3]], padding=0, lsb_padding=False, endian=VT.BigEndian, determinist=True) @@ -1299,13 +1222,12 @@ def test_BitField_basic_features(self): if t.is_exhausted(): break - if val[0] != b'0311' or val[1] != b'0312' or val[2] != b'0316' or val[3] != b'031a' \ - or val[4] != b'031e' or val[5] != b'0322' or val[6] != b'0326' or val[7] != b'032a' \ - or val[8] != b'032e' or val[9] != b'0332' or val[10] != b'0372' or val[11] != b'03b2' or val[12] != b'03f2': + or val[4] != b'031e' or val[5] != b'0322' or val[6] != b'0326' or val[7] != b'032a' \ + or val[8] != b'032e' or val[9] != b'0332' or val[10] != b'0372' or val[11] != b'03b2' or val[ + 12] != b'03f2': raise ValueError - print('\n********\n') t.reset_state() @@ -1337,7 +1259,6 @@ def test_BitField_basic_features(self): print(binascii.b2a_hex(t.get_value())) print(binascii.b2a_hex(t.get_value())) - print('\n******** Fuzzy mode\n') t.reset_state() t.switch_mode() @@ -1371,35 +1292,30 @@ def test_BitField_basic_features(self): print(binascii.b2a_hex(t.get_value())) print(binascii.b2a_hex(t.get_value())) - - - - def test_BitField_various_features(self): bf = Node('BF') - vt1 = BitField(subfield_sizes=[3,5,7], - subfield_val_lists=[[2,1], None, [10,120]], - subfield_val_extremums=[None, [6, 15], None], - padding=0, lsb_padding=True, endian=VT.BigEndian) + vt1 = BitField(subfield_sizes=[3, 5, 7], + subfield_val_lists=[[2, 1], None, [10, 120]], + subfield_val_extremums=[None, [6, 15], None], + padding=0, lsb_padding=True, endian=VT.BigEndian) bf.set_values(value_type=vt1) bf.make_determinist(all_conf=True, recursive=True) bf.set_env(Env()) print('\n -=[ .extend_right() method ]=- \n') print('*** before extension') - + bf.show() # print(bf.get_raw_value()) # bf.unfreeze() # bf.show() - - vt2 = BitField(subfield_sizes=[4,3,4,4,2], - subfield_val_lists=[None, [3,5], [15], [14], [2]], + + vt2 = BitField(subfield_sizes=[4, 3, 4, 4, 2], + subfield_val_lists=[None, [3, 5], [15], [14], [2]], subfield_val_extremums=[[8, 12], None, None, None, None], padding=0, lsb_padding=False, endian=VT.BigEndian) - print('*** after extension') bf.unfreeze() @@ -1418,36 +1334,35 @@ def test_BitField_various_features(self): bf.unfreeze() bf.show() - + self.assertEqual(bf.value_type.get_subfield(idx=3), 5) self.assertEqual(bf.value_type.get_subfield(idx=0), 3) - - + def test_BitField_absorb(self): - vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[3,2,0xe,1], None, [10,13,3]], + vt = BitField(subfield_sizes=[4, 4, 4], + subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], - padding=1, endian=VT.BigEndian, lsb_padding=True) + padding=1, endian=VT.BigEndian, lsb_padding=True) bfield_1 = Node('bfield_1', value_type=vt) # bfield.set_env(Env()) - vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[3,2,0xe,1], None, [10,13,3]], + vt = BitField(subfield_sizes=[4, 4, 4], + subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], - padding=0, endian=VT.BigEndian, lsb_padding=True) + padding=0, endian=VT.BigEndian, lsb_padding=True) bfield_2 = Node('bfield_2', value_type=vt) - vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[3,2,0xe,1], None, [10,13,3]], + vt = BitField(subfield_sizes=[4, 4, 4], + subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], - padding=1, endian=VT.BigEndian, lsb_padding=False) + padding=1, endian=VT.BigEndian, lsb_padding=False) bfield_3 = Node('bfield_3', value_type=vt) - vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[3,2,0xe,1], None, [10,13,3]], + vt = BitField(subfield_sizes=[4, 4, 4], + subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], - padding=0, endian=VT.BigEndian, lsb_padding=False) + padding=0, endian=VT.BigEndian, lsb_padding=False) bfield_4 = Node('bfield_4', value_type=vt) # '?\xef' (\x3f\xe0) + padding 0b1111 @@ -1460,7 +1375,6 @@ def test_BitField_absorb(self): self.assertEqual(status, AbsorbStatus.FullyAbsorbed) self.assertEqual(size, len(msg)) - msg = struct.pack('>H', 0x3fe0) status, off, size, name = bfield_2.absorb(msg, constraints=AbsFullCsts()) @@ -1481,14 +1395,13 @@ def test_BitField_absorb(self): msg = struct.pack('>H', 0x3fe) status, off, size, name = bfield_4.absorb(msg, constraints=AbsFullCsts()) - + print('\n ---[message to absorb]---') print(repr(msg)) bfield_4.show() self.assertEqual(status, AbsorbStatus.FullyAbsorbed) self.assertEqual(size, len(msg)) - def test_MISC(self): ''' TODO: assertion + purpose @@ -1496,7 +1409,7 @@ def test_MISC(self): loop_count = 20 e = Node('VT1') - vt = UINT16_be(int_list=[1,2,3,4,5,6]) + vt = UINT16_be(int_list=[1, 2, 3, 4, 5, 6]) e.set_values(value_type=vt) e.set_env(Env()) e.make_determinist(all_conf=True, recursive=True) @@ -1514,8 +1427,8 @@ def test_MISC(self): sep = Node('sep', values=[' # ']) nt = Node('NT') nt.set_subnodes_with_csts([ - 1, ['u>', [e, 3], [sep, 1], [e2, 2]] - ]) + 1, ['u>', [e, 3], [sep, 1], [e2, 2]] + ]) nt.set_env(Env()) self._loop_nodes(nt, loop_count, criteria_func=lambda x: True) @@ -1527,8 +1440,8 @@ def test_MISC(self): e = Node('NT') e.set_subnodes_with_csts([ - 1, ['u>', [v, 2]] - ]) + 1, ['u>', [v, 2]] + ]) e.set_env(Env()) e.make_determinist(recursive=True) self._loop_nodes(e, loop_count, criteria_func=lambda x: True) @@ -1545,9 +1458,7 @@ def test_MISC(self): self._loop_nodes(e, loop_count, criteria_func=lambda x: x.name == 'Middle_NT') - class TestModelWalker(unittest.TestCase): - @classmethod def setUpClass(cls): cls.dm = example.data_model @@ -1557,143 +1468,149 @@ def setUp(self): pass def test_NodeConsumerStub_1(self): - nt = self.dm.get_data('Simple') + nt = self.dm.get_data('Simple') default_consumer = NodeConsumerStub() - for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, max_steps=70): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, + max_steps=70): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 49) - + def test_NodeConsumerStub_2(self): - nt = self.dm.get_data('Simple') + nt = self.dm.get_data('Simple') default_consumer = NodeConsumerStub(max_runs_per_node=-1, min_runs_per_node=2) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, max_steps=70): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, + max_steps=70): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 35) def test_BasicVisitor(self): - nt = self.dm.get_data('Simple') + nt = self.dm.get_data('Simple') default_consumer = BasicVisitor() - for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, max_steps=70): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, + max_steps=70): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 37) def test_NonTermVisitor(self): print('***') - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') nonterm_consumer = NonTermVisitor(respect_order=True) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, nonterm_consumer, make_determinist=True, max_steps=20): - print(colorize('[%d] '%idx + repr(rnode.to_bytes()), rgb=Color.INFO)) + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, nonterm_consumer, make_determinist=True, + max_steps=20): + print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 4) print('***') - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') nonterm_consumer = NonTermVisitor(respect_order=False) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, nonterm_consumer, make_determinist=True, max_steps=20): - print(colorize('[%d] '%idx + repr(rnode.to_bytes()), rgb=Color.INFO)) + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, nonterm_consumer, make_determinist=True, + max_steps=20): + print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 4) print('***') data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') # idx == 3 nonterm_consumer = NonTermVisitor(respect_order=True) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=10): - print(colorize('[%d] '%idx + rnode.to_ascii(), rgb=Color.INFO)) + for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, + max_steps=10): + print(colorize('[%d] ' % idx + rnode.to_str(), rgb=Color.INFO)) self.assertEqual(idx, 3) print('***') data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') # idx == 3 nonterm_consumer = NonTermVisitor(respect_order=False) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=10): - print(colorize('[%d] '%idx + rnode.to_ascii(), rgb=Color.INFO)) + for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, + max_steps=10): + print(colorize('[%d] ' % idx + rnode.to_str(), rgb=Color.INFO)) self.assertEqual(idx, 3) print('***') - def test_basics(self): # data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') shape_desc = \ - {'name': 'shape', - 'custo_set': MH.Custo.NTerm.FrozenCopy, - 'custo_clear': MH.Custo.NTerm.MutableClone, - 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=[' [!] '])}}, - 'contents': [ - - {'weight': 20, - 'contents': [ - {'name': 'prefix1', - 'contents': String(size=10, alphabet='+')}, - - {'name': 'body_top', - 'contents': [ - - {'name': 'body', - 'custo_set': MH.Custo.NTerm.FrozenCopy, - 'custo_clear': MH.Custo.NTerm.MutableClone, - 'separator': {'contents': {'name': 'sep2', - 'contents': String(val_list=['::'])}}, - 'shape_type': MH.Random, # ignored in determnist mode - 'contents': [ - {'contents': Filename(val_list=['AAA']), - 'qty': (0, 4), - 'name': 'str'}, - {'contents': UINT8(int_list=[0x3E]), # chr(0x3E) == '>' - 'name': 'int'} - ]} - ]} - ]}, - - {'weight': 20, - 'contents': [ - {'name': 'prefix2', - 'contents': String(size=10, alphabet='>')}, - - {'name': 'body'} - ]} - ]} + {'name': 'shape', + 'custo_set': MH.Custo.NTerm.FrozenCopy, + 'custo_clear': MH.Custo.NTerm.MutableClone, + 'separator': {'contents': {'name': 'sep', + 'contents': String(val_list=[' [!] '])}}, + 'contents': [ + + {'weight': 20, + 'contents': [ + {'name': 'prefix1', + 'contents': String(size=10, alphabet='+')}, + + {'name': 'body_top', + 'contents': [ + + {'name': 'body', + 'custo_set': MH.Custo.NTerm.FrozenCopy, + 'custo_clear': MH.Custo.NTerm.MutableClone, + 'separator': {'contents': {'name': 'sep2', + 'contents': String(val_list=['::'])}}, + 'shape_type': MH.Random, # ignored in determnist mode + 'contents': [ + {'contents': Filename(val_list=['AAA']), + 'qty': (0, 4), + 'name': 'str'}, + {'contents': UINT8(int_list=[0x3E]), # chr(0x3E) == '>' + 'name': 'int'} + ]} + ]} + ]}, + + {'weight': 20, + 'contents': [ + {'name': 'prefix2', + 'contents': String(size=10, alphabet='>')}, + + {'name': 'body'} + ]} + ]} mh = ModelHelper(delayed_jobs=True) data = mh.create_graph_from_desc(shape_desc) raw_vals = [ - b' [!] ++++++++++ [!] ::=:: [!] ', - b' [!] ++++++++++ [!] ::?:: [!] ', - b' [!] ++++++++++ [!] ::\xff:: [!] ', - b' [!] ++++++++++ [!] ::\x00:: [!] ', - b' [!] ++++++++++ [!] ::\x01:: [!] ', - b' [!] ++++++++++ [!] ::\x80:: [!] ', - b' [!] ++++++++++ [!] ::\x7f:: [!] ', - b' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit - b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::=:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::?:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::\xff:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::\x00:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', - b' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', - b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit - b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::=:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::?:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::\xff:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' + ' [!] ++++++++++ [!] ::=:: [!] ', + ' [!] ++++++++++ [!] ::?:: [!] ', + ' [!] ++++++++++ [!] ::\xff:: [!] ', + ' [!] ++++++++++ [!] ::\x00:: [!] ', + ' [!] ++++++++++ [!] ::\x01:: [!] ', + ' [!] ++++++++++ [!] ::\x80:: [!] ', + ' [!] ++++++++++ [!] ::\x7f:: [!] ', + ' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit + ' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', + ' [!] ++++++++++ [!] ::::AAA::>:: [!] ', + ' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + ' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', + ' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', + ' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::=:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::?:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::\xff:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::\x00:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', + ' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', + ' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit + ' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', + ' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + ' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', + ' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', + ' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::=:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::?:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::\xff:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', + ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' ] tn_consumer = TypedNodeDisruption() @@ -1702,31 +1619,32 @@ def test_basics(self): node_kinds=[NodeInternals_TypedValue], negative_node_subkinds=[String]) tn_consumer.set_node_interest(internals_criteria=ic) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, tn_consumer, make_determinist=True, max_steps=100): - val = rnode.to_bytes() - print(colorize('[%d] '%idx + repr(val), rgb=Color.INFO)) + for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, tn_consumer, make_determinist=True, + max_steps=100): + val = rnode.to_str() + print(colorize('[%d] ' % idx + repr(val), rgb=Color.INFO)) if idx not in [8, 22]: - self.assertEqual(val, raw_vals[idx-1]) + self.assertEqual(val, raw_vals[idx - 1]) self.assertEqual(idx, 35) - def test_TypedNodeDisruption_1(self): - nt = self.dm.get_data('Simple') + nt = self.dm.get_data('Simple') tn_consumer = TypedNodeDisruption() ic = NodeInternalsCriteria(negative_node_subkinds=[String]) tn_consumer.set_node_interest(internals_criteria=ic) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, max_steps=300): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, + max_steps=300): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 27) - def test_TypedNodeDisruption_2(self): - nt = self.dm.get_data('Simple') + nt = self.dm.get_data('Simple') tn_consumer = TypedNodeDisruption(max_runs_per_node=3, min_runs_per_node=3) ic = NodeInternalsCriteria(negative_node_subkinds=[String]) tn_consumer.set_node_interest(internals_criteria=ic) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, max_steps=100): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, + max_steps=100): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 9) @@ -1735,11 +1653,12 @@ def test_TypedNodeDisruption_3(self): Test case similar to test_TermNodeDisruption_1() but with more powerfull TypedNodeDisruption. ''' - nt = self.dm.get_data('Simple') + nt = self.dm.get_data('Simple') tn_consumer = TypedNodeDisruption(max_runs_per_node=1) # ic = NodeInternalsCriteria(negative_node_subkinds=[String]) # tn_consumer.set_node_interest(internals_criteria=ic) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, max_steps=-1): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, + max_steps=-1): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 310) @@ -1752,7 +1671,7 @@ def test_TypedNodeDisruption_BitfieldCollapse(self): data.freeze() data.show() - print('\norig value: '+repr(data['smscmd/TP-DCS'].to_bytes())) + print('\norig value: ' + repr(data['smscmd/TP-DCS'].to_bytes())) # self.assertEqual(data['smscmd/TP-DCS'].to_bytes(), b'\xF6') corrupt_table = { @@ -1774,78 +1693,82 @@ def test_TypedNodeDisruption_BitfieldCollapse(self): print(colorize('\n[%d] ' % idx + repr(rnode['smscmd/TP-DCS$'].to_bytes()), rgb=Color.INFO)) print('node name: ' + consumed_node.name) print('original value: {!s} ({!s})'.format(binascii.b2a_hex(orig_node_val), - bin(struct.unpack('B', orig_node_val)[0]))) + bin(struct.unpack('B', orig_node_val)[0]))) print('corrupted value: {!s} ({!s})'.format(binascii.b2a_hex(consumed_node.to_bytes()), - bin(struct.unpack('B', consumed_node.to_bytes())[0]))) + bin(struct.unpack('B', consumed_node.to_bytes())[0]))) print('result: {!s} ({!s})'.format(binascii.b2a_hex(rnode['smscmd/TP-DCS$'].to_bytes()), - bin(struct.unpack('B', rnode['smscmd/TP-DCS$'].to_bytes())[0]))) + bin(struct.unpack('B', rnode['smscmd/TP-DCS$'].to_bytes())[0]))) rnode.unfreeze(recursive=True, reevaluate_constraints=True) rnode.freeze() rnode['smscmd/TP-DCS$'].show() self.assertEqual(rnode['smscmd/TP-DCS'].to_bytes(), corrupt_table[idx]) - def test_TermNodeDisruption_1(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = TermNodeDisruption() - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=-1): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=-1): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) # print('original val: %s' % repr(orig_node_val)) # print('corrupted val: %s' % repr(consumed_node.to_bytes())) self.assertEqual(idx, 266) def test_TermNodeDisruption_2(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = TermNodeDisruption(max_runs_per_node=-1, min_runs_per_node=2) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=-1): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=-1): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 91) def test_TermNodeDisruption_3(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = TermNodeDisruption(specific_args=['1_BANG_1', '2_PLOUF_2']) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=-1): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=-1): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 152) - def test_AltConfConsumer_1(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = AltConfConsumer(max_runs_per_node=-1, min_runs_per_node=-1) consumer.set_node_interest(owned_confs=['ALT']) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=100): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=100): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 15) def test_AltConfConsumer_2(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = AltConfConsumer(max_runs_per_node=2, min_runs_per_node=1) consumer.set_node_interest(owned_confs=['ALT']) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=100): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=100): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 8) def test_AltConfConsumer_3(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = AltConfConsumer(max_runs_per_node=-1, min_runs_per_node=-1) consumer.set_node_interest(owned_confs=['ALT', 'ALT_2']) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=100): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=100): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 24) def test_AltConfConsumer_4(self): - simple = self.dm.get_data('Simple') + simple = self.dm.get_data('Simple') consumer = AltConfConsumer(max_runs_per_node=-1, min_runs_per_node=-1) consumer.set_node_interest(owned_confs=['ALT_2', 'ALT']) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=50): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, + max_steps=50): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 24) - def test_JPG(self): nt = self.dm.get_data('jpg') tn_consumer = TypedNodeDisruption() @@ -1858,184 +1781,29 @@ def test_JPG(self): except StopIteration: break - print(colorize('number of imgs: %d'%idx, rgb=Color.INFO)) - - self.assertEqual(idx, 116) + print(colorize('number of imgs: %d' % idx, rgb=Color.INFO)) + self.assertEqual(idx, 115) def test_USB(self): dm_usb = fmk.get_data_model_by_name('usb') dm_usb.build_data_model() - data = dm_usb.get_data('CONF') + data = dm_usb.get_data('CONF') consumer = TypedNodeDisruption() consumer.need_reset_when_structure_change = True - for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, consumer, make_determinist=True, max_steps=600): + for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, consumer, make_determinist=True, + max_steps=600): pass # print(colorize('[%d] '%idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - print(colorize('number of confs: %d'%idx, rgb=Color.INFO)) + print(colorize('number of confs: %d' % idx, rgb=Color.INFO)) self.assertIn(idx, [159]) -if mock_module and ddt_module: - @ddt.ddt - class TestBitFieldCondition(unittest.TestCase): - - @classmethod - def setUpClass(cls): - - def side_effect(idx): - return [0, 1, 2][idx] - - cls.node = mock.Mock() - cls.node.get_subfield = mock.MagicMock(side_effect=side_effect) - - - @ddt.data((1, 1), (1, [1]), ([1], [1]), - (1, (1,)), ((1,), (1,)), - (2, [2, 6, 7]), (2, (2, 6, 7)), - ([1, 2], [1, [5, 2, 8]]), ([1, 2], [[1], [5, 6, 2]]), - ((1, 2), (1, (5, 2, 8))), ((1, 2), ((1,), (5, 6, 2)))) - @ddt.unpack - def test_with_one_argument(self, sf, val): - condition = BitFieldCondition(sf=sf, val=val) - self.assertTrue(condition.check(TestBitFieldCondition.node)) - - condition = BitFieldCondition(sf=sf, neg_val=val) - self.assertFalse(condition.check(TestBitFieldCondition.node)) - - @ddt.data(([0, 1, 2], [0, [1, 3], None], [None, None, 5]), - ([0, 2], [None, 2], [3, None])) - @ddt.unpack - def test_true_with_both_arguments(self, sf, val, neg_val): - condition = BitFieldCondition(sf=sf, val=val, neg_val=neg_val) - self.assertTrue(condition.check(TestBitFieldCondition.node)) - - @ddt.data(([0, 1, 2], [[0, 1], [1, 2], None], [None, None, [1, 2, 3]]), - ([0, 1, 2], [[1, 2, 3], [1, 2], None], [None, None, [1, 3, 5]])) - @ddt.unpack - def test_false_with_both_arguments(self, sf, val, neg_val): - condition = BitFieldCondition(sf=sf, val=val, neg_val=neg_val) - self.assertFalse(condition.check(TestBitFieldCondition.node)) - - def test_true_val_has_priority(self): - condition = BitFieldCondition(sf=0, val=[0, 4, 5], neg_val=[0, 4, 5]) - self.assertTrue(condition.check(TestBitFieldCondition.node)) - - def test_false_val_has_priority(self): - condition = BitFieldCondition(sf=0, val=[3, 4, 5], neg_val=[3, 4, 5]) - self.assertFalse(condition.check(TestBitFieldCondition.node)) - - @ddt.data((None, [2, 3]), ([1], 1), ((1,), 2), - ([1], [2, 1, 4]), ((1,), (2, 1, 4)), - ([1, 2], [1])) - @ddt.unpack - def test_invalid_with_one_argument(self, sf, val): - self.assertRaises(Exception, BitFieldCondition, sf=sf, val=val) - self.assertRaises(Exception, BitFieldCondition, sf=sf, neg_val=val) - - @ddt.data((1, None, None), (None, 2, 3), - ([1, 2], [1, None], [2, None]), - ([1, 2], [1, 2], [[1, 2, 3, 4]]), - ([1, 2], [1, 2, 3, 4], [[1, 2]])) - @ddt.unpack - def test_invalid_with_both_arguments(self, sf, val, neg_val): - self.assertRaises(Exception, BitFieldCondition, sf=sf, val=val, neg_val=neg_val) - - class ProbeUserTest(unittest.TestCase): - """Test case used to test the 'ProbeUser' class.""" - - @classmethod - def setUpClass(cls): - pass - - def setUp(self): - """Initialisation des tests.""" - - self.timeout = 2 - - self.probe = Probe() - self.probe.main = mock.Mock() - - self.probe.start = mock.Mock() - self.probe.stop = mock.Mock() - - self.dm = mock.Mock() - self.target = mock.Mock() - self.logger = mock.Mock() - - self._set_up_specific() - - def _set_up_specific(self): - self.probe_user = ProbeUser(self.probe) - - def tearDown(self): - pass - - def test_not_started_is_alive(self): - self.assertFalse(self.probe_user.is_alive()) - - def test_started_is_alive(self): - self.probe_user.start(self.dm, self.target, self.logger) - self.assertTrue(self.probe_user.is_alive()) - - def test_stopped_is_alive(self): - self.probe_user.start(self.dm, self.target, self.logger) - self.probe_user.stop() - self.probe_user.join(self.timeout) - self.assertFalse(self.probe_user.is_alive()) - - def test_multiple_starts(self): - self.probe_user.start(self.dm, self.target, self.logger) - self.assertRaises(RuntimeError, self.probe_user.start, self.dm, self.target, self.logger) - - def test_start_and_stop(self): - self.probe_user.start(self.dm, self.target, self.logger) - self.probe_user.stop() - self.probe_user.join(self.timeout) - self.probe.start.assert_called_once_with(self.dm, self.target, self.logger) - self.probe.stop.assert_called_once_with(self.dm, self.target, self.logger) - - def test_main(self): - test_period = 0.5 - delta = 0.005 - self.probe_user.set_probe_delay(0.05) - - print("***** test period: " + str(test_period)) - print("***** tolerate delta between executions: " + str(delta)) - print("***** probe delay: " + str(self.probe_user.get_probe_delay())) - - execution_times = [] - - def side_effect(*args, **kwargs): - execution_times.append(datetime.datetime.now()) - return mock.Mock() - - self.probe.main.side_effect = side_effect - - self.probe_user.start(self.dm, self.target, self.logger) - time.sleep(test_period) - self.probe_user.stop() - self.probe_user.join(self.timeout) - self.probe.main.assert_called_with(self.dm, self.target, self.logger) - - print("***** probe's main method execution times: ") - for execution in execution_times: - print(" " + str(execution)) - - self.assertTrue(self.probe.main.call_count >= test_period/self.probe_user.get_probe_delay() - 1) - - for i in range(len(execution_times)): - if i+1 < len(execution_times): - self.assertTrue(0 <= (execution_times[i+1] - execution_times[i]).total_seconds() - - self.probe_user.get_probe_delay() <= delta) - - class TestNodeFeatures(unittest.TestCase): - @classmethod def setUpClass(cls): pass @@ -2051,10 +1819,10 @@ def test_absorb_nonterm_1(self): nstr_1 = Node('str1', value_type=String(val_list=['TBD1'], max_sz=5)) nstr_2 = Node('str2', value_type=String(val_list=['TBD2'], max_sz=8)) - vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[3,2,0xe,1], None, [10,13,3]], + vt = BitField(subfield_sizes=[4, 4, 4], + subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], - padding=1, endian=VT.BigEndian, lsb_padding=True) + padding=1, endian=VT.BigEndian, lsb_padding=True) bfield = Node('bfield', value_type=vt) bfield.enforce_absorb_constraints(AbsCsts()) @@ -2068,10 +1836,10 @@ def test_absorb_nonterm_1(self): # '?\xef' (\x3f\xe0) + padding 0b1111 msg_tail = struct.pack('>H', 0x3fe0 + 0b1111) - + msg = b'\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678' + msg_tail status, off, size, name = top.absorb(msg, constraints=AbsNoCsts(size=True)) - + print('\n ---[message to absorb]---') print(repr(msg)) print('\n ---[absobed message]---') @@ -2098,9 +1866,9 @@ def test_absorb_nonterm_2(self): top.set_env(Env()) # 2*nint_3 + nstr_1 + nstr_2 + 2*nint_2 + nint_1 - msg = b'\xef\xfe\xef\xfeSTR1str222\xcf\xab\xcd' + msg = '\xef\xfe\xef\xfeSTR1str222\xcf\xab\xcd' status, off, size, name = top.absorb(msg) - + print('\n ---[message to absorb]---') print(repr(msg)) print('\n ---[absobed message]---') @@ -2111,7 +1879,6 @@ def test_absorb_nonterm_2(self): self.assertEqual(status, AbsorbStatus.FullyAbsorbed) self.assertEqual(size, len(msg)) - def test_absorb_nonterm_3(self): nint_1 = Node('nint1', value_type=UINT16_le(int_list=[0xcdab, 0xffee])) nint_2 = Node('nint2', value_type=UINT8(int_list=[0xaf, 0xbf, 0xcf])) @@ -2129,7 +1896,7 @@ def test_absorb_nonterm_3(self): msg = 'str222str222' status, off, size, name = top.absorb(msg) - + print('\n ---[message to absorb]---') print(repr(msg)) print('\n ---[absobed message]---') @@ -2140,23 +1907,22 @@ def test_absorb_nonterm_3(self): self.assertEqual(status, AbsorbStatus.FullyAbsorbed) self.assertEqual(size, len(msg)) - def test_absorb_nonterm_fullyrandom(self): - + test_desc = \ - {'name': 'test', - 'contents': [ - {'section_type': MH.FullyRandom, - 'contents': [ - {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), - 'qty': (2, 3), - 'name': 'str'}, - - {'contents': UINT8(int_list=[2, 4, 6, 8]), - 'qty': (3, 6), - 'name': 'int'} - ]} - ]} + {'name': 'test', + 'contents': [ + {'section_type': MH.FullyRandom, + 'contents': [ + {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + 'qty': (2, 3), + 'name': 'str'}, + + {'contents': UINT8(int_list=[2, 4, 6, 8]), + 'qty': (3, 6), + 'name': 'int'} + ]} + ]} for i in range(5): mh = ModelHelper() @@ -2179,7 +1945,6 @@ def test_absorb_nonterm_fullyrandom(self): self.assertEqual(status, AbsorbStatus.FullyAbsorbed) - def test_intg_absorb_1(self): self.helper1_called = False @@ -2242,7 +2007,7 @@ def nint_10_helper(blob, constraints, node_internals): nstr_10 = Node('str10', value_type=String(val_list=['TBD', 'THE_END'], max_sz=7)) delim = Node('delim', value_type=String(val_list=[','], size=1)) - nint_20 = Node('nint20', value_type=INT_str(int_list=[1,2,3])) + nint_20 = Node('nint20', value_type=INT_str(int_list=[1, 2, 3])) nint_21 = Node('nint21', value_type=UINT8(int_list=[0xbb])) bottom = Node('bottom', subnodes=[delim, nint_20, nint_21]) @@ -2250,7 +2015,7 @@ def nint_10_helper(blob, constraints, node_internals): middle2 = Node('middle2') middle2.set_subnodes_with_csts([ - 1, ['u>', [splitter, 1], [nint_10, 1], [bottom, 0, 1], [nstr_10 ,1], [bottom2, 0, 1]] + 1, ['u>', [splitter, 1], [nint_10, 1], [bottom, 0, 1], [nstr_10, 1], [bottom2, 0, 1]] ]) top = Node('top', subnodes=[middle1, yeah, middle2]) @@ -2259,11 +2024,11 @@ def nint_10_helper(blob, constraints, node_internals): top.set_env(Env()) top2.set_env(Env()) - msg = b'\xe1\xe2\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678YEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' + msg = '\xe1\xe2\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678YEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' # middle1: nint_1_alt + nint_3 + 2*nint_1 + nstr_1('ABCD') + nint_51 + 2*nstr_50 + nint_50 - msg2 = b'\xff\xe2\x88\x99\xe1\xe2\xcd\xabABCD\xef\xfeIAMHERE\xbfYEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' - + msg2 = '\xff\xe2\x88\x99\xe1\xe2\xcd\xabABCD\xef\xfeIAMHERE\xbfYEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' + print('\n****** top ******\n') status, off, size, name = top.absorb(msg) @@ -2299,7 +2064,7 @@ def verif_val_and_print(arg, log_func=None): # Because constraints are untighten on this node, its nominal # size of 4 is set to 5 when absorbing b'COOL!' self.assertEqual(top.get_node_by_path("top/middle1/cool").to_bytes(), b'COOL!') - + self.assertEqual(status2, AbsorbStatus.FullyAbsorbed) del self.helper1_called @@ -2310,7 +2075,6 @@ def verif_val_and_print(arg, log_func=None): print('\n***\n') print(repr(top["top/middle2"])) - def test_show(self): a = fmk.dm.get_external_node(dm_name='usb', data_id='DEV') @@ -2319,13 +2083,12 @@ def test_show(self): a.show(raw_limit=400) b.show(raw_limit=400) - b['PNG_00/chunks/chk/height']=a + b['PNG_00/chunks/chk/height'] = a b.show(raw_limit=400) - b['PNG_00/chunks/chk/height/idProduct']=a + b['PNG_00/chunks/chk/height/idProduct'] = a b.show(raw_limit=400) - def test_exist_condition_01(self): ''' Test existence condition for generation and absorption ''' @@ -2333,7 +2096,6 @@ def test_exist_condition_01(self): d = fmk.dm.get_external_node(dm_name='mydf', data_id='exist_cond') for i in range(10): - d_abs = fmk.dm.get_external_node(dm_name='mydf', data_id='exist_cond') d.show() @@ -2355,7 +2117,7 @@ def test_exist_condition_01(self): print('-----------------------') print('Absorb Status: status=%s, off=%d, sz=%d, name=%s' % (status, off, size, name)) print(' \_ length of original data: %d' % len(raw_data)) - print(' \_ remaining: %r' %raw_data[size:]) + print(' \_ remaining: %r' % raw_data[size:]) print('-----------------------') self.assertEqual(status, AbsorbStatus.FullyAbsorbed) @@ -2366,50 +2128,50 @@ def test_exist_condition_01(self): def test_exist_condition_02(self): cond_desc = \ - {'name': 'exist_cond', - 'shape_type': MH.Ordered, - 'contents': [ - {'name': 'opcode', - 'determinist': True, - 'contents': String(val_list=['A3', 'A2'])}, - - {'name': 'command_A3', - 'exists_if': (RawCondition('A3'), 'opcode'), - 'contents': [ - {'name': 'A3_subopcode', - 'contents': BitField(subfield_sizes=[15,2,4], endian=VT.BigEndian, - subfield_val_lists=[None, [1,2], [5,6,12]], - subfield_val_extremums=[[500, 600], None, None], - determinist=False)}, - - {'name': 'A3_int', - 'determinist': True, - 'contents': UINT16_be(int_list=[10, 20, 30])}, - - {'name': 'A3_deco1', - 'exists_if/and': [(IntCondition(val=[10]), 'A3_int'), - (BitFieldCondition(sf=2, val=[5]), 'A3_subopcode')], - 'contents': String(val_list=['$ and_OK $'])}, - - {'name': 'A3_deco2', - 'exists_if/and': [(IntCondition(val=[10]), 'A3_int'), - (BitFieldCondition(sf=2, val=[6]), 'A3_subopcode')], - 'contents': String(val_list=['! and_KO !'])} - ]}, - - {'name': 'A31_payload1', - 'contents': String(val_list=['$ or_OK $']), - 'exists_if/or': [(IntCondition(val=[20]), 'A3_int'), - (BitFieldCondition(sf=2, val=[5]), 'A3_subopcode')], - }, - - {'name': 'A31_payload2', - 'contents': String(val_list=['! or_KO !']), - 'exists_if/or': [(IntCondition(val=[20]), 'A3_int'), - (BitFieldCondition(sf=2, val=[6]), 'A3_subopcode')], - }, - - ]} + {'name': 'exist_cond', + 'shape_type': MH.Ordered, + 'contents': [ + {'name': 'opcode', + 'determinist': True, + 'contents': String(val_list=['A3', 'A2'])}, + + {'name': 'command_A3', + 'exists_if': (RawCondition('A3'), 'opcode'), + 'contents': [ + {'name': 'A3_subopcode', + 'contents': BitField(subfield_sizes=[15, 2, 4], endian=VT.BigEndian, + subfield_val_lists=[None, [1, 2], [5, 6, 12]], + subfield_val_extremums=[[500, 600], None, None], + determinist=False)}, + + {'name': 'A3_int', + 'determinist': True, + 'contents': UINT16_be(int_list=[10, 20, 30])}, + + {'name': 'A3_deco1', + 'exists_if/and': [(IntCondition(val=[10]), 'A3_int'), + (BitFieldCondition(sf=2, val=[5]), 'A3_subopcode')], + 'contents': String(val_list=['$ and_OK $'])}, + + {'name': 'A3_deco2', + 'exists_if/and': [(IntCondition(val=[10]), 'A3_int'), + (BitFieldCondition(sf=2, val=[6]), 'A3_subopcode')], + 'contents': String(val_list=['! and_KO !'])} + ]}, + + {'name': 'A31_payload1', + 'contents': String(val_list=['$ or_OK $']), + 'exists_if/or': [(IntCondition(val=[20]), 'A3_int'), + (BitFieldCondition(sf=2, val=[5]), 'A3_subopcode')], + }, + + {'name': 'A31_payload2', + 'contents': String(val_list=['! or_KO !']), + 'exists_if/or': [(IntCondition(val=[20]), 'A3_int'), + (BitFieldCondition(sf=2, val=[6]), 'A3_subopcode')], + }, + + ]} mh = ModelHelper() node = mh.create_graph_from_desc(cond_desc) @@ -2426,52 +2188,52 @@ def test_exist_condition_02(self): def test_generalized_exist_cond(self): gen_exist_desc = \ - {'name': 'gen_exist_cond', - 'separator': {'contents': {'name': 'sep_nl', - 'contents': String(val_list=['\n'], max_sz=100, absorb_regexp=b'[\r\n|\n]+'), - 'absorb_csts': AbsNoCsts(regexp=True)}, - 'prefix': False, 'suffix': False, 'unique': True}, - 'contents': [ - {'name': 'body', - 'qty': 7, - 'separator': {'contents': {'name': 'sep_space', - 'contents': String(val_list=[' '], max_sz=100, absorb_regexp=b'\s+'), - 'absorb_csts': AbsNoCsts(size=True, regexp=True)}, + {'name': 'gen_exist_cond', + 'separator': {'contents': {'name': 'sep_nl', + 'contents': String(val_list=['\n'], max_sz=100, absorb_regexp=b'[\r\n|\n]+'), + 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, 'unique': True}, 'contents': [ - {'name': 'val_blk', - 'separator': {'contents': {'name': 'sep_quote', - 'contents': String(val_list=['"'])}, - 'prefix': False, 'suffix': True, 'unique': True}, - 'contents': [ - {'name': 'key', - 'contents': String(val_list=['value='])}, - {'name': 'val1', - 'contents': String(val_list=['Toulouse', 'Paris', 'Lyon']), - 'exists_if': (RawCondition('Location'), 'param')}, - {'name': 'val2', - 'contents': String(val_list=['2015/10/08']), - 'exists_if': (RawCondition('Date'), 'param')}, - {'name': 'val3', - 'contents': String(val_list=['10:40:42']), - 'exists_if': (RawCondition('Time'), 'param')}, - {'name': 'val4', - 'contents': String(val_list=['NOT_SUPPORTED']), - 'exists_if': (RawCondition(['NOTSUP1', 'NOTSUP2', 'NOTSUP3']), 'param')} - ]}, - {'name': 'name_blk', - 'separator': {'contents': {'name': ('sep_quote', 2), - 'contents': String(val_list=['"'])}, - 'prefix': False, 'suffix': True, 'unique': True}, + {'name': 'body', + 'qty': 7, + 'separator': {'contents': {'name': 'sep_space', + 'contents': String(val_list=[' '], max_sz=100, absorb_regexp=b'\s+'), + 'absorb_csts': AbsNoCsts(size=True, regexp=True)}, + 'prefix': False, 'suffix': False, 'unique': True}, 'contents': [ - {'name': ('key', 2), - 'contents': String(val_list=['name='])}, - {'name': 'param', - 'contents': MH.CYCLE(['NOTSUP1', 'Date', 'Time', 'NOTSUP2', 'NOTSUP3', 'Location'], - depth=2)} + {'name': 'val_blk', + 'separator': {'contents': {'name': 'sep_quote', + 'contents': String(val_list=['"'])}, + 'prefix': False, 'suffix': True, 'unique': True}, + 'contents': [ + {'name': 'key', + 'contents': String(val_list=['value='])}, + {'name': 'val1', + 'contents': String(val_list=['Toulouse', 'Paris', 'Lyon']), + 'exists_if': (RawCondition('Location'), 'param')}, + {'name': 'val2', + 'contents': String(val_list=['2015/10/08']), + 'exists_if': (RawCondition('Date'), 'param')}, + {'name': 'val3', + 'contents': String(val_list=['10:40:42']), + 'exists_if': (RawCondition('Time'), 'param')}, + {'name': 'val4', + 'contents': String(val_list=['NOT_SUPPORTED']), + 'exists_if': (RawCondition(['NOTSUP1', 'NOTSUP2', 'NOTSUP3']), 'param')} + ]}, + {'name': 'name_blk', + 'separator': {'contents': {'name': ('sep_quote', 2), + 'contents': String(val_list=['"'])}, + 'prefix': False, 'suffix': True, 'unique': True}, + 'contents': [ + {'name': ('key', 2), + 'contents': String(val_list=['name='])}, + {'name': 'param', + 'contents': MH.CYCLE(['NOTSUP1', 'Date', 'Time', 'NOTSUP2', 'NOTSUP3', 'Location'], + depth=2)} + ]} ]} ]} - ]} mh = ModelHelper(delayed_jobs=True) node = mh.create_graph_from_desc(gen_exist_desc) @@ -2481,59 +2243,58 @@ def test_generalized_exist_cond(self): print(raw, len(raw)) result = \ - b'value="NOT_SUPPORTED" name="NOTSUP1"\n' \ - b'value="2015/10/08" name="Date"\n' \ - b'value="10:40:42" name="Time"\n' \ - b'value="NOT_SUPPORTED" name="NOTSUP2"\n' \ - b'value="NOT_SUPPORTED" name="NOTSUP3"\n' \ - b'value="Toulouse" name="Location"\n' \ - b'value="NOT_SUPPORTED" name="NOTSUP1"' + b'value="NOT_SUPPORTED" name="NOTSUP1"\n' \ + b'value="2015/10/08" name="Date"\n' \ + b'value="10:40:42" name="Time"\n' \ + b'value="NOT_SUPPORTED" name="NOTSUP2"\n' \ + b'value="NOT_SUPPORTED" name="NOTSUP3"\n' \ + b'value="Toulouse" name="Location"\n' \ + b'value="NOT_SUPPORTED" name="NOTSUP1"' print('***') print(result, len(result)) self.assertEqual(result, raw) - def test_pick_and_cond(self): pick_cond_desc = \ - {'name': 'pick_cond', - 'shape_type': MH.Ordered, - 'contents': [ - {'name': 'opcode', - 'determinist': True, - 'contents': String(val_list=['A1', 'A2', 'A3'])}, - {'name': 'part1', - 'determinist': True, - 'shape_type': MH.Pick, - 'contents': [ - {'name': 'option2', - 'exists_if': (RawCondition('A2'), 'opcode'), - 'contents': String(val_list=[' 1_KO_A2'])}, - {'name': 'option3', - 'exists_if': (RawCondition('A3'), 'opcode'), - 'contents': String(val_list=[' 1_KO_A3'])}, - {'name': 'option1', - 'exists_if': (RawCondition('A1'), 'opcode'), - 'contents': String(val_list=[' 1_OK_A1'])}, - ]}, - {'name': 'part2', - 'determinist': False, - 'weights': (100, 100, 1), - 'shape_type': MH.Pick, - 'contents': [ - {'name': 'optionB', - 'exists_if': (RawCondition('A2'), 'opcode'), - 'contents': String(val_list=[' 2_KO_A2'])}, - {'name': 'optionC', - 'exists_if': (RawCondition('A3'), 'opcode'), - 'contents': String(val_list=[' 2_KO_A3'])}, - {'name': 'optionA', - 'exists_if': (RawCondition('A1'), 'opcode'), - 'contents': String(val_list=[' 2_OK_A1'])}, - ]}, - ]} + {'name': 'pick_cond', + 'shape_type': MH.Ordered, + 'contents': [ + {'name': 'opcode', + 'determinist': True, + 'contents': String(val_list=['A1', 'A2', 'A3'])}, + {'name': 'part1', + 'determinist': True, + 'shape_type': MH.Pick, + 'contents': [ + {'name': 'option2', + 'exists_if': (RawCondition('A2'), 'opcode'), + 'contents': String(val_list=[' 1_KO_A2'])}, + {'name': 'option3', + 'exists_if': (RawCondition('A3'), 'opcode'), + 'contents': String(val_list=[' 1_KO_A3'])}, + {'name': 'option1', + 'exists_if': (RawCondition('A1'), 'opcode'), + 'contents': String(val_list=[' 1_OK_A1'])}, + ]}, + {'name': 'part2', + 'determinist': False, + 'weights': (100, 100, 1), + 'shape_type': MH.Pick, + 'contents': [ + {'name': 'optionB', + 'exists_if': (RawCondition('A2'), 'opcode'), + 'contents': String(val_list=[' 2_KO_A2'])}, + {'name': 'optionC', + 'exists_if': (RawCondition('A3'), 'opcode'), + 'contents': String(val_list=[' 2_KO_A3'])}, + {'name': 'optionA', + 'exists_if': (RawCondition('A1'), 'opcode'), + 'contents': String(val_list=[' 2_OK_A1'])}, + ]}, + ]} mh = ModelHelper(delayed_jobs=True) node = mh.create_graph_from_desc(pick_cond_desc) @@ -2546,55 +2307,53 @@ def test_pick_and_cond(self): self.assertEqual(result, raw) - def test_collapse_padding(self): padding_desc = \ - {'name': 'padding', - 'shape_type': MH.Ordered, - 'custo_set': MH.Custo.NTerm.CollapsePadding, - 'contents': [ - {'name': 'part1', - 'determinist': True, - 'contents': BitField(subfield_sizes=[3,1], padding=0, endian=VT.BigEndian, - subfield_val_lists=[None, [1]], - subfield_val_extremums=[[1,3], None]) - }, - {'name': 'sublevel', - 'contents': [ - {'name': 'part2_o1', - 'exists_if': (BitFieldCondition(sf=0, val=[1]), 'part1'), - 'contents': BitField(subfield_sizes=[2,2,1], endian=VT.BigEndian, - subfield_val_lists=[[1,2], [3], [0]]) - }, - {'name': 'part2_o2', - 'exists_if': (BitFieldCondition(sf=0, val=[1]), 'part1'), - 'contents': BitField(subfield_sizes=[2,2], endian=VT.BigEndian, - subfield_val_lists=[[3], [3]]) + {'name': 'padding', + 'shape_type': MH.Ordered, + 'custo_set': MH.Custo.NTerm.CollapsePadding, + 'contents': [ + {'name': 'part1', + 'determinist': True, + 'contents': BitField(subfield_sizes=[3, 1], padding=0, endian=VT.BigEndian, + subfield_val_lists=[None, [1]], + subfield_val_extremums=[[1, 3], None]) }, - {'name': 'part2_KO', - 'exists_if': (BitFieldCondition(sf=0, val=[2]), 'part1'), - 'contents': BitField(subfield_sizes=[2,2], endian=VT.BigEndian, - subfield_val_lists=[[1], [1]]) - } - ]} - ]} + {'name': 'sublevel', + 'contents': [ + {'name': 'part2_o1', + 'exists_if': (BitFieldCondition(sf=0, val=[1]), 'part1'), + 'contents': BitField(subfield_sizes=[2, 2, 1], endian=VT.BigEndian, + subfield_val_lists=[[1, 2], [3], [0]]) + }, + {'name': 'part2_o2', + 'exists_if': (BitFieldCondition(sf=0, val=[1]), 'part1'), + 'contents': BitField(subfield_sizes=[2, 2], endian=VT.BigEndian, + subfield_val_lists=[[3], [3]]) + }, + {'name': 'part2_KO', + 'exists_if': (BitFieldCondition(sf=0, val=[2]), 'part1'), + 'contents': BitField(subfield_sizes=[2, 2], endian=VT.BigEndian, + subfield_val_lists=[[1], [1]]) + } + ]} + ]} mh = ModelHelper() node = mh.create_graph_from_desc(padding_desc) print('***') raw = node.to_bytes() - node.show() # part2_KO should not be displayed + node.show() # part2_KO should not be displayed print(raw, binascii.b2a_hex(raw), - list(map(lambda x: bin(x), struct.unpack('>'+'B'*len(raw), raw))), + list(map(lambda x: bin(x), struct.unpack('>' + 'B' * len(raw), raw))), len(raw)) result = b'\xf6\xc8' self.assertEqual(result, raw) - def test_search_primitive(self): data = fmk.dm.get_external_node(dm_name='mydf', data_id='exist_cond') @@ -2614,7 +2373,7 @@ def test_search_primitive(self): for n in l1: print(' |_ ' + n.name) res.append(n.name) - + self.assertEqual(len(res), 3) self.assertTrue('command_A3' in res) @@ -2624,13 +2383,12 @@ def test_search_primitive(self): # data.env.add_node_to_corrupt(node_to_corrupt) # corrupted_data = Node(data.name, base_node=data, ignore_frozen_state=False, new_env=True) # data.env.remove_node_to_corrupt(node_to_corrupt) - + # corrupted_data.unfreeze(recursive=True, reevaluate_constraints=True) # corrupted_data.show() class TestNode_NonTerm(unittest.TestCase): - @classmethod def setUpClass(cls): pass @@ -2638,22 +2396,20 @@ def setUpClass(cls): def setUp(self): pass - def test_infinity(self): - infinity_desc = \ - {'name': 'infinity', - 'contents': [ - {'name': 'prefix', - 'contents': String(val_list=['A']), - 'qty': (2,-1)}, - {'name': 'mid', - 'contents': String(val_list=['H']), - 'qty': -1}, - {'name': 'suffix', - 'contents': String(val_list=['Z']), - 'qty': (2,-1)}, - ]} + {'name': 'infinity', + 'contents': [ + {'name': 'prefix', + 'contents': String(val_list=['A']), + 'qty': (2, -1)}, + {'name': 'mid', + 'contents': String(val_list=['H']), + 'qty': -1}, + {'name': 'suffix', + 'contents': String(val_list=['Z']), + 'qty': (2, -1)}, + ]} mh = ModelHelper() node = mh.create_graph_from_desc(infinity_desc) @@ -2682,8 +2438,9 @@ def test_infinity(self): self.assertEqual(raw_data, raw_data_abs) print('\n*** Test with big raw data\n\nOriginal data:') - raw_data2 = b'A'*(NodeInternals_NonTerm.INFINITY_LIMIT + 30) + b'H'*(NodeInternals_NonTerm.INFINITY_LIMIT + 1) + \ - b'Z'*(NodeInternals_NonTerm.INFINITY_LIMIT - 1) + raw_data2 = b'A' * (NodeInternals_NonTerm.INFINITY_LIMIT + 30) + b'H' * ( + NodeInternals_NonTerm.INFINITY_LIMIT + 1) + \ + b'Z' * (NodeInternals_NonTerm.INFINITY_LIMIT - 1) print(repr(raw_data2), len(raw_data2)) status, off, size, name = node_abs2.absorb(raw_data2, constraints=AbsFullCsts()) @@ -2697,51 +2454,50 @@ def test_infinity(self): self.assertEqual(status, AbsorbStatus.FullyAbsorbed) self.assertEqual(raw_data2, raw_data_abs2) - def test_separator(self): test_desc = \ - {'name': 'test', - 'determinist': True, - 'separator': {'contents': {'name': 'SEP', - 'contents': String(val_list=[' ', ' ', ' '], - absorb_regexp=b'\s+', determinist=False), - 'absorb_csts': AbsNoCsts(regexp=True)}, - 'prefix': True, - 'suffix': True, - 'unique': True}, - 'contents': [ - {'section_type': MH.FullyRandom, - 'contents': [ - {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), - 'qty': (3, 5), - 'name': 'str'}, - - {'contents': String(val_list=['1', '22', '333']), - 'qty': (3, 5), - 'name': 'int'} - ]}, - - {'section_type': MH.Random, - 'contents': [ - {'contents': String(val_list=['WW', 'YYY', 'ZZZZ']), - 'qty': (2, 2), - 'name': 'str2'}, - - {'contents': UINT16_be(int_list=[0xFFFF, 0xAAAA, 0xCCCC]), - 'qty': (3, 3), - 'name': 'int2'} - ]}, - {'section_type': MH.Pick, - 'contents': [ - {'contents': String(val_list=['LAST', 'END']), - 'qty': (2, 2), - 'name': 'str3'}, - - {'contents': UINT16_be(int_list=[0xDEAD, 0xBEEF]), - 'qty': (2, 2), - 'name': 'int3'} - ]} - ]} + {'name': 'test', + 'determinist': True, + 'separator': {'contents': {'name': 'SEP', + 'contents': String(val_list=[' ', ' ', ' '], + absorb_regexp=b'\s+', determinist=False), + 'absorb_csts': AbsNoCsts(regexp=True)}, + 'prefix': True, + 'suffix': True, + 'unique': True}, + 'contents': [ + {'section_type': MH.FullyRandom, + 'contents': [ + {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + 'qty': (3, 5), + 'name': 'str'}, + + {'contents': String(val_list=['1', '22', '333']), + 'qty': (3, 5), + 'name': 'int'} + ]}, + + {'section_type': MH.Random, + 'contents': [ + {'contents': String(val_list=['WW', 'YYY', 'ZZZZ']), + 'qty': (2, 2), + 'name': 'str2'}, + + {'contents': UINT16_be(int_list=[0xFFFF, 0xAAAA, 0xCCCC]), + 'qty': (3, 3), + 'name': 'int2'} + ]}, + {'section_type': MH.Pick, + 'contents': [ + {'contents': String(val_list=['LAST', 'END']), + 'qty': (2, 2), + 'name': 'str3'}, + + {'contents': UINT16_be(int_list=[0xDEAD, 0xBEEF]), + 'qty': (2, 2), + 'name': 'int3'} + ]} + ]} mh = ModelHelper() node = mh.create_graph_from_desc(test_desc) @@ -2772,32 +2528,30 @@ def test_separator(self): node.unfreeze() - def test_encoding_attr(self): - enc_desc = \ - {'name': 'enc', - 'contents': [ - {'name': 'data0', - 'contents': String(val_list=['Plip', 'Plop']) }, - {'name': 'crc', - 'contents': MH.CRC(vt=UINT32_be, after_encoding=False), - 'node_args': ['enc_data', 'data2'], - 'absorb_csts': AbsFullCsts(contents=False) }, - {'name': 'enc_data', - 'encoder': GZIP_Enc(6), - 'set_attrs': [NodeInternals.Abs_Postpone], - 'contents': [ - {'name': 'len', - 'contents': MH.LEN(vt=UINT8, after_encoding=False), - 'node_args': 'data1', + {'name': 'enc', + 'contents': [ + {'name': 'data0', + 'contents': String(val_list=['Plip', 'Plop'])}, + {'name': 'crc', + 'contents': MH.CRC(vt=UINT32_be, after_encoding=False), + 'node_args': ['enc_data', 'data2'], 'absorb_csts': AbsFullCsts(contents=False)}, - {'name': 'data1', - 'contents': UTF16_LE(val_list=['Test!', 'Hello World!']) }, - ]}, - {'name': 'data2', - 'contents': String(val_list=['Red', 'Green', 'Blue']) }, - ]} + {'name': 'enc_data', + 'encoder': GZIP_Enc(6), + 'set_attrs': [NodeInternals.Abs_Postpone], + 'contents': [ + {'name': 'len', + 'contents': MH.LEN(vt=UINT8, after_encoding=False), + 'node_args': 'data1', + 'absorb_csts': AbsFullCsts(contents=False)}, + {'name': 'data1', + 'contents': UTF16_LE(val_list=['Test!', 'Hello World!'])}, + ]}, + {'name': 'data2', + 'contents': String(val_list=['Red', 'Green', 'Blue'])}, + ]} mh = ModelHelper() node = mh.create_graph_from_desc(enc_desc) @@ -2826,9 +2580,7 @@ def test_encoding_attr(self): self.assertEqual(raw_data, raw_data_abs) - class TestNode_TypedValue(unittest.TestCase): - @classmethod def setUpClass(cls): pass @@ -2836,23 +2588,22 @@ def setUpClass(cls): def setUp(self): pass - def test_str_alphabet(self): alphabet1 = 'ABC' alphabet2 = 'NED' alpha_desc = \ - {'name': 'top', - 'contents': [ - {'name': 'alpha1', - 'contents': String(min_sz=10, max_sz=100, val_list=['A'*10], alphabet=alphabet1), - 'set_attrs': [NodeInternals.Abs_Postpone]}, - {'name': 'alpha2', - 'contents': String(min_sz=10, max_sz=100, alphabet=alphabet2)}, - {'name': 'end', - 'contents': String(val_list=['END'])}, - ]} + {'name': 'top', + 'contents': [ + {'name': 'alpha1', + 'contents': String(min_sz=10, max_sz=100, val_list=['A' * 10], alphabet=alphabet1), + 'set_attrs': [NodeInternals.Abs_Postpone]}, + {'name': 'alpha2', + 'contents': String(min_sz=10, max_sz=100, alphabet=alphabet2)}, + {'name': 'end', + 'contents': String(val_list=['END'])}, + ]} mh = ModelHelper() node = mh.create_graph_from_desc(alpha_desc) @@ -2871,9 +2622,8 @@ def test_str_alphabet(self): l = chr(l) self.assertTrue(l in alphabet) - print('\n*** Test with following data:') - raw_data = b'A'*10 + b'DNE'*30+ b'E'*10 + b'END' + raw_data = b'A' * 10 + b'DNE' * 30 + b'E' * 10 + b'END' print(repr(raw_data), len(raw_data)) status, off, size, name = node_abs.absorb(raw_data, constraints=AbsFullCsts()) @@ -2892,7 +2642,7 @@ def test_str_alphabet(self): node_abs.set_env(Env()) print('\n*** Test with following INVALID data:') - raw_data = b'A'*10 + b'DNE'*20 + b'F' + b'END' + raw_data = b'A' * 10 + b'DNE' * 20 + b'F' + b'END' print(repr(raw_data), len(raw_data)) status, off, size, name = node_abs.absorb(raw_data, constraints=AbsFullCsts()) @@ -2907,30 +2657,28 @@ def test_str_alphabet(self): self.assertEqual(status, AbsorbStatus.Reject) self.assertEqual(raw_data[size:], b'FEND') - - def test_encoded_str_1(self): + def test_encoded_str(self): class EncodedStr(String): - def encode(self, val): - return val+b'***' + return val + b'***' def decode(self, val): return val[:-3] data = ['Test!', 'Hello World!'] enc_desc = \ - {'name': 'enc', - 'contents': [ - {'name': 'len', - 'contents': MH.LEN(vt=UINT8, after_encoding=False), - 'node_args': 'user_data', - 'absorb_csts': AbsFullCsts(contents=False)}, - {'name': 'user_data', - 'contents': EncodedStr(val_list=data) }, - {'name': 'compressed_data', - 'contents': GZIP(val_list=data, encoding_arg=6) } - ]} + {'name': 'enc', + 'contents': [ + {'name': 'len', + 'contents': MH.LEN(vt=UINT8, after_encoding=False), + 'node_args': 'user_data', + 'absorb_csts': AbsFullCsts(contents=False)}, + {'name': 'user_data', + 'contents': EncodedStr(val_list=data)}, + {'name': 'compressed_data', + 'contents': GZIP(val_list=data, encoding_arg=6)} + ]} mh = ModelHelper() node = mh.create_graph_from_desc(enc_desc) @@ -2974,31 +2722,31 @@ def decode(self, val): gsm_dec = gsm_t.decode(gsm_enc) self.assertEqual(msg, gsm_dec) - msg = u'où ça'.encode(internal_repr_codec) #' b'o\xf9 \xe7a' + msg = b'o\xf9 \xe7a' vtype = UTF16_LE(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = u'où ça'.encode(internal_repr_codec) + msg = b'o\xf9 \xe7a' vtype = UTF16_BE(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = u'où ça'.encode(internal_repr_codec) + msg = b'o\xf9 \xe7a' vtype = UTF8(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = u'où ça'.encode(internal_repr_codec) + msg = b'o\xf9 \xe7a' vtype = Codec(max_sz=20, encoding_arg=None) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = u'où ça'.encode(internal_repr_codec) + msg = b'o\xf9 \xe7a' vtype = Codec(max_sz=20, encoding_arg='utf_32') enc = vtype.encode(msg) dec = vtype.decode(enc) @@ -3007,7 +2755,7 @@ def decode(self, val): b"\x00\x00\x00\xe7\x00\x00\x00a\x00\x00\x00" self.assertEqual(enc, utf32_enc) - msg = b'Hello World!'*10 + msg = b'Hello World!' * 10 vtype = GZIP(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) @@ -3029,52 +2777,8 @@ def decode(self, val): dec = vtype.decode(enc) self.assertEqual(msg, dec) - def test_encoded_str_2(self): - - enc_desc = \ - {'name': 'enc', - 'contents': [ - {'name': 'len', - 'contents': UINT8()}, - {'name': 'user_data', - 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=['TEST'])}, - {'name': 'padding', - 'contents': String(max_sz=0), - 'absorb_csts': AbsNoCsts()}, - ]} - - mh = ModelHelper() - node = mh.create_graph_from_desc(enc_desc) - node.set_env(Env()) - - node_abs = Node('enc_abs', base_node=node, new_env=True) - node_abs.set_env(Env()) - node_abs2 = node_abs.get_clone() - - node_abs.show() - - raw_data = b'\x0C' + b'\xC6\x67' + b'garbage' # \xC6\x67 --> invalid UTF8 - status, off, size, name = node_abs.absorb(raw_data, constraints=AbsNoCsts(size=True, struct=True)) - - self.assertEqual(status, AbsorbStatus.Reject) - - raw_data = b'\x05' + b'\xC3\xBCber' + b'padding' # \xC3\xBC = ü in UTF8 - - status, off, size, name = node_abs2.absorb(raw_data, constraints=AbsNoCsts(size=True, struct=True)) - - print('Absorb Status:', status, off, size, name) - print(' \_ length of original data:', len(raw_data)) - print(' \_ remaining:', raw_data[size:]) - raw_data_abs = node_abs2.to_bytes() - print(' \_ absorbed data:', repr(raw_data_abs), len(raw_data_abs)) - node_abs2.show() - - self.assertEqual(status, AbsorbStatus.FullyAbsorbed) - class TestHLAPI(unittest.TestCase): - @classmethod def setUpClass(cls): pass @@ -3082,9 +2786,7 @@ def setUpClass(cls): def setUp(self): pass - def test_create_graph(self): - a = {'name': 'top', 'contents': [ {'weight': 2, @@ -3106,7 +2808,7 @@ def test_create_graph(self): 'contents': [{ 'section_type': MH.Ordered, 'contents': [ - + {'contents': String(val_list=['OK', 'KO'], size=2), 'name': 'val2'}, @@ -3124,12 +2826,12 @@ def test_create_graph(self): 'custo_set': MH.Custo.Func.FrozenArgs} ]}]}, - {'contents': String(max_sz = 10), + {'contents': String(max_sz=10), 'name': 'val3', 'sync_qty_with': 'val1', 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(int_list=[1, 4, 8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56), 'determinist': True}]} @@ -3147,10 +2849,9 @@ def test_create_graph(self): ]} ]} - mh = ModelHelper(fmk.dm) node = mh.create_graph_from_desc(a) - + node.set_env(Env()) node.show() @@ -3167,14 +2868,12 @@ def test_create_graph(self): node.set_current_conf('alt2', recursive=True) node.show() - print('\nNode Dictionnary (size: {:d}):\n'.format(len(mh.node_dico))) for name, node in mh.node_dico.items(): print(name, ': ', repr(node), node.c) class TestDataModel(unittest.TestCase): - @classmethod def setUpClass(cls): pass @@ -3201,12 +2900,11 @@ def test_data_makers(self): print("Test '%s' Data Model" % dm.name) for data_id in dm.data_identifiers(): print("Try to get '%s'" % data_id) - data = dm.get_data(data_id) + data = dm.get_data(data_id) data.get_value() # data.show(raw_limit=200) print('Success!') - def test_generic_generators(self): dm = fmk.get_data_model_by_name('mydf') dm.load_data_model(fmk._name2dm) @@ -3224,10 +2922,9 @@ def test_generic_generators(self): print('\nRetrieved offset is: %d' % retr_off) int_idx = d['off_gen/body$'].get_subnode_idx(d['off_gen/body/int']) - off = int_idx * 3 + 10 # +10 for 'prefix' delta + off = int_idx * 3 + 10 # +10 for 'prefix' delta self.assertEqual(off, retr_off) - @unittest.skipIf(ignore_data_model_specifics, "USB specific test cases") def test_usb_specifics(self): @@ -3237,9 +2934,8 @@ def test_usb_specifics(self): msd_conf = dm.get_data('CONF') msd_conf.set_current_conf('MSD', recursive=True) msd_conf.show() - - self.assertEqual(len(msd_conf.to_bytes()), 32) + self.assertEqual(len(msd_conf.to_bytes()), 32) @unittest.skipIf(ignore_data_model_specifics, "PNG specific test cases") def test_png_specifics(self): @@ -3291,7 +2987,6 @@ def test_jpg_specifics(self): self.assertEqual(jpg_buff, orig_buff) - @unittest.skipIf(ignore_data_model_specifics, "Tutorial specific test cases, cover various construction") def test_tuto_specifics(self): '''Tutorial specific test cases, cover various data model patterns and @@ -3329,7 +3024,7 @@ def test_tuto_specifics(self): print('-----------------------') print('Absorb Status: status=%s, off=%d, sz=%d, name=%s' % (status, off, size, name)) print(' \_ length of original data: %d' % len(raw_data)) - print(' \_ remaining: %r' %raw_data[size:]) + print(' \_ remaining: %r' % raw_data[size:]) print('-----------------------') self.assertEqual(status, AbsorbStatus.FullyAbsorbed) @@ -3337,7 +3032,6 @@ def test_tuto_specifics(self): d.unfreeze() - @unittest.skipIf(ignore_data_model_specifics, "ZIP specific test cases") def test_zip_specifics(self): @@ -3353,7 +3047,7 @@ def test_zip_specifics(self): # dm.pkzip.show(raw_limit=400) # dm.pkzip.reset_state(recursive=True) - status, off, size, name = abszip.absorb(zip_buff, constraints=AbsNoCsts(size=True,struct=True)) + status, off, size, name = abszip.absorb(zip_buff, constraints=AbsNoCsts(size=True, struct=True)) # abszip.show(raw_limit=400) print('\n*** Absorb Status:', status, off, size, name) @@ -3409,7 +3103,6 @@ def test_zip_specifics(self): flen_before - flen_after) self.assertEqual(struct.unpack(' +# +################################################################################ +# +# This file is part of fuddly. +# +# fuddly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fuddly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fuddly. If not, see +# +################################################################################ + +__all__ = [] + +from test.unit.test_data_model import * +from test.unit.test_data_model_helper import * +from test.unit.test_monitor import * diff --git a/test/unit/test_data_model.py b/test/unit/test_data_model.py new file mode 100644 index 0000000..2de4f6f --- /dev/null +++ b/test/unit/test_data_model.py @@ -0,0 +1,92 @@ +################################################################################ +# +# Copyright 2014-2016 Eric Lacombe +# +################################################################################ +# +# This file is part of fuddly. +# +# fuddly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fuddly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fuddly. If not, see +# +################################################################################ + +import unittest +import ddt +from test import mock + +from framework.data_model import * + +@ddt.ddt +class TestBitFieldCondition(unittest.TestCase): + + @classmethod + def setUpClass(cls): + + def side_effect(idx): + return [0, 1, 2][idx] + + cls.node = mock.Mock() + cls.node.get_subfield = mock.MagicMock(side_effect=side_effect) + + + @ddt.data((1, 1), (1, [1]), ([1], [1]), + (1, (1,)), ((1,), (1,)), + (2, [2, 6, 7]), (2, (2, 6, 7)), + ([1, 2], [1, [5, 2, 8]]), ([1, 2], [[1], [5, 6, 2]]), + ((1, 2), (1, (5, 2, 8))), ((1, 2), ((1,), (5, 6, 2)))) + @ddt.unpack + def test_with_one_argument(self, sf, val): + condition = BitFieldCondition(sf=sf, val=val) + self.assertTrue(condition.check(TestBitFieldCondition.node)) + + condition = BitFieldCondition(sf=sf, neg_val=val) + self.assertFalse(condition.check(TestBitFieldCondition.node)) + + @ddt.data(([0, 1, 2], [0, [1, 3], None], [None, None, 5]), + ([0, 2], [None, 2], [3, None])) + @ddt.unpack + def test_true_with_both_arguments(self, sf, val, neg_val): + condition = BitFieldCondition(sf=sf, val=val, neg_val=neg_val) + self.assertTrue(condition.check(TestBitFieldCondition.node)) + + @ddt.data(([0, 1, 2], [[0, 1], [1, 2], None], [None, None, [1, 2, 3]]), + ([0, 1, 2], [[1, 2, 3], [1, 2], None], [None, None, [1, 3, 5]])) + @ddt.unpack + def test_false_with_both_arguments(self, sf, val, neg_val): + condition = BitFieldCondition(sf=sf, val=val, neg_val=neg_val) + self.assertFalse(condition.check(TestBitFieldCondition.node)) + + def test_true_val_has_priority(self): + condition = BitFieldCondition(sf=0, val=[0, 4, 5], neg_val=[0, 4, 5]) + self.assertTrue(condition.check(TestBitFieldCondition.node)) + + def test_false_val_has_priority(self): + condition = BitFieldCondition(sf=0, val=[3, 4, 5], neg_val=[3, 4, 5]) + self.assertFalse(condition.check(TestBitFieldCondition.node)) + + @ddt.data((None, [2, 3]), ([1], 1), ((1,), 2), + ([1], [2, 1, 4]), ((1,), (2, 1, 4)), + ([1, 2], [1])) + @ddt.unpack + def test_invalid_with_one_argument(self, sf, val): + self.assertRaises(Exception, BitFieldCondition, sf=sf, val=val) + self.assertRaises(Exception, BitFieldCondition, sf=sf, neg_val=val) + + @ddt.data((1, None, None), (None, 2, 3), + ([1, 2], [1, None], [2, None]), + ([1, 2], [1, 2], [[1, 2, 3, 4]]), + ([1, 2], [1, 2, 3, 4], [[1, 2]])) + @ddt.unpack + def test_invalid_with_both_arguments(self, sf, val, neg_val): + self.assertRaises(Exception, BitFieldCondition, sf=sf, val=val, neg_val=neg_val) diff --git a/test/unit/test_monitor.py b/test/unit/test_monitor.py new file mode 100644 index 0000000..c27050a --- /dev/null +++ b/test/unit/test_monitor.py @@ -0,0 +1,114 @@ +################################################################################ +# +# Copyright 2014-2016 Eric Lacombe +# +################################################################################ +# +# This file is part of fuddly. +# +# fuddly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# fuddly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with fuddly. If not, see +# +################################################################################ + +import unittest +from test import mock +from framework.monitor import * + +class ProbeUserTest(unittest.TestCase): + """Test case used to test the 'ProbeUser' class.""" + + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + """Initialisation des tests.""" + + self.timeout = 2 + + self.probe = Probe() + self.probe.main = mock.Mock() + + self.probe.start = mock.Mock() + self.probe.stop = mock.Mock() + + self.dm = mock.Mock() + self.target = mock.Mock() + self.logger = mock.Mock() + + self._set_up_specific() + + def _set_up_specific(self): + self.probe_user = ProbeUser(self.probe) + + def tearDown(self): + pass + + def test_not_started_is_alive(self): + self.assertFalse(self.probe_user.is_alive()) + + def test_started_is_alive(self): + self.probe_user.start(self.dm, self.target, self.logger) + self.assertTrue(self.probe_user.is_alive()) + + def test_stopped_is_alive(self): + self.probe_user.start(self.dm, self.target, self.logger) + self.probe_user.stop() + self.probe_user.join(self.timeout) + self.assertFalse(self.probe_user.is_alive()) + + def test_multiple_starts(self): + self.probe_user.start(self.dm, self.target, self.logger) + self.assertRaises(RuntimeError, self.probe_user.start, self.dm, self.target, self.logger) + + def test_start_and_stop(self): + self.probe_user.start(self.dm, self.target, self.logger) + self.probe_user.stop() + self.probe_user.join(self.timeout) + self.probe.start.assert_called_once_with(self.dm, self.target, self.logger) + self.probe.stop.assert_called_once_with(self.dm, self.target, self.logger) + + def test_main(self): + test_period = 0.5 + delta = 0.005 + self.probe_user.set_probe_delay(0.05) + + print("***** test period: " + str(test_period)) + print("***** tolerate delta between executions: " + str(delta)) + print("***** probe delay: " + str(self.probe_user.get_probe_delay())) + + execution_times = [] + + def side_effect(*args, **kwargs): + execution_times.append(datetime.datetime.now()) + return mock.Mock() + + self.probe.main.side_effect = side_effect + + self.probe_user.start(self.dm, self.target, self.logger) + time.sleep(test_period) + self.probe_user.stop() + self.probe_user.join(self.timeout) + self.probe.main.assert_called_with(self.dm, self.target, self.logger) + + print("***** probe's main method execution times: ") + for execution in execution_times: + print(" " + str(execution)) + + self.assertTrue(self.probe.main.call_count >= test_period/self.probe_user.get_probe_delay() - 1) + + for i in range(len(execution_times)): + if i+1 < len(execution_times): + self.assertTrue(0 <= (execution_times[i+1] - execution_times[i]).total_seconds() + - self.probe_user.get_probe_delay() <= delta) From 94156d68cd1ad9feed730a268b5d05c2a2cb7816 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Mon, 4 Jul 2016 09:46:53 +0200 Subject: [PATCH 22/80] Update instructions for tests usage in the readme file --- README.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 564267c..15b9290 100755 --- a/README.rst +++ b/README.rst @@ -49,22 +49,22 @@ About documentation Launch fuddly test cases ------------------------ -The file ``framework/test.py`` include all unit & integration test cases +The package ``test`` include all unit & integration test cases of ``fuddly`` itself. Usage is as follows: -- To launch all the test, issue the command:: +- To launch all the tests, issue the command:: - >> python framework/test.py -a + >> python -m test -a -- To launch all the test but the longer ones, issue the command:: +- To launch all the tests but the longer ones, issue the command:: - >> python framework/test.py + >> python -m test - To avoid data model specific test cases use the option ``--ignore-dm-specifics`` -- To launch a specific test category issue the folowing command:: +- To launch a specific test category issue the following command:: - >> python framework/test.py . + >> python -m test test.... Miscellaneous From d21374f84b47b677e70440dae2b5bd36581acfdb Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Mon, 4 Jul 2016 16:06:07 +0200 Subject: [PATCH 23/80] RegexParser tests --- test/unit/__init__.py | 2 +- test/unit/test_data_model_helpers.py | 197 +++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 test/unit/test_data_model_helpers.py diff --git a/test/unit/__init__.py b/test/unit/__init__.py index 0790bad..fc1ec28 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -24,5 +24,5 @@ __all__ = [] from test.unit.test_data_model import * -from test.unit.test_data_model_helper import * +from test.unit.test_data_model_helpers import * from test.unit.test_monitor import * diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py new file mode 100644 index 0000000..0b07aaf --- /dev/null +++ b/test/unit/test_data_model_helpers.py @@ -0,0 +1,197 @@ +from framework.data_model_helpers import * +import framework.value_types as vt +import unittest +import ddt +from test import mock + + +@ddt.ddt +class RegexParserTest(unittest.TestCase): + """Test case used to test the 'ProbeUser' class.""" + + @classmethod + def setUpClass(cls): + pass + + def setUp(self): + """Initialisation des tests.""" + self._parser = RegexParser() + self._parser._create_terminal_node = mock.Mock() + + def tearDown(self): + pass + + @ddt.data(r"(sa(lu))(les)(louloux)", r"(salut)(les(louloux)", r"(salut))les(louloux)", + r"(sal*ut)oo", r"(sal?ut)oo", r"sal{utoo", r"(sal+ut)oo", r"(sal{u)too", + r"(sal{2}u)too", r"sal{2,1}utoo", r"sal(u[t]o)o", + r"whatever|toto?ff", r"whate?ver|toto", r"(toto)*ohoho|haha", r"(toto)ohoho|haha") + def test_invalid_regexes(self, regex): + self.assertRaises(Exception, self._parser.run, regex, "toto") + + @ddt.data(r"", r"b", r"salut") + def test_one_word(self, regex): + self._parser.run(regex, "toto") + self._parser._create_terminal_node.assert_called_once_with("toto1", vt.String, + contents=[regex], + alphabet=None, qty=(1, 1)) + + @ddt.data(r"(salut)(les)(louloux)", r"(salut)les(louloux)", + r"salut(les)(louloux)", r"(salut)(les)louloux", r"salut(les)louloux") + def test_with_parenthesis(self, regex): + nodes = self._parser.run(regex, "toto") + self.assertEquals(len(nodes), 3) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), + mock.call("toto2", vt.String, contents=["les"], alphabet=None, qty=(1, 1)), + mock.call("toto3", vt.String, contents=["louloux"], alphabet=None, qty=(1, 1))]) + + + @ddt.data(r"salut(l\(es)(lou\\loux)cmoi", r"salut(l\(es)lou\\loux(cmoi)") + def test_escape_char(self, regex): + nodes = self._parser.run(regex, "toto") + self.assertEquals(len(nodes), 4) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), + mock.call("toto2", vt.String, contents=["l(es"], alphabet=None, qty=(1, 1)), + mock.call("toto3", vt.String, contents=["lou\loux"], alphabet=None, qty=(1, 1)), + mock.call("toto4", vt.String, contents=["cmoi"], alphabet=None, qty=(1, 1))]) + + @ddt.unpack + @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), + ('{7}', (7, 7)), ('{2,7}', (2, 7)), + ('{0}', (0, 0)), ('{0,0}', (0, 0)), + ('{3,}', (3, None)), ('{,15}', (0, 15))) + def test_7(self, char, qty): + nodes = self._parser.run(r"salut" + char + "ooo", "toto") + self.assertEquals(len(nodes), 3) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=["salu"], alphabet=None, qty=(1, 1)), + mock.call("toto2", vt.String, contents=["t"], alphabet=None, qty=qty), + mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) + + @ddt.unpack + @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), + ('{7}', (7, 7)), ('{2,7}', (2, 7)), + ('{0}', (0, 0)), ('{0,0}', (0, 0)), + ('{3,}', (3, None)), ('{,15}', (0, 15))) + def test_7(self, char, qty): + nodes = self._parser.run(r"salut[abcd]" + char + "ooo", "toto") + self.assertEquals(len(nodes), 3) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), + mock.call("toto2", vt.String, contents=None, alphabet="abcd", qty=qty), + mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) + + @ddt.unpack + @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), + ('{7}', (7, 7)), ('{2,7}', (2, 7)), + ('{0}', (0, 0)), ('{0,0}', (0, 0)), + ('{3,}', (3, None)), ('{,15}', (0, 15))) + def test_8(self, char, qty): + nodes = self._parser.run(r"salu(ttteee|whatever)" + + char + + "ooo", "toto") + self.assertEquals(len(nodes), 3) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=["salu"], alphabet=None, qty=(1, 1)), + mock.call("toto2", vt.String, contents=["ttteee", "whatever"], alphabet=None, qty=qty), + mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) + + + def test_alphabet(self): + nodes = self._parser.run(r"salut[abc]ooo", "toto") + self.assertEquals(len(nodes), 3) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), + mock.call("toto2", vt.String, contents=None, alphabet="abc", qty=(1, 1)), + mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) + + + def test_empty_parenthesis_before(self): + nodes = self._parser.run(r"()+whatever", "toto") + self.assertEquals(len(nodes), 2) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=[""], alphabet=None, qty=(1, None)), + mock.call("toto2", vt.String, contents=["whatever"], alphabet=None, qty=(1, 1))]) + + def test_empty_brackets(self): + nodes = self._parser.run(r"[]whatever", "toto") + self.assertEquals(len(nodes), 2) + self._parser._create_terminal_node.assert_has_calls( + [mock.call("toto1", vt.String, contents=None, alphabet="", qty=(1, 1)), + mock.call("toto2", vt.String, contents=["whatever"], alphabet=None, qty=(1, 1))]) + + + def regex_assert(self, regex, nodes): + + ns = self._parser.run(regex, "name") + self.assertEquals(len(ns), len(nodes)) + + calls = [] + for node in nodes: + calls.append(mock.call("name" + str(nodes.index(node) + 1), vt.String, + contents=node[0], alphabet=node[1], qty=node[2])) + + self._parser._create_terminal_node.assert_has_calls(calls) + + + @ddt.unpack + @ddt.data( + # (regex, nodes=[(contents, alphabet, qty)]) + (r"[abcd]*toto(|\(ab\)|cd)+what?ever", [(None, "abcd", (0, None)), + (["toto"], None, (1, 1)), + (["", "(ab)", "cd"], None, (1, None)), + (["wha"], None, (1, 1)), + (["t"], None, (0, 1)), + (["ever"], None, (1, 1))]) + ) + def test_complete(self, regex, nodes): + self.regex_assert(regex, nodes) + + + @ddt.unpack + @ddt.data( + (r"(ab|cd|)+", [(["ab", "cd", ""], None, (1, None))]), + (r"(ab||cd)", [(["ab", "", "cd"], None, (1, 1))]), + (r"(|ab|cd|ef|gh)+", [(["", "ab", "cd", "ef", "gh"], None, (1, None))]), + (r"(|)+", [(["", ""], None, (1, None))]), + (r"(|||)+", [(["", "", "", ""], None, (1, None))]), + ) + def test_or_in_parenthesis(self, regex, nodes): + self.regex_assert(regex, nodes) + + + @ddt.unpack + @ddt.data( + (r"tata|haha|c*|b*|[abcd]+", [(["tata", "haha"], None, (1, 1)), + (["c"], None, (0, None)), + (["b"], None, (0, None)), + (None, "abcd", (1, None))]), + + (r"(tata)+|haha|tata||b*|[abcd]+", [(["tata"], None, (1, None)), + (["haha", "tata", ""], None, (1, 1)), + (["b"], None, (0, None)), + (None, "abcd", (1, None))]), + + (r"toto|titi|tata", [(["toto", "titi", "tata"], None, (1, 1))]), + + (r"|", [(["",""], None, (1, 1))]), + + (r"coucou|[abcd]|", [(["coucou"], None, (1, 1)), + (None, "abcd", (1, 1)), + ([""], None, (1, 1))]), + + (r"[whatever]+|[hao]|[salut]?", [(None, "whatever", (1, None)), + (None, "hao", (1, 1)), + (None, "salut", (0, 1))]), + + (r"|[hao]|[salut]?", [([""], None, (1, 1)), + (None, "hao", (1, 1)), + (None, "salut", (0, 1))]), + (r"coucou||[salut]?", [(["coucou", ""], None, (1, 1)), + (None, "salut", (0, 1))]), + (r"coucou||||[salut]?", [(["coucou", "", "", ""], None, (1, 1)), + (None, "salut", (0, 1))]) + ) + def test_pick(self, regex, nodes): + self.regex_assert(regex, nodes) From d28bfe4e41082b09485afc03e5d1f514226434fa Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Mon, 4 Jul 2016 16:08:49 +0200 Subject: [PATCH 24/80] RegexParser integration + a simple test --- framework/data_model_helpers.py | 59 +++++++++++++++++++++++++++- test/integration/test_integration.py | 32 +++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 989815d..f6b6521 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -71,6 +71,7 @@ class MH(object): NonTerminal = 1 Generator = 2 Leaf = 3 + Regex = 5 RawNode = 4 # if a Node() is provided @@ -798,11 +799,37 @@ def run(self, inputs, name): self.flush() - return self._terminal_nodes + return self._create_non_terminal_node() def _create_terminal_node(self, name, type, contents=None, alphabet=None, qty=None): - return Node(name=name, vt=fvt.String(val_list=contents)) + + assert(contents is not None or alphabet is not None) + + if alphabet is not None: + return [Node(name=name, + vt=fvt.String(alphabet=alphabet, + min_sz=-1 if qty[0] is None else qty[0], + max_sz=-1 if qty[1] is None else qty[1])), + 1, 1] + else: + return [Node(name=name, vt=fvt.String(val_list=contents)), + -1 if qty[0] is None else qty[0], + -1 if qty[1] is None else qty[1]] + + def _create_non_terminal_node(self): + non_terminal = [1, [MH.Copy + MH.Ordered]] + formatted_terminal = non_terminal[1] + + for terminal in self.terminal_nodes: + formatted_terminal.append(terminal) + if self.pick: + non_terminal.append(1) + formatted_terminal = [MH.Copy + MH.Ordered] + non_terminal.append(formatted_terminal) + + return non_terminal + class ModelHelper(object): @@ -913,6 +940,8 @@ def _get_type(top_desc, contents): ntype = MH.RawNode elif hasattr(contents, '__call__') and pre_ntype in [None, MH.Generator]: ntype = MH.Generator + elif isinstance(contents, str) and pre_ntype in [None, MH.Regex]: + ntype = MH.Regex else: ntype = MH.Leaf return ntype @@ -921,6 +950,7 @@ def _get_type(top_desc, contents): contents = desc.get('contents', None) dispatcher = {MH.NonTerminal: self._create_non_terminal_node, + MH.Regex: self._create_non_terminal_node_from_regex, MH.Generator: self._create_generator_node, MH.Leaf: self._create_leaf_node, MH.RawNode: self._update_provided_node} @@ -1044,6 +1074,31 @@ def _create_generator_node(self, desc, node=None): return n + def _create_non_terminal_node_from_regex(self, desc, node=None): + + n, conf = self.__pre_handling(desc, node) + + name = desc.get('name') + regexp = desc.get('contents') + assert isinstance(regexp, str) + + parser = RegexParser() + non_terminal_node = parser.run(regexp, name) + n.set_subnodes_with_csts(non_terminal_node, conf=conf) + + custo_set = desc.get('custo_set', None) + custo_clear = desc.get('custo_clear', None) + + if custo_set or custo_clear: + custo = NonTermCusto(items_to_set=custo_set, items_to_clear=custo_clear) + internals = n.cc if conf is None else n.c[conf] + internals.customize(custo) + + self._handle_common_attr(n, desc, conf) + + return n + + def _create_non_terminal_node(self, desc, node=None): n, conf = self.__pre_handling(desc, node) diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index e05f935..a8c3135 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -3126,6 +3126,38 @@ def test_zip_specifics(self): self.assertEqual(zip_buff, orig_buff, msg=err_msg) +class TestDataModelHelpers(unittest.TestCase): + + def test_regex(self): + HTTP_version_classic = \ + {'name': 'HTTP_version_classic', + 'contents': [ + {'name': 'HTTP_name', 'contents': String(val_list=["HTTP"])}, + {'name': 'slash', 'contents': String(val_list=["/"])}, + {'name': 'major_version_digit', 'contents': String(size=1, val_list=["0", "1", "2", "3", "4", + "5", "6", "7", "8", "9"])}, + + {'name': '.', 'contents': String(val_list=["."])}, + {'name': 'minor_version_digit', 'clone': 'major_version_digit'}, + ]} + + HTTP_version_regex = \ + {'name': 'HTTP_version_regex', 'contents': "(HTTP)(/)(0|1|2|3|4|5|6|7|8|9)(.)(0|1|2|3|4|5|6|7|8|9)"} + + mh = ModelHelper() + node_classic = mh.create_graph_from_desc(HTTP_version_classic) + node_classic.make_determinist(recursive=True) + + mh = ModelHelper() + node_regex = mh.create_graph_from_desc(HTTP_version_regex) + node_regex.make_determinist(recursive=True) + + node_regex.show() + node_classic.show() + + self.assertEqual(node_regex.to_bytes(), node_classic.to_bytes()) + + class TestFMK(unittest.TestCase): @classmethod def setUpClass(cls): From d0078dbb35ba3dda9e8dd4208330411bff519fbe Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Tue, 5 Jul 2016 16:47:11 +0200 Subject: [PATCH 25/80] Revamp RegexParser to add the notion of transition --- framework/data_model_helpers.py | 418 ++++++++++++++++++++++---------- 1 file changed, 293 insertions(+), 125 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index f6b6521..075b43b 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -513,85 +513,72 @@ def _handle_attrs(n, set_attrs, clear_attrs): +class RegexParser(object): -class State(object): - def run(self, context): - raise NotImplementedError - - + class State(object): + def run(self, context): + raise NotImplementedError + def check_transitions(self, context): + raise NotImplementedError -class RegexParser(object): - class PickState(State): + class InitialState(State): def run(self, ctx): - if ctx.input == '|' and (len(ctx.terminal_nodes) == 0 or - (len(ctx.terminal_nodes) == 1 and ctx.buffer is None)): - ctx.pick = True + pass - if ctx.pick and ctx.input != '|' or not ctx.pick and ctx.input == '|': + def check_transitions(self, ctx): + if ctx.input in ('?', '*', '+', '{', '}', ')', ']'): raise Exception - elif ctx.pick and ctx.input == '|': - ctx.append_to_contents("") - return RegexParser.InitialState + if ctx.input == '[': + return RegexParser.InsideSquareBrackets() + elif ctx.input == '(': + return RegexParser.InsideParenthesis() else: - return RegexParser.InitialState().run(ctx) + ctx.append_to_buffer("") + + if ctx.input == '\\': + return RegexParser.Escaping() + elif ctx.input == '|': + return RegexParser.PickState() + elif ctx.input is None: + return RegexParser.FinalState() + else: + return RegexParser.MainState() - class InitialState(State): + class FinalState(State): def run(self, ctx): + ctx.flush() - if ctx.input == '|': - return RegexParser.PickState().run(ctx) - - if ctx.input == '(': - if ctx.buffer is not None and len(ctx.buffer) == 0: - ctx.contents = ctx.contents[:-1] - if ctx.contents is not None and len(ctx.contents) == 0: - ctx.contents = None - ctx.flush() - ctx.append_to_contents("") - return RegexParser.InsideParenthesis - - elif ctx.input == '[': - if ctx.buffer is not None and len(ctx.buffer) == 0: - ctx.contents = ctx.contents[:-1] - - if ctx.contents is not None and len(ctx.contents) == 0: - ctx.contents = None - ctx.flush() - ctx.append_to_alphabet("") - return RegexParser.InsideSquareBrackets - - elif ctx.input in ('?', '*', '+', '{', '}', ')', ']'): - raise Exception - - elif ctx.input == '\\': - return RegexParser.Escaping - - else: - ctx.append_to_buffer(ctx.input) + def check_transitions(self, ctx): + pass - return RegexParser.QtyState - - class QtyState(State): + class MainState(State): def run(self, ctx): + ctx.append_to_buffer(ctx.input) - if ctx.input not in ('?', '*', '+', '{'): + def check_transitions(self, ctx): + if ctx.input == '(': + return RegexParser.InsideParenthesis() - # cases: (...) & [...] without any quantifier - if isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): - ctx.flush() + elif ctx.input == '[': + return RegexParser.InsideSquareBrackets() - return RegexParser.InitialState().run(ctx) + elif ctx.input == '\\': + return RegexParser.Escaping() - if not isinstance(ctx.old_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): + elif ctx.input == '|': + return RegexParser.PickState() + + elif ctx.input in ('?', '*', '+', '{'): + # pick if ctx.pick and len(ctx.contents) > 1 and len(ctx.buffer) > 1: raise Exception @@ -602,15 +589,58 @@ def run(self, ctx): ctx.flush() ctx.append_to_buffer(content) - else: # len(ctx.buffer) > 1 + else: # len(ctx.buffer) > 1 content = ctx.buffer[-1] ctx.buffer = ctx.buffer[:-1] ctx.flush() ctx.append_to_buffer(content) - if ctx.input == '{': - return RegexParser.InsideBrackets - elif ctx.input == '+': + if ctx.input == '{': + return RegexParser.InsideBrackets() + else: + return RegexParser.QtyState() + + elif ctx.input in ('}', ')', ']'): + raise Exception + + elif ctx.input is None: + return RegexParser.FinalState() + + return self + + + class PickState(State): + + def run(self, ctx): + + if not ctx.pick: + if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): + ctx.pick = True + else: + raise Exception + + def check_transitions(self, ctx): + if ctx.input == '(': + return RegexParser.InsideParenthesis() + elif ctx.input == '[': + return RegexParser.InsideSquareBrackets() + else: + + ctx.append_to_contents("") + + if ctx.input == '|': + return self + elif ctx.input is None: + return RegexParser.FinalState() + else: + return RegexParser.MainState() + + + + class QtyState(State): + + def run(self, ctx): + if ctx.input == '+': ctx.min = 1 elif ctx.input == '?': ctx.max = 1 @@ -618,31 +648,36 @@ def run(self, ctx): if ctx.min is None: ctx.min = 0 - # flush the buffer only ctx.flush() - return RegexParser.PickState + def check_transitions(self, ctx): + if ctx.input in ('*', '+', '?', '{', '}', ')', ']'): + raise Exception + if ctx.input == '(': + return RegexParser.InsideParenthesis() + elif ctx.input == '[': + return RegexParser.InsideSquareBrackets() + elif ctx.input == '|': + return RegexParser.PickState() + elif ctx.input == '\\': + return RegexParser.Escaping() + elif ctx.input is None: + return RegexParser.FinalState() + else: + return RegexParser.MainState() - class InsideBrackets(State): - def __init__(self): - pass + class InsideBrackets(State): def run(self, ctx): + if ctx.input == '{': + ctx.context = self.__class__ + elif ctx.input == '}': + ctx.context = RegexParser.MainState - if ctx.input == ',': - ctx.max = "" - elif ctx.input.isdigit(): - if ctx.max is not None: - ctx.max += ctx.input - else: - if ctx.min is None: - ctx.min = "" - ctx.min += ctx.input - elif ctx.input == "}": - - ctx.min = 0 if ctx.min is None else int(ctx.min) + if ctx.min is not None: + ctx.min = int(ctx.min) if ctx.max is None: ctx.max = ctx.min @@ -655,73 +690,208 @@ def run(self, ctx): raise Exception ctx.flush() - return RegexParser.PickState - elif ctx.input.isspace(): - pass - else: + + elif ctx.input == ',': + if ctx.min is None: + raise Exception + ctx.max = "" + elif ctx.input.isdigit(): + if ctx.max is not None: + ctx.max += ctx.input + else: + if ctx.min is None: + ctx.min = "" + ctx.min += ctx.input + + def check_transitions(self, ctx): + if ctx.input in ('*', '+', '?', '{', ')', ']'): raise Exception - return self.__class__ + if ctx.context == self.__class__: + if ctx.input.isdigit() or ctx.input in (',', '}'): + return self + else: + raise Exception + else: + if ctx.input == '|': + return RegexParser.PickState() + elif ctx.input is None: + return RegexParser.FinalState() + else: + if ctx.pick: + raise Exception + + if ctx.input == '}': + raise Exception + elif ctx.input == '(': + return RegexParser.InsideParenthesis() + elif ctx.input == '[': + return RegexParser.InsideSquareBrackets() + else: + return RegexParser.MainState() class Escaping(object): - def run(self, context): - context.buffer += context.input - return context.old_state.__class__ + def __init__(self): + self._started = False + self._closed = False + + def run(self, ctx): + if not self._started: + self._started = True + else: + if ctx.buffer is not None: + ctx.append_to_buffer(ctx.input) + elif ctx.alphabet is not None: + ctx.append_to_alphabet(ctx.input) + self._closed = True + + def check_transitions(self, ctx): + if self._closed: + return RegexParser.FinalState() if ctx.input is None else ctx.context() + else: + if ctx.input is None: + raise Exception + else: + return self + class InsideParenthesis(State): def run(self, ctx): - - if ctx.input in ('(', '[', ']', '?', '*', '+', '{', '}'): - raise Exception + if ctx.input == '(': + ctx.flush() + ctx.append_to_buffer("") + ctx.context = self.__class__ elif ctx.input == ')': - return RegexParser.QtyState - elif ctx.input == '\\': - return RegexParser.Escaping + ctx.context = RegexParser.MainState elif ctx.input == '|': ctx.append_to_contents("") else: ctx.append_to_buffer(ctx.input) - return self.__class__ + def check_transitions(self, ctx): + if ctx.input in ('}', ']'): + raise Exception + + # inside + if ctx.context == self.__class__: + if ctx.input in ('*', '+', '?', '{', '(', '['): + raise Exception + elif ctx.input == '\\': + return RegexParser.Escaping() + else: + return self + + # outside + else: + if ctx.input == ')': + raise Exception + # quantifier specified + if ctx.input in ('*', '+', '?'): + return RegexParser.QtyState() + elif ctx.input == '{': + return RegexParser.InsideBrackets() + else: + ctx.flush() + + # pick + if ctx.input == '|': + return RegexParser.PickState() + elif ctx.pick: + raise Exception + + # continue with something else + if ctx.input == '(': + return self + elif ctx.input == '[': + return RegexParser.InsideSquareBrackets() + elif ctx.input == '\\': + return RegexParser.Escaping() + elif ctx.input is None: + return RegexParser.FinalState() + else: + return RegexParser.MainState() + + class InsideSquareBrackets(State): def run(self, ctx): + if ctx.input == '[': + ctx.flush() + ctx.append_to_alphabet("") + ctx.context = self.__class__ + elif ctx.input == ']': + if len(ctx.alphabet) == 0: + raise Exception + ctx.context = RegexParser.MainState + else: + ctx.append_to_alphabet(ctx.input) - if ctx.input == ']': - return RegexParser.QtyState - elif ctx.input == '\\': - return RegexParser.Escaping - elif ctx.input in ('[', '(', ')', '?', '*', '+', '{', '}'): + def check_transitions(self, ctx): + if ctx.input in ('}', ')'): raise Exception + + # inside + if ctx.context == self.__class__: + if ctx.input in ('*', '+', '?', '{', '(', '[', '|'): + raise Exception + elif ctx.input == '\\': + return RegexParser.Escaping() + else: + return self + + # outside else: - ctx.append_to_alphabet(ctx.input) + if ctx.input == ')': + raise Exception + # quantifier specified + elif ctx.input in ('*', '+', '?'): + return RegexParser.QtyState() + elif ctx.input == '{': + return RegexParser.InsideBrackets() + else: + ctx.flush() - return self.__class__ + # pick + if ctx.input == '|': + return RegexParser.PickState() + elif ctx.pick: + raise Exception + # continue with something else + + if ctx.input == '(': + return RegexParser.InsideParenthesis() + elif ctx.input == '[': + return self + elif ctx.input == '\\': + return RegexParser.Escaping() + elif ctx.input is None: + return RegexParser.FinalState() + else: + return RegexParser.MainState() def __init__(self): - self.current_state = RegexParser.InitialState() # last ended state - self.old_state = self.current_state + self.current_state = RegexParser.InitialState() - self.state = [] + self.context = RegexParser.MainState self._name = None self._input = None - self.contents = [""] + self.contents = None self.alphabet = None - self.pick = False # pick context ? + self.pick = False self.min = None self.max = None - self._terminal_nodes = [] + self._nodes = [] @property @@ -753,14 +923,19 @@ def buffer(self, buffer): self.contents = [""] self.contents[-1] = buffer + @property + def nodes(self): + return self._nodes @property - def terminal_nodes(self): - return self._terminal_nodes + def nothing_to_flush(self): + return self.contents is None and self.alphabet is None def flush(self): - if self.contents is None and self.alphabet is None: + + + if self.nothing_to_flush: return if self.min is None and self.max is None: @@ -768,12 +943,13 @@ def flush(self): # type = fvt.INT_str if all(content.isdigit() for content in self.contents) else fvt.String type = fvt.String - name = self._name + str(len(self._terminal_nodes) + 1) + name = self._name + str(len(self.nodes) + 1) - - terminal_node = self._create_terminal_node(name, type, contents=self.contents, alphabet=self.alphabet, - qty=(self.min, self.max)) - self._terminal_nodes.append(terminal_node) + node = self._create_terminal_node(name, type, + contents=self.contents, + alphabet=self.alphabet, + qty=(self.min, self.max)) + self.nodes.append(node) self.reset() @@ -784,20 +960,12 @@ def reset(self): self.max = None def run(self, inputs, name): - self._name = name - for self._input in inputs: - next_state_class = self.current_state.run(self) - self.old_state = self.current_state - self.current_state = next_state_class() - - if isinstance(self.current_state, (RegexParser.InsideParenthesis, RegexParser.InsideSquareBrackets)): - raise Exception - - if inputs == "": - self.append_to_buffer("") - self.flush() + # None indicates the end of the regex + for self._input in list(inputs) + [None]: + self.current_state = self.current_state.check_transitions(self) + self.current_state.run(self) return self._create_non_terminal_node() @@ -821,7 +989,7 @@ def _create_non_terminal_node(self): non_terminal = [1, [MH.Copy + MH.Ordered]] formatted_terminal = non_terminal[1] - for terminal in self.terminal_nodes: + for terminal in self.nodes: formatted_terminal.append(terminal) if self.pick: non_terminal.append(1) From 6a35dafcc1858a6102f48d979960e310804a760c Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Wed, 6 Jul 2016 12:30:04 +0200 Subject: [PATCH 26/80] Rewrite InsideSquaredBrackets using a SubStateMachine + clean up --- framework/data_model_helpers.py | 183 ++++++++++++++++----------- test/unit/test_data_model_helpers.py | 162 +++++++++++++----------- 2 files changed, 202 insertions(+), 143 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 075b43b..4abde48 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1,3 +1,4 @@ + ################################################################################ # # Copyright 2014-2016 Eric Lacombe @@ -511,19 +512,44 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) +class StateMachine(object): + def run(self, inputs, name): + raise NotImplementedError + +class State(object): + def run(self, context): + raise NotImplementedError + def check_transitions(self, context): + raise NotImplementedError -class RegexParser(object): - class State(object): - def run(self, context): - raise NotImplementedError +class RegexParser(StateMachine): - def check_transitions(self, context): - raise NotImplementedError + class SubStateMachine(State): + + class Initial(State): + pass + class Final(State): + pass - class InitialState(State): + def __init__(self): + self.state = self.__class__.Initial() + + def run(self, ctx): + while True: + self.state.run(ctx) + if isinstance(self.state, self.__class__.Final): + break + ctx.inputs.pop(0) + self.state = self.state.check_transitions(ctx) + + def check_transitions(self, ctx): + return self.state.check_transitions(ctx) + + + class Initial(State): def run(self, ctx): pass @@ -533,23 +559,23 @@ def check_transitions(self, ctx): raise Exception if ctx.input == '[': - return RegexParser.InsideSquareBrackets() + return RegexParser.SquareBrackets() elif ctx.input == '(': return RegexParser.InsideParenthesis() else: ctx.append_to_buffer("") if ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.Escape() elif ctx.input == '|': return RegexParser.PickState() elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() else: return RegexParser.MainState() - class FinalState(State): + class Final(State): def run(self, ctx): ctx.flush() @@ -568,10 +594,10 @@ def check_transitions(self, ctx): return RegexParser.InsideParenthesis() elif ctx.input == '[': - return RegexParser.InsideSquareBrackets() + return RegexParser.SquareBrackets() elif ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.Escape() elif ctx.input == '|': return RegexParser.PickState() @@ -579,13 +605,13 @@ def check_transitions(self, ctx): elif ctx.input in ('?', '*', '+', '{'): # pick - if ctx.pick and len(ctx.contents) > 1 and len(ctx.buffer) > 1: + if ctx.pick and len(ctx.values) > 1 and len(ctx.buffer) > 1: raise Exception if len(ctx.buffer) == 1: - if len(ctx.contents) > 1: + if len(ctx.values) > 1: content = ctx.buffer - ctx.contents = ctx.contents[:-1] + ctx.values = ctx.values[:-1] ctx.flush() ctx.append_to_buffer(content) @@ -604,7 +630,7 @@ def check_transitions(self, ctx): raise Exception elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() return self @@ -623,7 +649,7 @@ def check_transitions(self, ctx): if ctx.input == '(': return RegexParser.InsideParenthesis() elif ctx.input == '[': - return RegexParser.InsideSquareBrackets() + return RegexParser.SquareBrackets() else: ctx.append_to_contents("") @@ -631,7 +657,7 @@ def check_transitions(self, ctx): if ctx.input == '|': return self elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() else: return RegexParser.MainState() @@ -657,13 +683,13 @@ def check_transitions(self, ctx): if ctx.input == '(': return RegexParser.InsideParenthesis() elif ctx.input == '[': - return RegexParser.InsideSquareBrackets() + return RegexParser.SquareBrackets() elif ctx.input == '|': return RegexParser.PickState() elif ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.Escape() elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() else: return RegexParser.MainState() @@ -716,7 +742,7 @@ def check_transitions(self, ctx): if ctx.input == '|': return RegexParser.PickState() elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() else: if ctx.pick: raise Exception @@ -726,12 +752,12 @@ def check_transitions(self, ctx): elif ctx.input == '(': return RegexParser.InsideParenthesis() elif ctx.input == '[': - return RegexParser.InsideSquareBrackets() + return RegexParser.SquareBrackets() else: return RegexParser.MainState() - class Escaping(object): + class Escape(State): def __init__(self): self._started = False @@ -749,7 +775,7 @@ def run(self, ctx): def check_transitions(self, ctx): if self._closed: - return RegexParser.FinalState() if ctx.input is None else ctx.context() + return RegexParser.Final() if ctx.input is None else ctx.context() else: if ctx.input is None: raise Exception @@ -780,7 +806,7 @@ def check_transitions(self, ctx): if ctx.input in ('*', '+', '?', '{', '(', '['): raise Exception elif ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.Escape() else: return self @@ -806,47 +832,60 @@ def check_transitions(self, ctx): if ctx.input == '(': return self elif ctx.input == '[': - return RegexParser.InsideSquareBrackets() + return RegexParser.SquareBrackets() elif ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.Escape() elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() else: return RegexParser.MainState() + class SquareBrackets(SubStateMachine): - class InsideSquareBrackets(State): + class Initial(State): - def run(self, ctx): - if ctx.input == '[': + def run(self, ctx): ctx.flush() ctx.append_to_alphabet("") - ctx.context = self.__class__ - elif ctx.input == ']': - if len(ctx.alphabet) == 0: + + def check_transitions(self, ctx): + if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '|', None): raise Exception - ctx.context = RegexParser.MainState - else: - ctx.append_to_alphabet(ctx.input) + elif ctx.input == '\\': + return RegexParser.SquareBrackets.Escape() + else: + return RegexParser.SquareBrackets.Inside() - def check_transitions(self, ctx): - if ctx.input in ('}', ')'): - raise Exception + class Inside(State): + def run(self, ctx): + ctx.append_to_alphabet(ctx.input) - # inside - if ctx.context == self.__class__: - if ctx.input in ('*', '+', '?', '{', '(', '[', '|'): + def check_transitions(self, ctx): + if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', '|', None): raise Exception elif ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.SquareBrackets.Escape() + elif ctx.input == ']': + return RegexParser.SquareBrackets.Final() else: return self - # outside - else: - if ctx.input == ')': + class Escape(State): + def run(self, ctx): + pass + + def check_transitions(self, ctx): + pass + + class Final(State): + def run(self, ctx): + pass + + def check_transitions(self, ctx): + if ctx.input in (')', '}', ']'): raise Exception + # quantifier specified elif ctx.input in ('*', '+', '?'): return RegexParser.QtyState() @@ -862,28 +901,27 @@ def check_transitions(self, ctx): raise Exception # continue with something else - if ctx.input == '(': return RegexParser.InsideParenthesis() elif ctx.input == '[': return self elif ctx.input == '\\': - return RegexParser.Escaping() + return RegexParser.Escape() elif ctx.input is None: - return RegexParser.FinalState() + return RegexParser.Final() else: return RegexParser.MainState() def __init__(self): - self.current_state = RegexParser.InitialState() + self.current_state = RegexParser.Initial() self.context = RegexParser.MainState self._name = None - self._input = None - self.contents = None + self.inputs = None + self.values = None self.alphabet = None self.pick = False @@ -896,17 +934,17 @@ def __init__(self): @property def input(self): - return self._input + return None if self.inputs is None or len(self.inputs) == 0 else self.inputs[0] def append_to_contents(self, content): - if self.contents is None: - self.contents = [] - self.contents.append(content) + if self.values is None: + self.values = [] + self.values.append(content) def append_to_buffer(self, str): - if self.contents is None: - self.contents = [""] - self.contents[-1] += str + if self.values is None: + self.values = [""] + self.values[-1] += str def append_to_alphabet(self, alphabet): if self.alphabet is None: @@ -915,13 +953,13 @@ def append_to_alphabet(self, alphabet): @property def buffer(self): - return None if self.contents is None else self.contents[-1] + return None if self.values is None else self.values[-1] @buffer.setter def buffer(self, buffer): - if self.contents is None: - self.contents = [""] - self.contents[-1] = buffer + if self.values is None: + self.values = [""] + self.values[-1] = buffer @property def nodes(self): @@ -929,12 +967,10 @@ def nodes(self): @property def nothing_to_flush(self): - return self.contents is None and self.alphabet is None + return self.values is None and self.alphabet is None def flush(self): - - if self.nothing_to_flush: return @@ -946,7 +982,7 @@ def flush(self): name = self._name + str(len(self.nodes) + 1) node = self._create_terminal_node(name, type, - contents=self.contents, + contents=self.values, alphabet=self.alphabet, qty=(self.min, self.max)) self.nodes.append(node) @@ -954,7 +990,7 @@ def flush(self): def reset(self): - self.contents = None + self.values = None self.alphabet = None self.min = None self.max = None @@ -963,9 +999,12 @@ def run(self, inputs, name): self._name = name # None indicates the end of the regex - for self._input in list(inputs) + [None]: + self.inputs = list(inputs) + [None] + + while self.inputs: self.current_state = self.current_state.check_transitions(self) self.current_state.run(self) + self.inputs.pop(0) return self._create_non_terminal_node() diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index 0b07aaf..b3d5107 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -24,59 +24,20 @@ def tearDown(self): @ddt.data(r"(sa(lu))(les)(louloux)", r"(salut)(les(louloux)", r"(salut))les(louloux)", r"(sal*ut)oo", r"(sal?ut)oo", r"sal{utoo", r"(sal+ut)oo", r"(sal{u)too", r"(sal{2}u)too", r"sal{2,1}utoo", r"sal(u[t]o)o", - r"whatever|toto?ff", r"whate?ver|toto", r"(toto)*ohoho|haha", r"(toto)ohoho|haha") + r"whatever|toto?ff", r"whate?ver|toto", r"(toto)*ohoho|haha", r"(toto)ohoho|haha", + 'salut[abcd]{,15}rr', r"[]whatever", r"t{,15}") def test_invalid_regexes(self, regex): - self.assertRaises(Exception, self._parser.run, regex, "toto") - - @ddt.data(r"", r"b", r"salut") - def test_one_word(self, regex): - self._parser.run(regex, "toto") - self._parser._create_terminal_node.assert_called_once_with("toto1", vt.String, - contents=[regex], - alphabet=None, qty=(1, 1)) - - @ddt.data(r"(salut)(les)(louloux)", r"(salut)les(louloux)", - r"salut(les)(louloux)", r"(salut)(les)louloux", r"salut(les)louloux") - def test_with_parenthesis(self, regex): - nodes = self._parser.run(regex, "toto") - self.assertEquals(len(nodes), 3) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), - mock.call("toto2", vt.String, contents=["les"], alphabet=None, qty=(1, 1)), - mock.call("toto3", vt.String, contents=["louloux"], alphabet=None, qty=(1, 1))]) - - - @ddt.data(r"salut(l\(es)(lou\\loux)cmoi", r"salut(l\(es)lou\\loux(cmoi)") - def test_escape_char(self, regex): - nodes = self._parser.run(regex, "toto") - self.assertEquals(len(nodes), 4) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), - mock.call("toto2", vt.String, contents=["l(es"], alphabet=None, qty=(1, 1)), - mock.call("toto3", vt.String, contents=["lou\loux"], alphabet=None, qty=(1, 1)), - mock.call("toto4", vt.String, contents=["cmoi"], alphabet=None, qty=(1, 1))]) + self.assertRaises(Exception, self._parser.run, regex, "name") - @ddt.unpack - @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), - ('{7}', (7, 7)), ('{2,7}', (2, 7)), - ('{0}', (0, 0)), ('{0,0}', (0, 0)), - ('{3,}', (3, None)), ('{,15}', (0, 15))) - def test_7(self, char, qty): - nodes = self._parser.run(r"salut" + char + "ooo", "toto") - self.assertEquals(len(nodes), 3) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=["salu"], alphabet=None, qty=(1, 1)), - mock.call("toto2", vt.String, contents=["t"], alphabet=None, qty=qty), - mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) @ddt.unpack @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), ('{7}', (7, 7)), ('{2,7}', (2, 7)), ('{0}', (0, 0)), ('{0,0}', (0, 0)), - ('{3,}', (3, None)), ('{,15}', (0, 15))) + ('{3,}', (3, None))) def test_7(self, char, qty): - nodes = self._parser.run(r"salut[abcd]" + char + "ooo", "toto") - self.assertEquals(len(nodes), 3) + self._parser.run(r"salut[abcd]" + char + "ooo", "toto") + self.assertEquals(self._parser._create_terminal_node.call_count, 3) self._parser._create_terminal_node.assert_has_calls( [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), mock.call("toto2", vt.String, contents=None, alphabet="abcd", qty=qty), @@ -86,46 +47,85 @@ def test_7(self, char, qty): @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), ('{7}', (7, 7)), ('{2,7}', (2, 7)), ('{0}', (0, 0)), ('{0,0}', (0, 0)), - ('{3,}', (3, None)), ('{,15}', (0, 15))) + ('{3,}', (3, None))) def test_8(self, char, qty): - nodes = self._parser.run(r"salu(ttteee|whatever)" - + char - + "ooo", "toto") - self.assertEquals(len(nodes), 3) + self._parser.run(r"salu(ttteee|whatever)" + char + "ooo", "toto") + self.assertEquals(self._parser._create_terminal_node.call_count, 3) self._parser._create_terminal_node.assert_has_calls( [mock.call("toto1", vt.String, contents=["salu"], alphabet=None, qty=(1, 1)), mock.call("toto2", vt.String, contents=["ttteee", "whatever"], alphabet=None, qty=qty), mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) - def test_alphabet(self): - nodes = self._parser.run(r"salut[abc]ooo", "toto") - self.assertEquals(len(nodes), 3) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), - mock.call("toto2", vt.String, contents=None, alphabet="abc", qty=(1, 1)), - mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) + @ddt.unpack + @ddt.data( + (r"", [([""], None, (1, 1))]), + (r"a", [(["a"], None, (1, 1))]), - def test_empty_parenthesis_before(self): - nodes = self._parser.run(r"()+whatever", "toto") - self.assertEquals(len(nodes), 2) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=[""], alphabet=None, qty=(1, None)), - mock.call("toto2", vt.String, contents=["whatever"], alphabet=None, qty=(1, 1))]) + (r"foo", [(["foo"], None, (1, 1))]), - def test_empty_brackets(self): - nodes = self._parser.run(r"[]whatever", "toto") - self.assertEquals(len(nodes), 2) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=None, alphabet="", qty=(1, 1)), - mock.call("toto2", vt.String, contents=["whatever"], alphabet=None, qty=(1, 1))]) + (r"(salut)(les)(loulous)", [(["salut"], None, (1, 1)), + (["les"], None, (1, 1)), + (["loulous"], None, (1, 1))]), + + (r"(salut)les(foo)", [(["salut"], None, (1, 1)), + (["les"], None, (1, 1)), + (["foo"], None, (1, 1))]), + + (r"salut(les)(loulous)", [(["salut"], None, (1, 1)), + (["les"], None, (1, 1)), + (["loulous"], None, (1, 1))]), + + (r"(salut)(les)loulous", [(["salut"], None, (1, 1)), + (["les"], None, (1, 1)), + (["loulous"], None, (1, 1))]), + + (r"salut(les)loulous", [(["salut"], None, (1, 1)), + (["les"], None, (1, 1)), + (["loulous"], None, (1, 1))]), + + (r"salut(l\(es)(lou\\lous)cmoi", [(["salut"], None, (1, 1)), + (["l(es"], None, (1, 1)), + (["lou\lous"], None, (1, 1)), + (["cmoi"], None, (1, 1))]), + + (r"salut(l\(es)lou\\lous(cmoi)", [(["salut"], None, (1, 1)), + (["l(es"], None, (1, 1)), + (["lou\lous"], None, (1, 1)), + (["cmoi"], None, (1, 1))]), + + (r"()+whatever", [([""], None, (1, None)), + (["whatever"], None, (1, 1))]), + + (r"salut[abc]ooo", [(["salut"], None, (1, 1)), + (None, "abc", (1, 1)), + (["ooo"], None, (1, 1))]), + ) + def test_various(self, regex, nodes): + self.regex_assert(regex, nodes) + + + + @ddt.data(r"?", r"*", r"+", r"{1,2}", r"what{,}ever", r"bj{}er" + r"what{1, 2}", r"what{,3}ever", r"ee{l1, 2}ever", r"whddddat{\13, 2}eyyyver", + r"wat{3, 2d}eyyyver", r"w**r", r"w+*r", r"w*?r") + def test_quantifier_raise(self, regex): + self.regex_raise(regex) + + @ddt.data(r"salut(", r"dd[", r"(", r"[", r"{0") + def test_wrong_end_raise(self, regex): + self.regex_raise(regex) + + + def regex_raise(self, regex): + self.assertRaises(Exception, self._parser.run, regex, "name") def regex_assert(self, regex, nodes): - ns = self._parser.run(regex, "name") - self.assertEquals(len(ns), len(nodes)) + self._parser.run(regex, "name") + self.assertEquals(self._parser._create_terminal_node.call_count, len(nodes)) calls = [] for node in nodes: @@ -195,3 +195,23 @@ def test_or_in_parenthesis(self, regex, nodes): ) def test_pick(self, regex, nodes): self.regex_assert(regex, nodes) + + + @ddt.data( + {'regex': r"bar", 'nodes': [{"contents": ["bar"], "alphabet": None, "qty": (1, 1)}]} + ) + def test_json(self, test_case): + self.regex_assert_json(test_case) + + + def regex_assert_json(self, test_case): + + self._parser.run(test_case['regex'], "name") + self.assertEquals(self._parser._create_terminal_node.call_count, len(test_case['nodes'])) + + calls = [] + for node in test_case['nodes']: + calls.append(mock.call("name" + str(test_case['nodes'].index(node) + 1), vt.String, + contents=node['contents'], alphabet=node['alphabet'], qty=node['qty'])) + + self._parser._create_terminal_node.assert_has_calls(calls) \ No newline at end of file From 46a857188ec15306acb699d2068f8f30b5769d2f Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 7 Jul 2016 10:45:42 +0200 Subject: [PATCH 27/80] Rewrite InsideParenthesis & Escape using SubStateMachine --- framework/data_model_helpers.py | 406 +++++++++++++++------------ test/unit/test_data_model_helpers.py | 12 +- 2 files changed, 228 insertions(+), 190 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 4abde48..ef2d977 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -512,76 +512,128 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) -class StateMachine(object): - def run(self, inputs, name): - raise NotImplementedError - class State(object): + """ + State at the lower level + """ + def __init__(self, machine): + """ + Args: + state_machine (StateMachine): state machine where it lives + """ + self.machine = machine + def run(self, context): raise NotImplementedError - def check_transitions(self, context): + def advance(self, context): raise NotImplementedError -class RegexParser(StateMachine): +class FinalState(State): - class SubStateMachine(State): + def advance(self, context): + pass - class Initial(State): - pass - class Final(State): +class StateMachine(object): + + class Initial(State): + pass + + class Final(State): + pass + + def __init__(self): + self.states = {} + + for name, cls in inspect.getmembers(self.__class__): + if inspect.isclass(cls) and issubclass(cls, State): + self.states[cls] = cls(self) + + def run(self, context): + self.state = self.states[self.Initial] + while True: + self.state.run(context) + if isinstance(self.state, self.Final): + break + context.inputs.pop(0) + self.state = self.states[self.state.advance(context)] + + + +class SubStateMachine(State, StateMachine): + """ + Seen as a State from outside + Act like a StateMachine in the inside + """ + def __init__(self, machine): + State.__init__(self, machine) + StateMachine.__init__(self) + + def run(self, context): + StateMachine.run(self, context) + + + +class EscapeState(SubStateMachine): + class Initial(State): + + def run(self, ctx): pass - def __init__(self): - self.state = self.__class__.Initial() + def advance(self, ctx): + if ctx.input == None: + raise Exception + return self.machine.Final + + class Final(FinalState): def run(self, ctx): - while True: - self.state.run(ctx) - if isinstance(self.state, self.__class__.Final): - break - ctx.inputs.pop(0) - self.state = self.state.check_transitions(ctx) + if ctx.buffer is not None: + ctx.append_to_buffer(ctx.input) + elif ctx.alphabet is not None: + ctx.append_to_alphabet(ctx.input) - def check_transitions(self, ctx): - return self.state.check_transitions(ctx) +class RegexParser(StateMachine): class Initial(State): def run(self, ctx): pass - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input in ('?', '*', '+', '{', '}', ')', ']'): raise Exception if ctx.input == '[': - return RegexParser.SquareBrackets() + return self.machine.SquareBrackets elif ctx.input == '(': - return RegexParser.InsideParenthesis() + return self.machine.Parenthesis else: ctx.append_to_buffer("") if ctx.input == '\\': - return RegexParser.Escape() + return self.machine.Escape elif ctx.input == '|': - return RegexParser.PickState() + return self.machine.PickState elif ctx.input is None: - return RegexParser.Final() + return self.machine.Final else: - return RegexParser.MainState() + return self.machine.MainState - class Final(State): + class Final(FinalState): def run(self, ctx): ctx.flush() - def check_transitions(self, ctx): - pass + + class Escape(EscapeState): + + def advance(self, ctx): + return self.machine.states[self.machine.MainState].advance(ctx) class MainState(State): @@ -589,18 +641,18 @@ class MainState(State): def run(self, ctx): ctx.append_to_buffer(ctx.input) - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input == '(': - return RegexParser.InsideParenthesis() + return self.machine.Parenthesis elif ctx.input == '[': - return RegexParser.SquareBrackets() + return self.machine.SquareBrackets elif ctx.input == '\\': - return RegexParser.Escape() + return self.machine.Escape elif ctx.input == '|': - return RegexParser.PickState() + return self.machine.PickState elif ctx.input in ('?', '*', '+', '{'): @@ -622,17 +674,17 @@ def check_transitions(self, ctx): ctx.append_to_buffer(content) if ctx.input == '{': - return RegexParser.InsideBrackets() + return self.machine.InsideBrackets else: - return RegexParser.QtyState() + return self.machine.QtyState elif ctx.input in ('}', ')', ']'): raise Exception elif ctx.input is None: - return RegexParser.Final() + return self.machine.Final - return self + return self.__class__ class PickState(State): @@ -645,21 +697,21 @@ def run(self, ctx): else: raise Exception - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input == '(': - return RegexParser.InsideParenthesis() + return self.machine.Parenthesis elif ctx.input == '[': - return RegexParser.SquareBrackets() + return self.machine.SquareBrackets else: ctx.append_to_contents("") if ctx.input == '|': - return self + return self.__class__ elif ctx.input is None: - return RegexParser.Final() + return self.machine.Final else: - return RegexParser.MainState() + return self.machine.MainState @@ -676,22 +728,22 @@ def run(self, ctx): ctx.flush() - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', '}', ')', ']'): raise Exception if ctx.input == '(': - return RegexParser.InsideParenthesis() + return self.machine.Parenthesis elif ctx.input == '[': - return RegexParser.SquareBrackets() + return self.machine.SquareBrackets elif ctx.input == '|': - return RegexParser.PickState() + return self.machine.PickState elif ctx.input == '\\': - return RegexParser.Escape() + return self.machine.Escape elif ctx.input is None: - return RegexParser.Final() + return self.machine.Final else: - return RegexParser.MainState() + return self.machine.MainState class InsideBrackets(State): @@ -729,20 +781,20 @@ def run(self, ctx): ctx.min = "" ctx.min += ctx.input - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', ')', ']'): raise Exception if ctx.context == self.__class__: if ctx.input.isdigit() or ctx.input in (',', '}'): - return self + return self.__class__ else: raise Exception else: if ctx.input == '|': - return RegexParser.PickState() + return self.machine.PickState elif ctx.input is None: - return RegexParser.Final() + return self.machine.Final else: if ctx.pick: raise Exception @@ -750,95 +802,86 @@ def check_transitions(self, ctx): if ctx.input == '}': raise Exception elif ctx.input == '(': - return RegexParser.InsideParenthesis() + return self.machine.Parenthesis elif ctx.input == '[': - return RegexParser.SquareBrackets() + return self.machine.SquareBrackets else: - return RegexParser.MainState() + return self.machine.MainState - class Escape(State): + class Parenthesis(SubStateMachine): - def __init__(self): - self._started = False - self._closed = False + class Initial(State): - def run(self, ctx): - if not self._started: - self._started = True - else: - if ctx.buffer is not None: + def run(self, ctx): + ctx.flush() + ctx.append_to_buffer("") + + def advance(self, ctx): + if ctx.input in ('*', '+', '?', '{', '}', '(', '[', ']', None): + raise Exception + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input == ')': + return self.machine.Final + else: + return self.machine.Inside + + class Inside(State): + def run(self, ctx): + if ctx.input == '|': + ctx.append_to_contents("") + else: ctx.append_to_buffer(ctx.input) - elif ctx.alphabet is not None: - ctx.append_to_alphabet(ctx.input) - self._closed = True - def check_transitions(self, ctx): - if self._closed: - return RegexParser.Final() if ctx.input is None else ctx.context() - else: - if ctx.input is None: + def advance(self, ctx): + if ctx.input in ('*', '+', '?', '{', '}', '(', '[', ']', None): raise Exception + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input == ')': + return self.machine.Final else: - return self + return self.__class__ + class Escape(EscapeState): - class InsideParenthesis(State): + def advance(self, ctx): + return self.machine.states[self.machine.Inside].advance(ctx) - def run(self, ctx): - if ctx.input == '(': - ctx.flush() - ctx.append_to_buffer("") - ctx.context = self.__class__ - elif ctx.input == ')': - ctx.context = RegexParser.MainState - elif ctx.input == '|': - ctx.append_to_contents("") - else: - ctx.append_to_buffer(ctx.input) + class Final(FinalState): + def run(self, ctx): + pass - def check_transitions(self, ctx): - if ctx.input in ('}', ']'): + def advance(self, ctx): + if ctx.input in (')', '}', ']'): raise Exception - # inside - if ctx.context == self.__class__: - if ctx.input in ('*', '+', '?', '{', '(', '['): - raise Exception - elif ctx.input == '\\': - return RegexParser.Escape() - else: - return self - - # outside + # quantifier specified + elif ctx.input in ('*', '+', '?'): + return self.machine.QtyState + elif ctx.input == '{': + return self.machine.InsideBrackets else: - if ctx.input == ')': - raise Exception - # quantifier specified - if ctx.input in ('*', '+', '?'): - return RegexParser.QtyState() - elif ctx.input == '{': - return RegexParser.InsideBrackets() - else: - ctx.flush() + ctx.flush() - # pick - if ctx.input == '|': - return RegexParser.PickState() - elif ctx.pick: - raise Exception + # pick + if ctx.input == '|': + return self.machine.PickState + elif ctx.pick: + raise Exception - # continue with something else - if ctx.input == '(': - return self - elif ctx.input == '[': - return RegexParser.SquareBrackets() - elif ctx.input == '\\': - return RegexParser.Escape() - elif ctx.input is None: - return RegexParser.Final() - else: - return RegexParser.MainState() + # continue with something else + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input is None: + return self.machine.Final + else: + return self.machine.MainState class SquareBrackets(SubStateMachine): @@ -849,75 +892,71 @@ def run(self, ctx): ctx.flush() ctx.append_to_alphabet("") - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '|', None): raise Exception elif ctx.input == '\\': - return RegexParser.SquareBrackets.Escape() + return self.machine.Escape else: - return RegexParser.SquareBrackets.Inside() + return self.machine.Inside class Inside(State): def run(self, ctx): ctx.append_to_alphabet(ctx.input) - def check_transitions(self, ctx): + def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', '|', None): raise Exception elif ctx.input == '\\': - return RegexParser.SquareBrackets.Escape() + return self.machine.Escape elif ctx.input == ']': - return RegexParser.SquareBrackets.Final() + return self.machine.Final else: - return self + return self.__class__ - class Escape(State): - def run(self, ctx): - pass + class Escape(EscapeState): - def check_transitions(self, ctx): - pass + def advance(self, ctx): + return self.machine.states[self.machine.Inside].advance(ctx) - class Final(State): + class Final(FinalState): def run(self, ctx): pass - def check_transitions(self, ctx): - if ctx.input in (')', '}', ']'): - raise Exception + def advance(self, ctx): + if ctx.input in (')', '}', ']'): + raise Exception - # quantifier specified - elif ctx.input in ('*', '+', '?'): - return RegexParser.QtyState() - elif ctx.input == '{': - return RegexParser.InsideBrackets() - else: - ctx.flush() + # quantifier specified + elif ctx.input in ('*', '+', '?'): + return self.machine.QtyState + elif ctx.input == '{': + return self.machine.InsideBrackets + else: + ctx.flush() - # pick - if ctx.input == '|': - return RegexParser.PickState() - elif ctx.pick: - raise Exception + # pick + if ctx.input == '|': + return self.machine.PickState + elif ctx.pick: + raise Exception - # continue with something else - if ctx.input == '(': - return RegexParser.InsideParenthesis() - elif ctx.input == '[': - return self - elif ctx.input == '\\': - return RegexParser.Escape() - elif ctx.input is None: - return RegexParser.Final() - else: - return RegexParser.MainState() + # continue with something else + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.__class__ + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input is None: + return self.machine.Final + else: + return self.machine.MainState def __init__(self): - self.current_state = RegexParser.Initial() - - self.context = RegexParser.MainState + StateMachine.__init__(self) self._name = None self.inputs = None @@ -974,6 +1013,11 @@ def flush(self): if self.nothing_to_flush: return + print("buffer: " + str(self.values)) + print("alphabet: " + str(self.alphabet)) + print + print + if self.min is None and self.max is None: self.min = self.max = 1 @@ -995,16 +1039,13 @@ def reset(self): self.min = None self.max = None - def run(self, inputs, name): + def parse(self, inputs, name): self._name = name - # None indicates the end of the regex - self.inputs = list(inputs) + [None] + # None indicates the beginning and the end of the regex + self.inputs = [None] + list(inputs) + [None] - while self.inputs: - self.current_state = self.current_state.check_transitions(self) - self.current_state.run(self) - self.inputs.pop(0) + self.run(self) return self._create_non_terminal_node() @@ -1014,15 +1055,12 @@ def _create_terminal_node(self, name, type, contents=None, alphabet=None, qty=No assert(contents is not None or alphabet is not None) if alphabet is not None: - return [Node(name=name, - vt=fvt.String(alphabet=alphabet, - min_sz=-1 if qty[0] is None else qty[0], - max_sz=-1 if qty[1] is None else qty[1])), - 1, 1] + return [Node(name=name, vt=fvt.String(alphabet=alphabet, + min_sz=-1 if qty[0] is None else qty[0], + max_sz=-1 if qty[1] is None else qty[1])), 1, 1] else: - return [Node(name=name, vt=fvt.String(val_list=contents)), - -1 if qty[0] is None else qty[0], - -1 if qty[1] is None else qty[1]] + return [Node(name=name, vt=fvt.String(val_list=contents)), -1 if qty[0] is None else qty[0], + -1 if qty[1] is None else qty[1]] def _create_non_terminal_node(self): non_terminal = [1, [MH.Copy + MH.Ordered]] @@ -1290,7 +1328,7 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): assert isinstance(regexp, str) parser = RegexParser() - non_terminal_node = parser.run(regexp, name) + non_terminal_node = parser.parse(regexp, name) n.set_subnodes_with_csts(non_terminal_node, conf=conf) custo_set = desc.get('custo_set', None) diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index b3d5107..fb4f69e 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -27,7 +27,7 @@ def tearDown(self): r"whatever|toto?ff", r"whate?ver|toto", r"(toto)*ohoho|haha", r"(toto)ohoho|haha", 'salut[abcd]{,15}rr', r"[]whatever", r"t{,15}") def test_invalid_regexes(self, regex): - self.assertRaises(Exception, self._parser.run, regex, "name") + self.assertRaises(Exception, self._parser.parse, regex, "name") @ddt.unpack @@ -36,7 +36,7 @@ def test_invalid_regexes(self, regex): ('{0}', (0, 0)), ('{0,0}', (0, 0)), ('{3,}', (3, None))) def test_7(self, char, qty): - self._parser.run(r"salut[abcd]" + char + "ooo", "toto") + self._parser.parse(r"salut[abcd]" + char + "ooo", "toto") self.assertEquals(self._parser._create_terminal_node.call_count, 3) self._parser._create_terminal_node.assert_has_calls( [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), @@ -49,7 +49,7 @@ def test_7(self, char, qty): ('{0}', (0, 0)), ('{0,0}', (0, 0)), ('{3,}', (3, None))) def test_8(self, char, qty): - self._parser.run(r"salu(ttteee|whatever)" + char + "ooo", "toto") + self._parser.parse(r"salu(ttteee|whatever)" + char + "ooo", "toto") self.assertEquals(self._parser._create_terminal_node.call_count, 3) self._parser._create_terminal_node.assert_has_calls( [mock.call("toto1", vt.String, contents=["salu"], alphabet=None, qty=(1, 1)), @@ -119,12 +119,12 @@ def test_wrong_end_raise(self, regex): def regex_raise(self, regex): - self.assertRaises(Exception, self._parser.run, regex, "name") + self.assertRaises(Exception, self._parser.parse, regex, "name") def regex_assert(self, regex, nodes): - self._parser.run(regex, "name") + self._parser.parse(regex, "name") self.assertEquals(self._parser._create_terminal_node.call_count, len(nodes)) calls = [] @@ -206,7 +206,7 @@ def test_json(self, test_case): def regex_assert_json(self, test_case): - self._parser.run(test_case['regex'], "name") + self._parser.parse(test_case['regex'], "name") self.assertEquals(self._parser._create_terminal_node.call_count, len(test_case['nodes'])) calls = [] From 8113f8a2605bc6c91a4b8712a43f9829af1fccbb Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 7 Jul 2016 11:32:53 +0200 Subject: [PATCH 28/80] Revamp InsideBrackets state --- framework/data_model_helpers.py | 114 ++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 43 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index ef2d977..8a575ca 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -674,7 +674,7 @@ def advance(self, ctx): ctx.append_to_buffer(content) if ctx.input == '{': - return self.machine.InsideBrackets + return self.machine.Brackets else: return self.machine.QtyState @@ -746,16 +746,63 @@ def advance(self, ctx): return self.machine.MainState - class InsideBrackets(State): + class Brackets(SubStateMachine): - def run(self, ctx): - if ctx.input == '{': - ctx.context = self.__class__ - elif ctx.input == '}': - ctx.context = RegexParser.MainState + class Initial(State): + + def run(self, ctx): + ctx.min = "" - if ctx.min is not None: - ctx.min = int(ctx.min) + def advance(self, ctx): + if ctx.input.isdigit(): + return self.machine.BeforeComma + else: + raise Exception + + class BeforeComma(State): + + def run(self, ctx): + ctx.min += ctx.input + + def advance(self, context): + if context.input.isdigit(): + return self.machine.BeforeComma + elif context.input == ',': + return self.machine.Comma + elif context.input == '}': + return self.machine.Final + else: + raise Exception + + class Comma(State): + + def run(self, ctx): + ctx.max = "" + + def advance(self, context): + if context.input.isdigit(): + return self.machine.AfterComma + elif context.input == '}': + return self.machine.Final + else: + raise Exception + + class AfterComma(State): + + def run(self, ctx): + ctx.max += ctx.input + + def advance(self, context): + if context.input.isdigit(): + return self.machine.AfterComma + elif context.input == '}': + return self.machine.Final + else: + raise Exception + + class Final(State): + def run(self, ctx): + ctx.min = int(ctx.min) if ctx.max is None: ctx.max = ctx.min @@ -769,44 +816,25 @@ def run(self, ctx): ctx.flush() - elif ctx.input == ',': - if ctx.min is None: - raise Exception - ctx.max = "" - elif ctx.input.isdigit(): - if ctx.max is not None: - ctx.max += ctx.input - else: - if ctx.min is None: - ctx.min = "" - ctx.min += ctx.input - def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', ')', ']'): raise Exception + elif ctx.input == '|': + return self.machine.PickState + elif ctx.input is None: + return self.machine.Final + else: + if ctx.pick: + raise Exception - if ctx.context == self.__class__: - if ctx.input.isdigit() or ctx.input in (',', '}'): - return self.__class__ - else: + if ctx.input == '}': raise Exception - else: - if ctx.input == '|': - return self.machine.PickState - elif ctx.input is None: - return self.machine.Final + elif ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets else: - if ctx.pick: - raise Exception - - if ctx.input == '}': - raise Exception - elif ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - else: - return self.machine.MainState + return self.machine.MainState class Parenthesis(SubStateMachine): @@ -861,7 +889,7 @@ def advance(self, ctx): elif ctx.input in ('*', '+', '?'): return self.machine.QtyState elif ctx.input == '{': - return self.machine.InsideBrackets + return self.machine.Brackets else: ctx.flush() @@ -931,7 +959,7 @@ def advance(self, ctx): elif ctx.input in ('*', '+', '?'): return self.machine.QtyState elif ctx.input == '{': - return self.machine.InsideBrackets + return self.machine.Brackets else: ctx.flush() From 840c067656af193153424cd07b8b24c024a64c14 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 7 Jul 2016 15:01:11 +0200 Subject: [PATCH 29/80] Make RegexParser ddts use json format --- test/unit/test_data_model_helpers.py | 133 ++++++++++++++++++--------- 1 file changed, 91 insertions(+), 42 deletions(-) diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index fb4f69e..f00437c 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -59,32 +59,6 @@ def test_8(self, char, qty): @ddt.unpack @ddt.data( - (r"", [([""], None, (1, 1))]), - - (r"a", [(["a"], None, (1, 1))]), - - (r"foo", [(["foo"], None, (1, 1))]), - - (r"(salut)(les)(loulous)", [(["salut"], None, (1, 1)), - (["les"], None, (1, 1)), - (["loulous"], None, (1, 1))]), - - (r"(salut)les(foo)", [(["salut"], None, (1, 1)), - (["les"], None, (1, 1)), - (["foo"], None, (1, 1))]), - - (r"salut(les)(loulous)", [(["salut"], None, (1, 1)), - (["les"], None, (1, 1)), - (["loulous"], None, (1, 1))]), - - (r"(salut)(les)loulous", [(["salut"], None, (1, 1)), - (["les"], None, (1, 1)), - (["loulous"], None, (1, 1))]), - - (r"salut(les)loulous", [(["salut"], None, (1, 1)), - (["les"], None, (1, 1)), - (["loulous"], None, (1, 1))]), - (r"salut(l\(es)(lou\\lous)cmoi", [(["salut"], None, (1, 1)), (["l(es"], None, (1, 1)), (["lou\lous"], None, (1, 1)), @@ -109,7 +83,7 @@ def test_various(self, regex, nodes): @ddt.data(r"?", r"*", r"+", r"{1,2}", r"what{,}ever", r"bj{}er" r"what{1, 2}", r"what{,3}ever", r"ee{l1, 2}ever", r"whddddat{\13, 2}eyyyver", - r"wat{3, 2d}eyyyver", r"w**r", r"w+*r", r"w*?r") + r"wat{3,2d}eyyyver", r"w**r", r"w+*r", r"w*?r") def test_quantifier_raise(self, regex): self.regex_raise(regex) @@ -135,18 +109,19 @@ def regex_assert(self, regex, nodes): self._parser._create_terminal_node.assert_has_calls(calls) - @ddt.unpack @ddt.data( - # (regex, nodes=[(contents, alphabet, qty)]) - (r"[abcd]*toto(|\(ab\)|cd)+what?ever", [(None, "abcd", (0, None)), - (["toto"], None, (1, 1)), - (["", "(ab)", "cd"], None, (1, None)), - (["wha"], None, (1, 1)), - (["t"], None, (0, 1)), - (["ever"], None, (1, 1))]) + {'regex': r"[abcd]*toto(|\(ab\)|cd)+what?ever", + 'nodes': [ + {"alphabet": "abcd", "qty": (0, None)}, + {"contents": ["toto"]}, + {"contents": ["", "(ab)", "cd"], "qty": (1, None)}, + {"contents": ["wha"]}, + {"contents": ["t"], "qty": (0, 1)}, + {"contents": ["ever"]} + ]}, ) - def test_complete(self, regex, nodes): - self.regex_assert(regex, nodes) + def test_complete(self, test_case): + self.regex_assert_json(test_case) @ddt.unpack @@ -197,21 +172,95 @@ def test_pick(self, regex, nodes): self.regex_assert(regex, nodes) + + + @ddt.data( + {'regex': r"()", 'nodes': [{"contents": [""]}]}, + {'regex': r"(z)", 'nodes': [{"contents": ["z"]}]}, + {'regex': r"(cat)", 'nodes': [{"contents": ["cat"]}]}, + + {'regex': r"hello(boat)", + 'nodes': [{"contents": ["hello"]}, {"contents": ["boat"]}]}, + + {'regex': r"(cake)awesome", + 'nodes': [{"contents": ["cake"]}, {"contents": ["awesome"]}]}, + + {'regex': r"(foo)(bar)(foo)", + 'nodes': [{"contents": ["foo"]}, {"contents": ["bar"]}, {"contents": ["foo"]}]}, + + {'regex': r"dashboard(apple)(purple)", + 'nodes': [{"contents": ["dashboard"]}, {"contents": ["apple"]}, {"contents": ["purple"]}]}, + + {'regex': r"(harder)better(faster)", + 'nodes': [{"contents": ["harder"]}, {"contents": ["better"]}, {"contents": ["faster"]}]}, + + {'regex': r"(stronger)(it is me)baby", + 'nodes': [{"contents": ["stronger"]}, {"contents": ["it is me"]}, {"contents": ["baby"]}]}, + + {'regex': r"new(york)city", + 'nodes': [{"contents": ["new"]}, {"contents": ["york"]}, {"contents": ["city"]}]}, + + {'regex': r"()whatever", + 'nodes': [{"contents": [""]}, {"contents": ["whatever"]}]}, + + {'regex': r"this is it()", + 'nodes': [{"contents": ["this is it"]}, {"contents": [""]}]}, + + {'regex': r"this()parser()is()working", + 'nodes': [{"contents": ["this"]}, {"contents": [""]}, {"contents": ["parser"]}, {"contents": [""]}, + {"contents": ["is"]}, {"contents": [""]}, {"contents": ["working"]}]}, + + {'regex': r"()()()", + 'nodes': [{"contents": [""]}, {"contents": [""]}, {"contents": [""]}]}, + ) + def test_basic_parenthesis(self, test_case): + self.regex_assert_json(test_case) + + + @ddt.data( - {'regex': r"bar", 'nodes': [{"contents": ["bar"], "alphabet": None, "qty": (1, 1)}]} + {'regex': r"[e]", 'nodes': [{"alphabet": "e"}]}, + {'regex': r"[caty]", 'nodes': [{"alphabet": "caty"}]}, + {'regex': r"[abcd][efghij]", 'nodes': [{"alphabet": "abcd"}, {"alphabet": "efghij"}]}, + {'regex': r"[cake]awesome", 'nodes': [{"alphabet": "cake"}, {"contents": ["awesome"]}]}, + + {'regex': r"[foo][bar][foo]", + 'nodes': [{"alphabet": "foo"}, {"alphabet": "bar"}, {"alphabet": "foo"}]}, + + {'regex': r"dashboard[apple][purple]", + 'nodes': [{"contents": ["dashboard"]}, {"alphabet": "apple"}, {"alphabet": "purple"}]}, + + {'regex': r"[harder]better[faster]", + 'nodes': [{"alphabet": "harder"}, {"contents": ["better"]}, {"alphabet": "faster"}]}, + + {'regex': r"[stronger][it is me]baby", + 'nodes': [{"alphabet": "stronger"}, {"alphabet": "it is me"}, {"contents": ["baby"]}]}, + + {'regex': r"new[york]city", + 'nodes': [{"contents": ["new"]}, {"alphabet": "york"}, {"contents": ["city"]}]}, ) - def test_json(self, test_case): + def test_basic_square_brackets(self, test_case): self.regex_assert_json(test_case) + @ddt.data(r"[]", r"stronger[]baby", r"strongerbaby[]", r"[]strongerbaby", r"stro[]nger[]baby[]") + def test_basic_square_brackets_raise(self, regex): + self.regex_raise(regex) + + def regex_assert_json(self, test_case): self._parser.parse(test_case['regex'], "name") self.assertEquals(self._parser._create_terminal_node.call_count, len(test_case['nodes'])) calls = [] - for node in test_case['nodes']: - calls.append(mock.call("name" + str(test_case['nodes'].index(node) + 1), vt.String, - contents=node['contents'], alphabet=node['alphabet'], qty=node['qty'])) + nodes = test_case['nodes'] + for i in range(0, len(nodes)): + + contents = nodes[i]['contents'] if 'contents' in nodes[i] else None + alphabet = nodes[i]['alphabet'] if 'alphabet' in nodes[i] else None + qty = nodes[i]['qty'] if 'qty' in nodes[i] else (1, 1) + + calls.append(mock.call("name" + str(i + 1), vt.String, contents=contents, alphabet=alphabet, qty=qty)) self._parser._create_terminal_node.assert_has_calls(calls) \ No newline at end of file From ee49340d67ba8e86e93e77c7a065d4688cdd5788 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 7 Jul 2016 16:08:50 +0200 Subject: [PATCH 30/80] RegexParser tests clean up --- test/unit/test_data_model_helpers.py | 184 +++++++++++++-------------- 1 file changed, 89 insertions(+), 95 deletions(-) diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index f00437c..3727e69 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -27,7 +27,7 @@ def tearDown(self): r"whatever|toto?ff", r"whate?ver|toto", r"(toto)*ohoho|haha", r"(toto)ohoho|haha", 'salut[abcd]{,15}rr', r"[]whatever", r"t{,15}") def test_invalid_regexes(self, regex): - self.assertRaises(Exception, self._parser.parse, regex, "name") + self.assert_regex_is_invalid(regex) @ddt.unpack @@ -57,27 +57,17 @@ def test_8(self, char, qty): mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) - @ddt.unpack @ddt.data( - (r"salut(l\(es)(lou\\lous)cmoi", [(["salut"], None, (1, 1)), - (["l(es"], None, (1, 1)), - (["lou\lous"], None, (1, 1)), - (["cmoi"], None, (1, 1))]), - - (r"salut(l\(es)lou\\lous(cmoi)", [(["salut"], None, (1, 1)), - (["l(es"], None, (1, 1)), - (["lou\lous"], None, (1, 1)), - (["cmoi"], None, (1, 1))]), - - (r"()+whatever", [([""], None, (1, None)), - (["whatever"], None, (1, 1))]), - - (r"salut[abc]ooo", [(["salut"], None, (1, 1)), - (None, "abc", (1, 1)), - (["ooo"], None, (1, 1))]), + {'regex': r"salut(l\(es)(lou\\lous)cmoi", + 'nodes': [ + {"contents": ["salut"]}, + {"contents": ["l(es"]}, + {"contents": ["lou\lous"]}, + {"contents": ["cmoi"]}, + ]}, ) - def test_various(self, regex, nodes): - self.regex_assert(regex, nodes) + def test_escape(self, test_case): + self.assert_regex_is_valid(test_case) @@ -85,28 +75,11 @@ def test_various(self, regex, nodes): r"what{1, 2}", r"what{,3}ever", r"ee{l1, 2}ever", r"whddddat{\13, 2}eyyyver", r"wat{3,2d}eyyyver", r"w**r", r"w+*r", r"w*?r") def test_quantifier_raise(self, regex): - self.regex_raise(regex) + self.assert_regex_is_invalid(regex) @ddt.data(r"salut(", r"dd[", r"(", r"[", r"{0") def test_wrong_end_raise(self, regex): - self.regex_raise(regex) - - - def regex_raise(self, regex): - self.assertRaises(Exception, self._parser.parse, regex, "name") - - - def regex_assert(self, regex, nodes): - - self._parser.parse(regex, "name") - self.assertEquals(self._parser._create_terminal_node.call_count, len(nodes)) - - calls = [] - for node in nodes: - calls.append(mock.call("name" + str(nodes.index(node) + 1), vt.String, - contents=node[0], alphabet=node[1], qty=node[2])) - - self._parser._create_terminal_node.assert_has_calls(calls) + self.assert_regex_is_invalid(regex) @ddt.data( @@ -121,57 +94,7 @@ def regex_assert(self, regex, nodes): ]}, ) def test_complete(self, test_case): - self.regex_assert_json(test_case) - - - @ddt.unpack - @ddt.data( - (r"(ab|cd|)+", [(["ab", "cd", ""], None, (1, None))]), - (r"(ab||cd)", [(["ab", "", "cd"], None, (1, 1))]), - (r"(|ab|cd|ef|gh)+", [(["", "ab", "cd", "ef", "gh"], None, (1, None))]), - (r"(|)+", [(["", ""], None, (1, None))]), - (r"(|||)+", [(["", "", "", ""], None, (1, None))]), - ) - def test_or_in_parenthesis(self, regex, nodes): - self.regex_assert(regex, nodes) - - - @ddt.unpack - @ddt.data( - (r"tata|haha|c*|b*|[abcd]+", [(["tata", "haha"], None, (1, 1)), - (["c"], None, (0, None)), - (["b"], None, (0, None)), - (None, "abcd", (1, None))]), - - (r"(tata)+|haha|tata||b*|[abcd]+", [(["tata"], None, (1, None)), - (["haha", "tata", ""], None, (1, 1)), - (["b"], None, (0, None)), - (None, "abcd", (1, None))]), - - (r"toto|titi|tata", [(["toto", "titi", "tata"], None, (1, 1))]), - - (r"|", [(["",""], None, (1, 1))]), - - (r"coucou|[abcd]|", [(["coucou"], None, (1, 1)), - (None, "abcd", (1, 1)), - ([""], None, (1, 1))]), - - (r"[whatever]+|[hao]|[salut]?", [(None, "whatever", (1, None)), - (None, "hao", (1, 1)), - (None, "salut", (0, 1))]), - - (r"|[hao]|[salut]?", [([""], None, (1, 1)), - (None, "hao", (1, 1)), - (None, "salut", (0, 1))]), - (r"coucou||[salut]?", [(["coucou", ""], None, (1, 1)), - (None, "salut", (0, 1))]), - (r"coucou||||[salut]?", [(["coucou", "", "", ""], None, (1, 1)), - (None, "salut", (0, 1))]) - ) - def test_pick(self, regex, nodes): - self.regex_assert(regex, nodes) - - + self.assert_regex_is_valid(test_case) @ddt.data( @@ -214,7 +137,20 @@ def test_pick(self, regex, nodes): 'nodes': [{"contents": [""]}, {"contents": [""]}, {"contents": [""]}]}, ) def test_basic_parenthesis(self, test_case): - self.regex_assert_json(test_case) + self.assert_regex_is_valid(test_case) + + + + + @ddt.data( + {'regex': r"(ab|cd|)+", 'nodes': [{"contents": ["ab", "cd", ""], "qty": (1, None)}]}, + {'regex': r"(ab||cd)", 'nodes': [{"contents": ["ab", "", "cd"]}]}, + {'regex': r"(|ab|cd|ef|gh)+", 'nodes': [{"contents": ["", "ab", "cd", "ef", "gh"], "qty": (1, None)}]}, + {'regex': r"(|)+", 'nodes': [{"contents": ["", ""], "qty": (1, None)}]}, + {'regex': r"(|||)+", 'nodes': [{"contents": ["", "", "", ""], "qty": (1, None)}]}, + ) + def test_or_in_parenthesis(self, test_case): + self.assert_regex_is_valid(test_case) @@ -240,15 +176,69 @@ def test_basic_parenthesis(self, test_case): 'nodes': [{"contents": ["new"]}, {"alphabet": "york"}, {"contents": ["city"]}]}, ) def test_basic_square_brackets(self, test_case): - self.regex_assert_json(test_case) + self.assert_regex_is_valid(test_case) @ddt.data(r"[]", r"stronger[]baby", r"strongerbaby[]", r"[]strongerbaby", r"stro[]nger[]baby[]") def test_basic_square_brackets_raise(self, regex): - self.regex_raise(regex) + self.assert_regex_is_invalid(regex) + + + + @ddt.data( + + {'regex': r"|", 'nodes': [{"contents": ["",""]}]}, + {'regex': r"|||", 'nodes': [{"contents": ["", "", "", ""]}]}, + {'regex': r"toto|titi|tata", 'nodes': [{"contents": ["toto", "titi", "tata"]}]}, + {'regex': r"toto|titi|", 'nodes': [{"contents": ["toto", "titi", ""]}]}, + {'regex': r"toto||tata", 'nodes': [{"contents": ["toto", "", "tata"]}]}, + {'regex': r"|titi|tata", 'nodes': [{"contents": ["", "titi", "tata"]}]}, + {'regex': r"coucou|[abcd]|", 'nodes': [{"contents": ["coucou"]}, {"alphabet": "abcd"}, {"contents": [""]}]}, + + {'regex': r"|[hao]|[salut]?", + 'nodes': [{"contents": [""]}, {"alphabet": "hao"}, {"alphabet": "salut", "qty": (0, 1)}]}, + + {'regex': r"coucou||[salut]?", + 'nodes': [{"contents": ["coucou", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, + + {'regex': r"coucou||||[salut]?", + 'nodes': [{"contents": ["coucou", "", "", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, + + {'regex': r"[whatever]+|[hao]|[salut]?", + 'nodes': [ + {"alphabet": "whatever", "qty": (1, None)}, + {"alphabet": "hao"}, + {"alphabet": "salut", "qty": (0, 1)} + ]}, + {'regex': r"(whatever)+|(hao)|(salut)?", + 'nodes': [ + {"contents": ["whatever"], "qty": (1, None)}, + {"contents": ["hao"]}, + {"contents": ["salut"], "qty": (0, 1)} + ]}, - def regex_assert_json(self, test_case): + + {'regex': r"tata|haha|c*|b*|[abcd]+", 'nodes': [ + {"contents": ["tata", "haha"]}, + {"contents": ["c"], "qty": (0, None)}, + {"contents": ["b"], "qty": (0, None)}, + {"alphabet": "abcd", "qty": (1, None)} + ]}, + + {'regex': r"(tata)+|haha|tata||b*|[abcd]+", 'nodes': [ + {"contents": ["tata"], "qty": (1, None)}, + {"contents": ["haha", "tata", ""]}, + {"contents": ["b"], "qty": (0, None)}, + {"alphabet": "abcd", "qty": (1, None)} + ]}, + ) + def test_shape(self, test_case): + self.assert_regex_is_valid(test_case) + + + + def assert_regex_is_valid(self, test_case): self._parser.parse(test_case['regex'], "name") self.assertEquals(self._parser._create_terminal_node.call_count, len(test_case['nodes'])) @@ -263,4 +253,8 @@ def regex_assert_json(self, test_case): calls.append(mock.call("name" + str(i + 1), vt.String, contents=contents, alphabet=alphabet, qty=qty)) - self._parser._create_terminal_node.assert_has_calls(calls) \ No newline at end of file + self._parser._create_terminal_node.assert_has_calls(calls) + + + def assert_regex_is_invalid(self, regex): + self.assertRaises(Exception, self._parser.parse, regex, "name") \ No newline at end of file From ed5ac882119f504e4cdff03ac902838971892d75 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Tue, 19 Jul 2016 11:58:15 +0200 Subject: [PATCH 31/80] Add support for range in squared brackets + shortcuts using \ --- framework/data_model_helpers.py | 443 ++++++++++++++++----------- framework/error_handling.py | 6 + test/unit/test_data_model_helpers.py | 17 +- 3 files changed, 288 insertions(+), 178 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 8a575ca..05ae1cc 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -512,133 +512,250 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) + class State(object): """ - State at the lower level + Represent a state at the lower level """ def __init__(self, machine): """ Args: - state_machine (StateMachine): state machine where it lives + machine (StateMachine): state machine where it lives (local context) """ self.machine = machine + self.init_specific() - def run(self, context): - raise NotImplementedError + def init_specific(self): + pass - def advance(self, context): + def _run(self, context): + """ + Do some actions on the current character. + Args: + context (StateMachine): root state machine (global context) + """ raise NotImplementedError - -class FinalState(State): + def run(self, context): + self._run(context) + context.inputs.pop(0) def advance(self, context): - pass + """ + Check transitions using the first non-run character. + Args: + context (StateMachine): root state machine (global context) + Returns: + Class of the next state de run or None if we are in a final state + """ + raise NotImplementedError -class StateMachine(object): - class Initial(State): - pass +class StateMachine(State): - class Final(State): + class Initial(State): pass - def __init__(self): + def __init__(self, machine=None): self.states = {} + self.inputs = None for name, cls in inspect.getmembers(self.__class__): if inspect.isclass(cls) and issubclass(cls, State): self.states[cls] = cls(self) + State.__init__(self, self if machine is None else machine) + + @property + def input(self): + return None if self.inputs is None or len(self.inputs) == 0 else self.inputs[0] + + def _run(self, context): + while self.state is not None: + self.state.run(context) + next_state = self.state.advance(context) + self.state = self.states[next_state] if next_state is not None else None + def run(self, context): self.state = self.states[self.Initial] - while True: - self.state.run(context) - if isinstance(self.state, self.Final): - break - context.inputs.pop(0) - self.state = self.states[self.state.advance(context)] + self._run(context) +class EscapeState(StateMachine): -class SubStateMachine(State, StateMachine): - """ - Seen as a State from outside - Act like a StateMachine in the inside - """ - def __init__(self, machine): - State.__init__(self, machine) - StateMachine.__init__(self) + def init_specific(self): + self.escaped = None - def run(self, context): - StateMachine.run(self, context) + class Initial(State): + def _run(self, ctx): + self.machine.escaped = "" + def advance(self, ctx): + if ctx.input == None: + raise EscapeError("Nothing to escape.") + elif ctx.input == 'x': + return self.machine.Hexadecimal + elif ctx.input in ('s','S','d','D','w','W','\\','(',')','[',']','{','}','+','?','*','|','-'): + return self.machine.Final + else: + raise EscapeError("Character to escape is not special. It is useless to escape it.") -class EscapeState(SubStateMachine): - class Initial(State): + class Hexadecimal(State): - def run(self, ctx): + def _run(self, ctx): pass def advance(self, ctx): - if ctx.input == None: - raise Exception - return self.machine.Final + if ctx.input in string.hexdigits: + return self.machine.Digit + else: + raise EscapeError("\\x must be followed with two hexadecimal digits: none provided.") + + class Digit(State): - class Final(FinalState): + def _run(self, ctx): + self.machine.escaped += ctx.input - def run(self, ctx): - if ctx.buffer is not None: - ctx.append_to_buffer(ctx.input) - elif ctx.alphabet is not None: - ctx.append_to_alphabet(ctx.input) + def advance(self, ctx): + if ctx.input in string.hexdigits and len(self.machine.escaped) == 1: + return self.__class__ + elif len(self.machine.escaped) == 2: + self.machine.escaped = self.machine.escaped.decode("hex") + return None + else: + raise EscapeError("\\x must be followed with two hexadecimal digits: only one provided.") + + class Final(State): + + def _run(self, ctx): + + def get_complement(not_allowed_chars): + return ''.join([chr(int(i)) for i in range(0, 0xFF) if chr(int(i)) not in not_allowed_chars]) + + shortcuts = {'s': string.whitespace, + 'S': get_complement(string.whitespace), + 'd': string.digits, + 'D': get_complement(string.digits), + 'w': string.letters + string.digits + '_', + 'W': get_complement(string.letters + string.digits + '_')} + + self.machine.escaped = shortcuts[ctx.input] if ctx.input in shortcuts else ctx.input + + def advance(self, context): + return None + + + +class GroupingState(StateMachine): + + class Final(State): + + def _run(self, context): + pass + + def advance(self, context): + return None + + def advance(self, ctx): + if ctx.input in (')', '}', ']'): + raise Exception + + # quantifier specified + elif ctx.input in ('*', '+', '?'): + return self.machine.QtyState + elif ctx.input == '{': + return self.machine.Brackets + else: + ctx.flush() + + # pick + if ctx.input == '|': + return self.machine.Pick + elif ctx.pick: + raise Exception + + # continue with something else + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input is None: + return self.machine.Final + else: + return self.machine.Main class RegexParser(StateMachine): class Initial(State): - def run(self, ctx): + def _run(self, ctx): pass def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{', '}', ')', ']'): - raise Exception - - if ctx.input == '[': + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError("Nothing to quantify.") + elif ctx.input in ('}', ')', ']'): + raise GroupingError("Unopened " + ctx.input) + elif ctx.input == '[': return self.machine.SquareBrackets elif ctx.input == '(': return self.machine.Parenthesis + elif ctx.input == '\\': + return self.machine.Escape else: ctx.append_to_buffer("") - if ctx.input == '\\': - return self.machine.Escape - elif ctx.input == '|': - return self.machine.PickState + if ctx.input == '|': + return self.machine.Pick elif ctx.input is None: return self.machine.Final else: - return self.machine.MainState - + return self.machine.Main - class Final(FinalState): + class Final(State): - def run(self, ctx): + def _run(self, ctx): ctx.flush() + def advance(self, ctx): + return None + class Escape(EscapeState): def advance(self, ctx): - return self.machine.states[self.machine.MainState].advance(ctx) + if len(self.escaped) > 1: + + if ctx.pick and len(ctx.values) > 1 and len(ctx.buffer) > 1: + raise UnconvertibleRegexError() + + if ctx.buffer is not None: + + if len(ctx.buffer) == 0: + + if len(ctx.values[:-1]) > 0: + ctx.values = ctx.values[:-1] + ctx.flush() + else: + ctx.flush() + + ctx.append_to_alphabet(self.escaped) + return self.machine.states[self.machine.SquareBrackets].advance(ctx) + + else: + ctx.append_to_buffer(self.escaped) + return self.machine.states[self.machine.Main].advance(ctx) - class MainState(State): - def run(self, ctx): + class Main(State): + + def _run(self, ctx): ctx.append_to_buffer(ctx.input) def advance(self, ctx): @@ -652,13 +769,13 @@ def advance(self, ctx): return self.machine.Escape elif ctx.input == '|': - return self.machine.PickState + return self.machine.Pick elif ctx.input in ('?', '*', '+', '{'): # pick if ctx.pick and len(ctx.values) > 1 and len(ctx.buffer) > 1: - raise Exception + raise UnconvertibleRegexError() if len(ctx.buffer) == 1: if len(ctx.values) > 1: @@ -678,8 +795,8 @@ def advance(self, ctx): else: return self.machine.QtyState - elif ctx.input in ('}', ')', ']'): - raise Exception + elif ctx.input in ('}',')',']'): + raise GroupingError("Unopened " + ctx.input) elif ctx.input is None: return self.machine.Final @@ -687,23 +804,24 @@ def advance(self, ctx): return self.__class__ - class PickState(State): + class Pick(State): - def run(self, ctx): + def _run(self, ctx): if not ctx.pick: if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): ctx.pick = True else: - raise Exception + raise UnconvertibleRegexError() def advance(self, ctx): if ctx.input == '(': return self.machine.Parenthesis elif ctx.input == '[': return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.SquareBrackets else: - ctx.append_to_contents("") if ctx.input == '|': @@ -711,13 +829,12 @@ def advance(self, ctx): elif ctx.input is None: return self.machine.Final else: - return self.machine.MainState - + return self.machine.Main class QtyState(State): - def run(self, ctx): + def _run(self, ctx): if ctx.input == '+': ctx.min = 1 elif ctx.input == '?': @@ -729,39 +846,42 @@ def run(self, ctx): ctx.flush() def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', ')', ']'): - raise Exception + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError("Nothing to quantify.") - if ctx.input == '(': + elif ctx.input in ('}', ')', ']'): + raise GroupingError("Unopened " + ctx.input) + + elif ctx.input == '(': return self.machine.Parenthesis elif ctx.input == '[': return self.machine.SquareBrackets elif ctx.input == '|': - return self.machine.PickState + return self.machine.Pick elif ctx.input == '\\': return self.machine.Escape elif ctx.input is None: return self.machine.Final else: - return self.machine.MainState + return self.machine.Main - class Brackets(SubStateMachine): + class Brackets(StateMachine): class Initial(State): - def run(self, ctx): + def _run(self, ctx): ctx.min = "" def advance(self, ctx): if ctx.input.isdigit(): return self.machine.BeforeComma else: - raise Exception + raise QuantificationError("{} content needs to start with digit(s).") class BeforeComma(State): - def run(self, ctx): + def _run(self, ctx): ctx.min += ctx.input def advance(self, context): @@ -772,11 +892,11 @@ def advance(self, context): elif context.input == '}': return self.machine.Final else: - raise Exception + raise QuantificationError("{} can only contain digits and a comma.") class Comma(State): - def run(self, ctx): + def _run(self, ctx): ctx.max = "" def advance(self, context): @@ -785,11 +905,11 @@ def advance(self, context): elif context.input == '}': return self.machine.Final else: - raise Exception + raise QuantificationError("{} can only contain digits and a comma.") class AfterComma(State): - def run(self, ctx): + def _run(self, ctx): ctx.max += ctx.input def advance(self, context): @@ -801,7 +921,7 @@ def advance(self, context): raise Exception class Final(State): - def run(self, ctx): + def _run(self, ctx): ctx.min = int(ctx.min) if ctx.max is None: @@ -812,15 +932,18 @@ def run(self, ctx): ctx.max = int(ctx.max) if ctx.max is not None and ctx.min > ctx.max: - raise Exception + raise QuantificationError("{a,b}: a <= b not verified.") ctx.flush() + def advance(self, context): + return None + def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', ')', ']'): raise Exception elif ctx.input == '|': - return self.machine.PickState + return self.machine.Pick elif ctx.input is None: return self.machine.Final else: @@ -834,14 +957,13 @@ def advance(self, ctx): elif ctx.input == '[': return self.machine.SquareBrackets else: - return self.machine.MainState - + return self.machine.Main - class Parenthesis(SubStateMachine): + class Parenthesis(GroupingState): class Initial(State): - def run(self, ctx): + def _run(self, ctx): ctx.flush() ctx.append_to_buffer("") @@ -853,10 +975,10 @@ def advance(self, ctx): elif ctx.input == ')': return self.machine.Final else: - return self.machine.Inside + return self.machine.Main - class Inside(State): - def run(self, ctx): + class Main(State): + def _run(self, ctx): if ctx.input == '|': ctx.append_to_contents("") else: @@ -872,56 +994,32 @@ def advance(self, ctx): else: return self.__class__ + class Escape(EscapeState): def advance(self, ctx): - return self.machine.states[self.machine.Inside].advance(ctx) - - class Final(FinalState): - def run(self, ctx): - pass - - def advance(self, ctx): - if ctx.input in (')', '}', ']'): - raise Exception - - # quantifier specified - elif ctx.input in ('*', '+', '?'): - return self.machine.QtyState - elif ctx.input == '{': - return self.machine.Brackets - else: - ctx.flush() - # pick - if ctx.input == '|': - return self.machine.PickState - elif ctx.pick: - raise Exception + if len(self.escaped) > 1: + raise UnconvertibleRegexError() + else: + ctx.append_to_buffer(self.escaped) + return self.machine.states[self.machine.Main].advance(ctx) - # continue with something else - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '\\': - return self.machine.Escape - elif ctx.input is None: - return self.machine.Final - else: - return self.machine.MainState + class SquareBrackets(GroupingState): - class SquareBrackets(SubStateMachine): + def init_specific(self): + self.range = None class Initial(State): - def run(self, ctx): + def _run(self, ctx): + self.machine.range = False ctx.flush() ctx.append_to_alphabet("") def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '|', None): + if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '|', '-', None): raise Exception elif ctx.input == '\\': return self.machine.Escape @@ -929,8 +1027,18 @@ def advance(self, ctx): return self.machine.Inside class Inside(State): - def run(self, ctx): - ctx.append_to_alphabet(ctx.input) + def _run(self, ctx): + if self.machine.range: + self.machine.range = False + if ctx.alphabet[-1] > ctx.input: + raise Exception + elif ctx.input == ctx.alphabet[-1]: + pass + else: + for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): + ctx.append_to_alphabet(chr(i)) + else: + ctx.append_to_alphabet(ctx.input) def advance(self, ctx): if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', '|', None): @@ -939,55 +1047,46 @@ def advance(self, ctx): return self.machine.Escape elif ctx.input == ']': return self.machine.Final + elif ctx.input == '-': + return self.machine.Range else: return self.__class__ - class Escape(EscapeState): + class Range(State): + def _run(self, ctx): + self.machine.range = True def advance(self, ctx): - return self.machine.states[self.machine.Inside].advance(ctx) - - class Final(FinalState): - def run(self, ctx): - pass - - def advance(self, ctx): - if ctx.input in (')', '}', ']'): - raise Exception - - # quantifier specified - elif ctx.input in ('*', '+', '?'): - return self.machine.QtyState - elif ctx.input == '{': - return self.machine.Brackets - else: - ctx.flush() + if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '-', '|', None): + raise Exception + elif ctx.input == '\\': + return self.machine.Escape + else: + return self.machine.Inside - # pick - if ctx.input == '|': - return self.machine.PickState - elif ctx.pick: - raise Exception + class Escape(EscapeState): - # continue with something else - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.__class__ - elif ctx.input == '\\': - return self.machine.Escape - elif ctx.input is None: - return self.machine.Final - else: - return self.machine.MainState + def advance(self, ctx): + if self.machine.range: + self.machine.range = False + if len(self.escaped) > 1: + raise Exception + elif ctx.alphabet[-1] > self.escaped: + raise Exception + elif self.escaped == ctx.alphabet[-1]: + pass + else: + for i in range(ord(ctx.alphabet[-1]) + 1, ord(self.escaped) + 1): + ctx.append_to_alphabet(chr(i)) + else: + ctx.append_to_alphabet(self.escaped) + return self.machine.states[self.machine.Inside].advance(ctx) - def __init__(self): - StateMachine.__init__(self) + def init_specific(self): self._name = None - self.inputs = None self.values = None self.alphabet = None @@ -999,10 +1098,6 @@ def __init__(self): self._nodes = [] - @property - def input(self): - return None if self.inputs is None or len(self.inputs) == 0 else self.inputs[0] - def append_to_contents(self, content): if self.values is None: self.values = [] @@ -1011,6 +1106,8 @@ def append_to_contents(self, content): def append_to_buffer(self, str): if self.values is None: self.values = [""] + if self.values[-1] is None: + self.values[-1] = "" self.values[-1] += str def append_to_alphabet(self, alphabet): @@ -1041,18 +1138,12 @@ def flush(self): if self.nothing_to_flush: return - print("buffer: " + str(self.values)) - print("alphabet: " + str(self.alphabet)) - print - print - if self.min is None and self.max is None: self.min = self.max = 1 # type = fvt.INT_str if all(content.isdigit() for content in self.contents) else fvt.String type = fvt.String name = self._name + str(len(self.nodes) + 1) - node = self._create_terminal_node(name, type, contents=self.values, alphabet=self.alphabet, diff --git a/framework/error_handling.py b/framework/error_handling.py index 7b473c3..9fd07fe 100644 --- a/framework/error_handling.py +++ b/framework/error_handling.py @@ -29,3 +29,9 @@ class DataProcessTermination(Exception): pass class UserInterruption(Exception): pass class DataModelDefinitionError(Exception): pass + +class RegexParserError(DataModelDefinitionError): pass +class EscapeError(RegexParserError): pass +class QuantificationError(RegexParserError): pass +class GroupingError(RegexParserError): pass +class UnconvertibleRegexError(RegexParserError): pass \ No newline at end of file diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index 3727e69..a332f31 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -7,14 +7,13 @@ @ddt.ddt class RegexParserTest(unittest.TestCase): - """Test case used to test the 'ProbeUser' class.""" + """Test case used to test the 'RegexParser' class.""" @classmethod def setUpClass(cls): pass def setUp(self): - """Initialisation des tests.""" self._parser = RegexParser() self._parser._create_terminal_node = mock.Mock() @@ -174,10 +173,24 @@ def test_or_in_parenthesis(self, test_case): {'regex': r"new[york]city", 'nodes': [{"contents": ["new"]}, {"alphabet": "york"}, {"contents": ["city"]}]}, + + {'regex': r"[a-e]", 'nodes': [{"alphabet": "abcde"}]}, + {'regex': r"[a-ewxy]", 'nodes': [{"alphabet": "abcdewxy"}]}, + {'regex': r"[1-9]", 'nodes': [{"alphabet": "123456789"}]}, + {'regex': r"[what1-9]", 'nodes': [{"alphabet": "what123456789"}]}, + {'regex': r"[a-c1-9]", 'nodes': [{"alphabet": "abc123456789"}]}, + {'regex': r"[a-c1-9fin]", 'nodes': [{"alphabet": "abc123456789fin"}]}, + + {'regex': r"[\x33]", 'nodes': [{"alphabet": "\x33"}]}, + {'regex': r"[\x33-\x35]", 'nodes': [{"alphabet": "\x33\x34\x35"}]}, + {'regex': r"[e\x33-\x35a]", 'nodes': [{"alphabet": "e\x33\x34\x35a"}]} ) def test_basic_square_brackets(self, test_case): self.assert_regex_is_valid(test_case) + @ddt.data(r"[\x33-\x23]", r"[3-1]", r"[y-a]", r"[\x3-\x34]", r"[\x3g]") + def test_wrong_alphabet(self, regex): + self.assert_regex_is_invalid(regex) @ddt.data(r"[]", r"stronger[]baby", r"strongerbaby[]", r"[]strongerbaby", r"stro[]nger[]baby[]") def test_basic_square_brackets_raise(self, regex): From eb9fc7e6284b88c3b5ddd24e44514a3495e9212e Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Tue, 19 Jul 2016 13:42:59 +0200 Subject: [PATCH 32/80] Add some test for range and escape + bug fix --- framework/data_model_helpers.py | 2 +- test/unit/test_data_model_helpers.py | 51 +++++++++++++--------------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 05ae1cc..d9f2abe 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -619,7 +619,7 @@ def _run(self, ctx): self.machine.escaped += ctx.input def advance(self, ctx): - if ctx.input in string.hexdigits and len(self.machine.escaped) == 1: + if ctx.input in list(string.hexdigits) and len(self.machine.escaped) == 1: return self.__class__ elif len(self.machine.escaped) == 2: self.machine.escaped = self.machine.escaped.decode("hex") diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index a332f31..ef32ef6 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -28,32 +28,18 @@ def tearDown(self): def test_invalid_regexes(self, regex): self.assert_regex_is_invalid(regex) - - @ddt.unpack - @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), - ('{7}', (7, 7)), ('{2,7}', (2, 7)), - ('{0}', (0, 0)), ('{0,0}', (0, 0)), - ('{3,}', (3, None))) - def test_7(self, char, qty): - self._parser.parse(r"salut[abcd]" + char + "ooo", "toto") - self.assertEquals(self._parser._create_terminal_node.call_count, 3) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=["salut"], alphabet=None, qty=(1, 1)), - mock.call("toto2", vt.String, contents=None, alphabet="abcd", qty=qty), - mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) - - @ddt.unpack - @ddt.data(('?', (0, 1)), ('*', (0, None)), ('+', (1, None)), - ('{7}', (7, 7)), ('{2,7}', (2, 7)), - ('{0}', (0, 0)), ('{0,0}', (0, 0)), - ('{3,}', (3, None))) - def test_8(self, char, qty): - self._parser.parse(r"salu(ttteee|whatever)" + char + "ooo", "toto") - self.assertEquals(self._parser._create_terminal_node.call_count, 3) - self._parser._create_terminal_node.assert_has_calls( - [mock.call("toto1", vt.String, contents=["salu"], alphabet=None, qty=(1, 1)), - mock.call("toto2", vt.String, contents=["ttteee", "whatever"], alphabet=None, qty=qty), - mock.call("toto3", vt.String, contents=["ooo"], alphabet=None, qty=(1, 1))]) + @ddt.data( + {'regex': r"[abcd]?", 'nodes': [{"alphabet": "abcd", "qty": (0, 1)}]}, + {'regex': r"[abcd]*", 'nodes': [{"alphabet": "abcd", "qty": (0, None)}]}, + {'regex': r"[abcd]+", 'nodes': [{"alphabet": "abcd", "qty": (1, None)}]}, + {'regex': r"[abcd]{7}", 'nodes': [{"alphabet": "abcd", "qty": (7, 7)}]}, + {'regex': r"[abcd]{2,7}", 'nodes': [{"alphabet": "abcd", "qty": (2, 7)}]}, + {'regex': r"[abcd]{0}", 'nodes': [{"alphabet": "abcd", "qty": (0, 0)}]}, + {'regex': r"[abcd]{0,0}", 'nodes': [{"alphabet": "abcd", "qty": (0, 0)}]}, + {'regex': r"[abcd]{3,}", 'nodes': [{"alphabet": "abcd", "qty": (3, None)}]}, + ) + def test_quantifiers(self, test_case): + self.assert_regex_is_valid(test_case) @ddt.data( @@ -64,12 +50,20 @@ def test_8(self, char, qty): {"contents": ["lou\lous"]}, {"contents": ["cmoi"]}, ]}, + {'regex': r"hi\x58", 'nodes': [{"contents": ["hi\x58"]}]}, + {'regex': r"hi\x00hola", 'nodes': [{"contents": ["hi\x00hola"]}]}, + {'regex': r"\xFFdom", 'nodes': [{"contents": ["\xFFdom"]}]}, + {'regex': r"\ddom", 'nodes': [{"alphabet": "0123456789"}, {"contents": ["dom"]}]}, + {'regex': r"dom[abcd\d]", 'nodes': [{"contents": ["dom"]}, {"alphabet": "abcd0123456789"}]}, + {'regex': r"[abcd]\x33", 'nodes': [{"alphabet": "abcd"}, {"contents": ["\x33"]}]}, + {'regex': r"(abcd)\x33", 'nodes': [{"contents": ["abcd"]}, {"contents": ["\x33"]}]}, + {'regex': r"\x33[abcd]", 'nodes': [{"contents": ["\x33"]}, {"alphabet": "abcd"}]}, + {'regex': r"\x33(abcd)", 'nodes': [{"contents": ["\x33"]}, {"contents": ["abcd"]}]}, ) def test_escape(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data(r"?", r"*", r"+", r"{1,2}", r"what{,}ever", r"bj{}er" r"what{1, 2}", r"what{,3}ever", r"ee{l1, 2}ever", r"whddddat{\13, 2}eyyyver", r"wat{3,2d}eyyyver", r"w**r", r"w+*r", r"w*?r") @@ -180,6 +174,8 @@ def test_or_in_parenthesis(self, test_case): {'regex': r"[what1-9]", 'nodes': [{"alphabet": "what123456789"}]}, {'regex': r"[a-c1-9]", 'nodes': [{"alphabet": "abc123456789"}]}, {'regex': r"[a-c1-9fin]", 'nodes': [{"alphabet": "abc123456789fin"}]}, + {'regex': r"[a-c9-9fin]", 'nodes': [{"alphabet": "abc9fin"}]}, + {'regex': r"[pa-cwho1-9fin]", 'nodes': [{"alphabet": "pabcwho123456789fin"}]}, {'regex': r"[\x33]", 'nodes': [{"alphabet": "\x33"}]}, {'regex': r"[\x33-\x35]", 'nodes': [{"alphabet": "\x33\x34\x35"}]}, @@ -199,7 +195,6 @@ def test_basic_square_brackets_raise(self, regex): @ddt.data( - {'regex': r"|", 'nodes': [{"contents": ["",""]}]}, {'regex': r"|||", 'nodes': [{"contents": ["", "", "", ""]}]}, {'regex': r"toto|titi|tata", 'nodes': [{"contents": ["toto", "titi", "tata"]}]}, From dc5f2b8ed7d07228b1100efa8f146b947f6360ae Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 21 Jul 2016 10:30:57 +0200 Subject: [PATCH 33/80] Fix some RegexParser integration bugs + tests --- framework/data_model_helpers.py | 8 ++++---- test/integration/test_integration.py | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index d9f2abe..ee70e0e 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1174,9 +1174,7 @@ def _create_terminal_node(self, name, type, contents=None, alphabet=None, qty=No assert(contents is not None or alphabet is not None) if alphabet is not None: - return [Node(name=name, vt=fvt.String(alphabet=alphabet, - min_sz=-1 if qty[0] is None else qty[0], - max_sz=-1 if qty[1] is None else qty[1])), 1, 1] + return [Node(name=name, vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1])), 1, 1] else: return [Node(name=name, vt=fvt.String(val_list=contents)), -1 if qty[0] is None else qty[0], -1 if qty[1] is None else qty[1]] @@ -1442,7 +1440,9 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): n, conf = self.__pre_handling(desc, node) - name = desc.get('name') + name = desc.get('name') if desc.get('name') is not None else node.name + if isinstance(name, tuple): + name = name[0] regexp = desc.get('contents') assert isinstance(regexp, str) diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index a8c3135..bc671d1 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -24,6 +24,7 @@ import sys import unittest +import ddt sys.path.append('.') @@ -3126,9 +3127,11 @@ def test_zip_specifics(self): self.assertEqual(zip_buff, orig_buff, msg=err_msg) +@ddt.ddt class TestDataModelHelpers(unittest.TestCase): - def test_regex(self): + @ddt.data("HTTP_version_regex", ("HTTP_version_regex", 17), ("HTTP_version_regex", "whatever")) + def test_regex(self, regex_node_name): HTTP_version_classic = \ {'name': 'HTTP_version_classic', 'contents': [ @@ -3142,7 +3145,7 @@ def test_regex(self): ]} HTTP_version_regex = \ - {'name': 'HTTP_version_regex', 'contents': "(HTTP)(/)(0|1|2|3|4|5|6|7|8|9)(.)(0|1|2|3|4|5|6|7|8|9)"} + {'name': regex_node_name, 'contents': "(HTTP)(/)(0|1|2|3|4|5|6|7|8|9)(.)(0|1|2|3|4|5|6|7|8|9)"} mh = ModelHelper() node_classic = mh.create_graph_from_desc(HTTP_version_classic) From 2021a530ff28cf69ceb9e418d2b14226e9fb74cb Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 21 Jul 2016 16:01:44 +0200 Subject: [PATCH 34/80] Fix non-terminal creation bug inside RegexParser --- framework/data_model_helpers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index ee70e0e..4b0cfbd 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1144,9 +1144,7 @@ def flush(self): # type = fvt.INT_str if all(content.isdigit() for content in self.contents) else fvt.String type = fvt.String name = self._name + str(len(self.nodes) + 1) - node = self._create_terminal_node(name, type, - contents=self.values, - alphabet=self.alphabet, + node = self._create_terminal_node(name, type, contents=self.values, alphabet=self.alphabet, qty=(self.min, self.max)) self.nodes.append(node) self.reset() @@ -1176,8 +1174,7 @@ def _create_terminal_node(self, name, type, contents=None, alphabet=None, qty=No if alphabet is not None: return [Node(name=name, vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1])), 1, 1] else: - return [Node(name=name, vt=fvt.String(val_list=contents)), -1 if qty[0] is None else qty[0], - -1 if qty[1] is None else qty[1]] + return [Node(name=name, vt=fvt.String(val_list=contents)), qty[0], -1 if qty[1] is None else qty[1]] def _create_non_terminal_node(self): non_terminal = [1, [MH.Copy + MH.Ordered]] @@ -1185,7 +1182,7 @@ def _create_non_terminal_node(self): for terminal in self.nodes: formatted_terminal.append(terminal) - if self.pick: + if self.pick and len(self.nodes) > 1: non_terminal.append(1) formatted_terminal = [MH.Copy + MH.Ordered] non_terminal.append(formatted_terminal) From e53f539620069fbe395321e83464b4a09dc9f2f3 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 28 Jul 2016 11:42:46 +0200 Subject: [PATCH 35/80] Make Regex-Terminal translation possible when there is no ambiguity --- framework/data_model_helpers.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 4b0cfbd..2177b59 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1444,8 +1444,14 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): assert isinstance(regexp, str) parser = RegexParser() - non_terminal_node = parser.parse(regexp, name) - n.set_subnodes_with_csts(non_terminal_node, conf=conf) + nodes = parser.parse(regexp, name) + + if len(nodes) == 2 and len(nodes[1]) == 2 and (nodes[1][1][1] == nodes[1][1][2] == 1 or + isinstance(nodes[1][1][0], fvt.String) and nodes[1][1][0].alphabet is not None): + n.set_values(value_type=nodes[1][1][0].internals[nodes[1][1][0].current_conf].value_type, conf=conf) + else: + n.set_subnodes_with_csts(nodes, conf=conf) + custo_set = desc.get('custo_set', None) custo_clear = desc.get('custo_clear', None) @@ -1455,6 +1461,16 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): internals = n.cc if conf is None else n.c[conf] internals.customize(custo) + sep_desc = desc.get('separator', None) + if sep_desc is not None: + sep_node_desc = sep_desc.get('contents', None) + assert (sep_node_desc is not None) + sep_node = self._create_graph_from_desc(sep_node_desc, n) + prefix = sep_desc.get('prefix', True) + suffix = sep_desc.get('suffix', True) + unique = sep_desc.get('unique', False) + n.set_separator_node(sep_node, prefix=prefix, suffix=suffix, unique=unique) + self._handle_common_attr(n, desc, conf) return n From 866604610de59d843bbdf3642ebf21679f51c793 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 28 Jul 2016 15:07:18 +0200 Subject: [PATCH 36/80] Add INT_Str recognition + unit tests + bug fix --- framework/data_model_helpers.py | 25 +++-- test/unit/test_data_model_helpers.py | 148 +++++++++++++++------------ 2 files changed, 101 insertions(+), 72 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 2177b59..41ae698 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -673,6 +673,8 @@ def advance(self, ctx): # pick if ctx.input == '|': return self.machine.Pick + elif ctx.input is None: + return self.machine.Final elif ctx.pick: raise Exception @@ -683,8 +685,6 @@ def advance(self, ctx): return self.machine.SquareBrackets elif ctx.input == '\\': return self.machine.Escape - elif ctx.input is None: - return self.machine.Final else: return self.machine.Main @@ -1141,10 +1141,14 @@ def flush(self): if self.min is None and self.max is None: self.min = self.max = 1 - # type = fvt.INT_str if all(content.isdigit() for content in self.contents) else fvt.String - type = fvt.String + if self.values is not None and all(val.isdigit() for val in self.values): + self.values = [int(i) for i in self.values] + type = fvt.INT_str + else: + type = fvt.String + name = self._name + str(len(self.nodes) + 1) - node = self._create_terminal_node(name, type, contents=self.values, alphabet=self.alphabet, + node = self._create_terminal_node(name, type, values=self.values, alphabet=self.alphabet, qty=(self.min, self.max)) self.nodes.append(node) self.reset() @@ -1167,14 +1171,19 @@ def parse(self, inputs, name): return self._create_non_terminal_node() - def _create_terminal_node(self, name, type, contents=None, alphabet=None, qty=None): + def _create_terminal_node(self, name, type, values=None, alphabet=None, qty=None): - assert(contents is not None or alphabet is not None) + assert(values is not None or alphabet is not None) if alphabet is not None: return [Node(name=name, vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1])), 1, 1] else: - return [Node(name=name, vt=fvt.String(val_list=contents)), qty[0], -1 if qty[1] is None else qty[1]] + if type == fvt.String: + node = Node(name=name, vt=fvt.String(val_list=values)) + else: + node = Node(name=name, vt=fvt.INT_str(int_list=values)) + + return [node, qty[0], -1 if qty[1] is None else qty[1]] def _create_non_terminal_node(self): non_terminal = [1, [MH.Copy + MH.Ordered]] diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index ef32ef6..d778a3f 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -45,20 +45,20 @@ def test_quantifiers(self, test_case): @ddt.data( {'regex': r"salut(l\(es)(lou\\lous)cmoi", 'nodes': [ - {"contents": ["salut"]}, - {"contents": ["l(es"]}, - {"contents": ["lou\lous"]}, - {"contents": ["cmoi"]}, + {"values": ["salut"]}, + {"values": ["l(es"]}, + {"values": ["lou\lous"]}, + {"values": ["cmoi"]}, ]}, - {'regex': r"hi\x58", 'nodes': [{"contents": ["hi\x58"]}]}, - {'regex': r"hi\x00hola", 'nodes': [{"contents": ["hi\x00hola"]}]}, - {'regex': r"\xFFdom", 'nodes': [{"contents": ["\xFFdom"]}]}, - {'regex': r"\ddom", 'nodes': [{"alphabet": "0123456789"}, {"contents": ["dom"]}]}, - {'regex': r"dom[abcd\d]", 'nodes': [{"contents": ["dom"]}, {"alphabet": "abcd0123456789"}]}, - {'regex': r"[abcd]\x33", 'nodes': [{"alphabet": "abcd"}, {"contents": ["\x33"]}]}, - {'regex': r"(abcd)\x33", 'nodes': [{"contents": ["abcd"]}, {"contents": ["\x33"]}]}, - {'regex': r"\x33[abcd]", 'nodes': [{"contents": ["\x33"]}, {"alphabet": "abcd"}]}, - {'regex': r"\x33(abcd)", 'nodes': [{"contents": ["\x33"]}, {"contents": ["abcd"]}]}, + {'regex': r"hi\x58", 'nodes': [{"values": ["hi\x58"]}]}, + {'regex': r"hi\x00hola", 'nodes': [{"values": ["hi\x00hola"]}]}, + {'regex': r"\xFFdom", 'nodes': [{"values": ["\xFFdom"]}]}, + {'regex': r"\ddom", 'nodes': [{"alphabet": "0123456789"}, {"values": ["dom"]}]}, + {'regex': r"dom[abcd\d]", 'nodes': [{"values": ["dom"]}, {"alphabet": "abcd0123456789"}]}, + {'regex': r"[abcd]\x43", 'nodes': [{"alphabet": "abcd"}, {"values": ["\x43"]}]}, + {'regex': r"(abcd)\x53", 'nodes': [{"values": ["abcd"]}, {"values": ["\x53"]}]}, + {'regex': r"\x43[abcd]", 'nodes': [{"values": ["\x43"]}, {"alphabet": "abcd"}]}, + {'regex': r"\x43(abcd)", 'nodes': [{"values": ["\x43"]}, {"values": ["abcd"]}]}, ) def test_escape(self, test_case): self.assert_regex_is_valid(test_case) @@ -79,11 +79,11 @@ def test_wrong_end_raise(self, regex): {'regex': r"[abcd]*toto(|\(ab\)|cd)+what?ever", 'nodes': [ {"alphabet": "abcd", "qty": (0, None)}, - {"contents": ["toto"]}, - {"contents": ["", "(ab)", "cd"], "qty": (1, None)}, - {"contents": ["wha"]}, - {"contents": ["t"], "qty": (0, 1)}, - {"contents": ["ever"]} + {"values": ["toto"]}, + {"values": ["", "(ab)", "cd"], "qty": (1, None)}, + {"values": ["wha"]}, + {"values": ["t"], "qty": (0, 1)}, + {"values": ["ever"]} ]}, ) def test_complete(self, test_case): @@ -91,43 +91,43 @@ def test_complete(self, test_case): @ddt.data( - {'regex': r"()", 'nodes': [{"contents": [""]}]}, - {'regex': r"(z)", 'nodes': [{"contents": ["z"]}]}, - {'regex': r"(cat)", 'nodes': [{"contents": ["cat"]}]}, + {'regex': r"()", 'nodes': [{"values": [""]}]}, + {'regex': r"(z)", 'nodes': [{"values": ["z"]}]}, + {'regex': r"(cat)", 'nodes': [{"values": ["cat"]}]}, {'regex': r"hello(boat)", - 'nodes': [{"contents": ["hello"]}, {"contents": ["boat"]}]}, + 'nodes': [{"values": ["hello"]}, {"values": ["boat"]}]}, {'regex': r"(cake)awesome", - 'nodes': [{"contents": ["cake"]}, {"contents": ["awesome"]}]}, + 'nodes': [{"values": ["cake"]}, {"values": ["awesome"]}]}, {'regex': r"(foo)(bar)(foo)", - 'nodes': [{"contents": ["foo"]}, {"contents": ["bar"]}, {"contents": ["foo"]}]}, + 'nodes': [{"values": ["foo"]}, {"values": ["bar"]}, {"values": ["foo"]}]}, {'regex': r"dashboard(apple)(purple)", - 'nodes': [{"contents": ["dashboard"]}, {"contents": ["apple"]}, {"contents": ["purple"]}]}, + 'nodes': [{"values": ["dashboard"]}, {"values": ["apple"]}, {"values": ["purple"]}]}, {'regex': r"(harder)better(faster)", - 'nodes': [{"contents": ["harder"]}, {"contents": ["better"]}, {"contents": ["faster"]}]}, + 'nodes': [{"values": ["harder"]}, {"values": ["better"]}, {"values": ["faster"]}]}, {'regex': r"(stronger)(it is me)baby", - 'nodes': [{"contents": ["stronger"]}, {"contents": ["it is me"]}, {"contents": ["baby"]}]}, + 'nodes': [{"values": ["stronger"]}, {"values": ["it is me"]}, {"values": ["baby"]}]}, {'regex': r"new(york)city", - 'nodes': [{"contents": ["new"]}, {"contents": ["york"]}, {"contents": ["city"]}]}, + 'nodes': [{"values": ["new"]}, {"values": ["york"]}, {"values": ["city"]}]}, {'regex': r"()whatever", - 'nodes': [{"contents": [""]}, {"contents": ["whatever"]}]}, + 'nodes': [{"values": [""]}, {"values": ["whatever"]}]}, {'regex': r"this is it()", - 'nodes': [{"contents": ["this is it"]}, {"contents": [""]}]}, + 'nodes': [{"values": ["this is it"]}, {"values": [""]}]}, {'regex': r"this()parser()is()working", - 'nodes': [{"contents": ["this"]}, {"contents": [""]}, {"contents": ["parser"]}, {"contents": [""]}, - {"contents": ["is"]}, {"contents": [""]}, {"contents": ["working"]}]}, + 'nodes': [{"values": ["this"]}, {"values": [""]}, {"values": ["parser"]}, {"values": [""]}, + {"values": ["is"]}, {"values": [""]}, {"values": ["working"]}]}, {'regex': r"()()()", - 'nodes': [{"contents": [""]}, {"contents": [""]}, {"contents": [""]}]}, + 'nodes': [{"values": [""]}, {"values": [""]}, {"values": [""]}]}, ) def test_basic_parenthesis(self, test_case): self.assert_regex_is_valid(test_case) @@ -136,37 +136,56 @@ def test_basic_parenthesis(self, test_case): @ddt.data( - {'regex': r"(ab|cd|)+", 'nodes': [{"contents": ["ab", "cd", ""], "qty": (1, None)}]}, - {'regex': r"(ab||cd)", 'nodes': [{"contents": ["ab", "", "cd"]}]}, - {'regex': r"(|ab|cd|ef|gh)+", 'nodes': [{"contents": ["", "ab", "cd", "ef", "gh"], "qty": (1, None)}]}, - {'regex': r"(|)+", 'nodes': [{"contents": ["", ""], "qty": (1, None)}]}, - {'regex': r"(|||)+", 'nodes': [{"contents": ["", "", "", ""], "qty": (1, None)}]}, + {'regex': r"(ab|cd|)+", 'nodes': [{"values": ["ab", "cd", ""], "qty": (1, None)}]}, + {'regex': r"(ab||cd)", 'nodes': [{"values": ["ab", "", "cd"]}]}, + {'regex': r"(|ab|cd|ef|gh)+", 'nodes': [{"values": ["", "ab", "cd", "ef", "gh"], "qty": (1, None)}]}, + {'regex': r"(|)+", 'nodes': [{"values": ["", ""], "qty": (1, None)}]}, + {'regex': r"(|||)+", 'nodes': [{"values": ["", "", "", ""], "qty": (1, None)}]}, ) def test_or_in_parenthesis(self, test_case): self.assert_regex_is_valid(test_case) + @ddt.data( + {'regex': r"1|2|3", 'nodes': [{"type": fvt.INT_str, "values": [1,2,3]}]}, + {'regex': r"1|2|3|foo", 'nodes': [{"values": ['1', '2', '3', 'foo']}]}, + {'regex': r"1|foo|2|3", 'nodes': [{"values": ['1', 'foo', '2', '3']}]}, + {'regex': r"foo|1|2|3", 'nodes': [{"values": ['foo', '1', '2', '3']}]}, + {'regex': r"(11|12|13)bar", + 'nodes': [{"type": fvt.INT_str, "values": [11, 12, 13]}, {"values": ['bar']}]}, + {'regex': r"(11|12|13|bar)", + 'nodes': [{"values": ['11', '12', '13', 'bar']}]}, + {'regex': r"234whatever23", 'nodes': [{"values": ['234whatever23']}]}, + {'regex': r"(234whatever23)foobar", + 'nodes': [{"values": ['234whatever23']}, {"values": ['foobar']}]}, + {'regex': r"1113|3435|3344|(hay)", + 'nodes': [{"type": fvt.INT_str, "values": [1113, 3435, 3344]}, {"values": ['hay']}]}, + ) + def test_types_recognition(self, test_case): + self.assert_regex_is_valid(test_case) + + @ddt.data( {'regex': r"[e]", 'nodes': [{"alphabet": "e"}]}, {'regex': r"[caty]", 'nodes': [{"alphabet": "caty"}]}, {'regex': r"[abcd][efghij]", 'nodes': [{"alphabet": "abcd"}, {"alphabet": "efghij"}]}, - {'regex': r"[cake]awesome", 'nodes': [{"alphabet": "cake"}, {"contents": ["awesome"]}]}, + {'regex': r"[cake]awesome", 'nodes': [{"alphabet": "cake"}, {"values": ["awesome"]}]}, {'regex': r"[foo][bar][foo]", 'nodes': [{"alphabet": "foo"}, {"alphabet": "bar"}, {"alphabet": "foo"}]}, {'regex': r"dashboard[apple][purple]", - 'nodes': [{"contents": ["dashboard"]}, {"alphabet": "apple"}, {"alphabet": "purple"}]}, + 'nodes': [{"values": ["dashboard"]}, {"alphabet": "apple"}, {"alphabet": "purple"}]}, {'regex': r"[harder]better[faster]", - 'nodes': [{"alphabet": "harder"}, {"contents": ["better"]}, {"alphabet": "faster"}]}, + 'nodes': [{"alphabet": "harder"}, {"values": ["better"]}, {"alphabet": "faster"}]}, {'regex': r"[stronger][it is me]baby", - 'nodes': [{"alphabet": "stronger"}, {"alphabet": "it is me"}, {"contents": ["baby"]}]}, + 'nodes': [{"alphabet": "stronger"}, {"alphabet": "it is me"}, {"values": ["baby"]}]}, {'regex': r"new[york]city", - 'nodes': [{"contents": ["new"]}, {"alphabet": "york"}, {"contents": ["city"]}]}, + 'nodes': [{"values": ["new"]}, {"alphabet": "york"}, {"values": ["city"]}]}, {'regex': r"[a-e]", 'nodes': [{"alphabet": "abcde"}]}, {'regex': r"[a-ewxy]", 'nodes': [{"alphabet": "abcdewxy"}]}, @@ -195,22 +214,22 @@ def test_basic_square_brackets_raise(self, regex): @ddt.data( - {'regex': r"|", 'nodes': [{"contents": ["",""]}]}, - {'regex': r"|||", 'nodes': [{"contents": ["", "", "", ""]}]}, - {'regex': r"toto|titi|tata", 'nodes': [{"contents": ["toto", "titi", "tata"]}]}, - {'regex': r"toto|titi|", 'nodes': [{"contents": ["toto", "titi", ""]}]}, - {'regex': r"toto||tata", 'nodes': [{"contents": ["toto", "", "tata"]}]}, - {'regex': r"|titi|tata", 'nodes': [{"contents": ["", "titi", "tata"]}]}, - {'regex': r"coucou|[abcd]|", 'nodes': [{"contents": ["coucou"]}, {"alphabet": "abcd"}, {"contents": [""]}]}, + {'regex': r"|", 'nodes': [{"values": ["",""]}]}, + {'regex': r"|||", 'nodes': [{"values": ["", "", "", ""]}]}, + {'regex': r"toto|titi|tata", 'nodes': [{"values": ["toto", "titi", "tata"]}]}, + {'regex': r"toto|titi|", 'nodes': [{"values": ["toto", "titi", ""]}]}, + {'regex': r"toto||tata", 'nodes': [{"values": ["toto", "", "tata"]}]}, + {'regex': r"|titi|tata", 'nodes': [{"values": ["", "titi", "tata"]}]}, + {'regex': r"coucou|[abcd]|", 'nodes': [{"values": ["coucou"]}, {"alphabet": "abcd"}, {"values": [""]}]}, {'regex': r"|[hao]|[salut]?", - 'nodes': [{"contents": [""]}, {"alphabet": "hao"}, {"alphabet": "salut", "qty": (0, 1)}]}, + 'nodes': [{"values": [""]}, {"alphabet": "hao"}, {"alphabet": "salut", "qty": (0, 1)}]}, {'regex': r"coucou||[salut]?", - 'nodes': [{"contents": ["coucou", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, + 'nodes': [{"values": ["coucou", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, {'regex': r"coucou||||[salut]?", - 'nodes': [{"contents": ["coucou", "", "", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, + 'nodes': [{"values": ["coucou", "", "", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, {'regex': r"[whatever]+|[hao]|[salut]?", 'nodes': [ @@ -221,23 +240,23 @@ def test_basic_square_brackets_raise(self, regex): {'regex': r"(whatever)+|(hao)|(salut)?", 'nodes': [ - {"contents": ["whatever"], "qty": (1, None)}, - {"contents": ["hao"]}, - {"contents": ["salut"], "qty": (0, 1)} + {"values": ["whatever"], "qty": (1, None)}, + {"values": ["hao"]}, + {"values": ["salut"], "qty": (0, 1)} ]}, {'regex': r"tata|haha|c*|b*|[abcd]+", 'nodes': [ - {"contents": ["tata", "haha"]}, - {"contents": ["c"], "qty": (0, None)}, - {"contents": ["b"], "qty": (0, None)}, + {"values": ["tata", "haha"]}, + {"values": ["c"], "qty": (0, None)}, + {"values": ["b"], "qty": (0, None)}, {"alphabet": "abcd", "qty": (1, None)} ]}, {'regex': r"(tata)+|haha|tata||b*|[abcd]+", 'nodes': [ - {"contents": ["tata"], "qty": (1, None)}, - {"contents": ["haha", "tata", ""]}, - {"contents": ["b"], "qty": (0, None)}, + {"values": ["tata"], "qty": (1, None)}, + {"values": ["haha", "tata", ""]}, + {"values": ["b"], "qty": (0, None)}, {"alphabet": "abcd", "qty": (1, None)} ]}, ) @@ -255,11 +274,12 @@ def assert_regex_is_valid(self, test_case): nodes = test_case['nodes'] for i in range(0, len(nodes)): - contents = nodes[i]['contents'] if 'contents' in nodes[i] else None + type = nodes[i]['type'] if 'type' in nodes[i] else vt.String + values = nodes[i]['values'] if 'values' in nodes[i] else None alphabet = nodes[i]['alphabet'] if 'alphabet' in nodes[i] else None qty = nodes[i]['qty'] if 'qty' in nodes[i] else (1, 1) - calls.append(mock.call("name" + str(i + 1), vt.String, contents=contents, alphabet=alphabet, qty=qty)) + calls.append(mock.call("name" + str(i + 1), type, values=values, alphabet=alphabet, qty=qty)) self._parser._create_terminal_node.assert_has_calls(calls) From 789314be39245fa4f2f617b28bfbc14faaa6e4b7 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Fri, 29 Jul 2016 09:44:53 +0200 Subject: [PATCH 37/80] Add support for unicode chars + fix issues with python 2/3 compativility --- framework/data_model_helpers.py | 36 +--- test/unit/test_data_model_helpers.py | 278 +++++++++++++-------------- 2 files changed, 144 insertions(+), 170 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 41ae698..d1c109c 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -31,6 +31,7 @@ import traceback import datetime +import six ################################ # ModelWalker Helper Functions # @@ -595,51 +596,24 @@ def _run(self, ctx): def advance(self, ctx): if ctx.input == None: raise EscapeError("Nothing to escape.") - elif ctx.input == 'x': - return self.machine.Hexadecimal elif ctx.input in ('s','S','d','D','w','W','\\','(',')','[',']','{','}','+','?','*','|','-'): return self.machine.Final else: raise EscapeError("Character to escape is not special. It is useless to escape it.") - class Hexadecimal(State): - - def _run(self, ctx): - pass - - def advance(self, ctx): - if ctx.input in string.hexdigits: - return self.machine.Digit - else: - raise EscapeError("\\x must be followed with two hexadecimal digits: none provided.") - - class Digit(State): - - def _run(self, ctx): - self.machine.escaped += ctx.input - - def advance(self, ctx): - if ctx.input in list(string.hexdigits) and len(self.machine.escaped) == 1: - return self.__class__ - elif len(self.machine.escaped) == 2: - self.machine.escaped = self.machine.escaped.decode("hex") - return None - else: - raise EscapeError("\\x must be followed with two hexadecimal digits: only one provided.") - class Final(State): def _run(self, ctx): def get_complement(not_allowed_chars): - return ''.join([chr(int(i)) for i in range(0, 0xFF) if chr(int(i)) not in not_allowed_chars]) + return ''.join([six.unichr(int(i)) for i in range(0, 0xFFFF) if six.unichr(int(i)) not in not_allowed_chars]) shortcuts = {'s': string.whitespace, 'S': get_complement(string.whitespace), 'd': string.digits, 'D': get_complement(string.digits), - 'w': string.letters + string.digits + '_', - 'W': get_complement(string.letters + string.digits + '_')} + 'w': string.ascii_letters + string.digits + '_', + 'W': get_complement(string.ascii_letters + string.digits + '_')} self.machine.escaped = shortcuts[ctx.input] if ctx.input in shortcuts else ctx.input @@ -1077,7 +1051,7 @@ def advance(self, ctx): pass else: for i in range(ord(ctx.alphabet[-1]) + 1, ord(self.escaped) + 1): - ctx.append_to_alphabet(chr(i)) + ctx.append_to_alphabet(six.unichr(i)) else: ctx.append_to_alphabet(self.escaped) diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index d778a3f..6ef4c1d 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -20,70 +20,70 @@ def setUp(self): def tearDown(self): pass - @ddt.data(r"(sa(lu))(les)(louloux)", r"(salut)(les(louloux)", r"(salut))les(louloux)", - r"(sal*ut)oo", r"(sal?ut)oo", r"sal{utoo", r"(sal+ut)oo", r"(sal{u)too", - r"(sal{2}u)too", r"sal{2,1}utoo", r"sal(u[t]o)o", - r"whatever|toto?ff", r"whate?ver|toto", r"(toto)*ohoho|haha", r"(toto)ohoho|haha", - 'salut[abcd]{,15}rr', r"[]whatever", r"t{,15}") + @ddt.data(u"(sa(lu))(les)(louloux)", u"(salut)(les(louloux)", u"(salut))les(louloux)", + u"(sal*ut)oo", u"(sal?ut)oo", u"sal{utoo", u"(sal+ut)oo", u"(sal{u)too", + u"(sal{2}u)too", u"sal{2,1}utoo", u"sal(u[t]o)o", + u"whatever|toto?ff", u"whate?ver|toto", u"(toto)*ohoho|haha", u"(toto)ohoho|haha", + u"salut[abcd]{,15}rr", u"[]whatever", u"t{,15}") def test_invalid_regexes(self, regex): self.assert_regex_is_invalid(regex) @ddt.data( - {'regex': r"[abcd]?", 'nodes': [{"alphabet": "abcd", "qty": (0, 1)}]}, - {'regex': r"[abcd]*", 'nodes': [{"alphabet": "abcd", "qty": (0, None)}]}, - {'regex': r"[abcd]+", 'nodes': [{"alphabet": "abcd", "qty": (1, None)}]}, - {'regex': r"[abcd]{7}", 'nodes': [{"alphabet": "abcd", "qty": (7, 7)}]}, - {'regex': r"[abcd]{2,7}", 'nodes': [{"alphabet": "abcd", "qty": (2, 7)}]}, - {'regex': r"[abcd]{0}", 'nodes': [{"alphabet": "abcd", "qty": (0, 0)}]}, - {'regex': r"[abcd]{0,0}", 'nodes': [{"alphabet": "abcd", "qty": (0, 0)}]}, - {'regex': r"[abcd]{3,}", 'nodes': [{"alphabet": "abcd", "qty": (3, None)}]}, + {'regex': u"[abcd]?", 'nodes': [{"alphabet": u"abcd", "qty": (0, 1)}]}, + {'regex': u"[abcd]*", 'nodes': [{"alphabet": u"abcd", "qty": (0, None)}]}, + {'regex': u"[abcd]+", 'nodes': [{"alphabet": u"abcd", "qty": (1, None)}]}, + {'regex': u"[abcd]{7}", 'nodes': [{"alphabet": u"abcd", "qty": (7, 7)}]}, + {'regex': u"[abcd]{2,7}", 'nodes': [{"alphabet": u"abcd", "qty": (2, 7)}]}, + {'regex': u"[abcd]{0}", 'nodes': [{"alphabet": u"abcd", "qty": (0, 0)}]}, + {'regex': u"[abcd]{0,0}", 'nodes': [{"alphabet": u"abcd", "qty": (0, 0)}]}, + {'regex': u"[abcd]{3,}", 'nodes': [{"alphabet": u"abcd", "qty": (3, None)}]}, ) def test_quantifiers(self, test_case): self.assert_regex_is_valid(test_case) @ddt.data( - {'regex': r"salut(l\(es)(lou\\lous)cmoi", + {'regex': u"salut(l\(es)(lou\\\\lous)cmoi", 'nodes': [ - {"values": ["salut"]}, - {"values": ["l(es"]}, - {"values": ["lou\lous"]}, - {"values": ["cmoi"]}, + {"values": [u"salut"]}, + {"values": [u"l(es"]}, + {"values": [u"lou\lous"]}, + {"values": [u"cmoi"]}, ]}, - {'regex': r"hi\x58", 'nodes': [{"values": ["hi\x58"]}]}, - {'regex': r"hi\x00hola", 'nodes': [{"values": ["hi\x00hola"]}]}, - {'regex': r"\xFFdom", 'nodes': [{"values": ["\xFFdom"]}]}, - {'regex': r"\ddom", 'nodes': [{"alphabet": "0123456789"}, {"values": ["dom"]}]}, - {'regex': r"dom[abcd\d]", 'nodes': [{"values": ["dom"]}, {"alphabet": "abcd0123456789"}]}, - {'regex': r"[abcd]\x43", 'nodes': [{"alphabet": "abcd"}, {"values": ["\x43"]}]}, - {'regex': r"(abcd)\x53", 'nodes': [{"values": ["abcd"]}, {"values": ["\x53"]}]}, - {'regex': r"\x43[abcd]", 'nodes': [{"values": ["\x43"]}, {"alphabet": "abcd"}]}, - {'regex': r"\x43(abcd)", 'nodes': [{"values": ["\x43"]}, {"values": ["abcd"]}]}, + {'regex': u"hi\x58", 'nodes': [{"values": [u"hi\x58"]}]}, + {'regex': u"hi\x00hola", 'nodes': [{"values": [u"hi\x00hola"]}]}, + {'regex': u"\xFFdom", 'nodes': [{"values": [u"\xFFdom"]}]}, + {'regex': u"\ddom", 'nodes': [{"alphabet": u"0123456789"}, {"values": [u"dom"]}]}, + {'regex': u"dom[abcd\d]", 'nodes': [{"values": [u"dom"]}, {"alphabet": u"abcd0123456789"}]}, + {'regex': u"[abcd]\x43", 'nodes': [{"alphabet": u"abcd"}, {"values": [u"\x43"]}]}, + {'regex': u"(abcd)\x53", 'nodes': [{"values": [u"abcd"]}, {"values": [u"\x53"]}]}, + {'regex': u"\x43[abcd]", 'nodes': [{"values": [u"\x43"]}, {"alphabet": u"abcd"}]}, + {'regex': u"\x43(abcd)", 'nodes': [{"values": [u"\x43"]}, {"values": [u"abcd"]}]}, ) def test_escape(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data(r"?", r"*", r"+", r"{1,2}", r"what{,}ever", r"bj{}er" - r"what{1, 2}", r"what{,3}ever", r"ee{l1, 2}ever", r"whddddat{\13, 2}eyyyver", - r"wat{3,2d}eyyyver", r"w**r", r"w+*r", r"w*?r") + @ddt.data(u"?", u"*", u"+", u"{1,2}", u"what{,}ever", u"bj{}er" + u"what{1, 2}", u"what{,3}ever", u"ee{l1, 2}ever", u"whddddat{\13, 2}eyyyver", + u"wat{3,2d}eyyyver", u"w**r", u"w+*r", u"w*?r") def test_quantifier_raise(self, regex): self.assert_regex_is_invalid(regex) - @ddt.data(r"salut(", r"dd[", r"(", r"[", r"{0") + @ddt.data(u"salut(", u"dd[", u"(", u"[", u"{0") def test_wrong_end_raise(self, regex): self.assert_regex_is_invalid(regex) @ddt.data( - {'regex': r"[abcd]*toto(|\(ab\)|cd)+what?ever", + {'regex': u"[abcd]*toto(|\(ab\)|cd)+what?ever", 'nodes': [ - {"alphabet": "abcd", "qty": (0, None)}, - {"values": ["toto"]}, - {"values": ["", "(ab)", "cd"], "qty": (1, None)}, - {"values": ["wha"]}, - {"values": ["t"], "qty": (0, 1)}, - {"values": ["ever"]} + {"alphabet": u"abcd", "qty": (0, None)}, + {"values": [u"toto"]}, + {"values": [u"", u"(ab)", u"cd"], "qty": (1, None)}, + {"values": [u"wha"]}, + {"values": [u"t"], "qty": (0, 1)}, + {"values": [u"ever"]} ]}, ) def test_complete(self, test_case): @@ -91,43 +91,43 @@ def test_complete(self, test_case): @ddt.data( - {'regex': r"()", 'nodes': [{"values": [""]}]}, - {'regex': r"(z)", 'nodes': [{"values": ["z"]}]}, - {'regex': r"(cat)", 'nodes': [{"values": ["cat"]}]}, + {'regex': u"()", 'nodes': [{"values": [u""]}]}, + {'regex': u"(z)", 'nodes': [{"values": [u"z"]}]}, + {'regex': u"(cat)", 'nodes': [{"values": [u"cat"]}]}, - {'regex': r"hello(boat)", - 'nodes': [{"values": ["hello"]}, {"values": ["boat"]}]}, + {'regex': u"hello(boat)", + 'nodes': [{"values": [u"hello"]}, {"values": [u"boat"]}]}, - {'regex': r"(cake)awesome", - 'nodes': [{"values": ["cake"]}, {"values": ["awesome"]}]}, + {'regex': u"(cake)awesome", + 'nodes': [{"values": [u"cake"]}, {"values": [u"awesome"]}]}, - {'regex': r"(foo)(bar)(foo)", - 'nodes': [{"values": ["foo"]}, {"values": ["bar"]}, {"values": ["foo"]}]}, + {'regex': u"(foo)(bar)(foo)", + 'nodes': [{"values": [u"foo"]}, {"values": [u"bar"]}, {"values": [u"foo"]}]}, - {'regex': r"dashboard(apple)(purple)", - 'nodes': [{"values": ["dashboard"]}, {"values": ["apple"]}, {"values": ["purple"]}]}, + {'regex': u"dashboard(apple)(purple)", + 'nodes': [{"values": [u"dashboard"]}, {"values": [u"apple"]}, {"values": [u"purple"]}]}, - {'regex': r"(harder)better(faster)", - 'nodes': [{"values": ["harder"]}, {"values": ["better"]}, {"values": ["faster"]}]}, + {'regex': u"(harder)better(faster)", + 'nodes': [{"values": [u"harder"]}, {"values": [u"better"]}, {"values": [u"faster"]}]}, - {'regex': r"(stronger)(it is me)baby", - 'nodes': [{"values": ["stronger"]}, {"values": ["it is me"]}, {"values": ["baby"]}]}, + {'regex': u"(stronger)(it is me)baby", + 'nodes': [{"values": [u"stronger"]}, {"values": [u"it is me"]}, {"values": [u"baby"]}]}, - {'regex': r"new(york)city", - 'nodes': [{"values": ["new"]}, {"values": ["york"]}, {"values": ["city"]}]}, + {'regex': u"new(york)city", + 'nodes': [{"values": [u"new"]}, {"values": [u"york"]}, {"values": [u"city"]}]}, - {'regex': r"()whatever", - 'nodes': [{"values": [""]}, {"values": ["whatever"]}]}, + {'regex': u"()whatever", + 'nodes': [{"values": [u""]}, {"values": [u"whatever"]}]}, - {'regex': r"this is it()", - 'nodes': [{"values": ["this is it"]}, {"values": [""]}]}, + {'regex': u"this is it()", + 'nodes': [{"values": [u"this is it"]}, {"values": [u""]}]}, - {'regex': r"this()parser()is()working", - 'nodes': [{"values": ["this"]}, {"values": [""]}, {"values": ["parser"]}, {"values": [""]}, - {"values": ["is"]}, {"values": [""]}, {"values": ["working"]}]}, + {'regex': u"this()parser()is()working", + 'nodes': [{"values": [u"this"]}, {"values": [u""]}, {"values": [u"parser"]}, {"values": [u""]}, + {"values": [u"is"]}, {"values": [u""]}, {"values": [u"working"]}]}, - {'regex': r"()()()", - 'nodes': [{"values": [""]}, {"values": [""]}, {"values": [""]}]}, + {'regex': u"()()()", + 'nodes': [{"values": [u""]}, {"values": [u""]}, {"values": [u""]}]}, ) def test_basic_parenthesis(self, test_case): self.assert_regex_is_valid(test_case) @@ -136,30 +136,30 @@ def test_basic_parenthesis(self, test_case): @ddt.data( - {'regex': r"(ab|cd|)+", 'nodes': [{"values": ["ab", "cd", ""], "qty": (1, None)}]}, - {'regex': r"(ab||cd)", 'nodes': [{"values": ["ab", "", "cd"]}]}, - {'regex': r"(|ab|cd|ef|gh)+", 'nodes': [{"values": ["", "ab", "cd", "ef", "gh"], "qty": (1, None)}]}, - {'regex': r"(|)+", 'nodes': [{"values": ["", ""], "qty": (1, None)}]}, - {'regex': r"(|||)+", 'nodes': [{"values": ["", "", "", ""], "qty": (1, None)}]}, + {'regex': u"(ab|cd|)+", 'nodes': [{"values": [u"ab", u"cd", u""], "qty": (1, None)}]}, + {'regex': u"(ab||cd)", 'nodes': [{"values": [u"ab", u"", u"cd"]}]}, + {'regex': u"(|ab|cd|ef|gh)+", 'nodes': [{"values": [u"", u"ab", u"cd", u"ef", u"gh"], "qty": (1, None)}]}, + {'regex': u"(|)+", 'nodes': [{"values": [u"", u""], "qty": (1, None)}]}, + {'regex': u"(|||)+", 'nodes': [{"values": [u"", u"", u"", u""], "qty": (1, None)}]}, ) def test_or_in_parenthesis(self, test_case): self.assert_regex_is_valid(test_case) @ddt.data( - {'regex': r"1|2|3", 'nodes': [{"type": fvt.INT_str, "values": [1,2,3]}]}, - {'regex': r"1|2|3|foo", 'nodes': [{"values": ['1', '2', '3', 'foo']}]}, - {'regex': r"1|foo|2|3", 'nodes': [{"values": ['1', 'foo', '2', '3']}]}, - {'regex': r"foo|1|2|3", 'nodes': [{"values": ['foo', '1', '2', '3']}]}, - {'regex': r"(11|12|13)bar", - 'nodes': [{"type": fvt.INT_str, "values": [11, 12, 13]}, {"values": ['bar']}]}, - {'regex': r"(11|12|13|bar)", - 'nodes': [{"values": ['11', '12', '13', 'bar']}]}, - {'regex': r"234whatever23", 'nodes': [{"values": ['234whatever23']}]}, - {'regex': r"(234whatever23)foobar", - 'nodes': [{"values": ['234whatever23']}, {"values": ['foobar']}]}, - {'regex': r"1113|3435|3344|(hay)", - 'nodes': [{"type": fvt.INT_str, "values": [1113, 3435, 3344]}, {"values": ['hay']}]}, + {'regex': u"1|2|3", 'nodes': [{"type": fvt.INT_str, "values": [1,2,3]}]}, + {'regex': u"1|2|3|foo", 'nodes': [{"values": [u'1', u'2', u'3', u'foo']}]}, + {'regex': u"1|foo|2|3", 'nodes': [{"values": [u'1', u'foo', u'2', u'3']}]}, + {'regex': u"foo|1|2|3", 'nodes': [{"values": [u'foo', u'1', u'2', u'3']}]}, + {'regex': u"(11|12|13)bar", + 'nodes': [{"type": fvt.INT_str, "values": [11, 12, 13]}, {"values": [u'bar']}]}, + {'regex': u"(11|12|13|bar)", + 'nodes': [{"values": [u'11', u'12', u'13', u'bar']}]}, + {'regex': u"234whatever23", 'nodes': [{"values": [u'234whatever23']}]}, + {'regex': u"(234whatever23)foobar", + 'nodes': [{"values": [u'234whatever23']}, {"values": [u'foobar']}]}, + {'regex': u"1113|3435|3344|(hay)", + 'nodes': [{"type": fvt.INT_str, "values": [1113, 3435, 3344]}, {"values": [u'hay']}]}, ) def test_types_recognition(self, test_case): self.assert_regex_is_valid(test_case) @@ -167,97 +167,97 @@ def test_types_recognition(self, test_case): @ddt.data( - {'regex': r"[e]", 'nodes': [{"alphabet": "e"}]}, - {'regex': r"[caty]", 'nodes': [{"alphabet": "caty"}]}, - {'regex': r"[abcd][efghij]", 'nodes': [{"alphabet": "abcd"}, {"alphabet": "efghij"}]}, - {'regex': r"[cake]awesome", 'nodes': [{"alphabet": "cake"}, {"values": ["awesome"]}]}, + {'regex': u"[e]", 'nodes': [{"alphabet": u"e"}]}, + {'regex': u"[caty]", 'nodes': [{"alphabet": u"caty"}]}, + {'regex': u"[abcd][efghij]", 'nodes': [{"alphabet": u"abcd"}, {"alphabet": u"efghij"}]}, + {'regex': u"[cake]awesome", 'nodes': [{"alphabet": u"cake"}, {"values": [u"awesome"]}]}, - {'regex': r"[foo][bar][foo]", + {'regex': u"[foo][bar][foo]", 'nodes': [{"alphabet": "foo"}, {"alphabet": "bar"}, {"alphabet": "foo"}]}, - {'regex': r"dashboard[apple][purple]", - 'nodes': [{"values": ["dashboard"]}, {"alphabet": "apple"}, {"alphabet": "purple"}]}, + {'regex': u"dashboard[apple][purple]", + 'nodes': [{"values": [u"dashboard"]}, {"alphabet": u"apple"}, {"alphabet": u"purple"}]}, - {'regex': r"[harder]better[faster]", - 'nodes': [{"alphabet": "harder"}, {"values": ["better"]}, {"alphabet": "faster"}]}, + {'regex': u"[harder]better[faster]", + 'nodes': [{"alphabet": u"harder"}, {"values": [u"better"]}, {"alphabet": u"faster"}]}, - {'regex': r"[stronger][it is me]baby", - 'nodes': [{"alphabet": "stronger"}, {"alphabet": "it is me"}, {"values": ["baby"]}]}, + {'regex': u"[stronger][it is me]baby", + 'nodes': [{"alphabet": u"stronger"}, {"alphabet": u"it is me"}, {"values": [u"baby"]}]}, - {'regex': r"new[york]city", - 'nodes': [{"values": ["new"]}, {"alphabet": "york"}, {"values": ["city"]}]}, + {'regex': u"new[york]city", + 'nodes': [{"values": [u"new"]}, {"alphabet": u"york"}, {"values": [u"city"]}]}, - {'regex': r"[a-e]", 'nodes': [{"alphabet": "abcde"}]}, - {'regex': r"[a-ewxy]", 'nodes': [{"alphabet": "abcdewxy"}]}, - {'regex': r"[1-9]", 'nodes': [{"alphabet": "123456789"}]}, - {'regex': r"[what1-9]", 'nodes': [{"alphabet": "what123456789"}]}, - {'regex': r"[a-c1-9]", 'nodes': [{"alphabet": "abc123456789"}]}, - {'regex': r"[a-c1-9fin]", 'nodes': [{"alphabet": "abc123456789fin"}]}, - {'regex': r"[a-c9-9fin]", 'nodes': [{"alphabet": "abc9fin"}]}, - {'regex': r"[pa-cwho1-9fin]", 'nodes': [{"alphabet": "pabcwho123456789fin"}]}, + {'regex': u"[a-e]", 'nodes': [{"alphabet": u"abcde"}]}, + {'regex': u"[a-ewxy]", 'nodes': [{"alphabet": u"abcdewxy"}]}, + {'regex': u"[1-9]", 'nodes': [{"alphabet": u"123456789"}]}, + {'regex': u"[what1-9]", 'nodes': [{"alphabet": u"what123456789"}]}, + {'regex': u"[a-c1-9]", 'nodes': [{"alphabet": u"abc123456789"}]}, + {'regex': u"[a-c1-9fin]", 'nodes': [{"alphabet": u"abc123456789fin"}]}, + {'regex': u"[a-c9-9fin]", 'nodes': [{"alphabet": u"abc9fin"}]}, + {'regex': u"[pa-cwho1-9fin]", 'nodes': [{"alphabet": u"pabcwho123456789fin"}]}, - {'regex': r"[\x33]", 'nodes': [{"alphabet": "\x33"}]}, - {'regex': r"[\x33-\x35]", 'nodes': [{"alphabet": "\x33\x34\x35"}]}, - {'regex': r"[e\x33-\x35a]", 'nodes': [{"alphabet": "e\x33\x34\x35a"}]} + {'regex': u"[\x33]", 'nodes': [{"alphabet": u"\x33"}]}, + {'regex': u"[\x33-\x35]", 'nodes': [{"alphabet": u"\x33\x34\x35"}]}, + {'regex': u"[e\x33-\x35a]", 'nodes': [{"alphabet": u"e\x33\x34\x35a"}]} ) def test_basic_square_brackets(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data(r"[\x33-\x23]", r"[3-1]", r"[y-a]", r"[\x3-\x34]", r"[\x3g]") + @ddt.data(u"[\x33-\x23]", u"[3-1]", u"[y-a]") def test_wrong_alphabet(self, regex): self.assert_regex_is_invalid(regex) - @ddt.data(r"[]", r"stronger[]baby", r"strongerbaby[]", r"[]strongerbaby", r"stro[]nger[]baby[]") + @ddt.data(u"[]", u"stronger[]baby", u"strongerbaby[]", u"[]strongerbaby", u"stro[]nger[]baby[]") def test_basic_square_brackets_raise(self, regex): self.assert_regex_is_invalid(regex) @ddt.data( - {'regex': r"|", 'nodes': [{"values": ["",""]}]}, - {'regex': r"|||", 'nodes': [{"values": ["", "", "", ""]}]}, - {'regex': r"toto|titi|tata", 'nodes': [{"values": ["toto", "titi", "tata"]}]}, - {'regex': r"toto|titi|", 'nodes': [{"values": ["toto", "titi", ""]}]}, - {'regex': r"toto||tata", 'nodes': [{"values": ["toto", "", "tata"]}]}, - {'regex': r"|titi|tata", 'nodes': [{"values": ["", "titi", "tata"]}]}, - {'regex': r"coucou|[abcd]|", 'nodes': [{"values": ["coucou"]}, {"alphabet": "abcd"}, {"values": [""]}]}, + {'regex': u"|", 'nodes': [{"values": [u"",u""]}]}, + {'regex': u"|||", 'nodes': [{"values": [u"", u"", u"", u""]}]}, + {'regex': u"toto|titi|tata", 'nodes': [{"values": [u"toto", u"titi", u"tata"]}]}, + {'regex': u"toto|titi|", 'nodes': [{"values": [u"toto", u"titi", u""]}]}, + {'regex': u"toto||tata", 'nodes': [{"values": [u"toto", u"", u"tata"]}]}, + {'regex': u"|titi|tata", 'nodes': [{"values": [u"", u"titi", u"tata"]}]}, + {'regex': u"coucou|[abcd]|", 'nodes': [{"values": [u"coucou"]}, {"alphabet": u"abcd"}, {"values": [u""]}]}, - {'regex': r"|[hao]|[salut]?", - 'nodes': [{"values": [""]}, {"alphabet": "hao"}, {"alphabet": "salut", "qty": (0, 1)}]}, + {'regex': u"|[hao]|[salut]?", + 'nodes': [{"values": [u""]}, {"alphabet": u"hao"}, {"alphabet": u"salut", "qty": (0, 1)}]}, - {'regex': r"coucou||[salut]?", - 'nodes': [{"values": ["coucou", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, + {'regex': u"coucou||[salut]?", + 'nodes': [{"values": [u"coucou", u""]}, {"alphabet": u"salut", "qty": (0, 1)}]}, - {'regex': r"coucou||||[salut]?", - 'nodes': [{"values": ["coucou", "", "", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, + {'regex': u"coucou||||[salut]?", + 'nodes': [{"values": [u"coucou", u"", u"", u""]}, {"alphabet": u"salut", "qty": (0, 1)}]}, - {'regex': r"[whatever]+|[hao]|[salut]?", + {'regex': u"[whatever]+|[hao]|[salut]?", 'nodes': [ - {"alphabet": "whatever", "qty": (1, None)}, - {"alphabet": "hao"}, - {"alphabet": "salut", "qty": (0, 1)} + {"alphabet": u"whatever", "qty": (1, None)}, + {"alphabet": u"hao"}, + {"alphabet": u"salut", "qty": (0, 1)} ]}, - {'regex': r"(whatever)+|(hao)|(salut)?", + {'regex': u"(whatever)+|(hao)|(salut)?", 'nodes': [ - {"values": ["whatever"], "qty": (1, None)}, - {"values": ["hao"]}, - {"values": ["salut"], "qty": (0, 1)} + {"values": [u"whatever"], "qty": (1, None)}, + {"values": [u"hao"]}, + {"values": [u"salut"], "qty": (0, 1)} ]}, - {'regex': r"tata|haha|c*|b*|[abcd]+", 'nodes': [ - {"values": ["tata", "haha"]}, - {"values": ["c"], "qty": (0, None)}, - {"values": ["b"], "qty": (0, None)}, - {"alphabet": "abcd", "qty": (1, None)} + {'regex': u"tata|haha|c*|b*|[abcd]+", 'nodes': [ + {"values": [u"tata", u"haha"]}, + {"values": [u"c"], "qty": (0, None)}, + {"values": [u"b"], "qty": (0, None)}, + {"alphabet": u"abcd", "qty": (1, None)} ]}, - {'regex': r"(tata)+|haha|tata||b*|[abcd]+", 'nodes': [ - {"values": ["tata"], "qty": (1, None)}, - {"values": ["haha", "tata", ""]}, - {"values": ["b"], "qty": (0, None)}, - {"alphabet": "abcd", "qty": (1, None)} + {'regex': u"(tata)+|haha|tata||b*|[abcd]+", 'nodes': [ + {"values": [u"tata"], "qty": (1, None)}, + {"values": [u"haha", u"tata", u""]}, + {"values": [u"b"], "qty": (0, None)}, + {"alphabet": u"abcd", "qty": (1, None)} ]}, ) def test_shape(self, test_case): From e14ef3b928a6fb6f8a40663e4b713ae5f8890c07 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Fri, 29 Jul 2016 09:56:07 +0200 Subject: [PATCH 38/80] Add unit tests for unicode support --- framework/data_model_helpers.py | 2 +- test/unit/test_data_model_helpers.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index d1c109c..d431155 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1010,7 +1010,7 @@ def _run(self, ctx): pass else: for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): - ctx.append_to_alphabet(chr(i)) + ctx.append_to_alphabet(six.unichr(i)) else: ctx.append_to_alphabet(ctx.input) diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index 6ef4c1d..41365f2 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -59,6 +59,8 @@ def test_quantifiers(self, test_case): {'regex': u"(abcd)\x53", 'nodes': [{"values": [u"abcd"]}, {"values": [u"\x53"]}]}, {'regex': u"\x43[abcd]", 'nodes': [{"values": [u"\x43"]}, {"alphabet": u"abcd"}]}, {'regex': u"\x43(abcd)", 'nodes': [{"values": [u"\x43"]}, {"values": [u"abcd"]}]}, + {'regex': u"\u0443(abcd)", 'nodes': [{"values": [u"\u0443"]}, {"values": [u"abcd"]}]}, + {'regex': u"hi(ab\u0443cd)", 'nodes': [{"values": [u"hi"]}, {"values": [u"ab\u0443cd"]}]}, ) def test_escape(self, test_case): self.assert_regex_is_valid(test_case) @@ -198,12 +200,17 @@ def test_types_recognition(self, test_case): {'regex': u"[\x33]", 'nodes': [{"alphabet": u"\x33"}]}, {'regex': u"[\x33-\x35]", 'nodes': [{"alphabet": u"\x33\x34\x35"}]}, - {'regex': u"[e\x33-\x35a]", 'nodes': [{"alphabet": u"e\x33\x34\x35a"}]} + {'regex': u"[e\x33-\x35a]", 'nodes': [{"alphabet": u"e\x33\x34\x35a"}]}, + + {'regex': u"[\u0033]", 'nodes': [{"alphabet": u"\u0033"}]}, + {'regex': u"[\u0003-\u0005]", 'nodes': [{"alphabet": u"\u0003\u0004\u0005"}]}, + {'regex': u"[\u0333-\u0335]", 'nodes': [{"alphabet": u"\u0333\u0334\u0335"}]}, + {'regex': u"[e\u4133-\u4135a]", 'nodes': [{"alphabet": u"e\u4133\u4134\u4135a"}]} ) def test_basic_square_brackets(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data(u"[\x33-\x23]", u"[3-1]", u"[y-a]") + @ddt.data(u"[\x33-\x23]", u"[\u7633-\u7323]", u"[3-1]", u"[y-a]") def test_wrong_alphabet(self, regex): self.assert_regex_is_invalid(regex) From 999f6e68c2f19f3feebaacff43c159b2bbbfd924 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Mon, 1 Aug 2016 09:41:52 +0200 Subject: [PATCH 39/80] RegexParser clean up --- framework/data_model_helpers.py | 370 +++++++++++++-------------- framework/error_handling.py | 50 +++- test/unit/test_data_model_helpers.py | 2 +- 3 files changed, 219 insertions(+), 203 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index d431155..3fb6699 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -516,7 +516,7 @@ def _handle_attrs(n, set_attrs, clear_attrs): class State(object): """ - Represent a state at the lower level + Represent states at the lower level """ def __init__(self, machine): """ @@ -527,17 +527,20 @@ def __init__(self, machine): self.init_specific() def init_specific(self): + """ + Can be overridden to express additional initializations + """ pass def _run(self, context): + raise NotImplementedError + + def run(self, context): """ Do some actions on the current character. Args: context (StateMachine): root state machine (global context) """ - raise NotImplementedError - - def run(self, context): self._run(context) context.inputs.pop(0) @@ -548,12 +551,15 @@ def advance(self, context): context (StateMachine): root state machine (global context) Returns: - Class of the next state de run or None if we are in a final state + Class of the next state de run (None if we are in a final state) """ raise NotImplementedError class StateMachine(State): + """ + Represent states that contain other states. + """ class Initial(State): pass @@ -584,29 +590,30 @@ def run(self, context): class EscapeState(StateMachine): + """ + Represent states that can handle chars in an alternative way + """ def init_specific(self): - self.escaped = None + self.translation = None class Initial(State): def _run(self, ctx): - self.machine.escaped = "" + self.machine.translation = "" def advance(self, ctx): - if ctx.input == None: - raise EscapeError("Nothing to escape.") - elif ctx.input in ('s','S','d','D','w','W','\\','(',')','[',']','{','}','+','?','*','|','-'): + if ctx.input in ('s','S','d','D','w','W','\\','(',')','[',']','{','}','+','?','*','|','-'): return self.machine.Final else: - raise EscapeError("Character to escape is not special. It is useless to escape it.") + raise EscapeError(ctx.input) class Final(State): def _run(self, ctx): - def get_complement(not_allowed_chars): - return ''.join([six.unichr(int(i)) for i in range(0, 0xFFFF) if six.unichr(int(i)) not in not_allowed_chars]) + def get_complement(chars): + return ''.join([six.unichr(i) for i in range(0, 0xFFFF) if six.unichr(i) not in chars]) shortcuts = {'s': string.whitespace, 'S': get_complement(string.whitespace), @@ -615,7 +622,7 @@ def get_complement(not_allowed_chars): 'w': string.ascii_letters + string.digits + '_', 'W': get_complement(string.ascii_letters + string.digits + '_')} - self.machine.escaped = shortcuts[ctx.input] if ctx.input in shortcuts else ctx.input + self.machine.translation = shortcuts[ctx.input] if ctx.input in shortcuts else ctx.input def advance(self, context): return None @@ -623,7 +630,9 @@ def advance(self, context): class GroupingState(StateMachine): - + """ + Represent states that parse portions of regular expression that delimit terminal nodes + """ class Final(State): def _run(self, context): @@ -634,9 +643,10 @@ def advance(self, context): def advance(self, ctx): if ctx.input in (')', '}', ']'): - raise Exception + raise StructureError(ctx.input) + elif ctx.input == '-': + raise EscapeError(ctx.input) - # quantifier specified elif ctx.input in ('*', '+', '?'): return self.machine.QtyState elif ctx.input == '{': @@ -644,15 +654,13 @@ def advance(self, ctx): else: ctx.flush() - # pick if ctx.input == '|': - return self.machine.Pick + return self.machine.Choice elif ctx.input is None: return self.machine.Final - elif ctx.pick: - raise Exception + elif ctx.choice: + raise InconvertibilityError() - # continue with something else if ctx.input == '(': return self.machine.Parenthesis elif ctx.input == '[': @@ -672,9 +680,12 @@ def _run(self, ctx): def advance(self, ctx): if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError("Nothing to quantify.") + raise QuantificationError() elif ctx.input in ('}', ')', ']'): - raise GroupingError("Unopened " + ctx.input) + raise StructureError(ctx.input) + elif ctx.input == '-': + raise EscapeError(ctx.input) + elif ctx.input == '[': return self.machine.SquareBrackets elif ctx.input == '(': @@ -682,15 +693,29 @@ def advance(self, ctx): elif ctx.input == '\\': return self.machine.Escape else: - ctx.append_to_buffer("") + ctx.append_to_contents("") if ctx.input == '|': - return self.machine.Pick + return self.machine.Choice elif ctx.input is None: return self.machine.Final else: return self.machine.Main + + class Choice(Initial): + + def _run(self, ctx): + if not ctx.choice: + # if is it still possible to build a NT with multiple shapes + if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): + ctx.choice = True + else: + raise InconvertibilityError() + else: + pass + + class Final(State): def _run(self, ctx): @@ -704,10 +729,10 @@ class Escape(EscapeState): def advance(self, ctx): - if len(self.escaped) > 1: + if len(self.translation) > 1: - if ctx.pick and len(ctx.values) > 1 and len(ctx.buffer) > 1: - raise UnconvertibleRegexError() + if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: + raise InconvertibilityError() if ctx.buffer is not None: @@ -719,11 +744,11 @@ def advance(self, ctx): else: ctx.flush() - ctx.append_to_alphabet(self.escaped) + ctx.append_to_alphabet(self.translation) return self.machine.states[self.machine.SquareBrackets].advance(ctx) else: - ctx.append_to_buffer(self.escaped) + ctx.append_to_buffer(self.translation) return self.machine.states[self.machine.Main].advance(ctx) @@ -735,21 +760,16 @@ def _run(self, ctx): def advance(self, ctx): if ctx.input == '(': return self.machine.Parenthesis - elif ctx.input == '[': return self.machine.SquareBrackets - elif ctx.input == '\\': return self.machine.Escape - elif ctx.input == '|': - return self.machine.Pick - + return self.machine.Choice elif ctx.input in ('?', '*', '+', '{'): - # pick - if ctx.pick and len(ctx.values) > 1 and len(ctx.buffer) > 1: - raise UnconvertibleRegexError() + if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: + raise InconvertibilityError() if len(ctx.buffer) == 1: if len(ctx.values) > 1: @@ -758,7 +778,7 @@ def advance(self, ctx): ctx.flush() ctx.append_to_buffer(content) - else: # len(ctx.buffer) > 1 + else: content = ctx.buffer[-1] ctx.buffer = ctx.buffer[:-1] ctx.flush() @@ -770,72 +790,44 @@ def advance(self, ctx): return self.machine.QtyState elif ctx.input in ('}',')',']'): - raise GroupingError("Unopened " + ctx.input) - + raise StructureError(ctx.input) + elif ctx.input == '-': + raise EscapeError(ctx.input) elif ctx.input is None: return self.machine.Final return self.__class__ - class Pick(State): - - def _run(self, ctx): - - if not ctx.pick: - if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): - ctx.pick = True - else: - raise UnconvertibleRegexError() - - def advance(self, ctx): - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '\\': - return self.machine.SquareBrackets - else: - ctx.append_to_contents("") - - if ctx.input == '|': - return self.__class__ - elif ctx.input is None: - return self.machine.Final - else: - return self.machine.Main - - class QtyState(State): def _run(self, ctx): - if ctx.input == '+': - ctx.min = 1 - elif ctx.input == '?': - ctx.max = 1 - - if ctx.min is None: - ctx.min = 0 + ctx.min = 1 if ctx.input == '+' else 0 + ctx.max = 1 if ctx.input == '?' else None ctx.flush() def advance(self, ctx): if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError("Nothing to quantify.") - + raise QuantificationError() elif ctx.input in ('}', ')', ']'): - raise GroupingError("Unopened " + ctx.input) + raise StructureError(ctx.input) + elif ctx.input == '-': + raise EscapeError(ctx.input) + elif ctx.input == '|': + return self.machine.Choice + elif ctx.input is None: + return self.machine.Final - elif ctx.input == '(': + if ctx.choice: + raise InconvertibilityError() + + if ctx.input == '(': return self.machine.Parenthesis elif ctx.input == '[': return self.machine.SquareBrackets - elif ctx.input == '|': - return self.machine.Pick elif ctx.input == '\\': return self.machine.Escape - elif ctx.input is None: - return self.machine.Final else: return self.machine.Main @@ -849,50 +841,42 @@ def _run(self, ctx): def advance(self, ctx): if ctx.input.isdigit(): - return self.machine.BeforeComma + return self.machine.Min else: - raise QuantificationError("{} content needs to start with digit(s).") + raise QuantificationError() - class BeforeComma(State): + class Min(State): def _run(self, ctx): ctx.min += ctx.input def advance(self, context): if context.input.isdigit(): - return self.machine.BeforeComma + return self.__class__ elif context.input == ',': return self.machine.Comma elif context.input == '}': return self.machine.Final else: - raise QuantificationError("{} can only contain digits and a comma.") + raise QuantificationError() - class Comma(State): + class Max(State): def _run(self, ctx): - ctx.max = "" + ctx.max += ctx.input def advance(self, context): if context.input.isdigit(): - return self.machine.AfterComma + return self.machine.Max elif context.input == '}': return self.machine.Final else: - raise QuantificationError("{} can only contain digits and a comma.") + raise QuantificationError() - class AfterComma(State): + class Comma(Max): def _run(self, ctx): - ctx.max += ctx.input - - def advance(self, context): - if context.input.isdigit(): - return self.machine.AfterComma - elif context.input == '}': - return self.machine.Final - else: - raise Exception + ctx.max = "" class Final(State): def _run(self, ctx): @@ -906,7 +890,7 @@ def _run(self, ctx): ctx.max = int(ctx.max) if ctx.max is not None and ctx.min > ctx.max: - raise QuantificationError("{a,b}: a <= b not verified.") + raise QuantificationError(u"{X,Y}: X \u2264 Y constraint not respected.") ctx.flush() @@ -914,24 +898,8 @@ def advance(self, context): return None def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', ')', ']'): - raise Exception - elif ctx.input == '|': - return self.machine.Pick - elif ctx.input is None: - return self.machine.Final - else: - if ctx.pick: - raise Exception + self.machine.states[self.machine.QtyState].advance(ctx) - if ctx.input == '}': - raise Exception - elif ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - else: - return self.machine.Main class Parenthesis(GroupingState): @@ -942,8 +910,14 @@ def _run(self, ctx): ctx.append_to_buffer("") def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', '(', '[', ']', None): - raise Exception + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() + elif ctx.input in ('}', ']', None): + raise StructureError(ctx.input) + elif ctx.input == '-': + raise EscapeError(ctx.input) + elif ctx.input in ('(', '['): + raise InconvertibilityError() elif ctx.input == '\\': return self.machine.Escape elif ctx.input == ')': @@ -951,7 +925,7 @@ def advance(self, ctx): else: return self.machine.Main - class Main(State): + class Main(Initial): def _run(self, ctx): if ctx.input == '|': ctx.append_to_contents("") @@ -959,103 +933,111 @@ def _run(self, ctx): ctx.append_to_buffer(ctx.input) def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', '(', '[', ']', None): - raise Exception - elif ctx.input == '\\': - return self.machine.Escape - elif ctx.input == ')': - return self.machine.Final - else: - return self.__class__ + if ctx.input in ('?', '*', '+', '{'): + raise InconvertibilityError() + return self.machine.Initial.advance(self, ctx) - class Escape(EscapeState): + + class Escape(EscapeState, Main): def advance(self, ctx): - if len(self.escaped) > 1: - raise UnconvertibleRegexError() + if len(self.translation) > 1: + raise InconvertibilityError() else: - ctx.append_to_buffer(self.escaped) - return self.machine.states[self.machine.Main].advance(ctx) + ctx.append_to_buffer(self.translation) + return self.machine.Main.advance(self, ctx) class SquareBrackets(GroupingState): - def init_specific(self): - self.range = None - class Initial(State): def _run(self, ctx): - self.machine.range = False ctx.flush() ctx.append_to_alphabet("") def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '|', '-', None): - raise Exception + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() + elif ctx.input in ('}', ')', None): + raise StructureError(ctx.input) + elif ctx.input in ('(', '['): + raise InconvertibilityError() + elif ctx.input in ('-', '|'): + raise EscapeError(ctx.input) + elif ctx.input == ']': + raise EmptyAlphabetError() elif ctx.input == '\\': return self.machine.Escape else: - return self.machine.Inside + return self.machine.BeforeRange - class Inside(State): + + class BeforeRange(Initial): def _run(self, ctx): - if self.machine.range: - self.machine.range = False - if ctx.alphabet[-1] > ctx.input: - raise Exception - elif ctx.input == ctx.alphabet[-1]: - pass - else: - for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): - ctx.append_to_alphabet(six.unichr(i)) - else: - ctx.append_to_alphabet(ctx.input) + ctx.append_to_alphabet(ctx.input) def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', '|', None): - raise Exception - elif ctx.input == '\\': - return self.machine.Escape - elif ctx.input == ']': + if ctx.input == ']': return self.machine.Final elif ctx.input == '-': return self.machine.Range else: - return self.__class__ + return self.machine.Initial.advance(self, ctx) + class Range(State): def _run(self, ctx): - self.machine.range = True + pass def advance(self, ctx): - if ctx.input in ('*', '+', '?', '{', '}', '(', ')', '[', ']', '-', '|', None): - raise Exception + if ctx.input in ('?', '*', '+', '{', '}', '(', ')', '[', ']', '|', '-', None): + raise InvalidRange() elif ctx.input == '\\': - return self.machine.Escape + return self.machine.EscapeAfterRange + else: + return self.machine.AfterRange + + + class AfterRange(Initial): + def _run(self, ctx): + if ctx.alphabet[-1] > ctx.input: + raise Exception + elif ctx.input == ctx.alphabet[-1]: + pass + else: + for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): + ctx.append_to_alphabet(six.unichr(i)) + + def advance(self, ctx): + if ctx.input == ']': + return self.machine.Final else: - return self.machine.Inside + return self.machine.Initial.advance(self, ctx) - class Escape(EscapeState): + + class Escape(EscapeState, BeforeRange): def advance(self, ctx): - if self.machine.range: - self.machine.range = False - if len(self.escaped) > 1: - raise Exception - elif ctx.alphabet[-1] > self.escaped: - raise Exception - elif self.escaped == ctx.alphabet[-1]: - pass - else: - for i in range(ord(ctx.alphabet[-1]) + 1, ord(self.escaped) + 1): - ctx.append_to_alphabet(six.unichr(i)) + ctx.append_to_alphabet(self.translation) + return self.machine.BeforeRange.advance(self, ctx) + + class EscapeAfterRange(EscapeState, AfterRange): + + def advance(self, ctx): + if len(self.translation) > 1: + raise InvalidRange() + elif ctx.alphabet[-1] > self.escaped: + raise InvalidRange() + elif self.translation == ctx.alphabet[-1]: + pass else: - ctx.append_to_alphabet(self.escaped) + for i in range(ord(ctx.alphabet[-1]) + 1, ord(self.translation) + 1): + ctx.append_to_alphabet(six.unichr(i)) - return self.machine.states[self.machine.Inside].advance(ctx) + return self.machine.AfterRange.advance(self, ctx) @@ -1064,12 +1046,12 @@ def init_specific(self): self.values = None self.alphabet = None - self.pick = False + self.choice = False self.min = None self.max = None - self._nodes = [] + self.nodes = [] def append_to_contents(self, content): @@ -1099,22 +1081,16 @@ def buffer(self, buffer): self.values = [""] self.values[-1] = buffer - @property - def nodes(self): - return self._nodes - - @property - def nothing_to_flush(self): - return self.values is None and self.alphabet is None - def flush(self): - if self.nothing_to_flush: + if self.values is None and self.alphabet is None: return + # set default values for min & max if none was provided if self.min is None and self.max is None: self.min = self.max = 1 + # guess the type of the terminal node to create if self.values is not None and all(val.isdigit() for val in self.values): self.values = [int(i) for i in self.values] type = fvt.INT_str @@ -1122,9 +1098,8 @@ def flush(self): type = fvt.String name = self._name + str(len(self.nodes) + 1) - node = self._create_terminal_node(name, type, values=self.values, alphabet=self.alphabet, - qty=(self.min, self.max)) - self.nodes.append(node) + self.nodes.append(self._create_terminal_node(name, type, values=self.values, + alphabet=self.alphabet, qty=(self.min, self.max))) self.reset() @@ -1139,7 +1114,6 @@ def parse(self, inputs, name): # None indicates the beginning and the end of the regex self.inputs = [None] + list(inputs) + [None] - self.run(self) return self._create_non_terminal_node() @@ -1165,7 +1139,7 @@ def _create_non_terminal_node(self): for terminal in self.nodes: formatted_terminal.append(terminal) - if self.pick and len(self.nodes) > 1: + if self.choice and len(self.nodes) > 1: non_terminal.append(1) formatted_terminal = [MH.Copy + MH.Ordered] non_terminal.append(formatted_terminal) diff --git a/framework/error_handling.py b/framework/error_handling.py index 9fd07fe..d8e07ae 100644 --- a/framework/error_handling.py +++ b/framework/error_handling.py @@ -31,7 +31,49 @@ class UserInterruption(Exception): pass class DataModelDefinitionError(Exception): pass class RegexParserError(DataModelDefinitionError): pass -class EscapeError(RegexParserError): pass -class QuantificationError(RegexParserError): pass -class GroupingError(RegexParserError): pass -class UnconvertibleRegexError(RegexParserError): pass \ No newline at end of file + +class EscapeError(RegexParserError): + + def __init__(self, char=None): + if char is None: + message = "Nothing to escape." + elif char in ('\\','(',')','[',']','{','}','+','?','*','|','-'): + message = char + " is a special character: it needs to be escaped in order to be used in this context." + else: + message = char + " is not a special character: it is useless to escape it." + RegexParserError.__init__(self, message) + +class QuantificationError(RegexParserError): + + def __init__(self, message=None): + + if message is None: + message = u"Quantifier must be specified as followed: {X[,Y]} with X \u2264 Y." + + RegexParserError.__init__(self, message) + + + +class StructureError(RegexParserError): + + def __init__(self, char): + message = "" + if char == '}': + message = "Unopened bracket, nothing to close." + elif char == ')': + message = "Unopened parenthesis, nothing to close." + elif char == "]": + message = "Unopened squared bracket, nothing to close." + else: + message = "Unclosed element." + RegexParserError.__init__(self, message) + + +class InconvertibilityError(RegexParserError): + + def __init__(self): + RegexParserError.__init__(self, "Described regular expression is to complex, it can't be " + + "translated into a non-terminal only composed of terminal ones.") + +class EmptyAlphabetError(RegexParserError): pass +class InvalidRange(RegexParserError): pass diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index 41365f2..fdd5e43 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -24,7 +24,7 @@ def tearDown(self): u"(sal*ut)oo", u"(sal?ut)oo", u"sal{utoo", u"(sal+ut)oo", u"(sal{u)too", u"(sal{2}u)too", u"sal{2,1}utoo", u"sal(u[t]o)o", u"whatever|toto?ff", u"whate?ver|toto", u"(toto)*ohoho|haha", u"(toto)ohoho|haha", - u"salut[abcd]{,15}rr", u"[]whatever", u"t{,15}") + u"salut[abcd]{,15}rr", u"[]whatever", u"t{,15}", u"hi|b?whatever", u"hi|b{3}whatever") def test_invalid_regexes(self, regex): self.assert_regex_is_invalid(regex) From addbf279ef37f595ff3406f1e969f22bc57db553 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Mon, 1 Aug 2016 15:56:08 +0200 Subject: [PATCH 40/80] Revamp Escape states + usage of decorators to declare states --- framework/data_model_helpers.py | 329 ++++++++++++++++++-------------- framework/error_handling.py | 5 + 2 files changed, 193 insertions(+), 141 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 3fb6699..b3649fa 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -513,7 +513,6 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) - class State(object): """ Represent states at the lower level @@ -561,15 +560,12 @@ class StateMachine(State): Represent states that contain other states. """ - class Initial(State): - pass - def __init__(self, machine=None): self.states = {} self.inputs = None for name, cls in inspect.getmembers(self.__class__): - if inspect.isclass(cls) and issubclass(cls, State): + if inspect.isclass(cls) and issubclass(cls, State) and hasattr(cls, 'INITIAL'): self.states[cls] = cls(self) State.__init__(self, self if machine is None else machine) @@ -585,94 +581,29 @@ def _run(self, context): self.state = self.states[next_state] if next_state is not None else None def run(self, context): - self.state = self.states[self.Initial] - self._run(context) - - -class EscapeState(StateMachine): - """ - Represent states that can handle chars in an alternative way - """ - - def init_specific(self): - self.translation = None - - class Initial(State): - - def _run(self, ctx): - self.machine.translation = "" - - def advance(self, ctx): - if ctx.input in ('s','S','d','D','w','W','\\','(',')','[',']','{','}','+','?','*','|','-'): - return self.machine.Final - else: - raise EscapeError(ctx.input) - - class Final(State): - - def _run(self, ctx): - - def get_complement(chars): - return ''.join([six.unichr(i) for i in range(0, 0xFFFF) if six.unichr(i) not in chars]) - - shortcuts = {'s': string.whitespace, - 'S': get_complement(string.whitespace), - 'd': string.digits, - 'D': get_complement(string.digits), - 'w': string.ascii_letters + string.digits + '_', - 'W': get_complement(string.ascii_letters + string.digits + '_')} - - self.machine.translation = shortcuts[ctx.input] if ctx.input in shortcuts else ctx.input - - def advance(self, context): - return None - - - -class GroupingState(StateMachine): - """ - Represent states that parse portions of regular expression that delimit terminal nodes - """ - class Final(State): - - def _run(self, context): - pass + for state in self.states: + if state.INITIAL: + self.state = self.states[state] + break + else: + raise InitialStateNotFound() - def advance(self, context): - return None + self._run(context) - def advance(self, ctx): - if ctx.input in (')', '}', ']'): - raise StructureError(ctx.input) - elif ctx.input == '-': - raise EscapeError(ctx.input) - elif ctx.input in ('*', '+', '?'): - return self.machine.QtyState - elif ctx.input == '{': - return self.machine.Brackets - else: - ctx.flush() +def register(cls): + cls.INITIAL = False + return cls - if ctx.input == '|': - return self.machine.Choice - elif ctx.input is None: - return self.machine.Final - elif ctx.choice: - raise InconvertibilityError() - - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '\\': - return self.machine.Escape - else: - return self.machine.Main +def initial(cls): + cls.INITIAL = True + return cls class RegexParser(StateMachine): + + @initial class Initial(State): def _run(self, ctx): @@ -703,6 +634,7 @@ def advance(self, ctx): return self.machine.Main + @register class Choice(Initial): def _run(self, ctx): @@ -716,6 +648,7 @@ def _run(self, ctx): pass + @register class Final(State): def _run(self, ctx): @@ -725,33 +658,7 @@ def advance(self, ctx): return None - class Escape(EscapeState): - - def advance(self, ctx): - - if len(self.translation) > 1: - - if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: - raise InconvertibilityError() - - if ctx.buffer is not None: - - if len(ctx.buffer) == 0: - - if len(ctx.values[:-1]) > 0: - ctx.values = ctx.values[:-1] - ctx.flush() - else: - ctx.flush() - - ctx.append_to_alphabet(self.translation) - return self.machine.states[self.machine.SquareBrackets].advance(ctx) - - else: - ctx.append_to_buffer(self.translation) - return self.machine.states[self.machine.Main].advance(ctx) - - + @register class Main(State): def _run(self, ctx): @@ -799,6 +706,7 @@ def advance(self, ctx): return self.__class__ + @register class QtyState(State): def _run(self, ctx): @@ -832,8 +740,10 @@ def advance(self, ctx): return self.machine.Main - class Brackets(StateMachine): + @register + class Brackets(StateMachine, QtyState): + @initial class Initial(State): def _run(self, ctx): @@ -845,6 +755,7 @@ def advance(self, ctx): else: raise QuantificationError() + @register class Min(State): def _run(self, ctx): @@ -860,6 +771,7 @@ def advance(self, context): else: raise QuantificationError() + @register class Max(State): def _run(self, ctx): @@ -873,11 +785,13 @@ def advance(self, context): else: raise QuantificationError() + @register class Comma(Max): def _run(self, ctx): ctx.max = "" + @register class Final(State): def _run(self, ctx): ctx.min = int(ctx.min) @@ -898,11 +812,48 @@ def advance(self, context): return None def advance(self, ctx): - self.machine.states[self.machine.QtyState].advance(ctx) + self.machine.QtyState.advance(self, ctx) + + + class GroupingState(State): + """ + Represent states that parse portions of regular expression that delimit terminal nodes + """ + + def advance(self, ctx): + if ctx.input in (')', '}', ']'): + raise StructureError(ctx.input) + elif ctx.input == '-': + raise EscapeError(ctx.input) + + elif ctx.input in ('*', '+', '?'): + return self.machine.QtyState + elif ctx.input == '{': + return self.machine.Brackets + else: + ctx.flush() + + if ctx.input == '|': + return self.machine.Choice + elif ctx.input is None: + return self.machine.Final + elif ctx.choice: + raise InconvertibilityError() + + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.Escape + else: + return self.machine.Main - class Parenthesis(GroupingState): + @register + class Parenthesis(StateMachine, GroupingState): + @initial class Initial(State): def _run(self, ctx): @@ -922,15 +873,25 @@ def advance(self, ctx): return self.machine.Escape elif ctx.input == ')': return self.machine.Final + elif ctx.input == '|': + return self.machine.Choice else: return self.machine.Main + @register + class Final(State): + + def _run(self, context): + pass + + def advance(self, context): + return None + + + @register class Main(Initial): def _run(self, ctx): - if ctx.input == '|': - ctx.append_to_contents("") - else: - ctx.append_to_buffer(ctx.input) + ctx.append_to_buffer(ctx.input) def advance(self, ctx): if ctx.input in ('?', '*', '+', '{'): @@ -938,20 +899,37 @@ def advance(self, ctx): return self.machine.Initial.advance(self, ctx) + @register + class Choice(Initial): - class Escape(EscapeState, Main): + def _run(self, context): + context.append_to_contents("") - def advance(self, ctx): + def advance(self, context): + if context.input in ('?', '*', '+', '{'): + raise QuantificationError() - if len(self.translation) > 1: + return self.machine.Initial.advance(self, context) + + @register + class Escape(State): + + def _run(self, ctx): + pass + + def advance(self, ctx): + if ctx.input in ctx.META_SEQUENCES: raise InconvertibilityError() + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.Main else: - ctx.append_to_buffer(self.translation) - return self.machine.Main.advance(self, ctx) + raise EscapeError(ctx.input) - class SquareBrackets(GroupingState): + @register + class SquareBrackets(StateMachine, GroupingState): + @initial class Initial(State): def _run(self, ctx): @@ -970,11 +948,22 @@ def advance(self, ctx): elif ctx.input == ']': raise EmptyAlphabetError() elif ctx.input == '\\': - return self.machine.Escape + return self.machine.EscapeBeforeRange else: return self.machine.BeforeRange + @register + class Final(State): + + def _run(self, context): + pass + + def advance(self, context): + return None + + + @register class BeforeRange(Initial): def _run(self, ctx): ctx.append_to_alphabet(ctx.input) @@ -987,7 +976,7 @@ def advance(self, ctx): else: return self.machine.Initial.advance(self, ctx) - + @register class Range(State): def _run(self, ctx): pass @@ -1000,11 +989,11 @@ def advance(self, ctx): else: return self.machine.AfterRange - + @register class AfterRange(Initial): def _run(self, ctx): if ctx.alphabet[-1] > ctx.input: - raise Exception + raise InvalidRange() elif ctx.input == ctx.alphabet[-1]: pass else: @@ -1017,28 +1006,74 @@ def advance(self, ctx): else: return self.machine.Initial.advance(self, ctx) + @register + class EscapeBeforeRange(State): - class Escape(EscapeState, BeforeRange): + def _run(self, ctx): + pass def advance(self, ctx): - ctx.append_to_alphabet(self.translation) - return self.machine.BeforeRange.advance(self, ctx) + if ctx.input in ctx.META_SEQUENCES: + return self.machine.EscapeMetaSequence + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.BeforeRange + else: + raise EscapeError(ctx.input) - class EscapeAfterRange(EscapeState, AfterRange): + @register + class EscapeMetaSequence(BeforeRange): + + def _run(self, ctx): + ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) + + @register + class EscapeAfterRange(State): + + def _run(self, ctx): + pass def advance(self, ctx): - if len(self.translation) > 1: - raise InvalidRange() - elif ctx.alphabet[-1] > self.escaped: + if ctx.input in ctx.META_SEQUENCES: raise InvalidRange() - elif self.translation == ctx.alphabet[-1]: - pass + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.AfterRange else: - for i in range(ord(ctx.alphabet[-1]) + 1, ord(self.translation) + 1): - ctx.append_to_alphabet(six.unichr(i)) + raise EscapeError(ctx.input) + + + @register + class Escape(State): + + def _run(self, ctx): + pass + + def advance(self, ctx): + if ctx.input in ctx.META_SEQUENCES: + return self.machine.EscapeMetaSequence + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.Main + else: + raise EscapeError(ctx.input) + + + @register + class EscapeMetaSequence(GroupingState): + + def _run(self, ctx): + if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: + raise InconvertibilityError() + + if ctx.buffer is not None: + + if len(ctx.buffer) == 0: - return self.machine.AfterRange.advance(self, ctx) + if len(ctx.values[:-1]) > 0: + ctx.values = ctx.values[:-1] + ctx.flush() + else: + ctx.flush() + ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) def init_specific(self): @@ -1112,6 +1147,18 @@ def reset(self): def parse(self, inputs, name): self._name = name + def get_complement(chars): + return ''.join([six.unichr(i) for i in range(0, 0xFFFF) if six.unichr(i) not in chars]) + + self.META_SEQUENCES = {'s': string.whitespace, + 'S': get_complement(string.whitespace), + 'd': string.digits, + 'D': get_complement(string.digits), + 'w': string.ascii_letters + string.digits + '_',} + # 'W': get_complement(string.ascii_letters + string.digits + '_')} + + self.SPECIAL_CHARS = list('\\()[]{}*+?|-') + # None indicates the beginning and the end of the regex self.inputs = [None] + list(inputs) + [None] self.run(self) diff --git a/framework/error_handling.py b/framework/error_handling.py index d8e07ae..e24784a 100644 --- a/framework/error_handling.py +++ b/framework/error_handling.py @@ -77,3 +77,8 @@ def __init__(self): class EmptyAlphabetError(RegexParserError): pass class InvalidRange(RegexParserError): pass + +class InitialStateNotFound(RegexParserError): + + def __init__(self): + RegexParserError.__init__(self, "No state was declared as initial.") From 8bf446ec725fb40becb60db62db31f251cc3c3d3 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Tue, 2 Aug 2016 11:29:49 +0200 Subject: [PATCH 41/80] Add charset keyword --- framework/data_model_helpers.py | 67 ++++++++++++++++++---------- framework/error_handling.py | 5 +++ test/unit/test_data_model_helpers.py | 47 +++++++++++-------- 3 files changed, 78 insertions(+), 41 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index b3649fa..23c3825 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -91,6 +91,16 @@ class MH(object): Copy = 'u' ZeroCopy = 's' + + ############################## + ### Regex Parser Specific #### + ############################## + + class Charset: + ASCII = 1 + ASCII_EXT = 2 + UNICODE = 3 + ########################## ### Node Customization ### ########################## @@ -540,6 +550,10 @@ def run(self, context): Args: context (StateMachine): root state machine (global context) """ + if context.input is not None and \ + ((context.charset == MH.Charset.ASCII and ord(context.input) > 0x7F) or + (context.charset == MH.Charset.ASCII_EXT and ord(context.input) > 0xFF)): + raise CharsetError() self._run(context) context.inputs.pop(0) @@ -639,7 +653,7 @@ class Choice(Initial): def _run(self, ctx): if not ctx.choice: - # if is it still possible to build a NT with multiple shapes + # if it is still possible to build a NT with multiple shapes if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): ctx.choice = True else: @@ -703,7 +717,7 @@ def advance(self, ctx): elif ctx.input is None: return self.machine.Final - return self.__class__ + return self.machine.Main @register @@ -763,7 +777,7 @@ def _run(self, ctx): def advance(self, context): if context.input.isdigit(): - return self.__class__ + return self.machine.Min elif context.input == ',': return self.machine.Comma elif context.input == '}': @@ -815,10 +829,7 @@ def advance(self, ctx): self.machine.QtyState.advance(self, ctx) - class GroupingState(State): - """ - Represent states that parse portions of regular expression that delimit terminal nodes - """ + class Group(State): def advance(self, ctx): if ctx.input in (')', '}', ']'): @@ -851,7 +862,7 @@ def advance(self, ctx): @register - class Parenthesis(StateMachine, GroupingState): + class Parenthesis(StateMachine, Group): @initial class Initial(State): @@ -902,14 +913,14 @@ def advance(self, ctx): @register class Choice(Initial): - def _run(self, context): - context.append_to_contents("") + def _run(self, ctx): + ctx.append_to_contents("") - def advance(self, context): - if context.input in ('?', '*', '+', '{'): + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{'): raise QuantificationError() - return self.machine.Initial.advance(self, context) + return self.machine.Initial.advance(self, ctx) @register class Escape(State): @@ -927,7 +938,7 @@ def advance(self, ctx): @register - class SquareBrackets(StateMachine, GroupingState): + class SquareBrackets(StateMachine, Group): @initial class Initial(State): @@ -956,10 +967,10 @@ def advance(self, ctx): @register class Final(State): - def _run(self, context): + def _run(self, ctx): pass - def advance(self, context): + def advance(self, ctx): return None @@ -1057,7 +1068,7 @@ def advance(self, ctx): @register - class EscapeMetaSequence(GroupingState): + class EscapeMetaSequence(Group): def _run(self, ctx): if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: @@ -1078,6 +1089,8 @@ def _run(self, ctx): def init_specific(self): self._name = None + self.charset = None + self.values = None self.alphabet = None @@ -1144,18 +1157,26 @@ def reset(self): self.min = None self.max = None - def parse(self, inputs, name): + def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): self._name = name + self.charset = charset + + if self.charset == MH.Charset.ASCII: + max = 0x7F + elif self.charset == MH.Charset.UNICODE: + max = 0xFFFF + else: + max = 0xFF def get_complement(chars): - return ''.join([six.unichr(i) for i in range(0, 0xFFFF) if six.unichr(i) not in chars]) + return ''.join([six.unichr(i) for i in range(0, max + 1) if six.unichr(i) not in chars]) self.META_SEQUENCES = {'s': string.whitespace, 'S': get_complement(string.whitespace), 'd': string.digits, 'D': get_complement(string.digits), - 'w': string.ascii_letters + string.digits + '_',} - # 'W': get_complement(string.ascii_letters + string.digits + '_')} + 'w': string.ascii_letters + string.digits + '_', + 'W': get_complement(string.ascii_letters + string.digits + '_')} self.SPECIAL_CHARS = list('\\()[]{}*+?|-') @@ -1225,7 +1246,7 @@ class ModelHelper(object): 'exists_if', 'exists_if_not', 'exists_if/and', 'exists_if/or', 'sync_size_with', 'sync_enc_size_with', - 'post_freeze' + 'post_freeze', 'charset' ] def __init__(self, dm=None, delayed_jobs=True, add_env=True): @@ -1448,7 +1469,7 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): assert isinstance(regexp, str) parser = RegexParser() - nodes = parser.parse(regexp, name) + nodes = parser.parse(regexp, name, desc.get('charset')) if len(nodes) == 2 and len(nodes[1]) == 2 and (nodes[1][1][1] == nodes[1][1][2] == 1 or isinstance(nodes[1][1][0], fvt.String) and nodes[1][1][0].alphabet is not None): diff --git a/framework/error_handling.py b/framework/error_handling.py index e24784a..3de236c 100644 --- a/framework/error_handling.py +++ b/framework/error_handling.py @@ -82,3 +82,8 @@ class InitialStateNotFound(RegexParserError): def __init__(self): RegexParserError.__init__(self, "No state was declared as initial.") + +class CharsetError(RegexParserError): + + def __init__(self): + RegexParserError.__init__(self, "Some character(s) into the regex are incoherent with the provided charset.") diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index fdd5e43..7d665a1 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -20,11 +20,13 @@ def setUp(self): def tearDown(self): pass - @ddt.data(u"(sa(lu))(les)(louloux)", u"(salut)(les(louloux)", u"(salut))les(louloux)", - u"(sal*ut)oo", u"(sal?ut)oo", u"sal{utoo", u"(sal+ut)oo", u"(sal{u)too", - u"(sal{2}u)too", u"sal{2,1}utoo", u"sal(u[t]o)o", - u"whatever|toto?ff", u"whate?ver|toto", u"(toto)*ohoho|haha", u"(toto)ohoho|haha", - u"salut[abcd]{,15}rr", u"[]whatever", u"t{,15}", u"hi|b?whatever", u"hi|b{3}whatever") + @ddt.data({'regex': u"(sa(lu))(les)(louloux)"}, {'regex': u"(salut)(les(louloux)"}, + {'regex': u"(salut))les(louloux)"}, {'regex': u"(sal*ut)oo"}, {'regex': u"(sal?ut)oo"}, + {'regex': u"sal{utoo"}, {'regex': u"(sal+ut)oo"}, {'regex': u"(sal{u)too"}, + {'regex': u"(sal{2}u)too"}, {'regex': u"sal{2,1}utoo"}, {'regex': u"sal(u[t]o)o"}, + {'regex': u"whatever|toto?ff"}, {'regex': u"whate?ver|toto"}, {'regex': u"(toto)*ohoho|haha"}, + {'regex': u"(toto)ohoho|haha"}, {'regex': u"salut[abcd]{,15}rr"}, {'regex': u"[]whatever"}, + {'regex': u"t{,15}"}, {'regex': u"hi|b?whatever"}, {'regex': u"hi|b{3}whatever"}) def test_invalid_regexes(self, regex): self.assert_regex_is_invalid(regex) @@ -59,20 +61,23 @@ def test_quantifiers(self, test_case): {'regex': u"(abcd)\x53", 'nodes': [{"values": [u"abcd"]}, {"values": [u"\x53"]}]}, {'regex': u"\x43[abcd]", 'nodes': [{"values": [u"\x43"]}, {"alphabet": u"abcd"}]}, {'regex': u"\x43(abcd)", 'nodes': [{"values": [u"\x43"]}, {"values": [u"abcd"]}]}, - {'regex': u"\u0443(abcd)", 'nodes': [{"values": [u"\u0443"]}, {"values": [u"abcd"]}]}, - {'regex': u"hi(ab\u0443cd)", 'nodes': [{"values": [u"hi"]}, {"values": [u"ab\u0443cd"]}]}, + {'regex': u"\u0443(abcd)", "charset": MH.Charset.UNICODE, + 'nodes': [{"values": [u"\u0443"]}, {"values": [u"abcd"]}]}, + {'regex': u"hi(ab\u0443cd)", "charset": MH.Charset.UNICODE, + 'nodes': [{"values": [u"hi"]}, {"values": [u"ab\u0443cd"]}]}, ) def test_escape(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data(u"?", u"*", u"+", u"{1,2}", u"what{,}ever", u"bj{}er" - u"what{1, 2}", u"what{,3}ever", u"ee{l1, 2}ever", u"whddddat{\13, 2}eyyyver", - u"wat{3,2d}eyyyver", u"w**r", u"w+*r", u"w*?r") + @ddt.data({'regex': u"?"}, {'regex': u"*"}, {'regex': u"+"}, {'regex': u"{1,2}"}, {'regex': u"what{,}ever"}, + {'regex': u"bj{}er"},{'regex': u"what{1, 2}"}, {'regex': u"what{,3}ever"}, {'regex': u"ee{l1, 2}ever"}, + {'regex': u"whddddat{\13, 2}eyyyver"}, {'regex': u"wat{3,2d}eyyyver"}, {'regex': u"w**r"}, + {'regex': u"w+*r"}, {'regex': u"w*?r"}) def test_quantifier_raise(self, regex): self.assert_regex_is_invalid(regex) - @ddt.data(u"salut(", u"dd[", u"(", u"[", u"{0") + @ddt.data({'regex': u"salut("}, {'regex': u"dd["}, {'regex': u"("}, {'regex': u"["}, {'regex': u"{0"}) def test_wrong_end_raise(self, regex): self.assert_regex_is_invalid(regex) @@ -204,17 +209,21 @@ def test_types_recognition(self, test_case): {'regex': u"[\u0033]", 'nodes': [{"alphabet": u"\u0033"}]}, {'regex': u"[\u0003-\u0005]", 'nodes': [{"alphabet": u"\u0003\u0004\u0005"}]}, - {'regex': u"[\u0333-\u0335]", 'nodes': [{"alphabet": u"\u0333\u0334\u0335"}]}, - {'regex': u"[e\u4133-\u4135a]", 'nodes': [{"alphabet": u"e\u4133\u4134\u4135a"}]} + {'regex': u"[\u0333-\u0335]", "charset": MH.Charset.UNICODE, + 'nodes': [{"alphabet": u"\u0333\u0334\u0335"}]}, + {'regex': u"[e\u4133-\u4135a]", "charset": MH.Charset.UNICODE, + 'nodes': [{"alphabet": u"e\u4133\u4134\u4135a"}]} ) def test_basic_square_brackets(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data(u"[\x33-\x23]", u"[\u7633-\u7323]", u"[3-1]", u"[y-a]") + @ddt.data({'regex': u"[\x33-\x23]"}, {'regex': u"[3-1]"}, {'regex': u"[y-a]"}, + {'regex': u"[\u7633-\u7323]", "charset": MH.Charset.UNICODE}) def test_wrong_alphabet(self, regex): self.assert_regex_is_invalid(regex) - @ddt.data(u"[]", u"stronger[]baby", u"strongerbaby[]", u"[]strongerbaby", u"stro[]nger[]baby[]") + @ddt.data({'regex': u"[]"}, {'regex': u"stronger[]baby"}, {'regex': u"strongerbaby[]"}, + {'regex': u"[]strongerbaby"}, {'regex': u"stro[]nger[]baby[]"}) def test_basic_square_brackets_raise(self, regex): self.assert_regex_is_invalid(regex) @@ -274,7 +283,8 @@ def test_shape(self, test_case): def assert_regex_is_valid(self, test_case): - self._parser.parse(test_case['regex'], "name") + charset = test_case['charset'] if 'charset' in test_case else MH.Charset.ASCII_EXT + self._parser.parse(test_case['regex'], "name", charset) self.assertEquals(self._parser._create_terminal_node.call_count, len(test_case['nodes'])) calls = [] @@ -291,5 +301,6 @@ def assert_regex_is_valid(self, test_case): self._parser._create_terminal_node.assert_has_calls(calls) - def assert_regex_is_invalid(self, regex): - self.assertRaises(Exception, self._parser.parse, regex, "name") \ No newline at end of file + def assert_regex_is_invalid(self, test_case): + charset = test_case['charset'] if 'charset' in test_case else MH.Charset.ASCII_EXT + self.assertRaises(Exception, self._parser.parse, test_case['regex'], "name", charset) \ No newline at end of file From c21bb9c878de6976969861e669528d027a2bd2d4 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Tue, 2 Aug 2016 15:45:25 +0200 Subject: [PATCH 42/80] Fix issues with charset and python 2/3 --- framework/data_model_helpers.py | 30 ++++++++++-------------------- framework/error_handling.py | 4 ++-- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 23c3825..ee311e5 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -600,7 +600,7 @@ def run(self, context): self.state = self.states[state] break else: - raise InitialStateNotFound() + raise InitialStateNotFoundError() self._run(context) @@ -628,8 +628,6 @@ def advance(self, ctx): raise QuantificationError() elif ctx.input in ('}', ')', ']'): raise StructureError(ctx.input) - elif ctx.input == '-': - raise EscapeError(ctx.input) elif ctx.input == '[': return self.machine.SquareBrackets @@ -712,8 +710,6 @@ def advance(self, ctx): elif ctx.input in ('}',')',']'): raise StructureError(ctx.input) - elif ctx.input == '-': - raise EscapeError(ctx.input) elif ctx.input is None: return self.machine.Final @@ -734,8 +730,6 @@ def advance(self, ctx): raise QuantificationError() elif ctx.input in ('}', ')', ']'): raise StructureError(ctx.input) - elif ctx.input == '-': - raise EscapeError(ctx.input) elif ctx.input == '|': return self.machine.Choice elif ctx.input is None: @@ -834,8 +828,6 @@ class Group(State): def advance(self, ctx): if ctx.input in (')', '}', ']'): raise StructureError(ctx.input) - elif ctx.input == '-': - raise EscapeError(ctx.input) elif ctx.input in ('*', '+', '?'): return self.machine.QtyState @@ -876,8 +868,6 @@ def advance(self, ctx): raise QuantificationError() elif ctx.input in ('}', ']', None): raise StructureError(ctx.input) - elif ctx.input == '-': - raise EscapeError(ctx.input) elif ctx.input in ('(', '['): raise InconvertibilityError() elif ctx.input == '\\': @@ -954,8 +944,8 @@ def advance(self, ctx): raise StructureError(ctx.input) elif ctx.input in ('(', '['): raise InconvertibilityError() - elif ctx.input in ('-', '|'): - raise EscapeError(ctx.input) + elif ctx.input == '-': + raise InvalidRangeError() elif ctx.input == ']': raise EmptyAlphabetError() elif ctx.input == '\\': @@ -994,7 +984,7 @@ def _run(self, ctx): def advance(self, ctx): if ctx.input in ('?', '*', '+', '{', '}', '(', ')', '[', ']', '|', '-', None): - raise InvalidRange() + raise InvalidRangeError() elif ctx.input == '\\': return self.machine.EscapeAfterRange else: @@ -1004,12 +994,12 @@ def advance(self, ctx): class AfterRange(Initial): def _run(self, ctx): if ctx.alphabet[-1] > ctx.input: - raise InvalidRange() + raise InvalidRangeError() elif ctx.input == ctx.alphabet[-1]: pass else: for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): - ctx.append_to_alphabet(six.unichr(i)) + ctx.append_to_alphabet(ctx.int_to_string(i)) def advance(self, ctx): if ctx.input == ']': @@ -1045,7 +1035,7 @@ def _run(self, ctx): def advance(self, ctx): if ctx.input in ctx.META_SEQUENCES: - raise InvalidRange() + raise InvalidRangeError() elif ctx.input in ctx.SPECIAL_CHARS: return self.machine.AfterRange else: @@ -1160,6 +1150,7 @@ def reset(self): def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): self._name = name self.charset = charset + self.int_to_string = chr if sys.version_info[0] == 2 and self.charset != MH.Charset.UNICODE else six.unichr if self.charset == MH.Charset.ASCII: max = 0x7F @@ -1169,7 +1160,7 @@ def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): max = 0xFF def get_complement(chars): - return ''.join([six.unichr(i) for i in range(0, max + 1) if six.unichr(i) not in chars]) + return ''.join([self.int_to_string(i) for i in range(0, max + 1) if self.int_to_string(i) not in chars]) self.META_SEQUENCES = {'s': string.whitespace, 'S': get_complement(string.whitespace), @@ -1324,7 +1315,7 @@ def _get_type(top_desc, contents): ntype = MH.RawNode elif hasattr(contents, '__call__') and pre_ntype in [None, MH.Generator]: ntype = MH.Generator - elif isinstance(contents, str) and pre_ntype in [None, MH.Regex]: + elif isinstance(contents, six.string_types) and pre_ntype in [None, MH.Regex]: ntype = MH.Regex else: ntype = MH.Leaf @@ -1466,7 +1457,6 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): if isinstance(name, tuple): name = name[0] regexp = desc.get('contents') - assert isinstance(regexp, str) parser = RegexParser() nodes = parser.parse(regexp, name, desc.get('charset')) diff --git a/framework/error_handling.py b/framework/error_handling.py index 3de236c..1a34a62 100644 --- a/framework/error_handling.py +++ b/framework/error_handling.py @@ -76,9 +76,9 @@ def __init__(self): "translated into a non-terminal only composed of terminal ones.") class EmptyAlphabetError(RegexParserError): pass -class InvalidRange(RegexParserError): pass +class InvalidRangeError(RegexParserError): pass -class InitialStateNotFound(RegexParserError): +class InitialStateNotFoundError(RegexParserError): def __init__(self): RegexParserError.__init__(self, "No state was declared as initial.") From a2eb97c54132487e2454ac2f1a044fb1443aca0f Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Wed, 3 Aug 2016 11:36:20 +0200 Subject: [PATCH 43/80] RegexParser documentation + bug fix --- docs/source/data_model.rst | 115 ++++++++++++++++++++++++++++++++ framework/data_model_helpers.py | 4 +- 2 files changed, 117 insertions(+), 2 deletions(-) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 6548fef..38163a6 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -427,6 +427,9 @@ contents will be used as is. Otherwise, the additional keywords will be used to complement the description. Note that the *keyword* ``name`` should not be provided as it will be picked from the provided node. + - a python ``regular expression`` will represent a node that is + terminal or non-terminal but only contains terminal ones + (refer to :ref:`dm:pattern:regex`). Note that for defining a *function node* and not a generator node, you have to state the type attribute to ``MH.Leaf``. @@ -928,6 +931,15 @@ specific_fuzzy_vals These additional values are added to the test cases planned by the *disruptor* (if not already planned). +charset + Used to specify a charset to be used within the node: it is particularly useful + for nodes that contain regular expressions. Accepted attributes are: + + - ``MH.Charset.ASCII`` + - ``MH.Charset.ASCII_EXT`` (default) + - ``MH.Charset.UNICODE`` + + .. _dm:patterns: Data Model Patterns @@ -1551,3 +1563,106 @@ The following picture displays the result of the previous code (triggered by lin .. note:: Line 11 is to make the absorption operation work correctly. Indeed because of the encoding, constraints are not rigid enough to make fuddly work out the absorption without some help. + + + +.. _dm:pattern:regex: + +How to Describe a Data Format That Contains Complex Strings +----------------------------------------------------------- + +Parts of the data that only contain strings can easily be described using python's regular expressions. +Here are some rules to respect: + +- The characters couple (``[``, ``]``) and meta-sequences, such as ``\s``, ``\S``, ``\w``, ``\W``, ``\d`` + and ``\D``, are the only ways to define a :class:`framework.value_types.String` terminal node that + contains an alphabet. + +- Anything else will be translated into a :class:`framework.value_types.String` terminal node that + declares a list of values. The characters couple (``(``, ``)``) can be used to delimit a portion of + the regular expression that need to be translated into a terminal node on its own. + +.. note:: If each item in a list of values are integers an :class:`framework.value_types.INT_Str` will + be created instead of a :class:`framework.value_types.String`. + +- ``(``, ``)``, ``[``, ``]``, ``?``, ``*``, ``+``, ``{``, ``}``, ``|``, ``\``, ``-`` are the only + recognised special chars. They can not be used in an unsuitable context without been escaped + (exceptions are made for ``|`` and ``-``). + +- Are only allowed regular expressions that can be translated into one terminal node or into one non-terminal + node composed of terminal ones. If this rule is not respected an + :class:`framework.error_handling.InconvertibilityError` will be raised. + +- An inconsistency between the charset and the characters that compose the regular expression will result + in an :class:`framework.error_handling.CharsetError`. + +.. note:: The default charset used by Fuddly is ``MH.Charset.ASCII_EXT``. To alter this behaviour, it is + necessary to use the ``charset`` keyword. + + +To embody these rules, let's take some examples: + +Example 1: the basics + +.. code-block:: python + :linenos: + + regex = {'name': 'HTTP_version', + 'contents': '(HTTP)/[0-9].(0|1|2|\x33|4|5|6|7|8|9)' + # is equivalent to + classic = {'name': 'HTTP_version', + 'contents': [ + {'name': 'HTTP_version_1', 'contents': String(val_list=["HTTP"])}, + {'name': 'HTTP_version_2', 'contents': String(val_list=["/"])}, + {'name': 'HTTP_version_3', + 'contents': String(alphabet="0123456789", size=[1])}, + {'name': 'HTTP_version_4', 'contents': String(val_list=["."])}, + {'name': 'HTTP_version_5', 'contents': INT_Str(mini=0, maxi=9)}]} + + +Example 2: introducing shapes + +.. code-block:: python + :linenos: + + regex = {'name': 'something', + 'contents': '(333|444)|foo-bar|\d|[th|is]' + # is equivalent to + classic = {'name': 'something', + 'contents': [ + {'weight': 1, 'contents': INT_Str(int_list=[333, 444])}, + {'weight': 1, 'contents': String(val_list=["foo-bar"])}, + {'weight': 1, 'contents': String(alphabet="0123456789", size=[1])}, + {'weight': 1, 'contents': String(alphabet="th|is", size=[1])}]} + + +Example 3: using quantifiers and the escape character ``\`` + +.. code-block:: python + :linenos: + + regex = {'name': 'something', + 'contents': '\(this[is]{3,4}the+end\]' + # is equivalent to + classic = {'name': 'something', + 'contents': [ + {'name': 'something_1', 'contents': String(val_list=["(this"])}, + {'name': 'something_2', + 'contents': String(alphabet="is", min_sz=3, max_sz=4)}, + {'name': 'something_3', 'contents': String(val_list=["th"])}, + {'name': 'something_4', 'qty': (1, -1), + 'contents': String(val_list=["e"])}, + {'name': 'something_5', 'contents': String(val_list=["end]"])}, + +Example 4: invalid regular expressions + +.. code-block:: python + :linenos: + + error_1 = {'name': 'rejected', 'contents': '(HT(T)P)/'} + # raise an framework.error_handling.InconvertibilityError + # because there are two nested parenthesis. + + error_2 = {'name': 'rejected', 'contents': '(HTTP)foo|bar'} + # raise also an framework.error_handling.InconvertibilityError + # because | has priority over parenthesis in regular expressions. diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index ee311e5..c63a4d2 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -820,7 +820,7 @@ def advance(self, context): return None def advance(self, ctx): - self.machine.QtyState.advance(self, ctx) + return self.machine.QtyState.advance(self, ctx) class Group(State): @@ -1135,7 +1135,7 @@ def flush(self): else: type = fvt.String - name = self._name + str(len(self.nodes) + 1) + name = self._name + '_' + str(len(self.nodes) + 1) self.nodes.append(self._create_terminal_node(name, type, values=self.values, alphabet=self.alphabet, qty=(self.min, self.max))) self.reset() From 4136983a0a627aa3a4e1ec845078c80e78713a7e Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Wed, 3 Aug 2016 11:53:19 +0200 Subject: [PATCH 44/80] Modify tests to use non-unicode strings in python 2 --- test/unit/test_data_model_helpers.py | 294 ++++++++++++++------------- 1 file changed, 148 insertions(+), 146 deletions(-) diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index 7d665a1..699d5fa 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -20,47 +20,47 @@ def setUp(self): def tearDown(self): pass - @ddt.data({'regex': u"(sa(lu))(les)(louloux)"}, {'regex': u"(salut)(les(louloux)"}, - {'regex': u"(salut))les(louloux)"}, {'regex': u"(sal*ut)oo"}, {'regex': u"(sal?ut)oo"}, - {'regex': u"sal{utoo"}, {'regex': u"(sal+ut)oo"}, {'regex': u"(sal{u)too"}, - {'regex': u"(sal{2}u)too"}, {'regex': u"sal{2,1}utoo"}, {'regex': u"sal(u[t]o)o"}, - {'regex': u"whatever|toto?ff"}, {'regex': u"whate?ver|toto"}, {'regex': u"(toto)*ohoho|haha"}, - {'regex': u"(toto)ohoho|haha"}, {'regex': u"salut[abcd]{,15}rr"}, {'regex': u"[]whatever"}, - {'regex': u"t{,15}"}, {'regex': u"hi|b?whatever"}, {'regex': u"hi|b{3}whatever"}) + @ddt.data({'regex': "(sa(lu))(les)(louloux)"}, {'regex': "(salut)(les(louloux)"}, + {'regex': "(salut))les(louloux)"}, {'regex': "(sal*ut)oo"}, {'regex': "(sal?ut)oo"}, + {'regex': "sal{utoo"}, {'regex': "(sal+ut)oo"}, {'regex': "(sal{u)too"}, + {'regex': "(sal{2}u)too"}, {'regex': "sal{2,1}utoo"}, {'regex': "sal(u[t]o)o"}, + {'regex': "whatever|toto?ff"}, {'regex': "whate?ver|toto"}, {'regex': "(toto)*ohoho|haha"}, + {'regex': "(toto)ohoho|haha"}, {'regex': "salut[abcd]{,15}rr"}, {'regex': "[]whatever"}, + {'regex': "t{,15}"}, {'regex': "hi|b?whatever"}, {'regex': "hi|b{3}whatever"}) def test_invalid_regexes(self, regex): self.assert_regex_is_invalid(regex) @ddt.data( - {'regex': u"[abcd]?", 'nodes': [{"alphabet": u"abcd", "qty": (0, 1)}]}, - {'regex': u"[abcd]*", 'nodes': [{"alphabet": u"abcd", "qty": (0, None)}]}, - {'regex': u"[abcd]+", 'nodes': [{"alphabet": u"abcd", "qty": (1, None)}]}, - {'regex': u"[abcd]{7}", 'nodes': [{"alphabet": u"abcd", "qty": (7, 7)}]}, - {'regex': u"[abcd]{2,7}", 'nodes': [{"alphabet": u"abcd", "qty": (2, 7)}]}, - {'regex': u"[abcd]{0}", 'nodes': [{"alphabet": u"abcd", "qty": (0, 0)}]}, - {'regex': u"[abcd]{0,0}", 'nodes': [{"alphabet": u"abcd", "qty": (0, 0)}]}, - {'regex': u"[abcd]{3,}", 'nodes': [{"alphabet": u"abcd", "qty": (3, None)}]}, + {'regex': "[abcd]?", 'nodes': [{"alphabet": "abcd", "qty": (0, 1)}]}, + {'regex': "[abcd]*", 'nodes': [{"alphabet": "abcd", "qty": (0, None)}]}, + {'regex': "[abcd]+", 'nodes': [{"alphabet": "abcd", "qty": (1, None)}]}, + {'regex': "[abcd]{7}", 'nodes': [{"alphabet": "abcd", "qty": (7, 7)}]}, + {'regex': "[abcd]{2,7}", 'nodes': [{"alphabet": "abcd", "qty": (2, 7)}]}, + {'regex': "[abcd]{0}", 'nodes': [{"alphabet": "abcd", "qty": (0, 0)}]}, + {'regex': "[abcd]{0,0}", 'nodes': [{"alphabet": "abcd", "qty": (0, 0)}]}, + {'regex': "[abcd]{3,}", 'nodes': [{"alphabet": "abcd", "qty": (3, None)}]}, ) def test_quantifiers(self, test_case): self.assert_regex_is_valid(test_case) @ddt.data( - {'regex': u"salut(l\(es)(lou\\\\lous)cmoi", + {'regex': "salut(l\(es)(lou\\\\lous)cmoi", 'nodes': [ - {"values": [u"salut"]}, - {"values": [u"l(es"]}, - {"values": [u"lou\lous"]}, - {"values": [u"cmoi"]}, + {"values": ["salut"]}, + {"values": ["l(es"]}, + {"values": ["lou\lous"]}, + {"values": ["cmoi"]}, ]}, - {'regex': u"hi\x58", 'nodes': [{"values": [u"hi\x58"]}]}, - {'regex': u"hi\x00hola", 'nodes': [{"values": [u"hi\x00hola"]}]}, - {'regex': u"\xFFdom", 'nodes': [{"values": [u"\xFFdom"]}]}, - {'regex': u"\ddom", 'nodes': [{"alphabet": u"0123456789"}, {"values": [u"dom"]}]}, - {'regex': u"dom[abcd\d]", 'nodes': [{"values": [u"dom"]}, {"alphabet": u"abcd0123456789"}]}, - {'regex': u"[abcd]\x43", 'nodes': [{"alphabet": u"abcd"}, {"values": [u"\x43"]}]}, - {'regex': u"(abcd)\x53", 'nodes': [{"values": [u"abcd"]}, {"values": [u"\x53"]}]}, - {'regex': u"\x43[abcd]", 'nodes': [{"values": [u"\x43"]}, {"alphabet": u"abcd"}]}, - {'regex': u"\x43(abcd)", 'nodes': [{"values": [u"\x43"]}, {"values": [u"abcd"]}]}, + {'regex': "hi\x58", 'nodes': [{"values": ["hi\x58"]}]}, + {'regex': "hi\x00hola", 'nodes': [{"values": ["hi\x00hola"]}]}, + {'regex': "\xFFdom", 'nodes': [{"values": ["\xFFdom"]}]}, + {'regex': "\ddom", 'nodes': [{"alphabet": "0123456789"}, {"values": ["dom"]}]}, + {'regex': "dom[abcd\d]", 'nodes': [{"values": ["dom"]}, {"alphabet": "abcd0123456789"}]}, + {'regex': "[abcd]\x43", 'nodes': [{"alphabet": "abcd"}, {"values": ["\x43"]}]}, + {'regex': "(abcd)\x53", 'nodes': [{"values": ["abcd"]}, {"values": ["\x53"]}]}, + {'regex': "\x43[abcd]", 'nodes': [{"values": ["\x43"]}, {"alphabet": "abcd"}]}, + {'regex': "\x43(abcd)", 'nodes': [{"values": ["\x43"]}, {"values": ["abcd"]}]}, {'regex': u"\u0443(abcd)", "charset": MH.Charset.UNICODE, 'nodes': [{"values": [u"\u0443"]}, {"values": [u"abcd"]}]}, {'regex': u"hi(ab\u0443cd)", "charset": MH.Charset.UNICODE, @@ -70,27 +70,27 @@ def test_escape(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data({'regex': u"?"}, {'regex': u"*"}, {'regex': u"+"}, {'regex': u"{1,2}"}, {'regex': u"what{,}ever"}, - {'regex': u"bj{}er"},{'regex': u"what{1, 2}"}, {'regex': u"what{,3}ever"}, {'regex': u"ee{l1, 2}ever"}, - {'regex': u"whddddat{\13, 2}eyyyver"}, {'regex': u"wat{3,2d}eyyyver"}, {'regex': u"w**r"}, - {'regex': u"w+*r"}, {'regex': u"w*?r"}) + @ddt.data({'regex': "?"}, {'regex': "*"}, {'regex': "+"}, {'regex': "{1,2}"}, {'regex': "what{,}ever"}, + {'regex': "bj{}er"},{'regex': "what{1, 2}"}, {'regex': "what{,3}ever"}, {'regex': "ee{l1, 2}ever"}, + {'regex': "whddddat{\13, 2}eyyyver"}, {'regex': "wat{3,2d}eyyyver"}, {'regex': "w**r"}, + {'regex': "w+*r"}, {'regex': "w*?r"}) def test_quantifier_raise(self, regex): self.assert_regex_is_invalid(regex) - @ddt.data({'regex': u"salut("}, {'regex': u"dd["}, {'regex': u"("}, {'regex': u"["}, {'regex': u"{0"}) + @ddt.data({'regex': "salut("}, {'regex': "dd["}, {'regex': "("}, {'regex': "["}, {'regex': "{0"}) def test_wrong_end_raise(self, regex): self.assert_regex_is_invalid(regex) @ddt.data( - {'regex': u"[abcd]*toto(|\(ab\)|cd)+what?ever", + {'regex': "[abcd]*toto(|\(ab\)|cd)+what?ever", 'nodes': [ - {"alphabet": u"abcd", "qty": (0, None)}, - {"values": [u"toto"]}, - {"values": [u"", u"(ab)", u"cd"], "qty": (1, None)}, - {"values": [u"wha"]}, - {"values": [u"t"], "qty": (0, 1)}, - {"values": [u"ever"]} + {"alphabet": "abcd", "qty": (0, None)}, + {"values": ["toto"]}, + {"values": ["", "(ab)", "cd"], "qty": (1, None)}, + {"values": ["wha"]}, + {"values": ["t"], "qty": (0, 1)}, + {"values": ["ever"]} ]}, ) def test_complete(self, test_case): @@ -98,43 +98,43 @@ def test_complete(self, test_case): @ddt.data( - {'regex': u"()", 'nodes': [{"values": [u""]}]}, - {'regex': u"(z)", 'nodes': [{"values": [u"z"]}]}, - {'regex': u"(cat)", 'nodes': [{"values": [u"cat"]}]}, + {'regex': "()", 'nodes': [{"values": [""]}]}, + {'regex': "(z)", 'nodes': [{"values": ["z"]}]}, + {'regex': "(cat)", 'nodes': [{"values": ["cat"]}]}, - {'regex': u"hello(boat)", - 'nodes': [{"values": [u"hello"]}, {"values": [u"boat"]}]}, + {'regex': "hello(boat)", + 'nodes': [{"values": ["hello"]}, {"values": ["boat"]}]}, - {'regex': u"(cake)awesome", - 'nodes': [{"values": [u"cake"]}, {"values": [u"awesome"]}]}, + {'regex': "(cake)awesome", + 'nodes': [{"values": ["cake"]}, {"values": ["awesome"]}]}, - {'regex': u"(foo)(bar)(foo)", - 'nodes': [{"values": [u"foo"]}, {"values": [u"bar"]}, {"values": [u"foo"]}]}, + {'regex': "(foo)(bar)(foo)", + 'nodes': [{"values": ["foo"]}, {"values": ["bar"]}, {"values": ["foo"]}]}, - {'regex': u"dashboard(apple)(purple)", - 'nodes': [{"values": [u"dashboard"]}, {"values": [u"apple"]}, {"values": [u"purple"]}]}, + {'regex': "dashboard(apple)(purple)", + 'nodes': [{"values": ["dashboard"]}, {"values": ["apple"]}, {"values": ["purple"]}]}, - {'regex': u"(harder)better(faster)", - 'nodes': [{"values": [u"harder"]}, {"values": [u"better"]}, {"values": [u"faster"]}]}, + {'regex': "(harder)better(faster)", + 'nodes': [{"values": ["harder"]}, {"values": ["better"]}, {"values": ["faster"]}]}, - {'regex': u"(stronger)(it is me)baby", - 'nodes': [{"values": [u"stronger"]}, {"values": [u"it is me"]}, {"values": [u"baby"]}]}, + {'regex': "(stronger)(it is me)baby", + 'nodes': [{"values": ["stronger"]}, {"values": ["it is me"]}, {"values": ["baby"]}]}, - {'regex': u"new(york)city", - 'nodes': [{"values": [u"new"]}, {"values": [u"york"]}, {"values": [u"city"]}]}, + {'regex': "new(york)city", + 'nodes': [{"values": ["new"]}, {"values": ["york"]}, {"values": ["city"]}]}, - {'regex': u"()whatever", - 'nodes': [{"values": [u""]}, {"values": [u"whatever"]}]}, + {'regex': "()whatever", + 'nodes': [{"values": [""]}, {"values": ["whatever"]}]}, - {'regex': u"this is it()", - 'nodes': [{"values": [u"this is it"]}, {"values": [u""]}]}, + {'regex': "this is it()", + 'nodes': [{"values": ["this is it"]}, {"values": [""]}]}, - {'regex': u"this()parser()is()working", - 'nodes': [{"values": [u"this"]}, {"values": [u""]}, {"values": [u"parser"]}, {"values": [u""]}, - {"values": [u"is"]}, {"values": [u""]}, {"values": [u"working"]}]}, + {'regex': "this()parser()is()working", + 'nodes': [{"values": ["this"]}, {"values": [""]}, {"values": ["parser"]}, {"values": [""]}, + {"values": ["is"]}, {"values": [""]}, {"values": ["working"]}]}, - {'regex': u"()()()", - 'nodes': [{"values": [u""]}, {"values": [u""]}, {"values": [u""]}]}, + {'regex': "()()()", + 'nodes': [{"values": [""]}, {"values": [""]}, {"values": [""]}]}, ) def test_basic_parenthesis(self, test_case): self.assert_regex_is_valid(test_case) @@ -143,30 +143,30 @@ def test_basic_parenthesis(self, test_case): @ddt.data( - {'regex': u"(ab|cd|)+", 'nodes': [{"values": [u"ab", u"cd", u""], "qty": (1, None)}]}, - {'regex': u"(ab||cd)", 'nodes': [{"values": [u"ab", u"", u"cd"]}]}, - {'regex': u"(|ab|cd|ef|gh)+", 'nodes': [{"values": [u"", u"ab", u"cd", u"ef", u"gh"], "qty": (1, None)}]}, - {'regex': u"(|)+", 'nodes': [{"values": [u"", u""], "qty": (1, None)}]}, - {'regex': u"(|||)+", 'nodes': [{"values": [u"", u"", u"", u""], "qty": (1, None)}]}, + {'regex': "(ab|cd|)+", 'nodes': [{"values": ["ab", "cd", ""], "qty": (1, None)}]}, + {'regex': "(ab||cd)", 'nodes': [{"values": ["ab", "", "cd"]}]}, + {'regex': "(|ab|cd|ef|gh)+", 'nodes': [{"values": ["", "ab", "cd", "ef", "gh"], "qty": (1, None)}]}, + {'regex': "(|)+", 'nodes': [{"values": ["", ""], "qty": (1, None)}]}, + {'regex': "(|||)+", 'nodes': [{"values": ["", "", "", ""], "qty": (1, None)}]}, ) def test_or_in_parenthesis(self, test_case): self.assert_regex_is_valid(test_case) @ddt.data( - {'regex': u"1|2|3", 'nodes': [{"type": fvt.INT_str, "values": [1,2,3]}]}, - {'regex': u"1|2|3|foo", 'nodes': [{"values": [u'1', u'2', u'3', u'foo']}]}, - {'regex': u"1|foo|2|3", 'nodes': [{"values": [u'1', u'foo', u'2', u'3']}]}, - {'regex': u"foo|1|2|3", 'nodes': [{"values": [u'foo', u'1', u'2', u'3']}]}, - {'regex': u"(11|12|13)bar", - 'nodes': [{"type": fvt.INT_str, "values": [11, 12, 13]}, {"values": [u'bar']}]}, - {'regex': u"(11|12|13|bar)", - 'nodes': [{"values": [u'11', u'12', u'13', u'bar']}]}, - {'regex': u"234whatever23", 'nodes': [{"values": [u'234whatever23']}]}, - {'regex': u"(234whatever23)foobar", - 'nodes': [{"values": [u'234whatever23']}, {"values": [u'foobar']}]}, - {'regex': u"1113|3435|3344|(hay)", - 'nodes': [{"type": fvt.INT_str, "values": [1113, 3435, 3344]}, {"values": [u'hay']}]}, + {'regex': "1|2|3", 'nodes': [{"type": fvt.INT_str, "values": [1,2,3]}]}, + {'regex': "1|2|3|foo", 'nodes': [{"values": ['1', '2', '3', 'foo']}]}, + {'regex': "1|foo|2|3", 'nodes': [{"values": ['1', 'foo', '2', '3']}]}, + {'regex': "foo|1|2|3", 'nodes': [{"values": ['foo', '1', '2', '3']}]}, + {'regex': "(11|12|13)bar", + 'nodes': [{"type": fvt.INT_str, "values": [11, 12, 13]}, {"values": ['bar']}]}, + {'regex': "(11|12|13|bar)", + 'nodes': [{"values": ['11', '12', '13', 'bar']}]}, + {'regex': "234whatever23", 'nodes': [{"values": ['234whatever23']}]}, + {'regex': "(234whatever23)foobar", + 'nodes': [{"values": ['234whatever23']}, {"values": ['foobar']}]}, + {'regex': "1113|3435|3344|(hay)", + 'nodes': [{"type": fvt.INT_str, "values": [1113, 3435, 3344]}, {"values": ['hay']}]}, ) def test_types_recognition(self, test_case): self.assert_regex_is_valid(test_case) @@ -174,41 +174,43 @@ def test_types_recognition(self, test_case): @ddt.data( - {'regex': u"[e]", 'nodes': [{"alphabet": u"e"}]}, - {'regex': u"[caty]", 'nodes': [{"alphabet": u"caty"}]}, - {'regex': u"[abcd][efghij]", 'nodes': [{"alphabet": u"abcd"}, {"alphabet": u"efghij"}]}, - {'regex': u"[cake]awesome", 'nodes': [{"alphabet": u"cake"}, {"values": [u"awesome"]}]}, + {'regex': "[e]", 'nodes': [{"alphabet": "e"}]}, + {'regex': "[caty]", 'nodes': [{"alphabet": "caty"}]}, + {'regex': "[abcd][efghij]", 'nodes': [{"alphabet": "abcd"}, {"alphabet": "efghij"}]}, + {'regex': "[cake]awesome", 'nodes': [{"alphabet": "cake"}, {"values": ["awesome"]}]}, - {'regex': u"[foo][bar][foo]", + {'regex': "[foo][bar][foo]", 'nodes': [{"alphabet": "foo"}, {"alphabet": "bar"}, {"alphabet": "foo"}]}, - {'regex': u"dashboard[apple][purple]", - 'nodes': [{"values": [u"dashboard"]}, {"alphabet": u"apple"}, {"alphabet": u"purple"}]}, + {'regex': "dashboard[apple][purple]", + 'nodes': [{"values": ["dashboard"]}, {"alphabet": "apple"}, {"alphabet": "purple"}]}, - {'regex': u"[harder]better[faster]", - 'nodes': [{"alphabet": u"harder"}, {"values": [u"better"]}, {"alphabet": u"faster"}]}, + {'regex': "[harder]better[faster]", + 'nodes': [{"alphabet": "harder"}, {"values": ["better"]}, {"alphabet": "faster"}]}, - {'regex': u"[stronger][it is me]baby", - 'nodes': [{"alphabet": u"stronger"}, {"alphabet": u"it is me"}, {"values": [u"baby"]}]}, + {'regex': "[stronger][it is me]baby", + 'nodes': [{"alphabet": "stronger"}, {"alphabet": "it is me"}, {"values": ["baby"]}]}, - {'regex': u"new[york]city", - 'nodes': [{"values": [u"new"]}, {"alphabet": u"york"}, {"values": [u"city"]}]}, + {'regex': "new[york]city", + 'nodes': [{"values": ["new"]}, {"alphabet": "york"}, {"values": ["city"]}]}, - {'regex': u"[a-e]", 'nodes': [{"alphabet": u"abcde"}]}, - {'regex': u"[a-ewxy]", 'nodes': [{"alphabet": u"abcdewxy"}]}, - {'regex': u"[1-9]", 'nodes': [{"alphabet": u"123456789"}]}, - {'regex': u"[what1-9]", 'nodes': [{"alphabet": u"what123456789"}]}, - {'regex': u"[a-c1-9]", 'nodes': [{"alphabet": u"abc123456789"}]}, - {'regex': u"[a-c1-9fin]", 'nodes': [{"alphabet": u"abc123456789fin"}]}, - {'regex': u"[a-c9-9fin]", 'nodes': [{"alphabet": u"abc9fin"}]}, - {'regex': u"[pa-cwho1-9fin]", 'nodes': [{"alphabet": u"pabcwho123456789fin"}]}, + {'regex': "[a-e]", 'nodes': [{"alphabet": "abcde"}]}, + {'regex': "[a-ewxy]", 'nodes': [{"alphabet": "abcdewxy"}]}, + {'regex': "[1-9]", 'nodes': [{"alphabet": "123456789"}]}, + {'regex': "[what1-9]", 'nodes': [{"alphabet": "what123456789"}]}, + {'regex': "[a-c1-9]", 'nodes': [{"alphabet": "abc123456789"}]}, + {'regex': "[a-c1-9fin]", 'nodes': [{"alphabet": "abc123456789fin"}]}, + {'regex': "[a-c9-9fin]", 'nodes': [{"alphabet": "abc9fin"}]}, + {'regex': "[pa-cwho1-9fin]", 'nodes': [{"alphabet": "pabcwho123456789fin"}]}, - {'regex': u"[\x33]", 'nodes': [{"alphabet": u"\x33"}]}, - {'regex': u"[\x33-\x35]", 'nodes': [{"alphabet": u"\x33\x34\x35"}]}, - {'regex': u"[e\x33-\x35a]", 'nodes': [{"alphabet": u"e\x33\x34\x35a"}]}, + {'regex': "[\x33]", 'nodes': [{"alphabet": "\x33"}]}, + {'regex': "[\x33-\x35]", 'nodes': [{"alphabet": "\x33\x34\x35"}]}, + {'regex': "[e\x33-\x35a]", 'nodes': [{"alphabet": "e\x33\x34\x35a"}]}, - {'regex': u"[\u0033]", 'nodes': [{"alphabet": u"\u0033"}]}, - {'regex': u"[\u0003-\u0005]", 'nodes': [{"alphabet": u"\u0003\u0004\u0005"}]}, + {'regex': u"[\u0033]", "charset": MH.Charset.UNICODE, + 'nodes': [{"alphabet": u"\u0033"}]}, + {'regex': u"[\u0003-\u0005]", "charset": MH.Charset.UNICODE, + 'nodes': [{"alphabet": u"\u0003\u0004\u0005"}]}, {'regex': u"[\u0333-\u0335]", "charset": MH.Charset.UNICODE, 'nodes': [{"alphabet": u"\u0333\u0334\u0335"}]}, {'regex': u"[e\u4133-\u4135a]", "charset": MH.Charset.UNICODE, @@ -217,63 +219,63 @@ def test_types_recognition(self, test_case): def test_basic_square_brackets(self, test_case): self.assert_regex_is_valid(test_case) - @ddt.data({'regex': u"[\x33-\x23]"}, {'regex': u"[3-1]"}, {'regex': u"[y-a]"}, + @ddt.data({'regex': "[\x33-\x23]"}, {'regex': "[3-1]"}, {'regex': "[y-a]"}, {'regex': u"[\u7633-\u7323]", "charset": MH.Charset.UNICODE}) def test_wrong_alphabet(self, regex): self.assert_regex_is_invalid(regex) - @ddt.data({'regex': u"[]"}, {'regex': u"stronger[]baby"}, {'regex': u"strongerbaby[]"}, - {'regex': u"[]strongerbaby"}, {'regex': u"stro[]nger[]baby[]"}) + @ddt.data({'regex': "[]"}, {'regex': "stronger[]baby"}, {'regex': "strongerbaby[]"}, + {'regex': "[]strongerbaby"}, {'regex': "stro[]nger[]baby[]"}) def test_basic_square_brackets_raise(self, regex): self.assert_regex_is_invalid(regex) @ddt.data( - {'regex': u"|", 'nodes': [{"values": [u"",u""]}]}, - {'regex': u"|||", 'nodes': [{"values": [u"", u"", u"", u""]}]}, - {'regex': u"toto|titi|tata", 'nodes': [{"values": [u"toto", u"titi", u"tata"]}]}, - {'regex': u"toto|titi|", 'nodes': [{"values": [u"toto", u"titi", u""]}]}, - {'regex': u"toto||tata", 'nodes': [{"values": [u"toto", u"", u"tata"]}]}, - {'regex': u"|titi|tata", 'nodes': [{"values": [u"", u"titi", u"tata"]}]}, - {'regex': u"coucou|[abcd]|", 'nodes': [{"values": [u"coucou"]}, {"alphabet": u"abcd"}, {"values": [u""]}]}, + {'regex': "|", 'nodes': [{"values": ["",""]}]}, + {'regex': "|||", 'nodes': [{"values": ["", "", "", ""]}]}, + {'regex': "toto|titi|tata", 'nodes': [{"values": ["toto", "titi", "tata"]}]}, + {'regex': "toto|titi|", 'nodes': [{"values": ["toto", "titi", ""]}]}, + {'regex': "toto||tata", 'nodes': [{"values": ["toto", "", "tata"]}]}, + {'regex': "|titi|tata", 'nodes': [{"values": ["", "titi", "tata"]}]}, + {'regex': "coucou|[abcd]|", 'nodes': [{"values": ["coucou"]}, {"alphabet": "abcd"}, {"values": [""]}]}, - {'regex': u"|[hao]|[salut]?", - 'nodes': [{"values": [u""]}, {"alphabet": u"hao"}, {"alphabet": u"salut", "qty": (0, 1)}]}, + {'regex': "|[hao]|[salut]?", + 'nodes': [{"values": [""]}, {"alphabet": "hao"}, {"alphabet": "salut", "qty": (0, 1)}]}, - {'regex': u"coucou||[salut]?", - 'nodes': [{"values": [u"coucou", u""]}, {"alphabet": u"salut", "qty": (0, 1)}]}, + {'regex': "coucou||[salut]?", + 'nodes': [{"values": ["coucou", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, - {'regex': u"coucou||||[salut]?", - 'nodes': [{"values": [u"coucou", u"", u"", u""]}, {"alphabet": u"salut", "qty": (0, 1)}]}, + {'regex': "coucou||||[salut]?", + 'nodes': [{"values": ["coucou", "", "", ""]}, {"alphabet": "salut", "qty": (0, 1)}]}, - {'regex': u"[whatever]+|[hao]|[salut]?", + {'regex': "[whatever]+|[hao]|[salut]?", 'nodes': [ - {"alphabet": u"whatever", "qty": (1, None)}, - {"alphabet": u"hao"}, - {"alphabet": u"salut", "qty": (0, 1)} + {"alphabet": "whatever", "qty": (1, None)}, + {"alphabet": "hao"}, + {"alphabet": "salut", "qty": (0, 1)} ]}, - {'regex': u"(whatever)+|(hao)|(salut)?", + {'regex': "(whatever)+|(hao)|(salut)?", 'nodes': [ - {"values": [u"whatever"], "qty": (1, None)}, - {"values": [u"hao"]}, - {"values": [u"salut"], "qty": (0, 1)} + {"values": ["whatever"], "qty": (1, None)}, + {"values": ["hao"]}, + {"values": ["salut"], "qty": (0, 1)} ]}, - {'regex': u"tata|haha|c*|b*|[abcd]+", 'nodes': [ - {"values": [u"tata", u"haha"]}, - {"values": [u"c"], "qty": (0, None)}, - {"values": [u"b"], "qty": (0, None)}, - {"alphabet": u"abcd", "qty": (1, None)} + {'regex': "tata|haha|c*|b*|[abcd]+", 'nodes': [ + {"values": ["tata", "haha"]}, + {"values": ["c"], "qty": (0, None)}, + {"values": ["b"], "qty": (0, None)}, + {"alphabet": "abcd", "qty": (1, None)} ]}, - {'regex': u"(tata)+|haha|tata||b*|[abcd]+", 'nodes': [ - {"values": [u"tata"], "qty": (1, None)}, - {"values": [u"haha", u"tata", u""]}, - {"values": [u"b"], "qty": (0, None)}, - {"alphabet": u"abcd", "qty": (1, None)} + {'regex': "(tata)+|haha|tata||b*|[abcd]+", 'nodes': [ + {"values": ["tata"], "qty": (1, None)}, + {"values": ["haha", "tata", ""]}, + {"values": ["b"], "qty": (0, None)}, + {"alphabet": "abcd", "qty": (1, None)} ]}, ) def test_shape(self, test_case): @@ -296,7 +298,7 @@ def assert_regex_is_valid(self, test_case): alphabet = nodes[i]['alphabet'] if 'alphabet' in nodes[i] else None qty = nodes[i]['qty'] if 'qty' in nodes[i] else (1, 1) - calls.append(mock.call("name" + str(i + 1), type, values=values, alphabet=alphabet, qty=qty)) + calls.append(mock.call("name" + "_" + str(i + 1), type, values=values, alphabet=alphabet, qty=qty)) self._parser._create_terminal_node.assert_has_calls(calls) From cef1028462a0d94c014c787ca121361931627216 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 4 Aug 2016 10:20:00 +0200 Subject: [PATCH 45/80] Change position of RegexParser in data_model_helpers --- framework/data_model_helpers.py | 2200 +++++++++++++++---------------- 1 file changed, 1090 insertions(+), 1110 deletions(-) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index c63a4d2..5879d1d 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -523,1370 +523,1350 @@ def _handle_attrs(n, set_attrs, clear_attrs): n.clear_attr(ca) -class State(object): - """ - Represent states at the lower level - """ - def __init__(self, machine): - """ - Args: - machine (StateMachine): state machine where it lives (local context) - """ - self.machine = machine - self.init_specific() +class ModelHelper(object): - def init_specific(self): - """ - Can be overridden to express additional initializations - """ - pass + HIGH_PRIO = 1 + MEDIUM_PRIO = 2 + LOW_PRIO = 3 + VERYLOW_PRIO = 4 - def _run(self, context): - raise NotImplementedError + valid_keys = [ + # generic description keys + 'name', 'contents', 'qty', 'clone', 'type', 'alt', 'conf', + 'custo_set', 'custo_clear', + # NonTerminal description keys + 'weight', 'shape_type', 'section_type', 'duplicate_mode', 'weights', + 'separator', 'prefix', 'suffix', 'unique', + 'encoder', + # Generator/Function description keys + 'node_args', 'other_args', 'provide_helpers', 'trigger_last', + # Typed-node description keys + 'specific_fuzzy_vals', + # Import description keys + 'import_from', 'data_id', + # node properties description keys + 'determinist', 'random', 'finite', 'infinite', 'mutable', + 'clear_attrs', 'set_attrs', + 'absorb_csts', 'absorb_helper', + 'semantics', 'fuzz_weight', + 'sync_qty_with', 'qty_from', + 'exists_if', 'exists_if_not', + 'exists_if/and', 'exists_if/or', + 'sync_size_with', 'sync_enc_size_with', + 'post_freeze', 'charset' + ] - def run(self, context): - """ - Do some actions on the current character. - Args: - context (StateMachine): root state machine (global context) + def __init__(self, dm=None, delayed_jobs=True, add_env=True): """ - if context.input is not None and \ - ((context.charset == MH.Charset.ASCII and ord(context.input) > 0x7F) or - (context.charset == MH.Charset.ASCII_EXT and ord(context.input) > 0xFF)): - raise CharsetError() - self._run(context) - context.inputs.pop(0) + Help the process of data description. This class is able to construct a + :class:`framework.data_model.Node` object from a JSON-like description. - def advance(self, context): - """ - Check transitions using the first non-run character. Args: - context (StateMachine): root state machine (global context) - - Returns: - Class of the next state de run (None if we are in a final state) + dm (DataModel): a DataModel object, only required if the 'import_from' statement is used + with :meth:`create_graph_from_desc`. + delayed_jobs (bool): Enable or disabled delayed jobs feature. Used for instance for + delaying constraint that cannot be solved immediately. + add_env (bool): If `True`, an :class:`framework.data_model.Env` object + will be assigned to the generated :class:`framework.data_model.Node` + from :meth:`create_graph_from_desc`. Should be set to ``False`` if you consider using + the generated `Node` within another description or if you will copy it for building + a new node type. Keeping an ``Env()`` object can be dangerous if you make some clones of + it and don't pay attention to set a new ``Env()`` for each copy, because. A graph node + SHALL have only one ``Env()`` shared between all the nodes and an Env() shall not be + shared between independent graph (otherwise it could lead to + unexpected results). """ - raise NotImplementedError - + self.dm = dm + self.delayed_jobs = delayed_jobs + self._add_env_to_the_node = add_env -class StateMachine(State): - """ - Represent states that contain other states. - """ + def _verify_keys_conformity(self, desc): + for k in desc.keys(): + if k not in self.valid_keys: + raise KeyError("The description key '{:s}' is not recognized!".format(k)) - def __init__(self, machine=None): - self.states = {} - self.inputs = None - for name, cls in inspect.getmembers(self.__class__): - if inspect.isclass(cls) and issubclass(cls, State) and hasattr(cls, 'INITIAL'): - self.states[cls] = cls(self) + def create_graph_from_desc(self, desc): + self.sorted_todo = {} + self.node_dico = {} + self.empty_node = Node('EMPTY') + + n = self._create_graph_from_desc(desc, None) - State.__init__(self, self if machine is None else machine) + if self._add_env_to_the_node: + self._register_todo(n, self._set_env, prio=self.LOW_PRIO) - @property - def input(self): - return None if self.inputs is None or len(self.inputs) == 0 else self.inputs[0] + todo = self._create_todo_list() + while todo: + for node, func, args, unpack_args in todo: + if isinstance(args, tuple) and unpack_args: + func(node, *args) + else: + func(node, args) + todo = self._create_todo_list() - def _run(self, context): - while self.state is not None: - self.state.run(context) - next_state = self.state.advance(context) - self.state = self.states[next_state] if next_state is not None else None + return n - def run(self, context): - for state in self.states: - if state.INITIAL: - self.state = self.states[state] - break + def _handle_name(self, name_desc): + if isinstance(name_desc, (tuple, list)): + assert(len(name_desc) == 2) + name = name_desc[0] + ident = name_desc[1] + elif isinstance(name_desc, str): + name = name_desc + ident = 1 else: - raise InitialStateNotFoundError() - - self._run(context) - - -def register(cls): - cls.INITIAL = False - return cls + raise ValueError("Name is not recognized: '%s'!" % name_desc) -def initial(cls): - cls.INITIAL = True - return cls + return name, ident -class RegexParser(StateMachine): + def _create_graph_from_desc(self, desc, parent_node): + def _get_type(top_desc, contents): + pre_ntype = top_desc.get('type', None) + if isinstance(contents, list) and pre_ntype in [None, MH.NonTerminal]: + ntype = MH.NonTerminal + elif isinstance(contents, Node) and pre_ntype in [None, MH.RawNode]: + ntype = MH.RawNode + elif hasattr(contents, '__call__') and pre_ntype in [None, MH.Generator]: + ntype = MH.Generator + elif isinstance(contents, six.string_types) and pre_ntype in [None, MH.Regex]: + ntype = MH.Regex + else: + ntype = MH.Leaf + return ntype - @initial - class Initial(State): + self._verify_keys_conformity(desc) - def _run(self, ctx): - pass + contents = desc.get('contents', None) + dispatcher = {MH.NonTerminal: self._create_non_terminal_node, + MH.Regex: self._create_non_terminal_node_from_regex, + MH.Generator: self._create_generator_node, + MH.Leaf: self._create_leaf_node, + MH.RawNode: self._update_provided_node} - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError() - elif ctx.input in ('}', ')', ']'): - raise StructureError(ctx.input) + if contents is None: + nd = self.__handle_clone(desc, parent_node) + else: + # Non-terminal are recognized via its contents (avoiding + # the user to always provide a 'type' field) + ntype = _get_type(desc, contents) + nd = dispatcher.get(ntype)(desc) + self.__post_handling(desc, nd) - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '\\': - return self.machine.Escape - else: - ctx.append_to_contents("") + alt_confs = desc.get('alt', None) + if alt_confs is not None: + for alt in alt_confs: + self._verify_keys_conformity(alt) + cts = alt.get('contents') + if cts is None: + raise ValueError("Cloning or referencing an existing node"\ + " into an alternate configuration is not supported") + ntype = _get_type(alt, cts) + # dispatcher.get(ntype)(alt, None, node=nd) + dispatcher.get(ntype)(alt, node=nd) - if ctx.input == '|': - return self.machine.Choice - elif ctx.input is None: - return self.machine.Final - else: - return self.machine.Main + return nd + def __handle_clone(self, desc, parent_node): + if isinstance(desc.get('contents'), Node): + name, ident = self._handle_name(desc['contents'].name) + else: + name, ident = self._handle_name(desc['name']) - @register - class Choice(Initial): + exp = desc.get('import_from', None) + if exp is not None: + assert self.dm is not None, "ModelHelper should be initialized with the current data model!" + data_id = desc.get('data_id', None) + assert data_id is not None, "Missing field: 'data_id' (to be used with 'import_from' field)" + nd = self.dm.get_external_node(dm_name=exp, data_id=data_id, name=name) + assert nd is not None, "The requested data ID '{:s}' does not exist!".format(data_id) + self.node_dico[(name, ident)] = nd + return nd - def _run(self, ctx): - if not ctx.choice: - # if it is still possible to build a NT with multiple shapes - if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): - ctx.choice = True - else: - raise InconvertibilityError() + nd = Node(name) + clone_ref = desc.get('clone', None) + if clone_ref is not None: + ref = self._handle_name(clone_ref) + self._register_todo(nd, self._clone_from_dict, args=(ref, desc), + prio=self.MEDIUM_PRIO) + self.node_dico[(name, ident)] = nd + else: + ref = (name, ident) + if ref in self.node_dico.keys(): + nd = self.node_dico[ref] else: - pass + # in this case nd.cc is still set to NodeInternals_Empty + self._register_todo(nd, self._get_from_dict, args=(ref, parent_node), + prio=self.HIGH_PRIO) + return nd - @register - class Final(State): + def __pre_handling(self, desc, node): + if node: + if isinstance(node.cc, NodeInternals_Empty): + raise ValueError("Error: alternative configuration"\ + " cannot be added to empty node ({:s})".format(node.name)) + conf = desc['conf'] + node.add_conf(conf) + n = node + elif isinstance(desc['contents'], Node): + n = desc['contents'] + conf = None + else: + conf = None + ref = self._handle_name(desc['name']) + if ref in self.node_dico: + raise ValueError("name {!r} is already used!".format(ref)) + n = Node(ref[0]) - def _run(self, ctx): - ctx.flush() + return n, conf - def advance(self, ctx): - return None + def __post_handling(self, desc, node): + if not isinstance(node.cc, NodeInternals_Empty): + if isinstance(desc.get('contents'), Node): + ref = self._handle_name(desc['contents'].name) + else: + ref = self._handle_name(desc['name']) + self.node_dico[ref] = node + def _update_provided_node(self, desc, node=None): + n, conf = self.__pre_handling(desc, node) + self._handle_custo(n, desc, conf) + self._handle_common_attr(n, desc, conf) + return n - @register - class Main(State): + def _create_generator_node(self, desc, node=None): - def _run(self, ctx): - ctx.append_to_buffer(ctx.input) + n, conf = self.__pre_handling(desc, node) - def advance(self, ctx): - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '\\': - return self.machine.Escape - elif ctx.input == '|': - return self.machine.Choice - elif ctx.input in ('?', '*', '+', '{'): + contents = desc.get('contents') - if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: - raise InconvertibilityError() + if hasattr(contents, '__call__'): + other_args = desc.get('other_args', None) + if hasattr(contents, 'provide_helpers') and contents.provide_helpers: + provide_helpers = True + else: + provide_helpers = desc.get('provide_helpers', False) + node_args = desc.get('node_args', None) + n.set_generator_func(contents, func_arg=other_args, + provide_helpers=provide_helpers, conf=conf) + if node_args is not None: + # node_args interpretation is postponed after all nodes has been created + self._register_todo(n, self._complete_generator, args=(node_args, conf), unpack_args=True, + prio=self.HIGH_PRIO) + else: + raise ValueError("*** ERROR: {:s} is an invalid contents!".format(repr(contents))) - if len(ctx.buffer) == 1: - if len(ctx.values) > 1: - content = ctx.buffer - ctx.values = ctx.values[:-1] - ctx.flush() - ctx.append_to_buffer(content) + self._handle_custo(n, desc, conf) + self._handle_common_attr(n, desc, conf) - else: - content = ctx.buffer[-1] - ctx.buffer = ctx.buffer[:-1] - ctx.flush() - ctx.append_to_buffer(content) + return n - if ctx.input == '{': - return self.machine.Brackets - else: - return self.machine.QtyState - elif ctx.input in ('}',')',']'): - raise StructureError(ctx.input) - elif ctx.input is None: - return self.machine.Final + def _create_non_terminal_node_from_regex(self, desc, node=None): - return self.machine.Main + n, conf = self.__pre_handling(desc, node) + name = desc.get('name') if desc.get('name') is not None else node.name + if isinstance(name, tuple): + name = name[0] + regexp = desc.get('contents') - @register - class QtyState(State): + parser = RegexParser() + nodes = parser.parse(regexp, name, desc.get('charset')) - def _run(self, ctx): - ctx.min = 1 if ctx.input == '+' else 0 - ctx.max = 1 if ctx.input == '?' else None + if len(nodes) == 2 and len(nodes[1]) == 2 and (nodes[1][1][1] == nodes[1][1][2] == 1 or + isinstance(nodes[1][1][0], fvt.String) and nodes[1][1][0].alphabet is not None): + n.set_values(value_type=nodes[1][1][0].internals[nodes[1][1][0].current_conf].value_type, conf=conf) + else: + n.set_subnodes_with_csts(nodes, conf=conf) - ctx.flush() - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError() - elif ctx.input in ('}', ')', ']'): - raise StructureError(ctx.input) - elif ctx.input == '|': - return self.machine.Choice - elif ctx.input is None: - return self.machine.Final + custo_set = desc.get('custo_set', None) + custo_clear = desc.get('custo_clear', None) - if ctx.choice: - raise InconvertibilityError() + if custo_set or custo_clear: + custo = NonTermCusto(items_to_set=custo_set, items_to_clear=custo_clear) + internals = n.cc if conf is None else n.c[conf] + internals.customize(custo) - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '\\': - return self.machine.Escape - else: - return self.machine.Main + sep_desc = desc.get('separator', None) + if sep_desc is not None: + sep_node_desc = sep_desc.get('contents', None) + assert (sep_node_desc is not None) + sep_node = self._create_graph_from_desc(sep_node_desc, n) + prefix = sep_desc.get('prefix', True) + suffix = sep_desc.get('suffix', True) + unique = sep_desc.get('unique', False) + n.set_separator_node(sep_node, prefix=prefix, suffix=suffix, unique=unique) + self._handle_common_attr(n, desc, conf) - @register - class Brackets(StateMachine, QtyState): + return n - @initial - class Initial(State): - def _run(self, ctx): - ctx.min = "" + def _create_non_terminal_node(self, desc, node=None): - def advance(self, ctx): - if ctx.input.isdigit(): - return self.machine.Min - else: - raise QuantificationError() + n, conf = self.__pre_handling(desc, node) - @register - class Min(State): + shapes = [] + cts = desc.get('contents') + if not cts: + raise ValueError - def _run(self, ctx): - ctx.min += ctx.input + if isinstance(cts[0], (list,tuple)): + # thus contains at least something that is not a + # node_desc, that is directly a node. Thus, only one + # shape! + w = None + else: + w = cts[0].get('weight', None) - def advance(self, context): - if context.input.isdigit(): - return self.machine.Min - elif context.input == ',': - return self.machine.Comma - elif context.input == '}': - return self.machine.Final - else: - raise QuantificationError() + if w is not None: + # in this case there are multiple shapes, as shape can be + # discriminated by its weight attr + for s in desc.get('contents'): + self._verify_keys_conformity(s) + weight = s.get('weight', 1) + shape = self._create_nodes_from_shape(s['contents'], n) + shapes.append(weight) + shapes.append(shape) + else: + # in this case there is only one shape + shtype = desc.get('shape_type', MH.Ordered) + dupmode = desc.get('duplicate_mode', MH.Copy) + shape = self._create_nodes_from_shape(cts, n, shape_type=shtype, + dup_mode=dupmode) + shapes.append(1) + shapes.append(shape) - @register - class Max(State): + n.set_subnodes_with_csts(shapes, conf=conf) - def _run(self, ctx): - ctx.max += ctx.input + self._handle_custo(n, desc, conf) - def advance(self, context): - if context.input.isdigit(): - return self.machine.Max - elif context.input == '}': - return self.machine.Final - else: - raise QuantificationError() + sep_desc = desc.get('separator', None) + if sep_desc is not None: + sep_node_desc = sep_desc.get('contents', None) + assert(sep_node_desc is not None) + sep_node = self._create_graph_from_desc(sep_node_desc, n) + prefix = sep_desc.get('prefix', True) + suffix = sep_desc.get('suffix', True) + unique = sep_desc.get('unique', False) + n.set_separator_node(sep_node, prefix=prefix, suffix=suffix, unique=unique) - @register - class Comma(Max): + self._handle_common_attr(n, desc, conf) - def _run(self, ctx): - ctx.max = "" + return n - @register - class Final(State): - def _run(self, ctx): - ctx.min = int(ctx.min) - if ctx.max is None: - ctx.max = ctx.min - elif len(ctx.max) == 0: - ctx.max = None + def _create_nodes_from_shape(self, shapes, parent_node, shape_type=MH.Ordered, dup_mode=MH.Copy): + + def _handle_section(nodes_desc, sh): + for n in nodes_desc: + if isinstance(n, (list,tuple)) and (len(n) == 2 or len(n) == 3): + sh.append(list(n)) + elif isinstance(n, dict): + qty = n.get('qty', 1) + if isinstance(qty, tuple): + mini = qty[0] + maxi = qty[1] + elif isinstance(qty, int): + mini = qty + maxi = qty + else: + raise ValueError + l = [mini, maxi] + node = self._create_graph_from_desc(n, parent_node) + l.insert(0, node) + sh.append(l) else: - ctx.max = int(ctx.max) + raise ValueError('Unrecognized section type!') - if ctx.max is not None and ctx.min > ctx.max: - raise QuantificationError(u"{X,Y}: X \u2264 Y constraint not respected.") + sh = [] + prev_section_exist = False + first_pass = True + # Note that sections are not always materialised in the description + for section_desc in shapes: - ctx.flush() + # check if it is directly a node + if isinstance(section_desc, (list,tuple)): + if prev_section_exist or first_pass: + prev_section_exist = False + first_pass = False + sh.append(dup_mode + shape_type) + _handle_section([section_desc], sh) - def advance(self, context): - return None + # check if it is a section description + elif section_desc.get('name') is None and not isinstance(section_desc.get('contents'), Node): + prev_section_exist = True + self._verify_keys_conformity(section_desc) + sec_type = section_desc.get('section_type', MH.Ordered) + dupmode = section_desc.get('duplicate_mode', MH.Copy) + # TODO: revamp weights + weights = ''.join(str(section_desc.get('weights', '')).split(' ')) + sh.append(dupmode+sec_type+weights) + _handle_section(section_desc.get('contents', []), sh) - def advance(self, ctx): - return self.machine.QtyState.advance(self, ctx) + # if 'name' attr is present, it is not a section in the + # shape, thus we adopt the default sequencing of nodes. + else: + if prev_section_exist or first_pass: + prev_section_exist = False + first_pass = False + sh.append(dup_mode + shape_type) + _handle_section([section_desc], sh) + return sh - class Group(State): - def advance(self, ctx): - if ctx.input in (')', '}', ']'): - raise StructureError(ctx.input) + def _create_leaf_node(self, desc, node=None): - elif ctx.input in ('*', '+', '?'): - return self.machine.QtyState - elif ctx.input == '{': - return self.machine.Brackets - else: - ctx.flush() + n, conf = self.__pre_handling(desc, node) - if ctx.input == '|': - return self.machine.Choice - elif ctx.input is None: - return self.machine.Final - elif ctx.choice: - raise InconvertibilityError() + contents = desc.get('contents') - if ctx.input == '(': - return self.machine.Parenthesis - elif ctx.input == '[': - return self.machine.SquareBrackets - elif ctx.input == '\\': - return self.machine.Escape - else: - return self.machine.Main + if issubclass(contents.__class__, VT): + if hasattr(contents, 'usable') and contents.usable == False: + raise ValueError("ERROR: {:s} is not usable! (use a subclass of it)".format(repr(contents))) + n.set_values(value_type=contents, conf=conf) + elif hasattr(contents, '__call__'): + other_args = desc.get('other_args', None) + provide_helpers = desc.get('provide_helpers', False) + node_args = desc.get('node_args', None) + n.set_func(contents, func_arg=other_args, + provide_helpers=provide_helpers, conf=conf) + # node_args interpretation is postponed after all nodes has been created + self._register_todo(n, self._complete_func, args=(node_args, conf), unpack_args=True, + prio=self.HIGH_PRIO) - @register - class Parenthesis(StateMachine, Group): + else: + raise ValueError("ERROR: {:s} is an invalid contents!".format(repr(contents))) - @initial - class Initial(State): + self._handle_custo(n, desc, conf) + self._handle_common_attr(n, desc, conf) - def _run(self, ctx): - ctx.flush() - ctx.append_to_buffer("") + return n - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError() - elif ctx.input in ('}', ']', None): - raise StructureError(ctx.input) - elif ctx.input in ('(', '['): - raise InconvertibilityError() - elif ctx.input == '\\': - return self.machine.Escape - elif ctx.input == ')': - return self.machine.Final - elif ctx.input == '|': - return self.machine.Choice + def _handle_custo(self, node, desc, conf): + custo_set = desc.get('custo_set', None) + custo_clear = desc.get('custo_clear', None) + + if node.is_genfunc(conf=conf): + Custo = GenFuncCusto + trig_last = desc.get('trigger_last', None) + if trig_last is not None: + if trig_last: + if custo_set is None: + custo_set = [] + elif not isinstance(custo_set, list): + custo_set = [custo_set] + custo_set.append(MH.Custo.Gen.TriggerLast) else: - return self.machine.Main + if custo_clear is None: + custo_clear = [] + elif not isinstance(custo_clear, list): + custo_clear = [custo_clear] + custo_clear.append(MH.Custo.Gen.TriggerLast) - @register - class Final(State): + elif node.is_nonterm(conf=conf): + Custo = NonTermCusto - def _run(self, context): - pass + elif node.is_func(conf=conf): + Custo = FuncCusto - def advance(self, context): - return None + else: + if custo_set or custo_clear: + raise DataModelDefinitionError('Customization is not compatible with this ' + 'node kind! [Guilty Node: {:s}]'.format(node.name)) + else: + return + if custo_set or custo_clear: + custo = Custo(items_to_set=custo_set, items_to_clear=custo_clear) + internals = node.conf(conf) + internals.customize(custo) - @register - class Main(Initial): - def _run(self, ctx): - ctx.append_to_buffer(ctx.input) - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{'): - raise InconvertibilityError() + def _handle_common_attr(self, node, desc, conf): + vals = desc.get('specific_fuzzy_vals', None) + if vals is not None: + if not node.is_typed_value(conf=conf): + raise DataModelDefinitionError("'specific_fuzzy_vals' is only usable with Typed-nodes") + node.conf(conf).set_specific_fuzzy_values(vals) + param = desc.get('mutable', None) + if param is not None: + if param: + node.set_attr(MH.Attr.Mutable, conf=conf) + else: + node.clear_attr(MH.Attr.Mutable, conf=conf) + param = desc.get('determinist', None) + if param is not None: + node.make_determinist(conf=conf) + param = desc.get('random', None) + if param is not None: + node.make_random(conf=conf) + param = desc.get('finite', None) + if param is not None: + node.make_finite(conf=conf) + param = desc.get('infinite', None) + if param is not None: + node.make_infinite(conf=conf) + param = desc.get('clear_attrs', None) + if param is not None: + if isinstance(param, (list, tuple)): + for a in param: + node.clear_attr(a, conf=conf) + else: + node.clear_attr(param, conf=conf) + param = desc.get('set_attrs', None) + if param is not None: + if isinstance(param, (list, tuple)): + for a in param: + node.set_attr(a, conf=conf) + else: + node.set_attr(param, conf=conf) + param = desc.get('absorb_csts', None) + if param is not None: + node.enforce_absorb_constraints(param, conf=conf) + param = desc.get('absorb_helper', None) + if param is not None: + node.set_absorb_helper(param, conf=conf) + param = desc.get('semantics', None) + if param is not None: + node.set_semantics(NodeSemantics(param)) + ref = desc.get('sync_qty_with', None) + if ref is not None: + self._register_todo(node, self._set_sync_node, + args=(ref, SyncScope.Qty, conf, None), + unpack_args=True) + qty_from = desc.get('qty_from', None) + if qty_from is not None: + self._register_todo(node, self._set_sync_node, + args=(qty_from, SyncScope.QtyFrom, conf, None), + unpack_args=True) - return self.machine.Initial.advance(self, ctx) + sync_size_with = desc.get('sync_size_with', None) + sync_enc_size_with = desc.get('sync_enc_size_with', None) + assert sync_size_with is None or sync_enc_size_with is None + if sync_size_with is not None: + self._register_todo(node, self._set_sync_node, + args=(sync_size_with, SyncScope.Size, conf, False), + unpack_args=True) + if sync_enc_size_with is not None: + self._register_todo(node, self._set_sync_node, + args=(sync_enc_size_with, SyncScope.Size, conf, True), + unpack_args=True) + condition = desc.get('exists_if', None) + if condition is not None: + self._register_todo(node, self._set_sync_node, + args=(condition, SyncScope.Existence, conf, None), + unpack_args=True) + condition = desc.get('exists_if/and', None) + if condition is not None: + self._register_todo(node, self._set_sync_node, + args=(condition, SyncScope.Existence, conf, 'and'), + unpack_args=True) + condition = desc.get('exists_if/or', None) + if condition is not None: + self._register_todo(node, self._set_sync_node, + args=(condition, SyncScope.Existence, conf, 'or'), + unpack_args=True) + condition = desc.get('exists_if_not', None) + if condition is not None: + self._register_todo(node, self._set_sync_node, + args=(condition, SyncScope.Inexistence, conf, None), + unpack_args=True) + fw = desc.get('fuzz_weight', None) + if fw is not None: + node.set_fuzz_weight(fw) + pfh = desc.get('post_freeze', None) + if pfh is not None: + node.register_post_freeze_handler(pfh) + encoder = desc.get('encoder', None) + if encoder is not None: + node.set_encoder(encoder) - @register - class Choice(Initial): + def _register_todo(self, node, func, args=None, unpack_args=True, prio=VERYLOW_PRIO): + if self.sorted_todo.get(prio, None) is None: + self.sorted_todo[prio] = [] + self.sorted_todo[prio].insert(0, (node, func, args, unpack_args)) - def _run(self, ctx): - ctx.append_to_contents("") - - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError() + def _create_todo_list(self): + todo = [] + tdl = sorted(self.sorted_todo.items(), key=lambda x: x[0]) + self.sorted_todo = {} + for prio, sub_tdl in tdl: + todo += sub_tdl + return todo - return self.machine.Initial.advance(self, ctx) + # Should be called at the last time to avoid side effects (e.g., + # when creating generator/function nodes, the node arguments are + # provided at a later time. If set_contents()---which copy nodes---is called + # in-between, node arguments risk to not be copied) + def _clone_from_dict(self, node, ref, desc): + if ref not in self.node_dico: + raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) + node.set_contents(self.node_dico[ref]) + self._handle_custo(node, desc, conf=None) + self._handle_common_attr(node, desc, conf=None) - @register - class Escape(State): + def _get_from_dict(self, node, ref, parent_node): + if ref not in self.node_dico: + raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) + parent_node.replace_subnode(node, self.node_dico[ref]) - def _run(self, ctx): - pass + def _set_sync_node(self, node, comp, scope, conf, private): + sync_obj = None - def advance(self, ctx): - if ctx.input in ctx.META_SEQUENCES: - raise InconvertibilityError() - elif ctx.input in ctx.SPECIAL_CHARS: - return self.machine.Main - else: - raise EscapeError(ctx.input) + if scope == SyncScope.QtyFrom: + if isinstance(comp, (tuple,list)): + node_ref, base_qty = comp + else: + node_ref, base_qty = comp, 0 + sync_with = self.__get_node_from_db(node_ref) + sync_obj = SyncQtyFromObj(sync_with, base_qty=base_qty) + elif scope == SyncScope.Size: + if isinstance(comp, (tuple,list)): + node_ref, base_size = comp + else: + node_ref, base_size = comp, 0 + sync_with = self.__get_node_from_db(node_ref) + sync_obj = SyncSizeObj(sync_with, base_size=base_size, + apply_to_enc_size=private) + else: + if isinstance(comp, (tuple,list)): + if issubclass(comp[0].__class__, NodeCondition): + param = comp[0] + sync_with = self.__get_node_from_db(comp[1]) + elif issubclass(comp[0].__class__, (tuple,list)): + assert private in ['and', 'or'] + sync_list = [] + for subcomp in comp: + assert isinstance(subcomp, (tuple,list)) and len(subcomp) == 2 + param = subcomp[0] + sync_with = self.__get_node_from_db(subcomp[1]) + sync_list.append((sync_with, param)) + and_junction = private == 'and' + sync_obj = SyncExistenceObj(sync_list, and_junction=and_junction) + else: # in this case this is a node reference in the form ('node name', ID) + param = None + sync_with = self.__get_node_from_db(comp) + else: + param = None + sync_with = self.__get_node_from_db(comp) - @register - class SquareBrackets(StateMachine, Group): + if sync_obj is not None: + node.make_synchronized_with(scope=scope, sync_obj=sync_obj, conf=conf) + else: + node.make_synchronized_with(scope=scope, node=sync_with, param=param, conf=conf) - @initial - class Initial(State): + def _complete_func(self, node, args, conf): + if isinstance(args, str): + func_args = self.__get_node_from_db(args) + else: + assert(isinstance(args, (tuple, list))) + func_args = [] + for name_desc in args: + func_args.append(self.__get_node_from_db(name_desc)) + internals = node.cc if conf is None else node.c[conf] + internals.set_func_arg(node=func_args) - def _run(self, ctx): - ctx.flush() - ctx.append_to_alphabet("") + def _complete_generator(self, node, args, conf): + if isinstance(args, str) or \ + (isinstance(args, tuple) and isinstance(args[1], int)): + func_args = self.__get_node_from_db(args) + else: + assert(isinstance(args, (tuple, list))) + func_args = [] + for name_desc in args: + func_args.append(self.__get_node_from_db(name_desc)) + internals = node.cc if conf is None else node.c[conf] + internals.set_generator_func_arg(generator_node_arg=func_args) - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{'): - raise QuantificationError() - elif ctx.input in ('}', ')', None): - raise StructureError(ctx.input) - elif ctx.input in ('(', '['): - raise InconvertibilityError() - elif ctx.input == '-': - raise InvalidRangeError() - elif ctx.input == ']': - raise EmptyAlphabetError() - elif ctx.input == '\\': - return self.machine.EscapeBeforeRange - else: - return self.machine.BeforeRange + def _set_env(self, node, args): + env = Env() + env.delayed_jobs_enabled = self.delayed_jobs + node.set_env(env) + def __get_node_from_db(self, name_desc): + ref = self._handle_name(name_desc) + if ref not in self.node_dico: + raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) - @register - class Final(State): + node = self.node_dico[ref] + if isinstance(node.cc, NodeInternals_Empty): + raise ValueError("Node ({:s}, {!s}) is Empty!".format(ref[0], ref[1])) + + return node - def _run(self, ctx): - pass - def advance(self, ctx): - return None - @register - class BeforeRange(Initial): - def _run(self, ctx): - ctx.append_to_alphabet(ctx.input) +class State(object): + """ + Represent states at the lower level + """ - def advance(self, ctx): - if ctx.input == ']': - return self.machine.Final - elif ctx.input == '-': - return self.machine.Range - else: - return self.machine.Initial.advance(self, ctx) + def __init__(self, machine): + """ + Args: + machine (StateMachine): state machine where it lives (local context) + """ + self.machine = machine + self.init_specific() - @register - class Range(State): - def _run(self, ctx): - pass + def init_specific(self): + """ + Can be overridden to express additional initializations + """ + pass - def advance(self, ctx): - if ctx.input in ('?', '*', '+', '{', '}', '(', ')', '[', ']', '|', '-', None): - raise InvalidRangeError() - elif ctx.input == '\\': - return self.machine.EscapeAfterRange - else: - return self.machine.AfterRange + def _run(self, context): + raise NotImplementedError - @register - class AfterRange(Initial): - def _run(self, ctx): - if ctx.alphabet[-1] > ctx.input: - raise InvalidRangeError() - elif ctx.input == ctx.alphabet[-1]: - pass - else: - for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): - ctx.append_to_alphabet(ctx.int_to_string(i)) + def run(self, context): + """ + Do some actions on the current character. + Args: + context (StateMachine): root state machine (global context) + """ + if context.input is not None and \ + ((context.charset == MH.Charset.ASCII and ord(context.input) > 0x7F) or + (context.charset == MH.Charset.ASCII_EXT and ord(context.input) > 0xFF)): + raise CharsetError() + self._run(context) + context.inputs.pop(0) - def advance(self, ctx): - if ctx.input == ']': - return self.machine.Final - else: - return self.machine.Initial.advance(self, ctx) + def advance(self, context): + """ + Check transitions using the first non-run character. + Args: + context (StateMachine): root state machine (global context) - @register - class EscapeBeforeRange(State): + Returns: + Class of the next state de run (None if we are in a final state) + """ + raise NotImplementedError - def _run(self, ctx): - pass +class StateMachine(State): + """ + Represent states that contain other states. + """ - def advance(self, ctx): - if ctx.input in ctx.META_SEQUENCES: - return self.machine.EscapeMetaSequence - elif ctx.input in ctx.SPECIAL_CHARS: - return self.machine.BeforeRange - else: - raise EscapeError(ctx.input) + def __init__(self, machine=None): + self.states = {} + self.inputs = None - @register - class EscapeMetaSequence(BeforeRange): + for name, cls in inspect.getmembers(self.__class__): + if inspect.isclass(cls) and issubclass(cls, State) and hasattr(cls, 'INITIAL'): + self.states[cls] = cls(self) - def _run(self, ctx): - ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) + State.__init__(self, self if machine is None else machine) - @register - class EscapeAfterRange(State): + @property + def input(self): + return None if self.inputs is None or len(self.inputs) == 0 else self.inputs[0] - def _run(self, ctx): - pass + def _run(self, context): + while self.state is not None: + self.state.run(context) + next_state = self.state.advance(context) + self.state = self.states[next_state] if next_state is not None else None - def advance(self, ctx): - if ctx.input in ctx.META_SEQUENCES: - raise InvalidRangeError() - elif ctx.input in ctx.SPECIAL_CHARS: - return self.machine.AfterRange - else: - raise EscapeError(ctx.input) + def run(self, context): + for state in self.states: + if state.INITIAL: + self.state = self.states[state] + break + else: + raise InitialStateNotFoundError() + self._run(context) - @register - class Escape(State): +def register(cls): + cls.INITIAL = False + return cls + +def initial(cls): + cls.INITIAL = True + return cls + +class RegexParser(StateMachine): + + @initial + class Initial(State): def _run(self, ctx): pass def advance(self, ctx): - if ctx.input in ctx.META_SEQUENCES: - return self.machine.EscapeMetaSequence - elif ctx.input in ctx.SPECIAL_CHARS: - return self.machine.Main + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() + elif ctx.input in ('}', ')', ']'): + raise StructureError(ctx.input) + + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '\\': + return self.machine.Escape else: - raise EscapeError(ctx.input) + ctx.append_to_contents("") + if ctx.input == '|': + return self.machine.Choice + elif ctx.input is None: + return self.machine.Final + else: + return self.machine.Main @register - class EscapeMetaSequence(Group): + class Choice(Initial): def _run(self, ctx): - if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: - raise InconvertibilityError() - - if ctx.buffer is not None: - - if len(ctx.buffer) == 0: - - if len(ctx.values[:-1]) > 0: - ctx.values = ctx.values[:-1] - ctx.flush() + if not ctx.choice: + # if it is still possible to build a NT with multiple shapes + if len(ctx.nodes) == 0 or (len(ctx.nodes) == 1 and ctx.buffer is None): + ctx.choice = True else: - ctx.flush() + raise InconvertibilityError() + else: + pass - ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) + @register + class Final(State): + def _run(self, ctx): + ctx.flush() - def init_specific(self): - self._name = None - self.charset = None + def advance(self, ctx): + return None - self.values = None - self.alphabet = None + @register + class Main(State): - self.choice = False + def _run(self, ctx): + ctx.append_to_buffer(ctx.input) - self.min = None - self.max = None + def advance(self, ctx): + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input == '|': + return self.machine.Choice + elif ctx.input in ('?', '*', '+', '{'): - self.nodes = [] + if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: + raise InconvertibilityError() + if len(ctx.buffer) == 1: + if len(ctx.values) > 1: + content = ctx.buffer + ctx.values = ctx.values[:-1] + ctx.flush() + ctx.append_to_buffer(content) - def append_to_contents(self, content): - if self.values is None: - self.values = [] - self.values.append(content) + else: + content = ctx.buffer[-1] + ctx.buffer = ctx.buffer[:-1] + ctx.flush() + ctx.append_to_buffer(content) - def append_to_buffer(self, str): - if self.values is None: - self.values = [""] - if self.values[-1] is None: - self.values[-1] = "" - self.values[-1] += str + if ctx.input == '{': + return self.machine.Brackets + else: + return self.machine.QtyState - def append_to_alphabet(self, alphabet): - if self.alphabet is None: - self.alphabet = "" - self.alphabet += alphabet + elif ctx.input in ('}', ')', ']'): + raise StructureError(ctx.input) + elif ctx.input is None: + return self.machine.Final - @property - def buffer(self): - return None if self.values is None else self.values[-1] + return self.machine.Main - @buffer.setter - def buffer(self, buffer): - if self.values is None: - self.values = [""] - self.values[-1] = buffer + @register + class QtyState(State): - def flush(self): + def _run(self, ctx): + ctx.min = 1 if ctx.input == '+' else 0 + ctx.max = 1 if ctx.input == '?' else None - if self.values is None and self.alphabet is None: - return + ctx.flush() - # set default values for min & max if none was provided - if self.min is None and self.max is None: - self.min = self.max = 1 + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() + elif ctx.input in ('}', ')', ']'): + raise StructureError(ctx.input) + elif ctx.input == '|': + return self.machine.Choice + elif ctx.input is None: + return self.machine.Final - # guess the type of the terminal node to create - if self.values is not None and all(val.isdigit() for val in self.values): - self.values = [int(i) for i in self.values] - type = fvt.INT_str - else: - type = fvt.String + if ctx.choice: + raise InconvertibilityError() - name = self._name + '_' + str(len(self.nodes) + 1) - self.nodes.append(self._create_terminal_node(name, type, values=self.values, - alphabet=self.alphabet, qty=(self.min, self.max))) - self.reset() + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.Escape + else: + return self.machine.Main + @register + class Brackets(StateMachine, QtyState): - def reset(self): - self.values = None - self.alphabet = None - self.min = None - self.max = None + @initial + class Initial(State): - def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): - self._name = name - self.charset = charset - self.int_to_string = chr if sys.version_info[0] == 2 and self.charset != MH.Charset.UNICODE else six.unichr + def _run(self, ctx): + ctx.min = "" - if self.charset == MH.Charset.ASCII: - max = 0x7F - elif self.charset == MH.Charset.UNICODE: - max = 0xFFFF - else: - max = 0xFF + def advance(self, ctx): + if ctx.input.isdigit(): + return self.machine.Min + else: + raise QuantificationError() - def get_complement(chars): - return ''.join([self.int_to_string(i) for i in range(0, max + 1) if self.int_to_string(i) not in chars]) + @register + class Min(State): - self.META_SEQUENCES = {'s': string.whitespace, - 'S': get_complement(string.whitespace), - 'd': string.digits, - 'D': get_complement(string.digits), - 'w': string.ascii_letters + string.digits + '_', - 'W': get_complement(string.ascii_letters + string.digits + '_')} + def _run(self, ctx): + ctx.min += ctx.input - self.SPECIAL_CHARS = list('\\()[]{}*+?|-') + def advance(self, context): + if context.input.isdigit(): + return self.machine.Min + elif context.input == ',': + return self.machine.Comma + elif context.input == '}': + return self.machine.Final + else: + raise QuantificationError() - # None indicates the beginning and the end of the regex - self.inputs = [None] + list(inputs) + [None] - self.run(self) + @register + class Max(State): - return self._create_non_terminal_node() + def _run(self, ctx): + ctx.max += ctx.input + def advance(self, context): + if context.input.isdigit(): + return self.machine.Max + elif context.input == '}': + return self.machine.Final + else: + raise QuantificationError() - def _create_terminal_node(self, name, type, values=None, alphabet=None, qty=None): + @register + class Comma(Max): - assert(values is not None or alphabet is not None) + def _run(self, ctx): + ctx.max = "" - if alphabet is not None: - return [Node(name=name, vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1])), 1, 1] - else: - if type == fvt.String: - node = Node(name=name, vt=fvt.String(val_list=values)) - else: - node = Node(name=name, vt=fvt.INT_str(int_list=values)) + @register + class Final(State): + def _run(self, ctx): + ctx.min = int(ctx.min) - return [node, qty[0], -1 if qty[1] is None else qty[1]] + if ctx.max is None: + ctx.max = ctx.min + elif len(ctx.max) == 0: + ctx.max = None + else: + ctx.max = int(ctx.max) - def _create_non_terminal_node(self): - non_terminal = [1, [MH.Copy + MH.Ordered]] - formatted_terminal = non_terminal[1] + if ctx.max is not None and ctx.min > ctx.max: + raise QuantificationError(u"{X,Y}: X \u2264 Y constraint not respected.") - for terminal in self.nodes: - formatted_terminal.append(terminal) - if self.choice and len(self.nodes) > 1: - non_terminal.append(1) - formatted_terminal = [MH.Copy + MH.Ordered] - non_terminal.append(formatted_terminal) + ctx.flush() - return non_terminal + def advance(self, context): + return None + def advance(self, ctx): + return self.machine.QtyState.advance(self, ctx) + class Group(State): -class ModelHelper(object): + def advance(self, ctx): + if ctx.input in (')', '}', ']'): + raise StructureError(ctx.input) - HIGH_PRIO = 1 - MEDIUM_PRIO = 2 - LOW_PRIO = 3 - VERYLOW_PRIO = 4 + elif ctx.input in ('*', '+', '?'): + return self.machine.QtyState + elif ctx.input == '{': + return self.machine.Brackets + else: + ctx.flush() - valid_keys = [ - # generic description keys - 'name', 'contents', 'qty', 'clone', 'type', 'alt', 'conf', - 'custo_set', 'custo_clear', - # NonTerminal description keys - 'weight', 'shape_type', 'section_type', 'duplicate_mode', 'weights', - 'separator', 'prefix', 'suffix', 'unique', - 'encoder', - # Generator/Function description keys - 'node_args', 'other_args', 'provide_helpers', 'trigger_last', - # Typed-node description keys - 'specific_fuzzy_vals', - # Import description keys - 'import_from', 'data_id', - # node properties description keys - 'determinist', 'random', 'finite', 'infinite', 'mutable', - 'clear_attrs', 'set_attrs', - 'absorb_csts', 'absorb_helper', - 'semantics', 'fuzz_weight', - 'sync_qty_with', 'qty_from', - 'exists_if', 'exists_if_not', - 'exists_if/and', 'exists_if/or', - 'sync_size_with', 'sync_enc_size_with', - 'post_freeze', 'charset' - ] + if ctx.input == '|': + return self.machine.Choice + elif ctx.input is None: + return self.machine.Final + elif ctx.choice: + raise InconvertibilityError() - def __init__(self, dm=None, delayed_jobs=True, add_env=True): - """ - Help the process of data description. This class is able to construct a - :class:`framework.data_model.Node` object from a JSON-like description. + if ctx.input == '(': + return self.machine.Parenthesis + elif ctx.input == '[': + return self.machine.SquareBrackets + elif ctx.input == '\\': + return self.machine.Escape + else: + return self.machine.Main - Args: - dm (DataModel): a DataModel object, only required if the 'import_from' statement is used - with :meth:`create_graph_from_desc`. - delayed_jobs (bool): Enable or disabled delayed jobs feature. Used for instance for - delaying constraint that cannot be solved immediately. - add_env (bool): If `True`, an :class:`framework.data_model.Env` object - will be assigned to the generated :class:`framework.data_model.Node` - from :meth:`create_graph_from_desc`. Should be set to ``False`` if you consider using - the generated `Node` within another description or if you will copy it for building - a new node type. Keeping an ``Env()`` object can be dangerous if you make some clones of - it and don't pay attention to set a new ``Env()`` for each copy, because. A graph node - SHALL have only one ``Env()`` shared between all the nodes and an Env() shall not be - shared between independent graph (otherwise it could lead to - unexpected results). - """ - self.dm = dm - self.delayed_jobs = delayed_jobs - self._add_env_to_the_node = add_env + @register + class Parenthesis(StateMachine, Group): - def _verify_keys_conformity(self, desc): - for k in desc.keys(): - if k not in self.valid_keys: - raise KeyError("The description key '{:s}' is not recognized!".format(k)) + @initial + class Initial(State): + def _run(self, ctx): + ctx.flush() + ctx.append_to_buffer("") - def create_graph_from_desc(self, desc): - self.sorted_todo = {} - self.node_dico = {} - self.empty_node = Node('EMPTY') - - n = self._create_graph_from_desc(desc, None) + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() + elif ctx.input in ('}', ']', None): + raise StructureError(ctx.input) + elif ctx.input in ('(', '['): + raise InconvertibilityError() + elif ctx.input == '\\': + return self.machine.Escape + elif ctx.input == ')': + return self.machine.Final + elif ctx.input == '|': + return self.machine.Choice + else: + return self.machine.Main - if self._add_env_to_the_node: - self._register_todo(n, self._set_env, prio=self.LOW_PRIO) + @register + class Final(State): - todo = self._create_todo_list() - while todo: - for node, func, args, unpack_args in todo: - if isinstance(args, tuple) and unpack_args: - func(node, *args) - else: - func(node, args) - todo = self._create_todo_list() + def _run(self, context): + pass - return n + def advance(self, context): + return None - def _handle_name(self, name_desc): - if isinstance(name_desc, (tuple, list)): - assert(len(name_desc) == 2) - name = name_desc[0] - ident = name_desc[1] - elif isinstance(name_desc, str): - name = name_desc - ident = 1 - else: - raise ValueError("Name is not recognized: '%s'!" % name_desc) + @register + class Main(Initial): + def _run(self, ctx): + ctx.append_to_buffer(ctx.input) - return name, ident + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{'): + raise InconvertibilityError() + return self.machine.Initial.advance(self, ctx) - def _create_graph_from_desc(self, desc, parent_node): + @register + class Choice(Initial): - def _get_type(top_desc, contents): - pre_ntype = top_desc.get('type', None) - if isinstance(contents, list) and pre_ntype in [None, MH.NonTerminal]: - ntype = MH.NonTerminal - elif isinstance(contents, Node) and pre_ntype in [None, MH.RawNode]: - ntype = MH.RawNode - elif hasattr(contents, '__call__') and pre_ntype in [None, MH.Generator]: - ntype = MH.Generator - elif isinstance(contents, six.string_types) and pre_ntype in [None, MH.Regex]: - ntype = MH.Regex - else: - ntype = MH.Leaf - return ntype + def _run(self, ctx): + ctx.append_to_contents("") - self._verify_keys_conformity(desc) + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() - contents = desc.get('contents', None) - dispatcher = {MH.NonTerminal: self._create_non_terminal_node, - MH.Regex: self._create_non_terminal_node_from_regex, - MH.Generator: self._create_generator_node, - MH.Leaf: self._create_leaf_node, - MH.RawNode: self._update_provided_node} + return self.machine.Initial.advance(self, ctx) - if contents is None: - nd = self.__handle_clone(desc, parent_node) - else: - # Non-terminal are recognized via its contents (avoiding - # the user to always provide a 'type' field) - ntype = _get_type(desc, contents) - nd = dispatcher.get(ntype)(desc) - self.__post_handling(desc, nd) + @register + class Escape(State): - alt_confs = desc.get('alt', None) - if alt_confs is not None: - for alt in alt_confs: - self._verify_keys_conformity(alt) - cts = alt.get('contents') - if cts is None: - raise ValueError("Cloning or referencing an existing node"\ - " into an alternate configuration is not supported") - ntype = _get_type(alt, cts) - # dispatcher.get(ntype)(alt, None, node=nd) - dispatcher.get(ntype)(alt, node=nd) + def _run(self, ctx): + pass - return nd + def advance(self, ctx): + if ctx.input in ctx.META_SEQUENCES: + raise InconvertibilityError() + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.Main + else: + raise EscapeError(ctx.input) - def __handle_clone(self, desc, parent_node): - if isinstance(desc.get('contents'), Node): - name, ident = self._handle_name(desc['contents'].name) - else: - name, ident = self._handle_name(desc['name']) + @register + class SquareBrackets(StateMachine, Group): - exp = desc.get('import_from', None) - if exp is not None: - assert self.dm is not None, "ModelHelper should be initialized with the current data model!" - data_id = desc.get('data_id', None) - assert data_id is not None, "Missing field: 'data_id' (to be used with 'import_from' field)" - nd = self.dm.get_external_node(dm_name=exp, data_id=data_id, name=name) - assert nd is not None, "The requested data ID '{:s}' does not exist!".format(data_id) - self.node_dico[(name, ident)] = nd - return nd + @initial + class Initial(State): - nd = Node(name) - clone_ref = desc.get('clone', None) - if clone_ref is not None: - ref = self._handle_name(clone_ref) - self._register_todo(nd, self._clone_from_dict, args=(ref, desc), - prio=self.MEDIUM_PRIO) - self.node_dico[(name, ident)] = nd - else: - ref = (name, ident) - if ref in self.node_dico.keys(): - nd = self.node_dico[ref] - else: - # in this case nd.cc is still set to NodeInternals_Empty - self._register_todo(nd, self._get_from_dict, args=(ref, parent_node), - prio=self.HIGH_PRIO) + def _run(self, ctx): + ctx.flush() + ctx.append_to_alphabet("") - return nd + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{'): + raise QuantificationError() + elif ctx.input in ('}', ')', None): + raise StructureError(ctx.input) + elif ctx.input in ('(', '['): + raise InconvertibilityError() + elif ctx.input == '-': + raise InvalidRangeError() + elif ctx.input == ']': + raise EmptyAlphabetError() + elif ctx.input == '\\': + return self.machine.EscapeBeforeRange + else: + return self.machine.BeforeRange - def __pre_handling(self, desc, node): - if node: - if isinstance(node.cc, NodeInternals_Empty): - raise ValueError("Error: alternative configuration"\ - " cannot be added to empty node ({:s})".format(node.name)) - conf = desc['conf'] - node.add_conf(conf) - n = node - elif isinstance(desc['contents'], Node): - n = desc['contents'] - conf = None - else: - conf = None - ref = self._handle_name(desc['name']) - if ref in self.node_dico: - raise ValueError("name {!r} is already used!".format(ref)) - n = Node(ref[0]) + @register + class Final(State): - return n, conf + def _run(self, ctx): + pass - def __post_handling(self, desc, node): - if not isinstance(node.cc, NodeInternals_Empty): - if isinstance(desc.get('contents'), Node): - ref = self._handle_name(desc['contents'].name) - else: - ref = self._handle_name(desc['name']) - self.node_dico[ref] = node + def advance(self, ctx): + return None - def _update_provided_node(self, desc, node=None): - n, conf = self.__pre_handling(desc, node) - self._handle_custo(n, desc, conf) - self._handle_common_attr(n, desc, conf) - return n + @register + class BeforeRange(Initial): + def _run(self, ctx): + ctx.append_to_alphabet(ctx.input) - def _create_generator_node(self, desc, node=None): + def advance(self, ctx): + if ctx.input == ']': + return self.machine.Final + elif ctx.input == '-': + return self.machine.Range + else: + return self.machine.Initial.advance(self, ctx) - n, conf = self.__pre_handling(desc, node) + @register + class Range(State): + def _run(self, ctx): + pass - contents = desc.get('contents') + def advance(self, ctx): + if ctx.input in ('?', '*', '+', '{', '}', '(', ')', '[', ']', '|', '-', None): + raise InvalidRangeError() + elif ctx.input == '\\': + return self.machine.EscapeAfterRange + else: + return self.machine.AfterRange - if hasattr(contents, '__call__'): - other_args = desc.get('other_args', None) - if hasattr(contents, 'provide_helpers') and contents.provide_helpers: - provide_helpers = True - else: - provide_helpers = desc.get('provide_helpers', False) - node_args = desc.get('node_args', None) - n.set_generator_func(contents, func_arg=other_args, - provide_helpers=provide_helpers, conf=conf) - if node_args is not None: - # node_args interpretation is postponed after all nodes has been created - self._register_todo(n, self._complete_generator, args=(node_args, conf), unpack_args=True, - prio=self.HIGH_PRIO) - else: - raise ValueError("*** ERROR: {:s} is an invalid contents!".format(repr(contents))) + @register + class AfterRange(Initial): + def _run(self, ctx): + if ctx.alphabet[-1] > ctx.input: + raise InvalidRangeError() + elif ctx.input == ctx.alphabet[-1]: + pass + else: + for i in range(ord(ctx.alphabet[-1]) + 1, ord(ctx.input) + 1): + ctx.append_to_alphabet(ctx.int_to_string(i)) - self._handle_custo(n, desc, conf) - self._handle_common_attr(n, desc, conf) + def advance(self, ctx): + if ctx.input == ']': + return self.machine.Final + else: + return self.machine.Initial.advance(self, ctx) - return n + @register + class EscapeBeforeRange(State): + def _run(self, ctx): + pass - def _create_non_terminal_node_from_regex(self, desc, node=None): + def advance(self, ctx): + if ctx.input in ctx.META_SEQUENCES: + return self.machine.EscapeMetaSequence + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.BeforeRange + else: + raise EscapeError(ctx.input) - n, conf = self.__pre_handling(desc, node) + @register + class EscapeMetaSequence(BeforeRange): - name = desc.get('name') if desc.get('name') is not None else node.name - if isinstance(name, tuple): - name = name[0] - regexp = desc.get('contents') + def _run(self, ctx): + ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) - parser = RegexParser() - nodes = parser.parse(regexp, name, desc.get('charset')) + @register + class EscapeAfterRange(State): - if len(nodes) == 2 and len(nodes[1]) == 2 and (nodes[1][1][1] == nodes[1][1][2] == 1 or - isinstance(nodes[1][1][0], fvt.String) and nodes[1][1][0].alphabet is not None): - n.set_values(value_type=nodes[1][1][0].internals[nodes[1][1][0].current_conf].value_type, conf=conf) - else: - n.set_subnodes_with_csts(nodes, conf=conf) + def _run(self, ctx): + pass + def advance(self, ctx): + if ctx.input in ctx.META_SEQUENCES: + raise InvalidRangeError() + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.AfterRange + else: + raise EscapeError(ctx.input) - custo_set = desc.get('custo_set', None) - custo_clear = desc.get('custo_clear', None) + @register + class Escape(State): - if custo_set or custo_clear: - custo = NonTermCusto(items_to_set=custo_set, items_to_clear=custo_clear) - internals = n.cc if conf is None else n.c[conf] - internals.customize(custo) + def _run(self, ctx): + pass - sep_desc = desc.get('separator', None) - if sep_desc is not None: - sep_node_desc = sep_desc.get('contents', None) - assert (sep_node_desc is not None) - sep_node = self._create_graph_from_desc(sep_node_desc, n) - prefix = sep_desc.get('prefix', True) - suffix = sep_desc.get('suffix', True) - unique = sep_desc.get('unique', False) - n.set_separator_node(sep_node, prefix=prefix, suffix=suffix, unique=unique) + def advance(self, ctx): + if ctx.input in ctx.META_SEQUENCES: + return self.machine.EscapeMetaSequence + elif ctx.input in ctx.SPECIAL_CHARS: + return self.machine.Main + else: + raise EscapeError(ctx.input) - self._handle_common_attr(n, desc, conf) + @register + class EscapeMetaSequence(Group): - return n + def _run(self, ctx): + if ctx.choice and len(ctx.values) > 1 and len(ctx.buffer) > 1: + raise InconvertibilityError() + if ctx.buffer is not None: - def _create_non_terminal_node(self, desc, node=None): + if len(ctx.buffer) == 0: - n, conf = self.__pre_handling(desc, node) + if len(ctx.values[:-1]) > 0: + ctx.values = ctx.values[:-1] + ctx.flush() + else: + ctx.flush() - shapes = [] - cts = desc.get('contents') - if not cts: - raise ValueError + ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) - if isinstance(cts[0], (list,tuple)): - # thus contains at least something that is not a - # node_desc, that is directly a node. Thus, only one - # shape! - w = None - else: - w = cts[0].get('weight', None) + def init_specific(self): + self._name = None + self.charset = None - if w is not None: - # in this case there are multiple shapes, as shape can be - # discriminated by its weight attr - for s in desc.get('contents'): - self._verify_keys_conformity(s) - weight = s.get('weight', 1) - shape = self._create_nodes_from_shape(s['contents'], n) - shapes.append(weight) - shapes.append(shape) - else: - # in this case there is only one shape - shtype = desc.get('shape_type', MH.Ordered) - dupmode = desc.get('duplicate_mode', MH.Copy) - shape = self._create_nodes_from_shape(cts, n, shape_type=shtype, - dup_mode=dupmode) - shapes.append(1) - shapes.append(shape) + self.values = None + self.alphabet = None - n.set_subnodes_with_csts(shapes, conf=conf) + self.choice = False - self._handle_custo(n, desc, conf) + self.min = None + self.max = None - sep_desc = desc.get('separator', None) - if sep_desc is not None: - sep_node_desc = sep_desc.get('contents', None) - assert(sep_node_desc is not None) - sep_node = self._create_graph_from_desc(sep_node_desc, n) - prefix = sep_desc.get('prefix', True) - suffix = sep_desc.get('suffix', True) - unique = sep_desc.get('unique', False) - n.set_separator_node(sep_node, prefix=prefix, suffix=suffix, unique=unique) - - self._handle_common_attr(n, desc, conf) - - return n - - - def _create_nodes_from_shape(self, shapes, parent_node, shape_type=MH.Ordered, dup_mode=MH.Copy): - - def _handle_section(nodes_desc, sh): - for n in nodes_desc: - if isinstance(n, (list,tuple)) and (len(n) == 2 or len(n) == 3): - sh.append(list(n)) - elif isinstance(n, dict): - qty = n.get('qty', 1) - if isinstance(qty, tuple): - mini = qty[0] - maxi = qty[1] - elif isinstance(qty, int): - mini = qty - maxi = qty - else: - raise ValueError - l = [mini, maxi] - node = self._create_graph_from_desc(n, parent_node) - l.insert(0, node) - sh.append(l) - else: - raise ValueError('Unrecognized section type!') - - sh = [] - prev_section_exist = False - first_pass = True - # Note that sections are not always materialised in the description - for section_desc in shapes: - - # check if it is directly a node - if isinstance(section_desc, (list,tuple)): - if prev_section_exist or first_pass: - prev_section_exist = False - first_pass = False - sh.append(dup_mode + shape_type) - _handle_section([section_desc], sh) - - # check if it is a section description - elif section_desc.get('name') is None and not isinstance(section_desc.get('contents'), Node): - prev_section_exist = True - self._verify_keys_conformity(section_desc) - sec_type = section_desc.get('section_type', MH.Ordered) - dupmode = section_desc.get('duplicate_mode', MH.Copy) - # TODO: revamp weights - weights = ''.join(str(section_desc.get('weights', '')).split(' ')) - sh.append(dupmode+sec_type+weights) - _handle_section(section_desc.get('contents', []), sh) + self.nodes = [] - # if 'name' attr is present, it is not a section in the - # shape, thus we adopt the default sequencing of nodes. - else: - if prev_section_exist or first_pass: - prev_section_exist = False - first_pass = False - sh.append(dup_mode + shape_type) - _handle_section([section_desc], sh) + def append_to_contents(self, content): + if self.values is None: + self.values = [] + self.values.append(content) - return sh + def append_to_buffer(self, str): + if self.values is None: + self.values = [""] + if self.values[-1] is None: + self.values[-1] = "" + self.values[-1] += str + def append_to_alphabet(self, alphabet): + if self.alphabet is None: + self.alphabet = "" + self.alphabet += alphabet - def _create_leaf_node(self, desc, node=None): + @property + def buffer(self): + return None if self.values is None else self.values[-1] - n, conf = self.__pre_handling(desc, node) + @buffer.setter + def buffer(self, buffer): + if self.values is None: + self.values = [""] + self.values[-1] = buffer - contents = desc.get('contents') + def flush(self): - if issubclass(contents.__class__, VT): - if hasattr(contents, 'usable') and contents.usable == False: - raise ValueError("ERROR: {:s} is not usable! (use a subclass of it)".format(repr(contents))) - n.set_values(value_type=contents, conf=conf) - elif hasattr(contents, '__call__'): - other_args = desc.get('other_args', None) - provide_helpers = desc.get('provide_helpers', False) - node_args = desc.get('node_args', None) - n.set_func(contents, func_arg=other_args, - provide_helpers=provide_helpers, conf=conf) + if self.values is None and self.alphabet is None: + return - # node_args interpretation is postponed after all nodes has been created - self._register_todo(n, self._complete_func, args=(node_args, conf), unpack_args=True, - prio=self.HIGH_PRIO) + # set default values for min & max if none was provided + if self.min is None and self.max is None: + self.min = self.max = 1 + # guess the type of the terminal node to create + if self.values is not None and all(val.isdigit() for val in self.values): + self.values = [int(i) for i in self.values] + type = fvt.INT_str else: - raise ValueError("ERROR: {:s} is an invalid contents!".format(repr(contents))) - - self._handle_custo(n, desc, conf) - self._handle_common_attr(n, desc, conf) - - return n - - def _handle_custo(self, node, desc, conf): - custo_set = desc.get('custo_set', None) - custo_clear = desc.get('custo_clear', None) + type = fvt.String - if node.is_genfunc(conf=conf): - Custo = GenFuncCusto - trig_last = desc.get('trigger_last', None) - if trig_last is not None: - if trig_last: - if custo_set is None: - custo_set = [] - elif not isinstance(custo_set, list): - custo_set = [custo_set] - custo_set.append(MH.Custo.Gen.TriggerLast) - else: - if custo_clear is None: - custo_clear = [] - elif not isinstance(custo_clear, list): - custo_clear = [custo_clear] - custo_clear.append(MH.Custo.Gen.TriggerLast) + name = self._name + '_' + str(len(self.nodes) + 1) + self.nodes.append(self._create_terminal_node(name, type, values=self.values, + alphabet=self.alphabet, qty=(self.min, self.max))) + self.reset() - elif node.is_nonterm(conf=conf): - Custo = NonTermCusto + def reset(self): + self.values = None + self.alphabet = None + self.min = None + self.max = None - elif node.is_func(conf=conf): - Custo = FuncCusto + def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): + self._name = name + self.charset = charset + self.int_to_string = chr if sys.version_info[0] == 2 and self.charset != MH.Charset.UNICODE else six.unichr + if self.charset == MH.Charset.ASCII: + max = 0x7F + elif self.charset == MH.Charset.UNICODE: + max = 0xFFFF else: - if custo_set or custo_clear: - raise DataModelDefinitionError('Customization is not compatible with this ' - 'node kind! [Guilty Node: {:s}]'.format(node.name)) - else: - return - - if custo_set or custo_clear: - custo = Custo(items_to_set=custo_set, items_to_clear=custo_clear) - internals = node.conf(conf) - internals.customize(custo) - - - def _handle_common_attr(self, node, desc, conf): - vals = desc.get('specific_fuzzy_vals', None) - if vals is not None: - if not node.is_typed_value(conf=conf): - raise DataModelDefinitionError("'specific_fuzzy_vals' is only usable with Typed-nodes") - node.conf(conf).set_specific_fuzzy_values(vals) - param = desc.get('mutable', None) - if param is not None: - if param: - node.set_attr(MH.Attr.Mutable, conf=conf) - else: - node.clear_attr(MH.Attr.Mutable, conf=conf) - param = desc.get('determinist', None) - if param is not None: - node.make_determinist(conf=conf) - param = desc.get('random', None) - if param is not None: - node.make_random(conf=conf) - param = desc.get('finite', None) - if param is not None: - node.make_finite(conf=conf) - param = desc.get('infinite', None) - if param is not None: - node.make_infinite(conf=conf) - param = desc.get('clear_attrs', None) - if param is not None: - if isinstance(param, (list, tuple)): - for a in param: - node.clear_attr(a, conf=conf) - else: - node.clear_attr(param, conf=conf) - param = desc.get('set_attrs', None) - if param is not None: - if isinstance(param, (list, tuple)): - for a in param: - node.set_attr(a, conf=conf) - else: - node.set_attr(param, conf=conf) - param = desc.get('absorb_csts', None) - if param is not None: - node.enforce_absorb_constraints(param, conf=conf) - param = desc.get('absorb_helper', None) - if param is not None: - node.set_absorb_helper(param, conf=conf) - param = desc.get('semantics', None) - if param is not None: - node.set_semantics(NodeSemantics(param)) - ref = desc.get('sync_qty_with', None) - if ref is not None: - self._register_todo(node, self._set_sync_node, - args=(ref, SyncScope.Qty, conf, None), - unpack_args=True) - qty_from = desc.get('qty_from', None) - if qty_from is not None: - self._register_todo(node, self._set_sync_node, - args=(qty_from, SyncScope.QtyFrom, conf, None), - unpack_args=True) + max = 0xFF - sync_size_with = desc.get('sync_size_with', None) - sync_enc_size_with = desc.get('sync_enc_size_with', None) - assert sync_size_with is None or sync_enc_size_with is None - if sync_size_with is not None: - self._register_todo(node, self._set_sync_node, - args=(sync_size_with, SyncScope.Size, conf, False), - unpack_args=True) - if sync_enc_size_with is not None: - self._register_todo(node, self._set_sync_node, - args=(sync_enc_size_with, SyncScope.Size, conf, True), - unpack_args=True) - condition = desc.get('exists_if', None) - if condition is not None: - self._register_todo(node, self._set_sync_node, - args=(condition, SyncScope.Existence, conf, None), - unpack_args=True) - condition = desc.get('exists_if/and', None) - if condition is not None: - self._register_todo(node, self._set_sync_node, - args=(condition, SyncScope.Existence, conf, 'and'), - unpack_args=True) - condition = desc.get('exists_if/or', None) - if condition is not None: - self._register_todo(node, self._set_sync_node, - args=(condition, SyncScope.Existence, conf, 'or'), - unpack_args=True) - condition = desc.get('exists_if_not', None) - if condition is not None: - self._register_todo(node, self._set_sync_node, - args=(condition, SyncScope.Inexistence, conf, None), - unpack_args=True) - fw = desc.get('fuzz_weight', None) - if fw is not None: - node.set_fuzz_weight(fw) - pfh = desc.get('post_freeze', None) - if pfh is not None: - node.register_post_freeze_handler(pfh) - encoder = desc.get('encoder', None) - if encoder is not None: - node.set_encoder(encoder) + def get_complement(chars): + return ''.join([self.int_to_string(i) for i in range(0, max + 1) if self.int_to_string(i) not in chars]) - def _register_todo(self, node, func, args=None, unpack_args=True, prio=VERYLOW_PRIO): - if self.sorted_todo.get(prio, None) is None: - self.sorted_todo[prio] = [] - self.sorted_todo[prio].insert(0, (node, func, args, unpack_args)) + self.META_SEQUENCES = {'s': string.whitespace, + 'S': get_complement(string.whitespace), + 'd': string.digits, + 'D': get_complement(string.digits), + 'w': string.ascii_letters + string.digits + '_', + 'W': get_complement(string.ascii_letters + string.digits + '_')} - def _create_todo_list(self): - todo = [] - tdl = sorted(self.sorted_todo.items(), key=lambda x: x[0]) - self.sorted_todo = {} - for prio, sub_tdl in tdl: - todo += sub_tdl - return todo + self.SPECIAL_CHARS = list('\\()[]{}*+?|-') - # Should be called at the last time to avoid side effects (e.g., - # when creating generator/function nodes, the node arguments are - # provided at a later time. If set_contents()---which copy nodes---is called - # in-between, node arguments risk to not be copied) - def _clone_from_dict(self, node, ref, desc): - if ref not in self.node_dico: - raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) - node.set_contents(self.node_dico[ref]) - self._handle_custo(node, desc, conf=None) - self._handle_common_attr(node, desc, conf=None) + # None indicates the beginning and the end of the regex + self.inputs = [None] + list(inputs) + [None] + self.run(self) - def _get_from_dict(self, node, ref, parent_node): - if ref not in self.node_dico: - raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) - parent_node.replace_subnode(node, self.node_dico[ref]) + return self._create_non_terminal_node() - def _set_sync_node(self, node, comp, scope, conf, private): - sync_obj = None + def _create_terminal_node(self, name, type, values=None, alphabet=None, qty=None): - if scope == SyncScope.QtyFrom: - if isinstance(comp, (tuple,list)): - node_ref, base_qty = comp - else: - node_ref, base_qty = comp, 0 - sync_with = self.__get_node_from_db(node_ref) - sync_obj = SyncQtyFromObj(sync_with, base_qty=base_qty) + assert (values is not None or alphabet is not None) - elif scope == SyncScope.Size: - if isinstance(comp, (tuple,list)): - node_ref, base_size = comp - else: - node_ref, base_size = comp, 0 - sync_with = self.__get_node_from_db(node_ref) - sync_obj = SyncSizeObj(sync_with, base_size=base_size, - apply_to_enc_size=private) + if alphabet is not None: + return [Node(name=name, vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1])), 1, 1] else: - if isinstance(comp, (tuple,list)): - if issubclass(comp[0].__class__, NodeCondition): - param = comp[0] - sync_with = self.__get_node_from_db(comp[1]) - elif issubclass(comp[0].__class__, (tuple,list)): - assert private in ['and', 'or'] - sync_list = [] - for subcomp in comp: - assert isinstance(subcomp, (tuple,list)) and len(subcomp) == 2 - param = subcomp[0] - sync_with = self.__get_node_from_db(subcomp[1]) - sync_list.append((sync_with, param)) - and_junction = private == 'and' - sync_obj = SyncExistenceObj(sync_list, and_junction=and_junction) - else: # in this case this is a node reference in the form ('node name', ID) - param = None - sync_with = self.__get_node_from_db(comp) + if type == fvt.String: + node = Node(name=name, vt=fvt.String(val_list=values)) else: - param = None - sync_with = self.__get_node_from_db(comp) - - if sync_obj is not None: - node.make_synchronized_with(scope=scope, sync_obj=sync_obj, conf=conf) - else: - node.make_synchronized_with(scope=scope, node=sync_with, param=param, conf=conf) - - def _complete_func(self, node, args, conf): - if isinstance(args, str): - func_args = self.__get_node_from_db(args) - else: - assert(isinstance(args, (tuple, list))) - func_args = [] - for name_desc in args: - func_args.append(self.__get_node_from_db(name_desc)) - internals = node.cc if conf is None else node.c[conf] - internals.set_func_arg(node=func_args) - - def _complete_generator(self, node, args, conf): - if isinstance(args, str) or \ - (isinstance(args, tuple) and isinstance(args[1], int)): - func_args = self.__get_node_from_db(args) - else: - assert(isinstance(args, (tuple, list))) - func_args = [] - for name_desc in args: - func_args.append(self.__get_node_from_db(name_desc)) - internals = node.cc if conf is None else node.c[conf] - internals.set_generator_func_arg(generator_node_arg=func_args) + node = Node(name=name, vt=fvt.INT_str(int_list=values)) - def _set_env(self, node, args): - env = Env() - env.delayed_jobs_enabled = self.delayed_jobs - node.set_env(env) + return [node, qty[0], -1 if qty[1] is None else qty[1]] - def __get_node_from_db(self, name_desc): - ref = self._handle_name(name_desc) - if ref not in self.node_dico: - raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) + def _create_non_terminal_node(self): + non_terminal = [1, [MH.Copy + MH.Ordered]] + formatted_terminal = non_terminal[1] - node = self.node_dico[ref] - if isinstance(node.cc, NodeInternals_Empty): - raise ValueError("Node ({:s}, {!s}) is Empty!".format(ref[0], ref[1])) - - return node + for terminal in self.nodes: + formatted_terminal.append(terminal) + if self.choice and len(self.nodes) > 1: + non_terminal.append(1) + formatted_terminal = [MH.Copy + MH.Ordered] + non_terminal.append(formatted_terminal) + return non_terminal #### Data Model Abstraction From 34975bea562f413640074188937ce89df379d176 Mon Sep 17 00:00:00 2001 From: Julien Baladier Date: Thu, 4 Aug 2016 10:44:06 +0200 Subject: [PATCH 46/80] Add support for . special char --- docs/source/data_model.rst | 10 +- framework/data_model_helpers.py | 21 ++- test/integration/test_integration.py | 246 +++++++++++++++++++-------- test/unit/test_data_model_helpers.py | 24 ++- 4 files changed, 220 insertions(+), 81 deletions(-) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 38163a6..b122e74 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -1574,8 +1574,8 @@ How to Describe a Data Format That Contains Complex Strings Parts of the data that only contain strings can easily be described using python's regular expressions. Here are some rules to respect: -- The characters couple (``[``, ``]``) and meta-sequences, such as ``\s``, ``\S``, ``\w``, ``\W``, ``\d`` - and ``\D``, are the only ways to define a :class:`framework.value_types.String` terminal node that +- The characters couple (``[``, ``]``), ``.`` and meta-sequences, such as ``\s``, ``\S``, ``\w``, ``\W``, + ``\d`` and ``\D``, are the only ways to define a :class:`framework.value_types.String` terminal node that contains an alphabet. - Anything else will be translated into a :class:`framework.value_types.String` terminal node that @@ -1585,9 +1585,9 @@ Here are some rules to respect: .. note:: If each item in a list of values are integers an :class:`framework.value_types.INT_Str` will be created instead of a :class:`framework.value_types.String`. -- ``(``, ``)``, ``[``, ``]``, ``?``, ``*``, ``+``, ``{``, ``}``, ``|``, ``\``, ``-`` are the only +- ``(``, ``)``, ``[``, ``]``, ``?``, ``*``, ``+``, ``{``, ``}``, ``|``, ``\``, ``-``, ``.`` are the only recognised special chars. They can not be used in an unsuitable context without been escaped - (exceptions are made for ``|`` and ``-``). + (exceptions are made for ``|``, ``.`` and ``-``). - Are only allowed regular expressions that can be translated into one terminal node or into one non-terminal node composed of terminal ones. If this rule is not respected an @@ -1608,7 +1608,7 @@ Example 1: the basics :linenos: regex = {'name': 'HTTP_version', - 'contents': '(HTTP)/[0-9].(0|1|2|\x33|4|5|6|7|8|9)' + 'contents': '(HTTP)/[0-9]\.(0|1|2|\x33|4|5|6|7|8|9)' # is equivalent to classic = {'name': 'HTTP_version', 'contents': [ diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 5879d1d..a31f1d8 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1313,6 +1313,8 @@ def advance(self, ctx): return self.machine.SquareBrackets elif ctx.input == '(': return self.machine.Parenthesis + elif ctx.input == '.': + return self.machine.Dot elif ctx.input == '\\': return self.machine.Escape else: @@ -1358,6 +1360,8 @@ def advance(self, ctx): return self.machine.Parenthesis elif ctx.input == '[': return self.machine.SquareBrackets + elif ctx.input == '.': + return self.machine.Dot elif ctx.input == '\\': return self.machine.Escape elif ctx.input == '|': @@ -1418,6 +1422,8 @@ def advance(self, ctx): return self.machine.Parenthesis elif ctx.input == '[': return self.machine.SquareBrackets + elif ctx.input == '.': + return self.machine.Dot elif ctx.input == '\\': return self.machine.Escape else: @@ -1521,6 +1527,8 @@ def advance(self, ctx): return self.machine.Parenthesis elif ctx.input == '[': return self.machine.SquareBrackets + elif ctx.input == '.': + return self.machine.Dot elif ctx.input == '\\': return self.machine.Escape else: @@ -1541,7 +1549,7 @@ def advance(self, ctx): raise QuantificationError() elif ctx.input in ('}', ']', None): raise StructureError(ctx.input) - elif ctx.input in ('(', '['): + elif ctx.input in ('(', '[', '.'): raise InconvertibilityError() elif ctx.input == '\\': return self.machine.Escape @@ -1743,6 +1751,14 @@ def _run(self, ctx): ctx.append_to_alphabet(ctx.META_SEQUENCES[ctx.input]) + @register + class Dot(Group): + + def _run(self, ctx): + ctx.flush() + ctx.append_to_alphabet(ctx.get_complement("")) + + def init_specific(self): self._name = None self.charset = None @@ -1825,6 +1841,7 @@ def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): def get_complement(chars): return ''.join([self.int_to_string(i) for i in range(0, max + 1) if self.int_to_string(i) not in chars]) + self.get_complement = get_complement self.META_SEQUENCES = {'s': string.whitespace, 'S': get_complement(string.whitespace), @@ -1833,7 +1850,7 @@ def get_complement(chars): 'w': string.ascii_letters + string.digits + '_', 'W': get_complement(string.ascii_letters + string.digits + '_')} - self.SPECIAL_CHARS = list('\\()[]{}*+?|-') + self.SPECIAL_CHARS = list('\\()[]{}*+?|-.') # None indicates the beginning and the end of the regex self.inputs = [None] + list(inputs) + [None] diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index bc671d1..a413f82 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1,3 +1,5 @@ +# -*- coding: utf8 -*- + ################################################################################ # # Copyright 2014-2016 Eric Lacombe @@ -88,7 +90,7 @@ def test_01(self): print('Flatten 1: ', repr(node_ex1.to_bytes())) print('Flatten 1: ', repr(node_ex1.to_bytes())) l = node_ex1.get_value() - hk = set(node_ex1.get_all_paths().keys()) + hk = list(node_ex1.iter_paths(only_paths=True)) # print(l) # # print('\n\n ####### \n\n') @@ -110,7 +112,7 @@ def test_01(self): print('\n### TEST 1: cross check self.node.get_all_paths().keys() and get_nodes_names() ###') - print('*** Hkeys from self.node.get_all_paths().keys():') + print('*** Hkeys from self.node.iter_paths(only_paths=True):') hk = sorted(hk) for k in hk: print(k) @@ -455,14 +457,16 @@ def test_01(self): node_ex1.set_current_conf('ALT', root_regexp=None) + nonascii_test_str = u'\u00c2'.encode(internal_repr_codec) + node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: res2 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: res2 = False print(msg) @@ -470,7 +474,7 @@ def test_01(self): node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' in msg or b' ~(X)~ ' in msg or b'[<]' in msg or b'[\xc2]' in msg: + if b' ~(..)~ ' in msg or b' ~(X)~ ' in msg or b'[<]' in msg or nonascii_test_str in msg: res2 = False print(msg) @@ -481,7 +485,7 @@ def test_01(self): node_ex1.unfreeze_all() msg = node_ex1.to_bytes() - if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or b'[\xc2]' not in msg: + if b' ~(..)~ ' not in msg or b' ~(X)~ ' not in msg or b'[<]' not in msg or nonascii_test_str not in msg: res2 = False print(msg) @@ -598,19 +602,19 @@ def test_01(self): res1 = True msg = node_ex1.to_bytes(conf='ALT') - if b'[<]' not in msg or b'[\xc2]' not in msg: + if b'[<]' not in msg or nonascii_test_str not in msg: res1 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.to_bytes(conf='ALT') - if b'[<]' not in msg or b'[\xc2]' not in msg: + if b'[<]' not in msg or nonascii_test_str not in msg: res1 = False print(msg) node_ex1.unfreeze_all() msg = node_ex1.get_node_by_path('TUX$').to_bytes(conf='ALT', recursive=False) - if b'[<]' in msg or b'[\xc2]' in msg or b' ~(..)~ TUX ~(..)~ ' not in msg: + if b'[<]' in msg or nonascii_test_str in msg or b' ~(..)~ TUX ~(..)~ ' not in msg: res1 = False print(msg) @@ -664,26 +668,20 @@ def test_01(self): print('\n*** test 12.1:') node_ex1 = dm.get_data('EX1') - htbl = node_ex1.get_all_paths() - l = sorted(list(htbl.keys())) - for i in l: + for i in node_ex1.iter_paths(only_paths=True): print(i) print('\n******\n') node_ex1.get_value() - htbl = node_ex1.get_all_paths() - l = sorted(list(htbl.keys())) - for i in l: + for i in node_ex1.iter_paths(only_paths=True): print(i) print('\n******\n') node_ex1.unfreeze_all() node_ex1.get_value() - htbl = node_ex1.get_all_paths() - l = sorted(list(htbl.keys())) - for i in l: + for i in node_ex1.iter_paths(only_paths=True): print(i) print('\n*** test 13: test typed_value Node') @@ -886,9 +884,7 @@ def test_TypedNode_1(self): print('=======[ PATHS ]========') - htbl = evt.get_all_paths() - l = sorted(list(htbl.keys())) - for i in l: + for i in evt.iter_paths(only_paths=True): print(i) print('\n=======[ Typed Nodes ]========') @@ -1516,7 +1512,7 @@ def test_NonTermVisitor(self): nonterm_consumer = NonTermVisitor(respect_order=True) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=10): - print(colorize('[%d] ' % idx + rnode.to_str(), rgb=Color.INFO)) + print(colorize('[%d] ' % idx + rnode.to_ascii(), rgb=Color.INFO)) self.assertEqual(idx, 3) print('***') @@ -1525,7 +1521,7 @@ def test_NonTermVisitor(self): nonterm_consumer = NonTermVisitor(respect_order=False) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=10): - print(colorize('[%d] ' % idx + rnode.to_str(), rgb=Color.INFO)) + print(colorize('[%d] ' % idx + rnode.to_ascii(), rgb=Color.INFO)) self.assertEqual(idx, 3) print('***') @@ -1577,41 +1573,41 @@ def test_basics(self): data = mh.create_graph_from_desc(shape_desc) raw_vals = [ - ' [!] ++++++++++ [!] ::=:: [!] ', - ' [!] ++++++++++ [!] ::?:: [!] ', - ' [!] ++++++++++ [!] ::\xff:: [!] ', - ' [!] ++++++++++ [!] ::\x00:: [!] ', - ' [!] ++++++++++ [!] ::\x01:: [!] ', - ' [!] ++++++++++ [!] ::\x80:: [!] ', - ' [!] ++++++++++ [!] ::\x7f:: [!] ', - ' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit - ' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::=:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::?:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\xff:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x00:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', - ' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', - ' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit - ' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::=:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::?:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\xff:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', - ' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' + b' [!] ++++++++++ [!] ::=:: [!] ', + b' [!] ++++++++++ [!] ::?:: [!] ', + b' [!] ++++++++++ [!] ::\xff:: [!] ', + b' [!] ++++++++++ [!] ::\x00:: [!] ', + b' [!] ++++++++++ [!] ::\x01:: [!] ', + b' [!] ++++++++++ [!] ::\x80:: [!] ', + b' [!] ++++++++++ [!] ::\x7f:: [!] ', + b' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit + b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::=:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::?:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\xff:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x00:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', + b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit + b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::=:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::?:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\xff:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' ] tn_consumer = TypedNodeDisruption() @@ -1622,7 +1618,7 @@ def test_basics(self): tn_consumer.set_node_interest(internals_criteria=ic) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, tn_consumer, make_determinist=True, max_steps=100): - val = rnode.to_str() + val = rnode.to_bytes() print(colorize('[%d] ' % idx + repr(val), rgb=Color.INFO)) if idx not in [8, 22]: self.assertEqual(val, raw_vals[idx - 1]) @@ -1784,7 +1780,7 @@ def test_JPG(self): print(colorize('number of imgs: %d' % idx, rgb=Color.INFO)) - self.assertEqual(idx, 115) + self.assertEqual(idx, 116) def test_USB(self): dm_usb = fmk.get_data_model_by_name('usb') @@ -1867,7 +1863,7 @@ def test_absorb_nonterm_2(self): top.set_env(Env()) # 2*nint_3 + nstr_1 + nstr_2 + 2*nint_2 + nint_1 - msg = '\xef\xfe\xef\xfeSTR1str222\xcf\xab\xcd' + msg = b'\xef\xfe\xef\xfeSTR1str222\xcf\xab\xcd' status, off, size, name = top.absorb(msg) print('\n ---[message to absorb]---') @@ -2025,10 +2021,10 @@ def nint_10_helper(blob, constraints, node_internals): top.set_env(Env()) top2.set_env(Env()) - msg = '\xe1\xe2\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678YEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' + msg = b'\xe1\xe2\xe1\xe2\xff\xeeCOOL!\xc1\xc2\x88\x9912345678YEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' # middle1: nint_1_alt + nint_3 + 2*nint_1 + nstr_1('ABCD') + nint_51 + 2*nstr_50 + nint_50 - msg2 = '\xff\xe2\x88\x99\xe1\xe2\xcd\xabABCD\xef\xfeIAMHERE\xbfYEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' + msg2 = b'\xff\xe2\x88\x99\xe1\xe2\xcd\xabABCD\xef\xfeIAMHERE\xbfYEAH!\xef\xdf\xbf\xd2\xd3,2\xbbTHE_END' print('\n****** top ******\n') status, off, size, name = top.absorb(msg) @@ -2658,7 +2654,7 @@ def test_str_alphabet(self): self.assertEqual(status, AbsorbStatus.Reject) self.assertEqual(raw_data[size:], b'FEND') - def test_encoded_str(self): + def test_encoded_str_1(self): class EncodedStr(String): def encode(self, val): @@ -2723,31 +2719,31 @@ def decode(self, val): gsm_dec = gsm_t.decode(gsm_enc) self.assertEqual(msg, gsm_dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) #' b'o\xf9 \xe7a' vtype = UTF16_LE(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = UTF16_BE(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = UTF8(max_sz=20) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = Codec(max_sz=20, encoding_arg=None) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - msg = b'o\xf9 \xe7a' + msg = u'où ça'.encode(internal_repr_codec) vtype = Codec(max_sz=20, encoding_arg='utf_32') enc = vtype.encode(msg) dec = vtype.decode(enc) @@ -2778,6 +2774,49 @@ def decode(self, val): dec = vtype.decode(enc) self.assertEqual(msg, dec) + def test_encoded_str_2(self): + + enc_desc = \ + {'name': 'enc', + 'contents': [ + {'name': 'len', + 'contents': UINT8()}, + {'name': 'user_data', + 'sync_enc_size_with': 'len', + 'contents': UTF8(val_list=['TEST'])}, + {'name': 'padding', + 'contents': String(max_sz=0), + 'absorb_csts': AbsNoCsts()}, + ]} + + mh = ModelHelper() + node = mh.create_graph_from_desc(enc_desc) + node.set_env(Env()) + + node_abs = Node('enc_abs', base_node=node, new_env=True) + node_abs.set_env(Env()) + node_abs2 = node_abs.get_clone() + + node_abs.show() + + raw_data = b'\x0C' + b'\xC6\x67' + b'garbage' # \xC6\x67 --> invalid UTF8 + status, off, size, name = node_abs.absorb(raw_data, constraints=AbsNoCsts(size=True, struct=True)) + + self.assertEqual(status, AbsorbStatus.Reject) + + raw_data = b'\x05' + b'\xC3\xBCber' + b'padding' # \xC3\xBC = ü in UTF8 + + status, off, size, name = node_abs2.absorb(raw_data, constraints=AbsNoCsts(size=True, struct=True)) + + print('Absorb Status:', status, off, size, name) + print(' \_ length of original data:', len(raw_data)) + print(' \_ remaining:', raw_data[size:]) + raw_data_abs = node_abs2.to_bytes() + print(' \_ absorbed data:', repr(raw_data_abs), len(raw_data_abs)) + node_abs2.show() + + self.assertEqual(status, AbsorbStatus.FullyAbsorbed) + class TestHLAPI(unittest.TestCase): @classmethod @@ -3145,7 +3184,7 @@ def test_regex(self, regex_node_name): ]} HTTP_version_regex = \ - {'name': regex_node_name, 'contents': "(HTTP)(/)(0|1|2|3|4|5|6|7|8|9)(.)(0|1|2|3|4|5|6|7|8|9)"} + {'name': regex_node_name, 'contents': "(HTTP)(/)(0|1|2|3|4|5|6|7|8|9)(\.)(0|1|2|3|4|5|6|7|8|9)"} mh = ModelHelper() node_classic = mh.create_graph_from_desc(HTTP_version_classic) @@ -3164,10 +3203,10 @@ def test_regex(self, regex_node_name): class TestFMK(unittest.TestCase): @classmethod def setUpClass(cls): - fmk.run_project(name='tuto', dm_name='mydf') + fmk.run_project(name='tuto', dm_name='mydf', tg=0) def setUp(self): - pass + fmk.reload_all(tg_num=0) def test_generic_disruptors_01(self): dmaker_type = 'TESTNODE' @@ -3272,4 +3311,65 @@ def test_typednode_disruptor(self): d.show() idx += 1 - self.assertEqual(idx, expected_idx) \ No newline at end of file + self.assertEqual(idx, expected_idx) + + def test_operator_1(self): + + fmk.launch_operator('MyOp', user_input=UserInputContainer(specific=UI(max_steps=100, mode=1))) + print('\n*** Last data ID: {:d}'.format(fmk.lg.last_data_id)) + fmkinfo = fmk.fmkDB.execute_sql_statement( + "SELECT CONTENT FROM FMKINFO " + "WHERE DATA_ID == {data_id:d} " + "ORDER BY ERROR DESC;".format(data_id=fmk.lg.last_data_id) + ) + self.assertTrue(fmkinfo) + for info in fmkinfo: + if 'Exhausted data maker' in info[0]: + break + else: + raise ValueError('the data maker should be exhausted and trigger the end of the operator') + + @unittest.skipIf(not run_long_tests, "Long test case") + def test_operator_2(self): + + fmk.launch_operator('MyOp') + fbk = fmk.fmkDB.last_feedback["Operator 'MyOp'"][0]['content'] + print(fbk) + self.assertIn(b'You win!', fbk) + + fmk.launch_operator('MyOp') + fbk = fmk.fmkDB.last_feedback["Operator 'MyOp'"][0]['content'] + print(fbk) + self.assertIn(b'You loose!', fbk) + + def test_scenario_infra(self): + + print('\n*** test scenario SC_NO_REGEN') + + base_qty = 0 + for i in range(100): + data = fmk.get_data(['SC_NO_REGEN']) + data_list = fmk.send_data([data]) # needed to make the scenario progress + # send_data_and_log() should be used for more complex scenarios + # hooking the framework in more places. + if not data_list: + base_qty = i + break + else: + raise ValueError + + err_list = fmk.get_error() + code_vector = [str(e) for e in err_list] + print('\n*** Retrieved error code vector: {!r}'.format(code_vector)) + + self.assertEqual(code_vector, ['DataUnusable', 'HandOver', 'DataUnusable', 'HandOver', + 'DPHandOver', 'NoMoreData']) + self.assertEqual(base_qty, 37) + + print('\n*** test scenario SC_AUTO_REGEN') + + for i in range(base_qty * 3): + data = fmk.get_data(['SC_AUTO_REGEN']) + data_list = fmk.send_data([data]) + if not data_list: + raise ValueError \ No newline at end of file diff --git a/test/unit/test_data_model_helpers.py b/test/unit/test_data_model_helpers.py index 699d5fa..8f055b9 100644 --- a/test/unit/test_data_model_helpers.py +++ b/test/unit/test_data_model_helpers.py @@ -4,6 +4,8 @@ import ddt from test import mock +ASCII_EXT = ''.join([(chr if sys.version_info[0] == 2 else six.unichr)(i) for i in range(0, 0xFF + 1)]) + @ddt.ddt class RegexParserTest(unittest.TestCase): @@ -26,10 +28,30 @@ def tearDown(self): {'regex': "(sal{2}u)too"}, {'regex': "sal{2,1}utoo"}, {'regex': "sal(u[t]o)o"}, {'regex': "whatever|toto?ff"}, {'regex': "whate?ver|toto"}, {'regex': "(toto)*ohoho|haha"}, {'regex': "(toto)ohoho|haha"}, {'regex': "salut[abcd]{,15}rr"}, {'regex': "[]whatever"}, - {'regex': "t{,15}"}, {'regex': "hi|b?whatever"}, {'regex': "hi|b{3}whatever"}) + {'regex': "t{,15}"}, {'regex': "hi|b?whatever"}, {'regex': "hi|b{3}whatever"}, + {'regex': "whatever(bar.foo)"}) def test_invalid_regexes(self, regex): self.assert_regex_is_invalid(regex) + @ddt.data( + {'regex': ".", 'nodes': [{"alphabet": ASCII_EXT}]}, + {'regex': "this.is", + 'nodes': [ + {"values": ["this"]}, + {"alphabet": ASCII_EXT}, + {"values": ["is"]}]}, + {'regex': "[fo.bar]hello", 'nodes': [{"alphabet": "fo.bar"}, {"values": ["hello"]}]}, + {'regex': "[bar].(hel).+lo", + 'nodes': [ + {"alphabet": "bar"}, + {"alphabet": ASCII_EXT}, + {"values": ["hel"]}, + {"alphabet": ASCII_EXT, 'qty': (1, None)}, + {"values": ["lo"]}]}, + ) + def test_dot(self, test_case): + self.assert_regex_is_valid(test_case) + @ddt.data( {'regex': "[abcd]?", 'nodes': [{"alphabet": "abcd", "qty": (0, 1)}]}, {'regex': "[abcd]*", 'nodes': [{"alphabet": "abcd", "qty": (0, None)}]}, From 4ba48213be60a6fb54e19f04df18e9b638729afb Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sat, 6 Aug 2016 13:01:50 +0200 Subject: [PATCH 47/80] Update PPPoE DM + NetworkTarget enhancement - Enhancement to PPPoE fuzzing scenarios - Enhance NetworkTarget way of finding your MAC addr when using a raw socket. - Add also support for MAC addr handling to registered interfaces. - Some polishing --- data_models/protocols/pppoe_strategy.py | 74 ++++++++++++------------- docs/source/data_model.rst | 8 +-- framework/data_model.py | 2 +- framework/plumbing.py | 6 +- framework/target.py | 73 +++++++++++++++++++----- framework/value_types.py | 2 +- projects/tuto_proj.py | 4 +- 7 files changed, 110 insertions(+), 59 deletions(-) diff --git a/data_models/protocols/pppoe_strategy.py b/data_models/protocols/pppoe_strategy.py index 16c0986..a460bb9 100644 --- a/data_models/protocols/pppoe_strategy.py +++ b/data_models/protocols/pppoe_strategy.py @@ -81,30 +81,14 @@ def retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi', u else: pass - if update: + if update: # we update the seed of the data process next_step.node.freeze() - error_msg = '\n*** The node has no path to: {:s}. Thus, ignore it.\n'\ - ' (probable reason: the node has been fuzzed in a way that makes the' \ - 'path unavailable)' - try: - next_step.node['.*/mac_dst'] = mac_src - except: - print(error_msg.format('mac_dst')) try: next_step.node['.*/tag_sn/value/v101'] = service_name + next_step.node['.*/tag_sn$'].unfreeze(recursive=True, reevaluate_constraints=True) + next_step.node.freeze() except: - print(error_msg.format('service_name')) - - if host_uniq is not None: - new_tag = env.dm.get_data('tag_host_uniq') - new_tag['.*/v103'] = host_uniq - try: - next_step.node['.*/host_uniq_stub'].set_contents(new_tag) - except: - print(error_msg.format('host_uniq_stub')) - else: - print('\n***WARNING: Host-Uniq not provided') - next_step.node.unfreeze(recursive=True, reevaluate_constraints=True) + pass return True @@ -119,13 +103,17 @@ def retrieve_padr_from_feedback(env, current_step, next_step, feedback): def retrieve_padi_from_feedback(env, current_step, next_step, feedback): return retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi') +def retrieve_padr_from_feedback_and_update(env, current_step, next_step, feedback): + return retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padr', update=True) + def retrieve_padi_from_feedback_and_update(env, current_step, next_step, feedback): return retrieve_X_from_feedback(env, current_step, next_step, feedback, x='padi', update=True) -@disruptor(tactics, dtype="FIX_FIELDS", weight=1) +@disruptor(tactics, dtype="FIX_FIELDS", weight=1, + args={'reevaluate_csts': ('reevaluate constraints on the whole message to preserve consistency', + False, bool)}) class t_fix_pppoe_msg_fields(Disruptor): - mac_src = None service_name = None host_uniq = None @@ -139,23 +127,28 @@ def disrupt_data(self, dm, target, prev_data): try: n['.*/mac_dst'] = self.mac_src prev_data.add_info("update 'mac_src'") + if not self.reevaluate_csts: + n['.*/mac_dst'].unfreeze(dont_change_state=True) except: print(error_msg.format('mac_dst')) else: print("\n*** 'mac_src' not found in the environment! ***") - if self.service_name: - try: - n['.*/tag_sn/value/v101'] = self.service_name - prev_data.add_info("update 'service_name'") - except: - print(error_msg.format('service_name')) - else: - print("\n*** 'service_name' not found in the environment! ***") + if self.reevaluate_csts: + if self.service_name: + try: + n['.*/tag_sn/value/v101'] = self.service_name + prev_data.add_info("update 'service_name'") + except: + print(error_msg.format('service_name')) + else: + print("\n*** 'service_name' not found in the environment! ***") if self.host_uniq: new_tag = dm.get_data('tag_host_uniq') new_tag['.*/v103'] = self.host_uniq + new_tag.unfreeze(recursive=True, reevaluate_constraints=True) + new_tag.freeze() try: n['.*/host_uniq_stub'].set_contents(new_tag) prev_data.add_info("update 'host_uniq'") @@ -164,12 +157,18 @@ def disrupt_data(self, dm, target, prev_data): else: print("\n*** 'host_uniq_stub' not found in the environment! ***") - n.unfreeze(recursive=True, reevaluate_constraints=True) + if self.reevaluate_csts: + n.unfreeze(recursive=True, reevaluate_constraints=True) + else: + try: + n['.*/length$'].unfreeze() + except: + print(error_msg.format('length')) n.freeze() + n.show() return prev_data - ### PADI fuzz scenario ### step_wait_padi = NoDataStep(fbk_timeout=1) @@ -179,7 +178,7 @@ def disrupt_data(self, dm, target, prev_data): # step_send_pado = Step('pado') step_end = Step('padt') -step_wait_padi.connect_to(step_send_pado, cbk_after_fbk=retrieve_padi_from_feedback) +step_wait_padi.connect_to(step_send_pado, cbk_after_fbk=retrieve_padi_from_feedback_and_update) step_send_pado.connect_to(step_end) step_end.connect_to(step_wait_padi) @@ -188,21 +187,22 @@ def disrupt_data(self, dm, target, prev_data): ### PADS fuzz scenario ### step_wait_padi = NoDataStep(fbk_timeout=1) -step_send_valid_pado = Step('pado') +step_send_valid_pado = Step(DataProcess(process=[('FIX_FIELDS#2', None, UI(reevaluate_csts=True))], + seed='pado')) dp_pads = DataProcess(process=[('tTYPE#2', UI(init=1), UI(order=True)), 'FIX_FIELDS'], seed='pads') dp_pads.append_new_process([('tSTRUCT#2', UI(init=1), UI(deep=True)), 'FIX_FIELDS']) step_send_fuzzed_pads = Step(dp_pads) step_wait_padr = NoDataStep(fbk_timeout=1) -step_wait_padi.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback_and_update) -step_send_valid_pado.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback) +step_wait_padi.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback) +step_send_valid_pado.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback_and_update) step_send_valid_pado.connect_to(step_wait_padr) step_send_fuzzed_pads.connect_to(step_wait_padr) step_wait_padr.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback) -step_wait_padr.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback_and_update) +step_wait_padr.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback) sc2 = Scenario('PADS') sc2.set_anchor(step_wait_padi) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index b122e74..7837491 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -164,8 +164,7 @@ that enables to handle transparently any encoding scheme: Below the different currently defined string types: - :class:`framework.value_types.String`: General purpose ``UTF8`` character string. -- :class:`framework.value_types.BYTES`: General purpose byte string (alias to - :class:`framework.value_types.LATIN_1`). +- :class:`framework.value_types.BYTES`: General purpose byte string. - :class:`framework.value_types.Filename`: Filename. Similar to the type ``String``, but some disruptors like ``tTYPE`` will generate more specific test cases. @@ -932,8 +931,9 @@ specific_fuzzy_vals planned). charset - Used to specify a charset to be used within the node: it is particularly useful - for nodes that contain regular expressions. Accepted attributes are: + Used in the context of a `regular expression` ``contents``. It enables to specify the charset + that will be considered for interpreting the regular expression and for creating the related + nodes. Accepted attributes are: - ``MH.Charset.ASCII`` - ``MH.Charset.ASCII_EXT`` (default) diff --git a/framework/data_model.py b/framework/data_model.py index 808486f..156a5d1 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -6566,7 +6566,7 @@ def __copy__(self): # # WARNING: If DJobs need to evolve in the future to support copy, DJobGroup should be updated # during this copy for updating the nodes in its node_list attribute. - assert not self._sorted_jobs and not self._djob_keys and not self._djob_groups + # assert not self._sorted_jobs and not self._djob_keys and not self._djob_groups new_env._sorted_jobs = None new_env._djob_keys = None new_env._djob_groups = None diff --git a/framework/plumbing.py b/framework/plumbing.py index 3bf04ac..c04c1d6 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -2930,8 +2930,12 @@ def show_probes(self): self.lg.print_console('-=[ Probes ]=-', rgb=Color.INFO, style=FontStyle.BOLD) self.lg.print_console('') for p in probes: + try: + status = self.mon.get_probe_status(p).get_status() + except: + status = None msg = "name: %s (status: %s, delay: %f) --> " % \ - (p, repr(self.mon.get_probe_status(p).get_status()), + (p, repr(status), self.mon.get_probe_delay(p)) if self.mon.is_probe_stuck(p): diff --git a/framework/target.py b/framework/target.py index 398e8ea..200675f 100644 --- a/framework/target.py +++ b/framework/target.py @@ -37,12 +37,11 @@ import time import collections import binascii +import uuid import errno from socket import error as socket_error -from uuid import getnode - from libs.external_modules import * from framework.data_model import Data, NodeSemanticsCriteria from framework.value_types import GSMPhoneNum @@ -330,9 +329,10 @@ def __init__(self, host='localhost', port=12345, socket_type=(socket.AF_INET, so Args: host (str): the IP address of the target to connect to, or the IP address on which we will wait for target connecting - to us (if `server_mode` is True). + to us (if `server_mode` is True). For raw socket type, it should contain the name of + the interface. port (int): the port for communicating with the target, or - the port to listen to. + the port to listen to. For raw socket type, it should contain the protocol ID. socket_type (tuple): tuple composed of the socket address family and socket type data_semantics (str): string of characters that will be used for @@ -347,16 +347,45 @@ def __init__(self, host='localhost', port=12345, socket_type=(socket.AF_INET, so hold_connection (bool): If `True`, we will maintain the connection while sending data to the real target. Otherwise, after each data emission, we close the related socket. + mac_src (bytes): Only in conjunction with raw socket. For each data sent through + this interface, and if this data contain nodes with the semantic ``'mac_src'``, + these nodes will be overwritten (through absorption) with this parameter. If nothing + is provided, the MAC address will be retrieved from the interface specified in 'host'. + (works accurately for Linux system). + mac_dst (bytes): Only in conjunction with raw socket. For each data sent through + this interface, and if this data contain nodes with the semantic ``'mac_dst'``, + these nodes will be overwritten (through absorption) with this parameter. ''' if not self._is_valid_socket_type(socket_type): raise ValueError("Unrecognized socket type") - self._mac_src = struct.pack('>Q', getnode())[2:] if mac_src is None else mac_src - self._mac_dst = mac_dst + if sys.platform == 'linux': + def get_mac_addr(ifname): + if sys.version_info[0] > 2: + ifname = bytes(ifname, 'latin_1') + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + try: + info = fcntl.ioctl(s.fileno(), 0x8927, struct.pack('256s', ifname[:15])) + except OSError: + return b'' + info = bytearray(info) + return bytes(info[18:24]) + else: + def get_mac_addr(ifname): + return struct.pack('>Q', uuid.getnode())[2:] + + self.get_mac_addr = get_mac_addr + self._mac_src_semantic = NodeSemanticsCriteria(mandatory_criteria=['mac_src']) self._mac_dst_semantic = NodeSemanticsCriteria(mandatory_criteria=['mac_dst']) + self._mac_src = {(host, port): None} + self._mac_dst = {(host, port): None} + if socket_type[1] == socket.SOCK_RAW: + self._mac_src[(host, port)] = self.get_mac_addr(host) if mac_src is None else mac_src + self._mac_dst[(host, port)] = mac_dst + self._host = {} self._port = {} self._socket_type = {} @@ -406,7 +435,7 @@ def _is_valid_socket_type(self, socket_type): return True def register_new_interface(self, host, port, socket_type, data_semantics, server_mode=False, - hold_connection=False): + hold_connection=False, mac_src=None, mac_dst=None): if not self._is_valid_socket_type(socket_type): raise ValueError("Unrecognized socket type") @@ -419,6 +448,9 @@ def register_new_interface(self, host, port, socket_type, data_semantics, server self.server_mode[(host,port)] = server_mode self._default_fbk_id[(host, port)] = self._default_fbk_socket_id + ' - {:s}:{:d}'.format(host, port) self.hold_connection[(host, port)] = hold_connection + self._mac_src[(host, port)] = self.get_mac_addr(host) if mac_src is None else mac_src + self._mac_dst[(host, port)] = mac_dst + def set_timeout(self, fbk_timeout, sending_delay): ''' @@ -653,6 +685,16 @@ def start(self): # self.send_multiple_data() is # used. self._connect_to_additional_feedback_sockets() + + for k, mac_src in self._mac_src.items(): + if mac_src is not None: + if mac_src: + self.record_info('*** Detected HW address for {!s}: {!s} ***' + .format(k[0], mac_src)) + else: + self.record_info('*** WARNING: HW Address not detected for {!s}! ***' + .format(k[0])) + return self.initialize() def stop(self): @@ -1281,16 +1323,19 @@ def _before_sending_data(self, data_list, from_fmk): for data in data_list: if data.node is None: continue - _, _, socket_type, _ = self._get_net_info_from(data) + host, port, socket_type, _ = self._get_net_info_from(data) if socket_type[1] == socket.SOCK_RAW: data.node.freeze() - try: - data.node[self._mac_src_semantic] = self._mac_src - except ValueError: - self._logger.log_comment('WARNING: Unable to set the MAC SOURCE on the packet') - if self._mac_dst is not None: + mac_src = self._mac_src[(host,port)] + mac_dst = self._mac_dst[(host,port)] + if mac_src is not None: + try: + data.node[self._mac_src_semantic] = mac_src + except ValueError: + self._logger.log_comment('WARNING: Unable to set the MAC SOURCE on the packet') + if mac_dst is not None: try: - data.node[self._mac_dst_semantic] = self._mac_dst + data.node[self._mac_dst_semantic] = mac_dst except ValueError: self._logger.log_comment('WARNING: Unable to set the MAC DESTINATION on the packet') diff --git a/framework/value_types.py b/framework/value_types.py index 1cfa2a3..900b4c9 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -955,7 +955,7 @@ def pretty_print(self, max_size=None): if self.drawn_val is None: self.get_value() - if self.encoded_string and not isinstance(self, BYTES): + if self.encoded_string and not isinstance(self, (BYTES,UTF8)): dec = self.drawn_val sz = len(dec) if max_size is not None and sz > max_size: diff --git a/projects/tuto_proj.py b/projects/tuto_proj.py index 39ccf7b..70004f3 100644 --- a/projects/tuto_proj.py +++ b/projects/tuto_proj.py @@ -68,7 +68,9 @@ def _feedback_handling(self, fbk, ref): rawnetsrv_tg = NetworkTarget(host='eth0', port=ETH_P_ALL, socket_type=(socket.AF_PACKET, socket.SOCK_RAW, socket.htons(ETH_P_ALL)), hold_connection=True, server_mode=False) - +rawnetsrv_tg.register_new_interface(host='eth2', port=ETH_P_ALL, + socket_type=(socket.AF_PACKET, socket.SOCK_RAW, socket.htons(ETH_P_ALL)), + data_semantics='TG2') ### PROBE DEFINITION ### From cd78b6ff5fbe3765b80024b1885da60d00a1c423 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sat, 6 Aug 2016 13:58:18 +0200 Subject: [PATCH 48/80] Enhance fuzzing test cases of String-based nodes. --- framework/value_types.py | 49 ++++++++++++++++++---------- test/integration/test_integration.py | 14 ++++++-- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/framework/value_types.py b/framework/value_types.py index 900b4c9..907a1b8 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -693,13 +693,7 @@ def set_description(self, val_list=None, size=None, min_sz=None, elif hasattr(self, 'specific_fuzzing_list'): self.extra_fuzzy_list = self.specific_fuzzing_list else: - self.extra_fuzzy_list = [ - b'', - b'\x00', - b'%s%s%s', - b'%n%n%n', - b'\r\n' - ] + self.extra_fuzzy_list = None if val_list is not None: assert(isinstance(val_list, list)) @@ -869,29 +863,50 @@ def enable_fuzz_mode(self): else: orig_val = random.choice(self.val_list_copy) + sz = len(orig_val) + sz_delta_with_max = self.max_sz - sz + try: val = bp.corrupt_bits(orig_val, n=1, ascii=self.ascii_mode) self.val_list_fuzzy.append(val) except: print("\n*** Value is empty! --> skipping bitflip test case ***") - sz = len(orig_val) - sz_delta = self.max_sz - sz - - val = orig_val + b"A"*(sz_delta + 1) + val = orig_val + b"A"*(sz_delta_with_max + 1) self.val_list_fuzzy.append(val) + self.val_list_fuzzy.append(b'') if sz > 0: - sz_delta = sz - self.min_sz - val = orig_val[:-sz_delta-1] - self.val_list_fuzzy.append(val) + sz_delta_with_min = sz - self.min_sz + val = orig_val[:-sz_delta_with_min-1] + if val != b'': + self.val_list_fuzzy.append(val) val = orig_val + b"X"*(self.max_sz*8) self.val_list_fuzzy.append(val) - for v in self.extra_fuzzy_list: - if v not in self.val_list_fuzzy: - self.val_list_fuzzy.append(v) + self.val_list_fuzzy.append(b'\x00'*sz if sz>0 else b'\x00') + + if sz > 1: + is_even = sz % 2 == 0 + cpt = sz // 2 + if is_even: + self.val_list_fuzzy.append(b'%n' * cpt) + self.val_list_fuzzy.append(b'%s' * cpt) + self.val_list_fuzzy.append(b'\r\n' * cpt) + else: + self.val_list_fuzzy.append(orig_val[:1] + b'%n' * cpt) + self.val_list_fuzzy.append(orig_val[:1] + b'%s' * cpt) + self.val_list_fuzzy.append(orig_val[:1] + b'\r\n' * cpt) + else: + self.val_list_fuzzy.append(b'%n%n%n') + self.val_list_fuzzy.append(b'%s%s%s') + self.val_list_fuzzy.append(b'\r\n') + + if self.extra_fuzzy_list: + for v in self.extra_fuzzy_list: + if v not in self.val_list_fuzzy: + self.val_list_fuzzy.append(v) enc_cases = self.encoding_test_cases(orig_val, self.max_sz, self.min_sz, self.min_encoded_sz, self.max_encoded_sz) diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index a413f82..74a2d1b 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1584,6 +1584,10 @@ def test_basics(self): b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::\x00\x00\x00::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::A%n::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::A%s::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::A\r\n::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', @@ -1594,10 +1598,14 @@ def test_basics(self): b' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', b' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', b' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', - b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [22] could change has it is a random corrupt_bit + b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [26] could change has it is a random corrupt_bit b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x00\x00\x00::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::A%n::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::A%s::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::A\r\n::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', @@ -1620,10 +1628,10 @@ def test_basics(self): max_steps=100): val = rnode.to_bytes() print(colorize('[%d] ' % idx + repr(val), rgb=Color.INFO)) - if idx not in [8, 22]: + if idx not in [8, 26]: self.assertEqual(val, raw_vals[idx - 1]) - self.assertEqual(idx, 35) + self.assertEqual(idx, 43) def test_TypedNodeDisruption_1(self): nt = self.dm.get_data('Simple') From 5e92e7ea6c6476a498b92e7acf3f46b1d8dcf09b Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sat, 6 Aug 2016 16:18:39 +0200 Subject: [PATCH 49/80] Fix String absorption regarding encoding. (TBC) --- data_models/example.py | 2 +- data_models/tuto.py | 6 +-- docs/source/data_model.rst | 4 +- framework/value_types.py | 64 +++++++++++++++------------- test/integration/test_integration.py | 4 +- 5 files changed, 42 insertions(+), 38 deletions(-) diff --git a/data_models/example.py b/data_models/example.py index eabc765..83952b4 100644 --- a/data_models/example.py +++ b/data_models/example.py @@ -357,7 +357,7 @@ def build_data_model(self): 'custo_set': MH.Custo.NTerm.FrozenCopy, 'custo_clear': MH.Custo.NTerm.MutableClone, 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'], absorb_regexp=b'\n+'), + 'contents': String(val_list=['\n'], absorb_regexp='\n+'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'contents': [{ 'section_type': MH.Random, diff --git a/data_models/tuto.py b/data_models/tuto.py index 331ad24..ad260c5 100644 --- a/data_models/tuto.py +++ b/data_models/tuto.py @@ -41,7 +41,7 @@ def build_data_model(self): 'custo_set': MH.Custo.NTerm.FrozenCopy, 'custo_clear': MH.Custo.NTerm.MutableClone, 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'], absorb_regexp=b'\n+'), + 'contents': String(val_list=['\n'], absorb_regexp='\n+'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'contents': [{ 'section_type': MH.Random, @@ -150,7 +150,7 @@ def keycode_helper(blob, constraints, node_internals): separator_desc = \ {'name': 'separator', 'separator': {'contents': {'name': 'sep_nl', - 'contents': String(val_list=['\n'], absorb_regexp=b'[\r\n|\n]+'), + 'contents': String(val_list=['\n'], absorb_regexp='[\r\n|\n]+'), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, @@ -160,7 +160,7 @@ def keycode_helper(blob, constraints, node_internals): 'contents': [ {'name': 'parameters', 'separator': {'contents': {'name': ('sep',2), - 'contents': String(val_list=[' '], absorb_regexp=b' +'), + 'contents': String(val_list=[' '], absorb_regexp=' +'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'qty': 3, 'contents': [ diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 7837491..b6d6028 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -1053,7 +1053,7 @@ parameters with space characters (line 12-14). {'name': 'separator_test', 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'], absorb_regexp=b'[\r\n|\n]+'), + 'contents': String(val_list=['\n'], absorb_regexp='[\r\n|\n]+'), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, @@ -1063,7 +1063,7 @@ parameters with space characters (line 12-14). 'contents': [ {'name': 'parameters', 'separator': {'contents': {'name': ('sep',2), - 'contents': String(val_list=[' '], absorb_regexp=b' +'), + 'contents': String(val_list=[' '], absorb_regexp=' +'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'qty': 3, 'contents': [ diff --git a/framework/value_types.py b/framework/value_types.py index 907a1b8..11a8a86 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -442,21 +442,23 @@ def absorb_auto_helper(self, blob, constraints): elif constraints[AbsCsts.Contents] and self.alphabet is not None: size = None - blob = unconvert_from_internal_repr(blob) if self.encoded_string: - blob_dec = unconvert_from_internal_repr(blob_dec) + blob_str = unconvert_from_internal_repr(blob_dec) else: + blob_str = unconvert_from_internal_repr(blob) blob_dec = blob alp = unconvert_from_internal_repr(self.alphabet) for l in alp: - if blob_dec.startswith(l): + if blob_str.startswith(l): break else: sup_sz = len(blob)+1 off = sup_sz for l in alp: if self.encoded_string: - l = self.encode(l) + l = self.encode(convert_to_internal_repr(l)) + else: + l = convert_to_internal_repr(l) new_off = blob.find(l) if new_off < off and new_off > -1: off = new_off @@ -464,15 +466,16 @@ def absorb_auto_helper(self, blob, constraints): off = -1 elif constraints[AbsCsts.Regexp] and self.regexp is not None: - g = re.search(self.regexp, blob_dec, re.S) + g = re.search(self.regexp, unconvert_from_internal_repr(blob_dec), re.S) if g is not None: if self.encoded_string: pattern_enc = self.encode(g.group(0)) off = blob.find(pattern_enc) size = len(pattern_enc) else: - off = g.start() - size = g.end() - off + pattern_enc = convert_to_internal_repr(g.group(0)) + off = blob.find(pattern_enc) + size = len(pattern_enc) else: off = -1 @@ -505,10 +508,7 @@ def do_absorb(self, blob, constraints, off=0, size=None): self.orig_drawn_val = self.drawn_val if constraints[AbsCsts.Size]: - if self.encoded_string: - sz = size if size is not None and size < self.max_encoded_sz else self.max_encoded_sz - else: - sz = size if size is not None and size < self.max_sz else self.max_sz + sz = size if size is not None and size < self.max_encoded_sz else self.max_encoded_sz # if encoded string, val is returned decoded val = self._read_value_from(blob[off:sz+off], constraints) @@ -519,8 +519,8 @@ def do_absorb(self, blob, constraints, off=0, size=None): raise ValueError('min_encoded_sz constraint not respected!') else: val_sz = len(val) # maybe different from sz if blob is smaller - if val_sz < self.min_sz: - raise ValueError('min_sz constraint not respected!') + if val_sz < self.min_encoded_sz: + raise ValueError('min_sz/min_encoded_sz constraint not respected!') else: blob = blob[off:] #blob[off:size+off] if size is not None else blob[off:] val = self._read_value_from(blob, constraints) @@ -653,10 +653,10 @@ def rewind(self): self.drawn_val = None - def _check_sizes(self): - if self.val_list is not None: - for v in self.val_list: - sz = len(unconvert_from_internal_repr(v)) + def _check_sizes(self, val_list): + if val_list is not None: + for v in val_list: + sz = len(v) if self.max_sz is not None: assert(self.max_sz >= sz >= self.min_sz) else: @@ -682,9 +682,9 @@ def set_description(self, val_list=None, size=None, min_sz=None, if absorb_regexp is None: if self.ascii_mode: - self.regexp = b'[\x00-\x7f]*' + self.regexp = '[\x00-\x7f]*' else: - self.regexp = b'.*' + self.regexp = '.*' else: self.regexp = convert_to_internal_repr(absorb_regexp) @@ -696,7 +696,7 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.extra_fuzzy_list = None if val_list is not None: - assert(isinstance(val_list, list)) + assert isinstance(val_list, list) self.val_list = VT._str2internal(val_list) for val in self.val_list: if not self._check_compliance(val, force_max_enc_sz=max_encoded_sz is not None, @@ -709,6 +709,18 @@ def set_description(self, val_list=None, size=None, min_sz=None, if l not in self.alphabet: raise ValueError("The value '%s' does not conform to the alphabet!" % val) + if isinstance(self, String): + max_enc_sz = 0 + min_enc_sz = len(self.val_list[0]) + for val in self.val_list: + length = len(val) + if length > max_enc_sz: + max_enc_sz = length + if length < min_enc_sz: + min_enc_sz = length + self.max_encoded_sz = max_enc_sz + self.min_encoded_sz = min_enc_sz + self.val_list_copy = copy.copy(self.val_list) self.is_val_list_provided = True # distinguish cases where # val_list is provided or @@ -721,45 +733,37 @@ def set_description(self, val_list=None, size=None, min_sz=None, if size is not None: self.min_sz = size self.max_sz = size - elif min_sz is not None and max_sz is not None: assert(max_sz >= 0 and min_sz >= 0 and max_sz - min_sz >= 0) self.min_sz = min_sz self.max_sz = max_sz - elif min_sz is not None: self.min_sz = min_sz # for string with no size limit, we set a threshold to # DEFAULT_MAX_SZ chars self.max_sz = self.DEFAULT_MAX_SZ - elif max_sz is not None: self.max_sz = max_sz self.min_sz = 0 - elif val_list is not None: sz = 0 - for v in self.val_list: + for v in val_list: length = len(v) if length > sz: sz = length - self.max_sz = sz self.min_sz = 0 - elif max_encoded_sz is not None: # If we reach this condition, that means no size has been provided, we thus decide # an arbitrary default value for max_sz. Regarding absorption, this arbitrary choice will # have no influence, as only max_encoded_sz will be used. self.min_sz = 0 self.max_sz = max_encoded_sz - else: self.min_sz = 0 self.max_sz = self.DEFAULT_MAX_SZ - - self._check_sizes() + self._check_sizes(val_list) if val_list is None: self._populate_val_list(force_max_enc_sz=max_encoded_sz is not None, diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 74a2d1b..96fdbd4 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -2195,7 +2195,7 @@ def test_generalized_exist_cond(self): gen_exist_desc = \ {'name': 'gen_exist_cond', 'separator': {'contents': {'name': 'sep_nl', - 'contents': String(val_list=['\n'], max_sz=100, absorb_regexp=b'[\r\n|\n]+'), + 'contents': String(val_list=['\n'], max_sz=100, absorb_regexp='[\r\n|\n]+'), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, 'unique': True}, 'contents': [ @@ -2465,7 +2465,7 @@ def test_separator(self): 'determinist': True, 'separator': {'contents': {'name': 'SEP', 'contents': String(val_list=[' ', ' ', ' '], - absorb_regexp=b'\s+', determinist=False), + absorb_regexp='\s+', determinist=False), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': True, 'suffix': True, From 2f09013084a2daf1edc99cafa0453a12589fc309 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 7 Aug 2016 02:47:39 +0200 Subject: [PATCH 50/80] Fix String issues introduced by commit 5e92e7ea6c6476a498b92e7acf3f46b1d8dcf09b --- framework/value_types.py | 37 ++++++++++++++-------------- test/integration/test_integration.py | 3 ++- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/framework/value_types.py b/framework/value_types.py index 11a8a86..4ac870d 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -356,7 +356,7 @@ def init_specific(self, val_list=None, size=None, min_sz=None, extra_fuzzy_list: During data generation, if this parameter is specified with some specific values, they will be part of the test cases generated by the generic disruptor tTYPE. - absorb_regexp: You can specify a regular expression in this parameter as a + absorb_regexp (str): You can specify a regular expression in this parameter as a supplementary constraint for data absorption operation. alphabet: The alphabet to use for generating data, in case no `val_list` is provided. Also use during absorption to validate the contents. It is @@ -418,7 +418,6 @@ def make_random(self): def absorb_auto_helper(self, blob, constraints): off = 0 size = self.max_encoded_sz - # If 'Contents' constraint is set, we seek for string within # val_list or conforming to the alphabet. # If 'Regexp' constraint is set, we seek for string matching @@ -469,7 +468,7 @@ def absorb_auto_helper(self, blob, constraints): g = re.search(self.regexp, unconvert_from_internal_repr(blob_dec), re.S) if g is not None: if self.encoded_string: - pattern_enc = self.encode(g.group(0)) + pattern_enc = self.encode(convert_to_internal_repr(g.group(0))) off = blob.find(pattern_enc) size = len(pattern_enc) else: @@ -498,7 +497,6 @@ def do_absorb(self, blob, constraints, off=0, size=None): Returns: value, off, size """ - self.orig_max_sz = self.max_sz self.orig_min_encoded_sz = self.min_encoded_sz self.orig_max_encoded_sz = self.max_encoded_sz @@ -628,11 +626,11 @@ def _read_value_from(self, blob, constraints): if self.encoded_string: blob = self.decode(blob) if constraints[AbsCsts.Regexp]: - g = re.match(self.regexp, blob, re.S) + g = re.match(self.regexp, unconvert_from_internal_repr(blob), re.S) if g is None: raise ValueError('regexp not valid!') else: - return g.group(0) + return convert_to_internal_repr(g.group(0)) else: return blob @@ -686,7 +684,7 @@ def set_description(self, val_list=None, size=None, min_sz=None, else: self.regexp = '.*' else: - self.regexp = convert_to_internal_repr(absorb_regexp) + self.regexp = absorb_regexp if extra_fuzzy_list is not None: self.extra_fuzzy_list = VT._str2internal(extra_fuzzy_list) @@ -709,18 +707,6 @@ def set_description(self, val_list=None, size=None, min_sz=None, if l not in self.alphabet: raise ValueError("The value '%s' does not conform to the alphabet!" % val) - if isinstance(self, String): - max_enc_sz = 0 - min_enc_sz = len(self.val_list[0]) - for val in self.val_list: - length = len(val) - if length > max_enc_sz: - max_enc_sz = length - if length < min_enc_sz: - min_enc_sz = length - self.max_encoded_sz = max_enc_sz - self.min_encoded_sz = min_enc_sz - self.val_list_copy = copy.copy(self.val_list) self.is_val_list_provided = True # distinguish cases where # val_list is provided or @@ -763,6 +749,14 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.min_sz = 0 self.max_sz = self.DEFAULT_MAX_SZ + if not self.encoded_string: + # In the case of String (or every non-Encoding type), the internal represenation + # is UTF8 encoding. Hence the byte string size is still >= to the string size. + if self.max_encoded_sz is None or self.max_encoded_sz < self.max_sz: + self.max_encoded_sz = self.max_sz + if self.min_encoded_sz is None or self.min_encoded_sz < self.min_sz: + self.min_encoded_sz = self.min_sz + self._check_sizes(val_list) if val_list is None: @@ -813,6 +807,11 @@ def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_li else: return False else: + val_sz = len(value) + if self.max_encoded_sz is None or val_sz > self.max_encoded_sz: + self.max_encoded_sz = val_sz + if self.min_encoded_sz is None or val_sz < self.min_encoded_sz: + self.min_encoded_sz = val_sz if update_list: self.val_list.append(value) return True diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 96fdbd4..4cf6e6d 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1847,7 +1847,7 @@ def test_absorb_nonterm_1(self): print('\n ---[message to absorb]---') print(repr(msg)) - print('\n ---[absobed message]---') + print('\n ---[absorbed message]---') print(top.get_value()) top.show() @@ -2669,6 +2669,7 @@ def encode(self, val): return val + b'***' def decode(self, val): + print('\nDBg: ', val) return val[:-3] data = ['Test!', 'Hello World!'] From 6005e864a1ea2ddfd638c25870ca6408453a6531 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 8 Aug 2016 01:21:51 +0200 Subject: [PATCH 51/80] Revamp String encoding to fix inconsistencies and clumsiness Python codecs ('latin-1', 'utf8', ...) are now handled directly by the String class through the parameter 'codec'. Besides, as this handling is independent from Encoder()s (e.g., GZIP, ...), the Encoder() associated to an *Encoded* String() apply to the content of the String(), which is encoded with the python codec provided as parameter. --- data_models/example.py | 2 +- data_models/example_strategy.py | 2 +- data_models/file_formats/jpg.py | 6 +- data_models/file_formats/png.py | 14 +- data_models/file_formats/zip.py | 42 +++--- data_models/protocols/pppoe.py | 32 ++--- data_models/protocols/sms.py | 2 +- data_models/protocols/usb.py | 4 +- data_models/tuto.py | 2 +- docs/source/data_model.rst | 22 +-- framework/basic_primitives.py | 2 +- framework/data_model_helpers.py | 15 +- framework/encoders.py | 35 +---- framework/global_resources.py | 7 +- framework/value_types.py | 198 ++++++++++++++------------- test/integration/test_integration.py | 87 ++++++------ 16 files changed, 218 insertions(+), 254 deletions(-) diff --git a/data_models/example.py b/data_models/example.py index 83952b4..537fd39 100644 --- a/data_models/example.py +++ b/data_models/example.py @@ -62,7 +62,7 @@ def build_data_model(self): tux_subparts_4 = [u'[\u00c2]PLIP', u'[\u00c2]GLOUP'] ku.add_conf('ALT') - ku.set_values(tux_subparts_4, conf='ALT') + ku.set_values(value_type=String(val_list=tux_subparts_4, codec='utf8'), conf='ALT') idx = Node('IDX') idx.set_values(value_type=SINT16_be(mini=4,maxi=40)) diff --git a/data_models/example_strategy.py b/data_models/example_strategy.py index 4ab2c35..72d6d4a 100644 --- a/data_models/example_strategy.py +++ b/data_models/example_strategy.py @@ -81,7 +81,7 @@ class t_fuzz_tve_01(Disruptor): def disrupt_data(self, dm, target, prev_data): - val = b"NEW_" + rand_string(mini=5, maxi=10, str_set='XYZRVW') + val = b"NEW_" + rand_string(mini=5, maxi=10, str_set='XYZRVW').encode('latin-1') if prev_data.node: prev_data.node.get_node_by_path('TVE.*EVT1$').set_frozen_value(val) diff --git a/data_models/file_formats/jpg.py b/data_models/file_formats/jpg.py index d045ac6..af9175c 100644 --- a/data_models/file_formats/jpg.py +++ b/data_models/file_formats/jpg.py @@ -75,7 +75,7 @@ def build_data_model(self): {'name': 'jpg', 'contents': [ {'name': 'before_SOF', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'absorb_csts': AbsNoCsts(), 'set_attrs': MH.Attr.Abs_Postpone, 'mutable': False}, @@ -119,7 +119,7 @@ def build_data_model(self): ]}, {'name': 'between_SOF_SOS', - 'contents': BYTES(), + 'contents': String(), 'random': True, 'absorb_csts': AbsNoCsts(), 'set_attrs': MH.Attr.Abs_Postpone, @@ -164,7 +164,7 @@ def build_data_model(self): {'name': 'afterSOS', 'mutable': False, - 'contents': BYTES(min_sz=0), + 'contents': String(min_sz=0), 'absorb_csts': AbsNoCsts()} ]} diff --git a/data_models/file_formats/png.py b/data_models/file_formats/png.py index 133cb15..61d470d 100644 --- a/data_models/file_formats/png.py +++ b/data_models/file_formats/png.py @@ -58,16 +58,16 @@ def build_data_model(self): {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': BYTES(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': String(val_list=[u'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,200), 'contents': [ {'name': 'len', 'contents': UINT32_be()}, {'name': 'type', - 'contents': BYTES(val_list=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': String(val_list=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', - 'contents': lambda x: Node('data', value_type=BYTES(size=x[0].cc.get_raw_value())), + 'contents': lambda x: Node('data', value_type=String(size=x[0].cc.get_raw_value())), 'node_args': ['len']}, {'name': 'crc32_gen', 'contents': MH.CRC(vt=UINT32_be, clear_attrs=[MH.Attr.Mutable]), @@ -80,7 +80,7 @@ def build_data_model(self): {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': BYTES(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': String(val_list=[u'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,200), 'contents': [ @@ -91,7 +91,7 @@ def build_data_model(self): {'weight': 10, 'contents': [ {'name': 'type1', - 'contents': BYTES(val_list=['IHDR'], size=4), + 'contents': String(val_list=['IHDR'], size=4), 'absorb_csts': AbsFullCsts()}, {'name': 'width', 'contents': UINT32_be()}, @@ -111,9 +111,9 @@ def build_data_model(self): {'weight': 5, 'contents': [ {'name': 'type2', - 'contents': BYTES(val_list=['IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': String(val_list=['IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', - 'contents': lambda x: Node('data', value_type=BYTES(size=x.get_raw_value())), + 'contents': lambda x: Node('data', value_type=String(size=x.get_raw_value())), 'node_args': 'len'} ]} ]}, diff --git a/data_models/file_formats/zip.py b/data_models/file_formats/zip.py index a4286d6..11b50e8 100644 --- a/data_models/file_formats/zip.py +++ b/data_models/file_formats/zip.py @@ -64,12 +64,12 @@ def build_data_model(self): {'name': 'ZIP', 'contents': [ {'name': 'start_padding', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'qty': (0, 1), 'clear_attrs': MH.Attr.Mutable, 'alt': [ {'conf': 'ABS', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'set_attrs': MH.Attr.Abs_Postpone, 'clear_attrs': MH.Attr.Mutable, 'absorb_csts': AbsNoCsts()} @@ -132,20 +132,20 @@ def build_data_model(self): {'name': 'extra_field', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.get_raw_value())), + String(size=x.get_raw_value())), 'node_args': 'extra_field_length'} ]}, {'name': 'data', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - BYTES(val_list=[zlib.compress(b'a'*x.get_raw_value())])), + String(val_list=[zlib.compress(b'a'*x.get_raw_value())])), 'node_args': 'uncompressed_size', 'alt': [ {'conf': 'ABS', 'type': MH.Generator, 'custo_clear': MH.Custo.Gen.ResetOnUnfreeze, 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.get_raw_value())), + String(size=x.get_raw_value())), 'node_args': 'compressed_size'} ]}, {'name': 'data_desc', @@ -160,12 +160,12 @@ def build_data_model(self): ]}, {'name': 'no_data_desc', 'exists_if': (BitFieldCondition(sf=1, val=0), 'gp_bit_flag'), - 'contents': BYTES(size=0)} + 'contents': String(size=0)} ]} ]}, {'name': 'archive_desc_header', 'qty': (0,1), - 'contents': BYTES(size=0), + 'contents': String(size=0), 'alt': [ {'conf': 'ABS', 'contents': [ @@ -178,7 +178,7 @@ def build_data_model(self): {'name': 'extra_enc_field', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.get_raw_value())), + String(size=x.get_raw_value())), 'node_args': 'extra_enc_field_len'} ]} ]}, @@ -189,10 +189,10 @@ def build_data_model(self): 'sync_qty_with': 'file', 'contents': [ {'name': 'unsupported_fields', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'alt': [ {'conf': 'ABS', - 'contents': BYTES(size=10), + 'contents': String(size=10), 'set_attrs': [MH.Attr.Abs_Postpone], 'absorb_csts': AbsNoCsts()} ]}, @@ -231,7 +231,7 @@ def build_data_model(self): 'alt': [ {'conf': 'ABS', 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.cc.generated_node.get_raw_value())), + String(size=x.cc.generated_node.get_raw_value())), 'node_args': ('file_name_length', 2)} ]}, {'name': ('extra_field', 2), 'contents': MH.COPY_VALUE(path='header/extra_field/cts$', depth=1), @@ -239,12 +239,12 @@ def build_data_model(self): 'alt': [ {'conf': 'ABS', 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.cc.generated_node.get_raw_value())), + String(size=x.cc.generated_node.get_raw_value())), 'node_args': ('extra_field_length', 2)} ]}, {'name': 'file_comment', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.get_raw_value())), + String(size=x.get_raw_value())), 'node_args': 'file_comment_length'} ]} ]}, @@ -253,12 +253,12 @@ def build_data_model(self): {'weight': 5, 'contents': [ {'name': 'empty', - 'contents': BYTES(size=0)}, + 'contents': String(size=0)}, ]}, {'weight': 1, 'contents': [ {'name': 'full', - 'contents': BYTES(val_list=['PK\x06\x06'+'A'*20+'PK\x06\x07'+'B'*16])}, + 'contents': String(val_list=['PK\x06\x06'+'A'*20+'PK\x06\x07'+'B'*16])}, ]}, ], 'alt': [ @@ -274,7 +274,7 @@ def build_data_model(self): 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'record_meta_data', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'set_attrs': [MH.Attr.Abs_Postpone], 'absorb_csts': AbsNoCsts()}, {'name': 'zip64_sig_locator', @@ -282,10 +282,10 @@ def build_data_model(self): 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'locator_meta_data', - 'contents': BYTES(size=16)} + 'contents': String(size=16)} ]}, {'name': 'empty_end_of_cdir', - 'contents': BYTES(size=0)} + 'contents': String(size=0)} ]} ]} ]}, @@ -322,16 +322,16 @@ def build_data_model(self): 'contents': UINT32_le(maxi=2**10)}, {'name': 'ZIP_comment', 'contents': lambda x: Node('cts', value_type=\ - BYTES(size=x.get_raw_value())), + String(size=x.get_raw_value())), 'node_args': 'ZIP_comment_len'} ]} ]}, {'name': 'end_padding', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'qty': (0,1), 'alt': [ {'conf': 'ABS', - 'contents': BYTES(size=0), + 'contents': String(size=0), 'absorb_csts': AbsNoCsts()} ]} ]} diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index ded52b9..246280e 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -52,26 +52,26 @@ def build_data_model(self): {'name': 'v000', # Final Tag (optional) 'exists_if': (IntCondition(0), 'type'), 'sync_enc_size_with': 'len', - 'contents': BYTES(size=0)}, + 'contents': String(size=0)}, {'name': 'v101', # Service Name 'exists_if': (IntCondition(0x0101), 'type'), 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=[u'my \u00fcber service']), + 'contents': String(val_list=[u'my \u00fcber service'], codec='utf8'), }, {'name': 'v102', # AC name 'exists_if': (IntCondition(0x0102), 'type'), 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=['AC name']), + 'contents': String(val_list=['AC name'], codec='utf8'), }, {'name': 'v103', # Host Identifier 'exists_if': (IntCondition(0x0103), 'type'), 'sync_enc_size_with': 'len', - 'contents': BYTES(val_list=['Host Identifier']), + 'contents': String(val_list=['Host Identifier']), }, {'name': 'v104', # Cookie 'exists_if': (IntCondition(0x0104), 'type'), 'sync_enc_size_with': 'len', - 'contents': BYTES(val_list=['Cookie'], min_sz=0, max_sz=1000), + 'contents': String(val_list=['Cookie'], min_sz=0, max_sz=1000), }, {'name': 'v105', # Vendor Specific 'exists_if': (IntCondition(0x0105), 'type'), @@ -82,29 +82,29 @@ def build_data_model(self): subfield_descs=['type','version']) }, {'name': 'remainder', 'sync_enc_size_with': ('len', 4), - 'contents': BYTES(val_list=['unspecified...'], min_sz=0, max_sz=1000), + 'contents': String(val_list=['unspecified...'], min_sz=0, max_sz=1000), }, ]}, {'name': 'v110', # Relay session ID 'exists_if': (IntCondition(0x0110), 'type'), 'sync_enc_size_with': 'len', - 'contents': BYTES(size=12)}, + 'contents': String(size=12)}, {'name': 'v201', 'exists_if': (IntCondition([0x201, 0x202]), 'type'), 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=['Service Name Error or AC System Error!']), + 'contents': String(val_list=['Service Name Error or AC System Error!'], codec='utf8'), }, {'name': 'v203', # Generic Error 'exists_if': (IntCondition(0x0203), 'type'), 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=['Generic Error!']), + 'contents': String(val_list=['Generic Error!'], codec='utf8'), 'alt': [ {'conf': 'null-terminated', # RFC2516 says it MUST NOT be null terminated 'exists_if': (IntCondition(0x0203), 'type'), 'contents': [ {'name': 'data', 'sync_enc_size_with': ('len', -1), - 'contents': UTF8(val_list=['Generic Error!'])}, + 'contents': String(val_list=['Generic Error!'], codec='utf8')}, {'name': 'null', 'mutable': False, 'contents': UINT8(int_list=[0])} @@ -135,11 +135,11 @@ def build_data_model(self): {'name': 'mac_dst', 'semantics': 'mac_dst', 'mutable': False, - 'contents': BYTES(size=6)}, + 'contents': String(size=6)}, {'name': 'mac_src', 'semantics': 'mac_src', 'mutable': False, - 'contents': BYTES(size=6)}, + 'contents': String(size=6)}, {'name': 'proto', 'mutable': False, 'contents': UINT16_be(int_list=[0x8863])}, @@ -178,7 +178,7 @@ def build_data_model(self): (tag_ac_name, 1), (tag_service_name.get_clone(), 1), {'name': 'host_uniq_stub', - 'contents': BYTES(val_list=[''])}, + 'contents': String(val_list=[''])}, (tag_node.get_clone(), 0, 4) ]}, {'name': '4padr', @@ -199,7 +199,7 @@ def build_data_model(self): 'contents': [ (tag_service_name.get_clone(), 1), {'name': ('host_uniq_stub', 2), - 'contents': BYTES(val_list=[''])}, + 'contents': String(val_list=[''])}, (tag_node_4pads, 0, 4) ]}, # Reject PPPoE session Case @@ -219,7 +219,7 @@ def build_data_model(self): ]} ]}, {'name': 'padding', - 'contents': BYTES(max_sz=0), + 'contents': String(max_sz=0), 'absorb_csts': AbsNoCsts(), 'mutable': False}, ]} @@ -229,7 +229,7 @@ def build_data_model(self): pppoe_msg.make_random(recursive=True) padi = pppoe_msg.get_clone('padi') - padi['.*/mac_dst'].set_values(value_type=BYTES(val_list=['\xff\xff\xff\xff\xff\xff'])) + padi['.*/mac_dst'].set_values(value_type=String(val_list=[u'\xff\xff\xff\xff\xff\xff'])) padi['.*/code'].set_values(value_type=UINT8(int_list=[0x9])) pado = pppoe_msg.get_clone('pado') diff --git a/data_models/protocols/sms.py b/data_models/protocols/sms.py index 6208144..41d650f 100644 --- a/data_models/protocols/sms.py +++ b/data_models/protocols/sms.py @@ -264,7 +264,7 @@ def build_data_model(self): 'node_args': ['SPI_p1','SPI_p2','KIc','KID_RC','TAR','CNTR','PCNTR','SecData']}, {'name': 'SecData', - 'contents': BYTES(min_sz=1, max_sz=100, determinist=False)} + 'contents': String(min_sz=1, max_sz=100, determinist=False)} ]}, ]}, diff --git a/data_models/protocols/usb.py b/data_models/protocols/usb.py index 24e9214..16143a5 100644 --- a/data_models/protocols/usb.py +++ b/data_models/protocols/usb.py @@ -333,8 +333,8 @@ def build_data_model(self): 'contents': UINT8(int_list=[USB_DEFS.DT_STRING])}, {'name': 'contents', 'sync_enc_size_with': ('bLength', 2), - 'contents': UTF16_LE(val_list=['\xfcber string', 'what an interesting string!'], - max_sz=126, max_encoded_sz=253)}, + 'contents': String(val_list=[u'\u00fcber string', u'what an interesting string!'], + max_sz=126, max_encoded_sz=253, codec='utf-16-le')}, ]} diff --git a/data_models/tuto.py b/data_models/tuto.py index ad260c5..721f474 100644 --- a/data_models/tuto.py +++ b/data_models/tuto.py @@ -378,7 +378,7 @@ def keycode_helper(blob, constraints, node_internals): 'node_args': 'data1', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': UTF16_LE(val_list=['Test!', 'Hello World!']) }, + 'contents': String(val_list=['Test!', 'Hello World!'], codec='utf-16-le') }, ]}, {'name': 'data2', 'contents': String(val_list=['Red', 'Green', 'Blue']) }, diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index b6d6028..cd265fa 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -112,6 +112,9 @@ following parameters: to do it at the node level by using the data model keyword ``determinist`` (refer to :ref:`dm:node_prop_keywords`). +``codec`` [default value: **'latin-1'**] + Codec to use for encoding the string (e.g., 'latin-1', 'utf8'). + Note that depending on the charset, additional fuzzing cases are defined. ``ascii_mode`` [default value: **False**] If set to ``True``, it will enforce the string to comply with ASCII @@ -163,27 +166,10 @@ that enables to handle transparently any encoding scheme: Below the different currently defined string types: -- :class:`framework.value_types.String`: General purpose ``UTF8`` character string. -- :class:`framework.value_types.BYTES`: General purpose byte string. +- :class:`framework.value_types.String`: General purpose character string. - :class:`framework.value_types.Filename`: Filename. Similar to the type ``String``, but some disruptors like ``tTYPE`` will generate more specific test cases. -- :class:`framework.value_types.ASCII`: ``String`` encoded in ``ASCII``. - Note that additional test cases on the encoding scheme are defined (e.g., set the most - significant bit of a character to 1). -- :class:`framework.value_types.LATIN_1`: ``String`` encoded in ``LATIN_1``. -- :class:`framework.value_types.UTF8`: ``String`` encoded in ``UTF8``. - It provides the same encoding as a ``String``, but using it in a data model for describing UTF8 - fields is preferable because: a disruptor may use that information for playing around UTF8, and you - are agnostic from String encoding choice. -- :class:`framework.value_types.UTF16_LE`: ``String`` encoded in ``UTF16`` little-endian. - Note that some test cases on the encoding scheme are defined. -- :class:`framework.value_types.UTF16_BE`: ``String`` encoded in ``UTF16`` big-endian. - Note that some test cases on the encoding scheme are defined. -- :class:`framework.value_types.Codec`: ``String`` encoded in any standard encoding - supported by Python. You have to provide the parameter ``encoding_arg`` with the - codec you want to use. If no codec is provided, this class will behave the same as the class - :class:`framework.value_types.String`, that is, the ``utf8`` codec will be used. - :class:`framework.value_types.GZIP`: ``String`` compressed with ``zlib``. The parameter ``encoding_arg`` is used to specify the level of compression (0-9). - :class:`framework.value_types.GSM7bitPacking`: ``String`` encoded in conformity diff --git a/framework/basic_primitives.py b/framework/basic_primitives.py index 00e06f9..065d2c5 100644 --- a/framework/basic_primitives.py +++ b/framework/basic_primitives.py @@ -37,7 +37,7 @@ def rand_string(size=None, mini=1, maxi=10, str_set=string.printable): val = random.choice(str_set) out += val - return convert_to_internal_repr(out) + return out def corrupt_bytes(s, p=0.01, n=None, ctrl_char=False): diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index a31f1d8..180e657 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -193,7 +193,7 @@ def qty(node_name, vt, set_attrs, clear_attrs, node): def TIMESTAMP(time_format="%H%M%S", utc=False, set_attrs=[], clear_attrs=[]): ''' - Return a *generator* that returns the current time (in a BYTES node). + Return a *generator* that returns the current time (in a String node). Args: time_format (str): time format to be used by the generator. @@ -206,7 +206,7 @@ def timestamp(time_format, utc, set_attrs, clear_attrs): else: now = datetime.datetime.now() ts = now.strftime(time_format) - n = Node('cts', value_type=fvt.BYTES(val_list=[ts], size=len(ts))) + n = Node('cts', value_type=fvt.String(val_list=[ts], size=len(ts))) n.set_semantics(NodeSemantics(['timestamp'])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -296,7 +296,7 @@ def map_func(vt, func, set_attrs, clear_attrs, nodes): return functools.partial(map_func, vt, func, set_attrs, clear_attrs) @staticmethod - def CYCLE(vals, depth=1, vt=fvt.BYTES, + def CYCLE(vals, depth=1, vt=fvt.String, set_attrs=[], clear_attrs=[]): '''Return a *generator* that iterates other the provided value list and returns at each step a `vt` node corresponding to the @@ -1834,10 +1834,13 @@ def parse(self, inputs, name, charset=MH.Charset.ASCII_EXT): if self.charset == MH.Charset.ASCII: max = 0x7F + self.codec = 'ascii' elif self.charset == MH.Charset.UNICODE: max = 0xFFFF + self.codec = 'utf8' else: max = 0xFF + self.codec = 'latin-1' def get_complement(chars): return ''.join([self.int_to_string(i) for i in range(0, max + 1) if self.int_to_string(i) not in chars]) @@ -1863,10 +1866,12 @@ def _create_terminal_node(self, name, type, values=None, alphabet=None, qty=None assert (values is not None or alphabet is not None) if alphabet is not None: - return [Node(name=name, vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1])), 1, 1] + return [Node(name=name, + vt=fvt.String(alphabet=alphabet, min_sz=qty[0], max_sz=qty[1], + codec=self.codec)), 1, 1] else: if type == fvt.String: - node = Node(name=name, vt=fvt.String(val_list=values)) + node = Node(name=name, vt=fvt.String(val_list=values, codec=self.codec)) else: node = Node(name=name, vt=fvt.INT_str(int_list=values)) diff --git a/framework/encoders.py b/framework/encoders.py index 71304be..aa4bfd6 100644 --- a/framework/encoders.py +++ b/framework/encoders.py @@ -80,32 +80,6 @@ def init_encoding_scheme(self, arg): """ pass - @staticmethod - def to_bytes(val): - return convert_to_internal_repr(val) - - -class PythonCodec_Enc(Encoder): - """ - Encoder enabling the usage of every standard encodings supported by Python. - """ - def init_encoding_scheme(self, arg=None): - if arg is None: - self._codec = internal_repr_codec - else: - self._codec = arg - - def encode(self, val): - return unconvert_from_internal_repr(val).encode(self._codec) - - def decode(self, val): - try: - dec = val.decode(self._codec, 'strict') - except: - dec = b'' - return Encoder.to_bytes(dec) - - class GZIP_Enc(Encoder): def init_encoding_scheme(self, arg=None): @@ -138,9 +112,12 @@ def init_encoding_scheme(self, arg): arg (list): Prefix and suffix character strings. Can be individually set to None """ - assert(isinstance(arg, (tuple, list))) - self.prefix = Encoder.to_bytes(arg[0]) - self.suffix = Encoder.to_bytes(arg[1]) + assert isinstance(arg, (tuple, list)) + if sys.version_info[0] > 2: + assert arg[0] is None or isinstance(arg[0], bytes) + assert arg[1] is None or isinstance(arg[1], bytes) + self.prefix = b'' if arg[0] is None else arg[0] + self.suffix = b'' if arg[1] is None else arg[1] self.prefix_sz = 0 if self.prefix is None else len(self.prefix) self.suffix_sz = 0 if self.suffix is None else len(self.suffix) diff --git a/framework/global_resources.py b/framework/global_resources.py index c6a143d..216b693 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -90,12 +90,7 @@ def convert_to_internal_repr(val): return val def unconvert_from_internal_repr(val): - # assert isinstance(val, bytes) - try: - dec_val = val.decode(internal_repr_codec, 'strict') - except: - dec_val = val.decode('latin_1') - return dec_val + return val.decode(internal_repr_codec, 'replace') class Error(object): diff --git a/framework/value_types.py b/framework/value_types.py index 4ac870d..6e129d0 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -34,6 +34,7 @@ import string import re import zlib +import codecs import six from six import with_metaclass @@ -41,7 +42,7 @@ sys.path.append('.') import framework.basic_primitives as bp -from framework.data_model import AbsorbStatus, AbsCsts, convert_to_internal_repr, unconvert_from_internal_repr +from framework.data_model import AbsorbStatus, AbsCsts from framework.encoders import * from framework.error_handling import * from framework.global_resources import * @@ -98,10 +99,6 @@ def set_size_from_constraints(self, size=None, encoded_size=None): def pretty_print(self, max_size=None): return None - @staticmethod - def _str2internal(arg): - return convert_to_internal_repr(arg) - class VT_Alt(VT): @@ -322,15 +319,46 @@ def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded__sz, max_ """ return None - def __repr__(self): if DEBUG: return VT_Alt.__repr__(self)[:-1] + ' contents:' + str(self.val_list) + '>' else: return VT_Alt.__repr__(self) + def _str2bytes(self, val): + if val is None: + return b'' + elif isinstance(val, (list, tuple)): + b = [] + for v in val: + b.append(self._str2bytes(v)) + else: + if sys.version_info[0] > 2: + b = val if isinstance(val, bytes) else val.encode(self.codec) + else: + try: + b = val.encode(self.codec) + except: + err_msg = "\n*** WARNING: Encoding issue. With python2 'str' or 'bytes' means " \ + "ASCII, prefix the string {:s} with 'u'".format(repr(val[:30])) + print(err_msg) + b = val + return b + + def _bytes2str(self, val): + if isinstance(val, (list, tuple)): + b = [v.decode(self.codec) for v in val] + else: + b = val.decode(self.codec) + return b + + UTF16LE = codecs.lookup('utf-16-le').name + UTF16BE = codecs.lookup('utf-16-be').name + ASCII = codecs.lookup('ascii').name + LATIN_1 = codecs.lookup('latin-1').name + def init_specific(self, val_list=None, size=None, min_sz=None, - max_sz=None, determinist=True, ascii_mode=False, + max_sz=None, determinist=True, codec='latin-1', ascii_mode=False, extra_fuzzy_list=None, absorb_regexp=None, alphabet=None, min_encoded_sz=None, max_encoded_sz=None, encoding_arg=None): @@ -351,6 +379,7 @@ def init_specific(self, val_list=None, size=None, min_sz=None, whether this latter is provided. determinist: If set to ``True`` generated values will be in a deterministic order, otherwise in a random order. + codec: codec to use for encoding the string (e.g., 'latin-1', 'utf8') ascii_mode: If set to ``True``, it will enforce the string to comply with ASCII 7 bits. extra_fuzzy_list: During data generation, if this parameter is specified with some @@ -390,7 +419,7 @@ def init_specific(self, val_list=None, size=None, min_sz=None, self.init_encoding_scheme(self.encoding_arg) self.set_description(val_list=val_list, size=size, min_sz=min_sz, - max_sz=max_sz, determinist=determinist, + max_sz=max_sz, determinist=determinist, codec=codec, ascii_mode=ascii_mode, extra_fuzzy_list=extra_fuzzy_list, absorb_regexp=absorb_regexp, alphabet=alphabet, min_encoded_sz=min_encoded_sz, max_encoded_sz=max_encoded_sz) @@ -441,12 +470,8 @@ def absorb_auto_helper(self, blob, constraints): elif constraints[AbsCsts.Contents] and self.alphabet is not None: size = None - if self.encoded_string: - blob_str = unconvert_from_internal_repr(blob_dec) - else: - blob_str = unconvert_from_internal_repr(blob) - blob_dec = blob - alp = unconvert_from_internal_repr(self.alphabet) + blob_str = self._bytes2str(blob_dec) + alp = self._bytes2str(self.alphabet) for l in alp: if blob_str.startswith(l): break @@ -454,10 +479,7 @@ def absorb_auto_helper(self, blob, constraints): sup_sz = len(blob)+1 off = sup_sz for l in alp: - if self.encoded_string: - l = self.encode(convert_to_internal_repr(l)) - else: - l = convert_to_internal_repr(l) + l = self.encode(self._str2bytes(l)) new_off = blob.find(l) if new_off < off and new_off > -1: off = new_off @@ -465,16 +487,11 @@ def absorb_auto_helper(self, blob, constraints): off = -1 elif constraints[AbsCsts.Regexp] and self.regexp is not None: - g = re.search(self.regexp, unconvert_from_internal_repr(blob_dec), re.S) + g = re.search(self.regexp, self._bytes2str(blob_dec), re.S) if g is not None: - if self.encoded_string: - pattern_enc = self.encode(convert_to_internal_repr(g.group(0))) - off = blob.find(pattern_enc) - size = len(pattern_enc) - else: - pattern_enc = convert_to_internal_repr(g.group(0)) - off = blob.find(pattern_enc) - size = len(pattern_enc) + pattern_enc = self.encode(self._str2bytes(g.group(0))) + off = blob.find(pattern_enc) + size = len(pattern_enc) else: off = -1 @@ -511,14 +528,11 @@ def do_absorb(self, blob, constraints, off=0, size=None): # if encoded string, val is returned decoded val = self._read_value_from(blob[off:sz+off], constraints) - if self.encoded_string: - val_enc_sz = len(self.encode(val)) # maybe different from sz if blob is smaller - if val_enc_sz < self.min_encoded_sz: - raise ValueError('min_encoded_sz constraint not respected!') - else: - val_sz = len(val) # maybe different from sz if blob is smaller - if val_sz < self.min_encoded_sz: - raise ValueError('min_sz/min_encoded_sz constraint not respected!') + val_enc_sz = len(self.encode(val)) # maybe different from sz if blob is smaller + if val_enc_sz < self.min_encoded_sz: + raise ValueError('min_encoded_sz constraint not respected!') + if not self.encoded_string: + val_sz = val_enc_sz else: blob = blob[off:] #blob[off:size+off] if size is not None else blob[off:] val = self._read_value_from(blob, constraints) @@ -626,11 +640,11 @@ def _read_value_from(self, blob, constraints): if self.encoded_string: blob = self.decode(blob) if constraints[AbsCsts.Regexp]: - g = re.match(self.regexp, unconvert_from_internal_repr(blob), re.S) + g = re.match(self.regexp, self._bytes2str(blob), re.S) if g is None: raise ValueError('regexp not valid!') else: - return convert_to_internal_repr(g.group(0)) + return self._str2bytes(g.group(0)) else: return blob @@ -662,18 +676,19 @@ def _check_sizes(self, val_list): def set_description(self, val_list=None, size=None, min_sz=None, - max_sz=None, determinist=True, + max_sz=None, determinist=True, codec='latin-1', ascii_mode=False, extra_fuzzy_list=None, absorb_regexp=None, alphabet=None, min_encoded_sz=None, max_encoded_sz=None): ''' @size take precedence over @min_sz and @max_sz ''' + self.codec = codecs.lookup(codec).name # normalize self.max_encoded_sz = max_encoded_sz self.min_encoded_sz = min_encoded_sz if alphabet is not None: - self.alphabet = convert_to_internal_repr(alphabet) + self.alphabet = self._str2bytes(alphabet) else: self.alphabet = None self.ascii_mode = ascii_mode @@ -687,7 +702,7 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.regexp = absorb_regexp if extra_fuzzy_list is not None: - self.extra_fuzzy_list = VT._str2internal(extra_fuzzy_list) + self.extra_fuzzy_list = self._str2bytes(extra_fuzzy_list) elif hasattr(self, 'specific_fuzzing_list'): self.extra_fuzzy_list = self.specific_fuzzing_list else: @@ -695,7 +710,7 @@ def set_description(self, val_list=None, size=None, min_sz=None, if val_list is not None: assert isinstance(val_list, list) - self.val_list = VT._str2internal(val_list) + self.val_list = self._str2bytes(val_list) for val in self.val_list: if not self._check_compliance(val, force_max_enc_sz=max_encoded_sz is not None, force_min_enc_sz=min_encoded_sz is not None, @@ -749,14 +764,6 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.min_sz = 0 self.max_sz = self.DEFAULT_MAX_SZ - if not self.encoded_string: - # In the case of String (or every non-Encoding type), the internal represenation - # is UTF8 encoding. Hence the byte string size is still >= to the string size. - if self.max_encoded_sz is None or self.max_encoded_sz < self.max_sz: - self.max_encoded_sz = self.max_sz - if self.min_encoded_sz is None or self.min_encoded_sz < self.min_sz: - self.min_encoded_sz = self.min_sz - self._check_sizes(val_list) if val_list is None: @@ -765,6 +772,13 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.determinist = determinist + if not self.encoded_string: + # For a non-Encoding type, the size of the string is always lesser or equal than the size + # of the encoded string. Hence the byte string size is still >= to the string size. + if self.max_encoded_sz is None or self.max_encoded_sz < self.max_sz: + self.max_encoded_sz = self.max_sz + if self.min_encoded_sz is None or self.min_encoded_sz < self.min_sz: + self.min_encoded_sz = self.min_sz def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_list=True): if self.encoded_string: @@ -818,14 +832,14 @@ def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_li def _populate_val_list(self, force_max_enc_sz=False, force_min_enc_sz=False): self.val_list = [] - alpbt = string.printable if self.alphabet is None else unconvert_from_internal_repr(self.alphabet) + alpbt = string.printable if self.alphabet is None else self._bytes2str(self.alphabet) if self.min_sz < self.max_sz: - self._check_compliance(bp.rand_string(size=self.max_sz, str_set=alpbt), + self._check_compliance(self._str2bytes(bp.rand_string(size=self.max_sz, str_set=alpbt)), force_max_enc_sz=force_max_enc_sz, force_min_enc_sz=force_min_enc_sz) - self._check_compliance(bp.rand_string(size=self.min_sz, str_set=alpbt), + self._check_compliance(self._str2bytes(bp.rand_string(size=self.min_sz, str_set=alpbt)), force_max_enc_sz=force_max_enc_sz, force_min_enc_sz=force_min_enc_sz) else: - self._check_compliance(bp.rand_string(size=self.max_sz, str_set=alpbt), + self._check_compliance(self._str2bytes(bp.rand_string(size=self.max_sz, str_set=alpbt)), force_max_enc_sz=force_max_enc_sz, force_min_enc_sz=force_min_enc_sz) if self.min_sz+1 < self.max_sz: NB_VALS_MAX = 3 @@ -834,7 +848,7 @@ def _populate_val_list(self, force_max_enc_sz=False, force_min_enc_sz=False): retry_cpt = 0 while nb_vals < NB_VALS_MAX and retry_cpt < 5: val = bp.rand_string(mini=self.min_sz+1, maxi=self.max_sz-1, str_set=alpbt) - if self._check_compliance(val, force_max_enc_sz=force_max_enc_sz, + if self._check_compliance(self._str2bytes(val), force_max_enc_sz=force_max_enc_sz, force_min_enc_sz=force_min_enc_sz): nb_vals += 1 else: @@ -911,6 +925,24 @@ def enable_fuzz_mode(self): if v not in self.val_list_fuzzy: self.val_list_fuzzy.append(v) + if self.codec == self.ASCII: + val = bytearray(orig_val) + if len(val) > 0: + val[0] |= 0x80 + val = bytes(val) + else: + val = b'\xe9' + if val not in self.val_list_fuzzy: + self.val_list_fuzzy.append(val) + elif self.codec == self.UTF16BE or self.codec == self.UTF16LE: + if self.max_sz > 0: + if self.max_encoded_sz % 2 == 1: + nb = self.max_sz // 2 + # euro character at the end that 'fully' use the 2 bytes of utf-16 + val = ('A' * nb).encode(self.codec) + b'\xac\x20' + if val not in self.val_list_fuzzy: + self.val_list_fuzzy.append(val) + enc_cases = self.encoding_test_cases(orig_val, self.max_sz, self.min_sz, self.min_encoded_sz, self.max_encoded_sz) if enc_cases: @@ -957,7 +989,7 @@ def set_size_from_constraints(self, size=None, encoded_size=None): # has to be used for an another purpose. assert size is not None or encoded_size is not None - if self.encoded_string and encoded_size is not None: + if encoded_size is not None: if encoded_size == self.max_encoded_sz: return self.max_encoded_sz = encoded_size @@ -973,14 +1005,17 @@ def pretty_print(self, max_size=None): if self.drawn_val is None: self.get_value() - if self.encoded_string and not isinstance(self, (BYTES,UTF8)): + if self.encoded_string or self.codec not in [self.ASCII, self.LATIN_1]: dec = self.drawn_val sz = len(dec) if max_size is not None and sz > max_size: dec = dec[:max_size] - return repr(dec) + ' [decoded, sz=' + str(len(dec)) + ']' + dec = dec.decode(self.codec, 'replace') + if sys.version_info[0] == 2: + dec = dec.encode('ascii', 'replace') + return dec + ' [decoded, sz={!s}, codec={!s}]'.format(len(dec), self.codec) else: - return None + return 'codec={!s}'.format(self.codec) class INT(VT): @@ -1349,50 +1384,12 @@ def new_meth(meth): string_subclass.encode = new_meth(encoder_cls.encode) string_subclass.decode = new_meth(encoder_cls.decode) string_subclass.init_encoding_scheme = new_meth(encoder_cls.init_encoding_scheme) - string_subclass.to_bytes = encoder_cls.to_bytes # static method if encoding_arg is not None: string_subclass.encoding_arg = encoding_arg return string_subclass return internal_func -@from_encoder(PythonCodec_Enc) -class Codec(String): pass - -@from_encoder(PythonCodec_Enc, 'ascii') -class ASCII(String): - def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_encoded_sz): - enc_val = bytearray(self.encode(current_val)) - if len(enc_val) > 0: - enc_val[0] |= 0x80 - enc_val = bytes(enc_val) - else: - enc_val = b'\xe9' - return [enc_val] - -@from_encoder(PythonCodec_Enc, 'latin_1') -class LATIN_1(String): pass - -@from_encoder(PythonCodec_Enc, 'latin_1') -class BYTES(String): pass - -@from_encoder(PythonCodec_Enc, 'utf_16_le') -class UTF16_LE(String): - def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded_sz, max_encoded_sz): - l = None - if max_sz > 0: - if max_encoded_sz % 2 == 1: - nb = max_sz // 2 - # euro character at the end that 'fully' use the 2 bytes of utf-16 - l = [self.encode(b'A'*nb)+b'\xac\x20'] - return l - -@from_encoder(PythonCodec_Enc, 'utf_16_be') -class UTF16_BE(UTF16_LE): pass - -@from_encoder(PythonCodec_Enc, 'utf_8') -class UTF8(String): pass - @from_encoder(GZIP_Enc) class GZIP(String): pass @@ -1460,8 +1457,7 @@ def _unconvert_value(self, val): return int(val) def _convert_value(self, val): - return VT._str2internal(str(val)) - # return str(val) + return self._str2bytes(str(val)) def pretty_print(self, max_size=None): if self.drawn_val is None: @@ -1469,6 +1465,12 @@ def pretty_print(self, max_size=None): return str(self.drawn_val) + def _str2bytes(self, val): + if isinstance(val, (list, tuple)): + b = [v.encode('utf8') for v in val] + else: + b = val.encode('utf8') + return b #class Fuzzy_INT_str(Fuzzy_INT, metaclass=meta_int_str): diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 4cf6e6d..2958939 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1848,7 +1848,7 @@ def test_absorb_nonterm_1(self): print('\n ---[message to absorb]---') print(repr(msg)) print('\n ---[absorbed message]---') - print(top.get_value()) + print(top.to_bytes()) top.show() @@ -2545,14 +2545,14 @@ def test_encoding_attr(self): 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'enc_data', 'encoder': GZIP_Enc(6), - 'set_attrs': [NodeInternals.Abs_Postpone], + 'set_attrs': NodeInternals.Abs_Postpone, 'contents': [ {'name': 'len', 'contents': MH.LEN(vt=UINT8, after_encoding=False), 'node_args': 'data1', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': UTF16_LE(val_list=['Test!', 'Hello World!'])}, + 'contents': String(val_list=['Test!', 'Hello World!'], codec='utf-16-le')}, ]}, {'name': 'data2', 'contents': String(val_list=['Red', 'Green', 'Blue'])}, @@ -2669,10 +2669,9 @@ def encode(self, val): return val + b'***' def decode(self, val): - print('\nDBg: ', val) return val[:-3] - data = ['Test!', 'Hello World!'] + data = ['Test!', u'Hell\u00fc World!'] enc_desc = \ {'name': 'enc', 'contents': [ @@ -2681,7 +2680,7 @@ def decode(self, val): 'node_args': 'user_data', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'user_data', - 'contents': EncodedStr(val_list=data)}, + 'contents': EncodedStr(val_list=data, codec='utf8')}, {'name': 'compressed_data', 'contents': GZIP(val_list=data, encoding_arg=6)} ]} @@ -2697,8 +2696,8 @@ def decode(self, val): self.assertEqual(struct.unpack('B', node['enc/len$'].to_bytes())[0], len(node['enc/user_data$'].get_raw_value())) - raw_data = b'\x0CHello World!***' + \ - b'x\x9c\xf3H\xcd\xc9\xc9W\x08\xcf/\xcaIQ\x04\x00\x1cI\x04>' + raw_data = b'\x0CHell\xC3\xBC World!***' + \ + b'x\x9c\xf3H\xcd\xc9\xf9\xa3\x10\x9e_\x94\x93\xa2\x08\x00 \xb1\x04\xcb' status, off, size, name = node_abs.absorb(raw_data, constraints=AbsFullCsts()) @@ -2728,38 +2727,38 @@ def decode(self, val): gsm_dec = gsm_t.decode(gsm_enc) self.assertEqual(msg, gsm_dec) - msg = u'où ça'.encode(internal_repr_codec) #' b'o\xf9 \xe7a' - vtype = UTF16_LE(max_sz=20) - enc = vtype.encode(msg) - dec = vtype.decode(enc) - self.assertEqual(msg, dec) - - msg = u'où ça'.encode(internal_repr_codec) - vtype = UTF16_BE(max_sz=20) - enc = vtype.encode(msg) - dec = vtype.decode(enc) - self.assertEqual(msg, dec) - - msg = u'où ça'.encode(internal_repr_codec) - vtype = UTF8(max_sz=20) - enc = vtype.encode(msg) - dec = vtype.decode(enc) - self.assertEqual(msg, dec) - - msg = u'où ça'.encode(internal_repr_codec) - vtype = Codec(max_sz=20, encoding_arg=None) - enc = vtype.encode(msg) - dec = vtype.decode(enc) - self.assertEqual(msg, dec) - - msg = u'où ça'.encode(internal_repr_codec) - vtype = Codec(max_sz=20, encoding_arg='utf_32') - enc = vtype.encode(msg) - dec = vtype.decode(enc) - self.assertEqual(msg, dec) - utf32_enc = b"\xff\xfe\x00\x00o\x00\x00\x00\xf9\x00\x00\x00 " \ - b"\x00\x00\x00\xe7\x00\x00\x00a\x00\x00\x00" - self.assertEqual(enc, utf32_enc) + # msg = u'où ça'.encode(internal_repr_codec) #' b'o\xf9 \xe7a' + # vtype = UTF16_LE(max_sz=20) + # enc = vtype.encode(msg) + # dec = vtype.decode(enc) + # self.assertEqual(msg, dec) + # + # msg = u'où ça'.encode(internal_repr_codec) + # vtype = UTF16_BE(max_sz=20) + # enc = vtype.encode(msg) + # dec = vtype.decode(enc) + # self.assertEqual(msg, dec) + # + # msg = u'où ça'.encode(internal_repr_codec) + # vtype = UTF8(max_sz=20) + # enc = vtype.encode(msg) + # dec = vtype.decode(enc) + # self.assertEqual(msg, dec) + # + # msg = u'où ça'.encode(internal_repr_codec) + # vtype = Codec(max_sz=20, encoding_arg=None) + # enc = vtype.encode(msg) + # dec = vtype.decode(enc) + # self.assertEqual(msg, dec) + # + # msg = u'où ça'.encode(internal_repr_codec) + # vtype = Codec(max_sz=20, encoding_arg='utf_32') + # enc = vtype.encode(msg) + # dec = vtype.decode(enc) + # self.assertEqual(msg, dec) + # utf32_enc = b"\xff\xfe\x00\x00o\x00\x00\x00\xf9\x00\x00\x00 " \ + # b"\x00\x00\x00\xe7\x00\x00\x00a\x00\x00\x00" + # self.assertEqual(enc, utf32_enc) msg = b'Hello World!' * 10 vtype = GZIP(max_sz=20) @@ -2768,17 +2767,17 @@ def decode(self, val): self.assertEqual(msg, dec) msg = b'Hello World!' - vtype = Wrapper(max_sz=20, encoding_arg=['', '']) + vtype = Wrapper(max_sz=20, encoding_arg=[b'', b'']) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - vtype = Wrapper(max_sz=20, encoding_arg=['', None]) + vtype = Wrapper(max_sz=20, encoding_arg=[b'', None]) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) - vtype = Wrapper(max_sz=20, encoding_arg=[None, '']) + vtype = Wrapper(max_sz=20, encoding_arg=[None, b'']) enc = vtype.encode(msg) dec = vtype.decode(enc) self.assertEqual(msg, dec) @@ -2792,7 +2791,7 @@ def test_encoded_str_2(self): 'contents': UINT8()}, {'name': 'user_data', 'sync_enc_size_with': 'len', - 'contents': UTF8(val_list=['TEST'])}, + 'contents': String(val_list=['TEST'], codec='utf8')}, {'name': 'padding', 'contents': String(max_sz=0), 'absorb_csts': AbsNoCsts()}, From 72c0d23ebf7d805e8aa623e2d2ce2586a9062ae0 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 8 Aug 2016 12:46:24 +0200 Subject: [PATCH 52/80] Fix encoding handling with 'size_sync_with' + some polish. --- data_models/file_formats/jpg.py | 1 + data_models/file_formats/png.py | 4 +- data_models/file_formats/zip.py | 1 + data_models/protocols/pppoe_strategy.py | 2 +- data_models/protocols/usb.py | 10 -- docs/source/data_model.rst | 16 ++- framework/data_model.py | 123 +++++++----------------- framework/data_model_helpers.py | 1 + framework/global_resources.py | 81 +++++++++++++++- framework/value_types.py | 16 +-- 10 files changed, 137 insertions(+), 118 deletions(-) diff --git a/data_models/file_formats/jpg.py b/data_models/file_formats/jpg.py index af9175c..80fd5e4 100644 --- a/data_models/file_formats/jpg.py +++ b/data_models/file_formats/jpg.py @@ -24,6 +24,7 @@ from framework.data_model import * from framework.data_model_helpers import * from framework.value_types import * +from framework.global_resources import * markers = { 'SOF': {0: 0xFFC0, diff --git a/data_models/file_formats/png.py b/data_models/file_formats/png.py index 61d470d..351367f 100644 --- a/data_models/file_formats/png.py +++ b/data_models/file_formats/png.py @@ -26,9 +26,7 @@ from framework.data_model import * from framework.value_types import * from framework.data_model_helpers import * - -import zlib -import struct +from framework.global_resources import * class PNG_DataModel(DataModel): diff --git a/data_models/file_formats/zip.py b/data_models/file_formats/zip.py index 11b50e8..06da657 100644 --- a/data_models/file_formats/zip.py +++ b/data_models/file_formats/zip.py @@ -26,6 +26,7 @@ from framework.data_model import * from framework.value_types import * from framework.data_model_helpers import * +from framework.global_resources import * import zlib import crcmod diff --git a/data_models/protocols/pppoe_strategy.py b/data_models/protocols/pppoe_strategy.py index a460bb9..6949a7b 100644 --- a/data_models/protocols/pppoe_strategy.py +++ b/data_models/protocols/pppoe_strategy.py @@ -23,7 +23,7 @@ from framework.tactics_helpers import * from framework.scenario import * -from framework.data_model import AbsorbStatus, AbsNoCsts +from framework.global_resources import * tactics = Tactics() diff --git a/data_models/protocols/usb.py b/data_models/protocols/usb.py index 16143a5..50f5958 100644 --- a/data_models/protocols/usb.py +++ b/data_models/protocols/usb.py @@ -22,20 +22,10 @@ ################################################################################ import sys -import os -import copy -import re -import functools -import struct -import random -import zlib -from framework.plumbing import * from framework.data_model import * from framework.data_model_helpers import * from framework.value_types import * -from framework.fuzzing_primitives import * -from framework.basic_primitives import * class USB_DEFS: diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index cd265fa..fc68e04 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -852,12 +852,18 @@ sync_size_with, sync_enc_size_with with the *value* of the node specified by reference (which should be an :class:`framework.value_types.INT`-based typed-node). These keywords are useful for size-variable node types. They are currently supported for typed-nodes which are - :class:`framework.value_types.String`-based with or without an encoding (e.g., - :class:`framework.value_types.UTF8`, ...). Non-terminal nodes are not supported (for absorption). + :class:`framework.value_types.String`-based with or without an encoding. + Non-terminal nodes are not supported (for absorption). The distinction between ``sync_size_with`` and ``sync_enc_size_with`` is that the synchronization - will be performed either with respect to the length of the data retrieved from the node - (or the decoded data for encoded node), or with respect to the length of the encoded data - (only usable in the case of an encoded node). + will be performed: + + - either with respect to the length of the data retrieved from the node in a + *decoded* form. *Decoded* means that it is agnostic to the *codec* specified + (e.g., ``utf-8``, ``latin-1``, ...) in the ``String``, and also, for ``Encoded-String`` + (e.g., :class:`framework.value_types.GZIP`, ...) , that it is agnostic to any + :class:`framework.encoders.Encoder` the ``String`` is wrapped with; + + - or with respect to the length of the encoded form of the data. Generation and absorption deal with these keywords differently, in order to achieve the expected behavior. For generation, the synchronization goes from the described node to the referenced node diff --git a/framework/data_model.py b/framework/data_model.py index 156a5d1..ef330d8 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -42,6 +42,7 @@ from libs.external_modules import * from framework.global_resources import * from framework.error_handling import * +import framework.value_types as fvt import libs.debug_facility as dbg @@ -403,77 +404,6 @@ def flatten(nested): nodes_weight_re = re.compile('(.*?)\((.*)\)') -class AbsorbStatus(Enum): - - Accept = 1 - Reject = 2 - Absorbed = 3 - FullyAbsorbed = 4 - -# List of constraints that rules blob absorption -class AbsCsts(object): - - Size = 1 - Contents = 2 - Regexp = 3 - Structure = 4 - - def __init__(self, size=True, contents=True, regexp=True, struct=True): - self.constraints = { - AbsCsts.Size: size, - AbsCsts.Contents: contents, - AbsCsts.Regexp: regexp, - AbsCsts.Structure: struct - } - - def __bool__(self): - return True in self.constraints.values() - - def __nonzero__(self): - return True in self.constraints.values() - - def set(self, cst): - if cst in self.constraints: - self.constraints[cst] = True - else: - raise ValueError - - def clear(self, cst): - if cst in self.constraints: - self.constraints[cst] = False - else: - raise ValueError - - def __copy__(self): - new_csts = type(self)() - new_csts.__dict__.update(self.__dict__) - new_csts.constraints = copy.copy(self.constraints) - - return new_csts - - def __getitem__(self, key): - return self.constraints[key] - - def __repr__(self): - return 'AbsCsts()' - - -class AbsNoCsts(AbsCsts): - - def __init__(self, size=False, contents=False, regexp=False, struct=False): - AbsCsts.__init__(self, size=size, contents=contents, regexp=regexp, struct=struct) - - def __repr__(self): - return 'AbsNoCsts()' - - -class AbsFullCsts(AbsCsts): - - def __init__(self, size=True, contents=True, regexp=True, struct=True): - AbsCsts.__init__(self, size=size, contents=contents, regexp=regexp, struct=struct) - - def __repr__(self): - return 'AbsFullCsts()' ### Materials for Node Synchronization ### @@ -690,8 +620,7 @@ def __init__(self, val=None, neg_val=None): self.val = neg_val def check(self, node): - from framework.value_types import INT - assert(node.is_typed_value(subkind=INT)) + assert(node.is_typed_value(subkind=fvt.INT)) if isinstance(self.val, (tuple, list)): if self.positive_mode: @@ -751,8 +680,7 @@ def __init__(self, sf, val=None, neg_val=None): def check(self, node): - from framework.value_types import BitField - assert(node.is_typed_value(subkind=BitField)) + assert(node.is_typed_value(subkind=fvt.BitField)) for sf, val, neg_val in zip(self.sf, self.val, self.neg_val): if val is not None: @@ -936,6 +864,12 @@ def __init__(self, arg=None): def _init_specific(self, arg): pass + def _get_value(self, conf=None, recursive=True, return_node_internals=False): + raise NotImplementedError + + def get_raw_value(self, **kwargs): + raise NotImplementedError + def customize(self, custo): self.custo = copy.copy(custo) @@ -1460,6 +1394,9 @@ def _get_value(self, conf=None, recursive=True, return_node_internals=False): else: return (b'', True) + def get_raw_value(self, **kwargs): + return b'' + def set_child_env(self, env): print('Empty:', hex(id(self))) raise AttributeError @@ -1705,6 +1642,9 @@ def _get_delayed_value(self, conf=None, recursive=True): ret = self.generated_node._get_value(conf=conf, recursive=recursive) return (ret, False) + def get_raw_value(self, **kwargs): + return self.generated_node.get_raw_value(**kwargs) + def absorb(self, blob, constraints, conf, pending_postpone_desc=None): # We make the generator freezable to be sure that _get_value() # won't reset it after absorption @@ -1888,6 +1828,8 @@ def _get_value(self, conf=None, recursive=True, return_node_internals=False): def _get_value_specific(self, conf, recursive): raise NotImplementedError + def get_raw_value(self, **kwargs): + return self._get_value() def absorb(self, blob, constraints, conf, pending_postpone_desc=None): status = None @@ -2053,10 +1995,10 @@ def _get_value_specific(self, conf=None, recursive=True): ret = self.value_type.get_value() return NodeInternals_Term._convert_to_internal_repr(ret) - def get_raw_value(self): + def get_raw_value(self, **kwargs): if not self.is_frozen(): self._get_value() - return self.value_type.get_current_raw_val() + return self.value_type.get_current_raw_val(**kwargs) def absorb_auto_helper(self, blob, constraints): return self.value_type.absorb_auto_helper(blob, constraints) @@ -2993,13 +2935,17 @@ def _sync_size_handling(node): if obj.apply_to_enc_size: sz = len(node.to_bytes()) else: - decoded_val = node.get_raw_value() - if isinstance(decoded_val, bytes): - sz = len(decoded_val) + if node.is_typed_value(subkind=fvt.String): + # We need to get the str form to be agnostic to any low-level encoding + # that may change the size ('utf8', ...). + decoded_val = node.get_raw_value(str_form=True) else: - # In this case, this is a BitField or an INT-based object, which are - # fixed size object - raise DataModelDefinitionError('size sync should not be used for fixed sized object!') + decoded_val = node.get_raw_value() + if not isinstance(decoded_val, bytes): + # In this case, this is a BitField or an INT-based object, which are + # fixed size object + raise DataModelDefinitionError('size sync should not be used for fixed sized object!') + sz = len(decoded_val) sz += obj.base_size obj.set_size_on_source_node(NodeInternals_NonTerm.sizesync_corrupt_hook(node, sz)) @@ -3302,7 +3248,6 @@ def tobytes_helper(node_internals): def handle_encoding(list_to_enc): if self.custo.collapse_padding_mode: - from framework.value_types import BitField list_to_enc = list(flatten(list_to_enc)) if list_to_enc and isinstance(list_to_enc[0], bytes): return list_to_enc @@ -3314,10 +3259,10 @@ def handle_encoding(list_to_enc): item1 = list_to_enc[i] item2 = list_to_enc[i+1] c1 = isinstance(item1, NodeInternals_TypedValue) and \ - item1.get_current_subkind() == BitField and \ + item1.get_current_subkind() == fvt.BitField and \ item1.get_value_type().padding_size != 0 c2 = isinstance(item2, NodeInternals_TypedValue) and \ - item2.get_current_subkind() == BitField + item2.get_current_subkind() == fvt.BitField if c1 and c2: new_item = NodeInternals_TypedValue() new_item1vt = copy.copy(item1.get_value_type()) @@ -3424,7 +3369,7 @@ def handle_encoding(list_to_enc): return (handle_encoding(l), was_not_frozen) - def get_raw_value(self): + def get_raw_value(self, **kwargs): raw_list = self._get_value(after_encoding=False)[0] raw_list = list(flatten(raw_list)) @@ -5383,10 +5328,8 @@ def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglem conf = self.__check_conf(conf) if val_list is not None: - from framework.value_types import String - self.internals[conf] = NodeInternals_TypedValue() - self.internals[conf].import_value_type(value_type=String(val_list=val_list)) + self.internals[conf].import_value_type(value_type=fvt.String(val_list=val_list)) elif value_type is not None: self.internals[conf] = NodeInternals_TypedValue() diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 180e657..08a2779 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1205,6 +1205,7 @@ def __get_node_from_db(self, name_desc): +### Helpers for RegExp-based Node ### class State(object): """ diff --git a/framework/global_resources.py b/framework/global_resources.py index 216b693..e663621 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -22,12 +22,15 @@ ################################################################################ import os -import framework import sys +import copy import inspect from enum import Enum + +import framework from libs.utils import ensure_dir, ensure_file + fuddly_version = '0.24.2' framework_folder = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) @@ -92,6 +95,80 @@ def convert_to_internal_repr(val): def unconvert_from_internal_repr(val): return val.decode(internal_repr_codec, 'replace') +### Exports for Node Absorption ### + +class AbsorbStatus(Enum): + Accept = 1 + Reject = 2 + Absorbed = 3 + FullyAbsorbed = 4 + +# List of constraints that rules blob absorption +class AbsCsts(object): + Size = 1 + Contents = 2 + Regexp = 3 + Structure = 4 + + def __init__(self, size=True, contents=True, regexp=True, struct=True): + self.constraints = { + AbsCsts.Size: size, + AbsCsts.Contents: contents, + AbsCsts.Regexp: regexp, + AbsCsts.Structure: struct + } + + def __bool__(self): + return True in self.constraints.values() + + def __nonzero__(self): + return True in self.constraints.values() + + def set(self, cst): + if cst in self.constraints: + self.constraints[cst] = True + else: + raise ValueError + + def clear(self, cst): + if cst in self.constraints: + self.constraints[cst] = False + else: + raise ValueError + + def __copy__(self): + new_csts = type(self)() + new_csts.__dict__.update(self.__dict__) + new_csts.constraints = copy.copy(self.constraints) + + return new_csts + + def __getitem__(self, key): + return self.constraints[key] + + def __repr__(self): + return 'AbsCsts()' + + +class AbsNoCsts(AbsCsts): + + def __init__(self, size=False, contents=False, regexp=False, struct=False): + AbsCsts.__init__(self, size=size, contents=contents, regexp=regexp, struct=struct) + + def __repr__(self): + return 'AbsNoCsts()' + + +class AbsFullCsts(AbsCsts): + + def __init__(self, size=True, contents=True, regexp=True, struct=True): + AbsCsts.__init__(self, size=size, contents=contents, regexp=regexp, struct=struct) + + def __repr__(self): + return 'AbsFullCsts()' + +### Error related resources ### + class Error(object): Reserved = -1 @@ -175,6 +252,8 @@ def __get_color(self): def __str__(self): return self._code_info[self.code]['name'] +### Hook related resources for Data ### + class HOOK(Enum): after_dmaker_production = 1 before_sending = 2 diff --git a/framework/value_types.py b/framework/value_types.py index 6e129d0..64d7ebb 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -42,7 +42,6 @@ sys.path.append('.') import framework.basic_primitives as bp -from framework.data_model import AbsorbStatus, AbsCsts from framework.encoders import * from framework.error_handling import * from framework.global_resources import * @@ -775,10 +774,10 @@ def set_description(self, val_list=None, size=None, min_sz=None, if not self.encoded_string: # For a non-Encoding type, the size of the string is always lesser or equal than the size # of the encoded string. Hence the byte string size is still >= to the string size. - if self.max_encoded_sz is None or self.max_encoded_sz < self.max_sz: + # self.max_encoded_sz is used for absorption + if max_encoded_sz is None and (max_sz is not None or size is not None) and \ + self.max_encoded_sz < self.max_sz: self.max_encoded_sz = self.max_sz - if self.min_encoded_sz is None or self.min_encoded_sz < self.min_sz: - self.min_encoded_sz = self.min_sz def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_list=True): if self.encoded_string: @@ -857,11 +856,12 @@ def _populate_val_list(self, force_max_enc_sz=False, force_min_enc_sz=False): if len(self.val_list) == 0: raise DataModelDefinitionError - def get_current_raw_val(self): + def get_current_raw_val(self, str_form=False): if self.drawn_val is None: self.get_value() - return self.drawn_val - + val = self._bytes2str(self.drawn_val) if str_form else self.drawn_val + return val + def enable_normal_mode(self): self.val_list = self.val_list_save self.val_list_copy = copy.copy(self.val_list) @@ -1012,7 +1012,7 @@ def pretty_print(self, max_size=None): dec = dec[:max_size] dec = dec.decode(self.codec, 'replace') if sys.version_info[0] == 2: - dec = dec.encode('ascii', 'replace') + dec = dec.encode('latin-1') return dec + ' [decoded, sz={!s}, codec={!s}]'.format(len(dec), self.codec) else: return 'codec={!s}'.format(self.codec) From 9e8fab9c8d9df6cdc9fb03fdd003eeaa90d11772 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 8 Aug 2016 20:09:06 +0200 Subject: [PATCH 53/80] Remove 'ascii_mode' String parameter + fix String._bytes2str() --- docs/source/data_model.rst | 4 ---- framework/database.py | 3 ++- framework/target.py | 9 ++++++--- framework/value_types.py | 26 +++++++------------------- test/integration/test_integration.py | 6 +++--- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index fc68e04..144b63b 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -116,10 +116,6 @@ following parameters: Codec to use for encoding the string (e.g., 'latin-1', 'utf8'). Note that depending on the charset, additional fuzzing cases are defined. -``ascii_mode`` [default value: **False**] - If set to ``True``, it will enforce the string to comply with ASCII - 7 bits. - ``extra_fuzzy_list`` [optional, default value: **None**] During data generation, if this parameter is specified with some specific values, they will be part of the test cases generated by diff --git a/framework/database.py b/framework/database.py index 30769c8..82481a5 100644 --- a/framework/database.py +++ b/framework/database.py @@ -72,7 +72,8 @@ def _is_valid(self, connection, cursor): valid = False with connection: tmp_con = sqlite3.connect(':memory:', detect_types=sqlite3.PARSE_DECLTYPES) - fmk_db_sql = open(gr.fmk_folder + self.DDL_fname).read() + with open(gr.fmk_folder + self.DDL_fname) as fd: + fmk_db_sql = fd.read() with tmp_con: cur = tmp_con.cursor() cur.executescript(fmk_db_sql) diff --git a/framework/target.py b/framework/target.py index 200675f..36cd6bb 100644 --- a/framework/target.py +++ b/framework/target.py @@ -368,9 +368,12 @@ def get_mac_addr(ifname): try: info = fcntl.ioctl(s.fileno(), 0x8927, struct.pack('256s', ifname[:15])) except OSError: - return b'' - info = bytearray(info) - return bytes(info[18:24]) + ret = b'' + else: + info = bytearray(info) + ret = bytes(info[18:24]) + s.close() + return ret else: def get_mac_addr(ifname): return struct.pack('>Q', uuid.getnode())[2:] diff --git a/framework/value_types.py b/framework/value_types.py index 64d7ebb..3c35707 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -346,9 +346,9 @@ def _str2bytes(self, val): def _bytes2str(self, val): if isinstance(val, (list, tuple)): - b = [v.decode(self.codec) for v in val] + b = [v.decode(self.codec, 'replace') for v in val] else: - b = val.decode(self.codec) + b = val.decode(self.codec, 'replace') return b UTF16LE = codecs.lookup('utf-16-le').name @@ -357,7 +357,7 @@ def _bytes2str(self, val): LATIN_1 = codecs.lookup('latin-1').name def init_specific(self, val_list=None, size=None, min_sz=None, - max_sz=None, determinist=True, codec='latin-1', ascii_mode=False, + max_sz=None, determinist=True, codec='latin-1', extra_fuzzy_list=None, absorb_regexp=None, alphabet=None, min_encoded_sz=None, max_encoded_sz=None, encoding_arg=None): @@ -379,8 +379,6 @@ def init_specific(self, val_list=None, size=None, min_sz=None, determinist: If set to ``True`` generated values will be in a deterministic order, otherwise in a random order. codec: codec to use for encoding the string (e.g., 'latin-1', 'utf8') - ascii_mode: If set to ``True``, it will enforce the string to comply with ASCII - 7 bits. extra_fuzzy_list: During data generation, if this parameter is specified with some specific values, they will be part of the test cases generated by the generic disruptor tTYPE. @@ -419,7 +417,7 @@ def init_specific(self, val_list=None, size=None, min_sz=None, self.set_description(val_list=val_list, size=size, min_sz=min_sz, max_sz=max_sz, determinist=determinist, codec=codec, - ascii_mode=ascii_mode, extra_fuzzy_list=extra_fuzzy_list, + extra_fuzzy_list=extra_fuzzy_list, absorb_regexp=absorb_regexp, alphabet=alphabet, min_encoded_sz=min_encoded_sz, max_encoded_sz=max_encoded_sz) @@ -676,7 +674,7 @@ def _check_sizes(self, val_list): def set_description(self, val_list=None, size=None, min_sz=None, max_sz=None, determinist=True, codec='latin-1', - ascii_mode=False, extra_fuzzy_list=None, + extra_fuzzy_list=None, absorb_regexp=None, alphabet=None, min_encoded_sz=None, max_encoded_sz=None): ''' @@ -690,10 +688,9 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.alphabet = self._str2bytes(alphabet) else: self.alphabet = None - self.ascii_mode = ascii_mode if absorb_regexp is None: - if self.ascii_mode: + if self.codec == self.ASCII: self.regexp = '[\x00-\x7f]*' else: self.regexp = '.*' @@ -884,7 +881,7 @@ def enable_fuzz_mode(self): sz_delta_with_max = self.max_sz - sz try: - val = bp.corrupt_bits(orig_val, n=1, ascii=self.ascii_mode) + val = bp.corrupt_bits(orig_val, n=1) self.val_list_fuzzy.append(val) except: print("\n*** Value is empty! --> skipping bitflip test case ***") @@ -946,15 +943,6 @@ def enable_fuzz_mode(self): enc_cases = self.encoding_test_cases(orig_val, self.max_sz, self.min_sz, self.min_encoded_sz, self.max_encoded_sz) if enc_cases: - if self.ascii_mode: - new_enc_cases = [] - for v in enc_cases: - s = '' - for i in bytearray(v): - s += chr(i & 0x7f) - new_enc_cases.append(bytes(s)) - enc_cases = new_enc_cases - self.val_list_fuzzy += enc_cases self.val_list_save = self.val_list diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 2958939..6c7132c 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1977,9 +1977,9 @@ def nint_1_alt_helper(blob, constraints, node_internals): nint_2 = Node('nint2', value_type=UINT8(int_list=[0xf, 0xff, 0xee])) nint_3 = Node('nint3', value_type=UINT16_be(int_list=[0xeffe, 0xc1c2, 0x8899])) - nstr_1 = Node('cool', value_type=String(val_list=['TBD1'], size=4, ascii_mode=True)) + nstr_1 = Node('cool', value_type=String(val_list=['TBD1'], size=4, codec='ascii')) nstr_1.enforce_absorb_constraints(AbsNoCsts(regexp=True)) - nstr_2 = Node('str2', value_type=String(val_list=['TBD2TBD2', '12345678'], size=8, ascii_mode=True)) + nstr_2 = Node('str2', value_type=String(val_list=['TBD2TBD2', '12345678'], size=8, codec='ascii')) nint_50 = Node('nint50', value_type=UINT8(int_list=[0xaf, 0xbf, 0xcf])) nint_51 = Node('nint51', value_type=UINT16_be(int_list=[0xcfab, 0xeffe])) @@ -1993,7 +1993,7 @@ def nint_1_alt_helper(blob, constraints, node_internals): 'u=.', [nint_50, 1], [nint_51, 1], [nstr_50, 2, 3]] ]) - yeah = Node('yeah', value_type=String(val_list=['TBD', 'YEAH!'], max_sz=10, ascii_mode=True)) + yeah = Node('yeah', value_type=String(val_list=['TBD', 'YEAH!'], max_sz=10, codec='ascii')) splitter = Node('splitter', value_type=String(val_list=['TBD'], max_sz=10)) splitter.set_attr(NodeInternals.Abs_Postpone) From 707161307c3a1b00d8821db7188b4f1e4c50581d Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 8 Aug 2016 21:04:10 +0200 Subject: [PATCH 54/80] Add Node debugging helper --- framework/data_model.py | 5 ++++- framework/data_model_helpers.py | 21 ++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index ef330d8..81fc195 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -831,6 +831,8 @@ class NodeInternals(object): Abs_Postpone = 6 Separator = 15 + DEBUG = 30 + DISABLED = 100 @@ -852,7 +854,8 @@ def __init__(self, arg=None): NodeInternals.Abs_Postpone: False, # Used to distinguish separator NodeInternals.Separator: False, - + # Used for debugging purpose + NodeInternals.DEBUG: False, ### INTERNAL USAGE ### NodeInternals.DISABLED: False } diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 08a2779..5bd1d39 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -138,6 +138,8 @@ class Attr: Separator = NodeInternals.Separator + DEBUG = NodeInternals.DEBUG + ########################### ### Generator Templates ### ########################### @@ -553,7 +555,9 @@ class ModelHelper(object): 'exists_if', 'exists_if_not', 'exists_if/and', 'exists_if/or', 'sync_size_with', 'sync_enc_size_with', - 'post_freeze', 'charset' + 'post_freeze', 'charset', + # used for debugging purpose + 'debug' ] def __init__(self, dm=None, delayed_jobs=True, add_env=True): @@ -783,14 +787,7 @@ def _create_non_terminal_node_from_regex(self, desc, node=None): else: n.set_subnodes_with_csts(nodes, conf=conf) - - custo_set = desc.get('custo_set', None) - custo_clear = desc.get('custo_clear', None) - - if custo_set or custo_clear: - custo = NonTermCusto(items_to_set=custo_set, items_to_clear=custo_clear) - internals = n.cc if conf is None else n.c[conf] - internals.customize(custo) + self._handle_custo(n, desc, conf) sep_desc = desc.get('separator', None) if sep_desc is not None: @@ -1002,6 +999,12 @@ def _handle_common_attr(self, node, desc, conf): node.set_attr(MH.Attr.Mutable, conf=conf) else: node.clear_attr(MH.Attr.Mutable, conf=conf) + param = desc.get('debug', None) + if param is not None: + if param: + node.set_attr(MH.Attr.DEBUG, conf=conf) + else: + node.clear_attr(MH.Attr.DEBUG, conf=conf) param = desc.get('determinist', None) if param is not None: node.make_determinist(conf=conf) From 9fd25320372191d9f772a8ff8a1ffeb6c2460b22 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Tue, 9 Aug 2016 21:12:55 +0200 Subject: [PATCH 55/80] Minor enhancements - Update a String() fuzzing test case. - Improve robustness of monitoring system. - Force probe status logging in launch_operator(). - Update PPPoE DM for situations where we didn't had time to respond to a PADR with a PADS in SC_PADS scenario. --- data_models/protocols/pppoe_strategy.py | 2 +- framework/monitor.py | 9 +++++++-- framework/plumbing.py | 2 +- framework/value_types.py | 2 +- test/integration/test_integration.py | 10 ++++++++-- 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/data_models/protocols/pppoe_strategy.py b/data_models/protocols/pppoe_strategy.py index 6949a7b..3386db9 100644 --- a/data_models/protocols/pppoe_strategy.py +++ b/data_models/protocols/pppoe_strategy.py @@ -201,7 +201,7 @@ def disrupt_data(self, dm, target, prev_data): step_send_fuzzed_pads.connect_to(step_wait_padr) -step_wait_padr.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback) +step_wait_padr.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback_and_update) step_wait_padr.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback) sc2 = Scenario('PADS') diff --git a/framework/monitor.py b/framework/monitor.py index a2a9245..2fedf59 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -26,6 +26,7 @@ import datetime import time import traceback +import re from libs.external_modules import * from framework.global_resources import * @@ -90,7 +91,10 @@ def set_probe_delay(self, delay): self._probe.delay = delay def get_probe_status(self): - self._probe.reset() + try: + self._probe.reset() + except: + self._handle_exception('during reset()') return self._probe.status def _notify_probe_started(self): @@ -980,6 +984,7 @@ class ProbeMem(Probe): def __init__(self): assert self.process_name != None assert self.backend != None + self._saved_mem = None Probe.__init__(self) def _get_mem(self): @@ -991,7 +996,7 @@ def _get_mem(self): for entry in proc_list: if entry.find(self.process_name) >= 0: try: - rss = int(entry.split()[0]) + rss = int(re.search('\d+', entry.split()[0]).group(0)) except ValueError: rss = -10 break diff --git a/framework/plumbing.py b/framework/plumbing.py index c04c1d6..506c3be 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -2456,7 +2456,7 @@ def launch_operator(self, name, user_input=UserInputContainer(), use_existing_se # Target fbk is logged only at the end of a burst if self._burst_countdown == self._burst: cont1 = self.log_target_feedback() - cont2 = self.monitor_probes() + cont2 = self.monitor_probes(force_record=True) if not cont1 or not cont2: exit_operator = True self.lg.log_fmk_info("Operator will shutdown because something is going wrong with " diff --git a/framework/value_types.py b/framework/value_types.py index 3c35707..2bd6970 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -896,7 +896,7 @@ def enable_fuzz_mode(self): if val != b'': self.val_list_fuzzy.append(val) - val = orig_val + b"X"*(self.max_sz*8) + val = orig_val + b"X"*(self.max_sz*42) self.val_list_fuzzy.append(val) self.val_list_fuzzy.append(b'\x00'*sz if sz>0 else b'\x00') diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 6c7132c..8f87bc1 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1583,7 +1583,10 @@ def test_basics(self): b' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + + b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \ + b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::\x00\x00\x00::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%n::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%s::AAA::>:: [!] ', @@ -1601,7 +1604,10 @@ def test_basics(self): b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [26] could change has it is a random corrupt_bit b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + + b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \ + b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x00\x00\x00::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%n::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%s::AAA::>:: [!] ', From 537171e9fc23946938ad2d0bbcc9f7291fed4193 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Wed, 10 Aug 2016 12:11:32 +0200 Subject: [PATCH 56/80] String(): Populating in a lazy way + Fix Previously any String() of a data model that were not provided with the 'val_list' parameter, were automatically populated at data model creation with some samples consistent with the other parameters provided (like size, alphabet, ...). Populating the String()s at that time made data model instantiation (through DataModel.get_data()) behave in a way that give you the same 'population' from one instance to another, having the population changed only after a reload of the data model itself. While this behavior does not matter if you use a data model in conjunction with disruptors that wants to corrupt stuff, it may be annoying if you need diversity in the data you instantiate from the data model. Thus, now, populating the String()s (without a user provided 'val_list') is performed in a lazy way, which resolve the String diversity problem and also avoid populating a String() if it is used only for absorption. Fix also a bug in String() absorption (within absorb_auto_helper()). --- framework/data_model.py | 3 +-- framework/value_types.py | 39 +++++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index 81fc195..2f8ad0d 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -3767,8 +3767,7 @@ def _try_absorption_with(base_node, min_node, max_node, blob, consumed_size, if st == AbsorbStatus.Reject: nb_absorbed = node_no-1 if DEBUG: - print('REJECT: %s, blob: %r ...' % (node.name, blob[:4])) - print(blob.find(b'\xFF\xDA')) + print('REJECT: %s, size: %d, blob: %r ...' % (node.name, len(blob), blob[:4])) if min_node == 0: # abort = False break diff --git a/framework/value_types.py b/framework/value_types.py index 2bd6970..d9ea4f4 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -426,7 +426,9 @@ def make_private(self, forget_current_state): if self.is_val_list_provided: self.val_list = copy.copy(self.val_list) else: - self._populate_val_list() + self._populate_val_list(force_max_enc_sz=self.max_enc_sz_provided, + force_min_enc_sz=self.min_enc_sz_provided) + self._ensure_enc_sizes_consistency() self.reset_state() else: self.val_list = copy.copy(self.val_list) @@ -452,7 +454,7 @@ def absorb_auto_helper(self, blob, constraints): # and let do_absorb() decide if it's OK (via size constraints # for instance). blob_dec = self.decode(blob) - if constraints[AbsCsts.Contents] and self.val_list is not None and self.alphabet is None: + if constraints[AbsCsts.Contents] and self.is_val_list_provided and self.alphabet is None: for v in self.val_list: if blob_dec.startswith(v): break @@ -683,6 +685,8 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.codec = codecs.lookup(codec).name # normalize self.max_encoded_sz = max_encoded_sz self.min_encoded_sz = min_encoded_sz + self.max_enc_sz_provided = max_encoded_sz is not None + self.min_enc_sz_provided = min_encoded_sz is not None if alphabet is not None: self.alphabet = self._str2bytes(alphabet) @@ -708,8 +712,8 @@ def set_description(self, val_list=None, size=None, min_sz=None, assert isinstance(val_list, list) self.val_list = self._str2bytes(val_list) for val in self.val_list: - if not self._check_compliance(val, force_max_enc_sz=max_encoded_sz is not None, - force_min_enc_sz=min_encoded_sz is not None, + if not self._check_compliance(val, force_max_enc_sz=self.max_enc_sz_provided, + force_min_enc_sz=self.min_enc_sz_provided, update_list=False): raise DataModelDefinitionError @@ -720,8 +724,8 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.val_list_copy = copy.copy(self.val_list) self.is_val_list_provided = True # distinguish cases where - # val_list is provided or - # created based on size + # val_list is provided or + # created based on size self.user_provided_list = copy.copy(self.val_list) else: self.is_val_list_provided = False @@ -762,19 +766,22 @@ def set_description(self, val_list=None, size=None, min_sz=None, self._check_sizes(val_list) - if val_list is None: - self._populate_val_list(force_max_enc_sz=max_encoded_sz is not None, - force_min_enc_sz=min_encoded_sz is not None) - self.determinist = determinist + self._ensure_enc_sizes_consistency() + + def _ensure_enc_sizes_consistency(self): if not self.encoded_string: # For a non-Encoding type, the size of the string is always lesser or equal than the size - # of the encoded string. Hence the byte string size is still >= to the string size. - # self.max_encoded_sz is used for absorption - if max_encoded_sz is None and (max_sz is not None or size is not None) and \ - self.max_encoded_sz < self.max_sz: + # of the encoded string (utf8, ...). Hence the byte string size is still >= to the string size. + # As self.max_encoded_sz is needed for absorption, we do the following heuristic (when + # information is missing). + if self.max_encoded_sz is None or \ + (not self.max_enc_sz_provided and self.max_encoded_sz < self.max_sz): self.max_encoded_sz = self.max_sz + if self.min_encoded_sz is None or \ + (not self.min_enc_sz_provided and self.min_encoded_sz > self.min_sz): + self.min_encoded_sz = self.min_sz def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_list=True): if self.encoded_string: @@ -952,6 +959,10 @@ def enable_fuzz_mode(self): self.drawn_val = None def get_value(self): + if not self.val_list: + self._populate_val_list(force_max_enc_sz=self.max_enc_sz_provided, + force_min_enc_sz=self.min_enc_sz_provided) + self._ensure_enc_sizes_consistency() if not self.val_list_copy: self.val_list_copy = copy.copy(self.val_list) if self.determinist: From ec87d0452bf4c72f4d78f1ecbc4e62bae38d2ad0 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Wed, 10 Aug 2016 22:17:16 +0200 Subject: [PATCH 57/80] Add new expanded shapes for Non-terminal node + Fixes - New shapes Non-terminal node: if qty=(min,max) then add: - qty=min - qty=max - qty=(min+1, max-1) if possible - Fix ModelWalker regarding non-terminal walking: because of incorrect 'value_not_yielded_yet', some cases where missing. (impact on tTYPE/... was possible depending on the data model) - Fix NonTermVisitor - Fix BasicVisitor to be agnostic to 'respect_order' (but values ) --- data_models/tuto.py | 9 +- docs/source/scenario.rst | 4 +- framework/data_model.py | 11 ++- framework/fuzzing_primitives.py | 108 +++++++++--------------- framework/generic_data_makers.py | 6 +- test/integration/test_integration.py | 119 +++++++++++++++++++-------- 6 files changed, 142 insertions(+), 115 deletions(-) diff --git a/data_models/tuto.py b/data_models/tuto.py index 721f474..89f7e66 100644 --- a/data_models/tuto.py +++ b/data_models/tuto.py @@ -335,14 +335,17 @@ def keycode_helper(blob, constraints, node_internals): 'contents': String(val_list=['::'])}}, 'shape_type': MH.Random, # ignored in determnist mode 'contents': [ - {'contents': String(val_list=['AAA']), + {'contents': String(val_list=['AAA', 'BBB']), 'qty': (0, 4), 'name': 'str'}, {'contents': UINT8(int_list=[0x3E]), # chr(0x3E) == '>' 'name': 'int'} - ]} - ]} + ]}, + ]}, + + {'contents': String(val_list=['?','!']), + 'name': 'int3'} ]}, {'weight': 20, diff --git a/docs/source/scenario.rst b/docs/source/scenario.rst index 581d287..9587aff 100644 --- a/docs/source/scenario.rst +++ b/docs/source/scenario.rst @@ -226,12 +226,12 @@ service for instance. This is illustrated in the following example in the lines periodic2 = Periodic(Data('2nd Periodic (3s)\n'), period=3) step1 = Step('exist_cond', fbk_timeout=2, set_periodic=[periodic1, periodic2]) - step2 = Step('separator', fbk_timeout=5, cbk_after_fbk=feedback_handler) + step2 = Step('separator', fbk_timeout=5) step3 = NoDataStep() step4 = Step(DataProcess(process=[('C',None,UI(nb=1)),'tTYPE'], seed='enc')) step1.connect_to(step2) - step2.connect_to(step3, cbk_after_fbk=cbk_transition2) + step2.connect_to(step3, cbk_after_fbk=feedback_handler) step3.connect_to(step4) step4.connect_to(FinalStep()) diff --git a/framework/data_model.py b/framework/data_model.py index 2f8ad0d..e5ea6d7 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -2892,13 +2892,16 @@ def _generate_expanded_nodelist(self, node_list): if delim[1] == '>' or delim[1:3] == '=.': for i, node_desc in enumerate(sublist): node, mini, maxi = self._handle_node_desc(node_desc) - if mini == 0 and maxi > 0: + if mini < maxi: new_nlist = self._copy_nodelist(node_list) - new_nlist[idx][1][i] = [node, 0] + new_nlist[idx][1][i] = [node, mini] expanded_node_list.insert(0, new_nlist) new_nlist = self._copy_nodelist(node_list) - new_nlist[idx][1][i] = [node, 1, maxi] - # new_nlist[idx][1][i] = [node, _pick_qty(1, maxi)] + new_nlist[idx][1][i] = [node, maxi] + expanded_node_list.insert(0, new_nlist) + if mini+1 < maxi: + new_nlist = self._copy_nodelist(node_list) + new_nlist[idx][1][i] = [node, mini+1, maxi-1] expanded_node_list.insert(0, new_nlist) elif delim[1:3] == '=+': new_delim = delim[0] + '>' diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 31c5bbe..bf1b849 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -87,9 +87,8 @@ def set_consumer(self, node_consumer): def __iter__(self): self._cpt = 1 - - gen = self.walk_graph_rec([self._root_node], self._consumer.yield_original_val, - structure_has_changed=False, consumed_nodes=set()) + gen = self.walk_graph_rec([self._root_node], structure_has_changed=False, + consumed_nodes=set()) for consumed_node, orig_node_val in gen: self._root_node.freeze() @@ -136,7 +135,7 @@ def _do_reset(self, node): node.unfreeze(recursive=True, dont_change_state=True) self._consumer.do_after_reset(node) - def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed, consumed_nodes): + def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): reset = False guilty = None @@ -145,7 +144,6 @@ def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed # in a frozen state (which means that it may have some # children in other states that are not dealt with in this current call) for node in node_list: - perform_second_step = True again = True @@ -158,9 +156,6 @@ def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed while again: again = False - if reset or value_not_yielded_yet: - value_not_yielded_yet = self._consumer.yield_original_val - ### STEP 1 ### # We freeze the node before making a research on it, @@ -181,8 +176,7 @@ def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed # node is terminal, and we go to Step 2. Otherwise, we # call ourselves recursively with the list of subnodes if fnodes: - generator = self.walk_graph_rec(fnodes, value_not_yielded_yet, - structure_has_changed, consumed_nodes) + generator = self.walk_graph_rec(fnodes, structure_has_changed, consumed_nodes) for consumed_node, orig_node_val in generator: yield consumed_node, orig_node_val # YIELD @@ -192,7 +186,6 @@ def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed # for possible uses/modifications. This is performed within our # method node_consumer_helper(). if perform_second_step: - consumer_gen = self.node_consumer_helper(node, structure_has_changed, consumed_nodes) for consumed_node, orig_node_val, reset, ignore_node in consumer_gen: @@ -227,17 +220,14 @@ def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed perform_second_step = True again = False - if value_not_yielded_yet: - yield consumed_node, orig_node_val # YIELD - else: - value_not_yielded_yet = True + yield consumed_node, orig_node_val # YIELD # We reach this case if the consumer is not interested # with 'node'. Then if the node is not exhausted we # may have new cases where the consumer will find # something (assuming the consumer accepts to reset). - elif self._consumer.need_reset(node) and not node.is_exhausted(): - again = True + elif self._consumer.need_reset(node): # and not node.is_exhausted(): + again = False if node.is_exhausted() else True # Not consumed so we don't unfreeze() with recursive=True self._do_reset(node) else: @@ -247,12 +237,11 @@ def walk_graph_rec(self, node_list, value_not_yielded_yet, structure_has_changed structure_has_changed = node.cc.structure_will_change() if structure_has_changed and self._consumer.need_reset_when_structure_change: - structure_has_changed = False idx = node_list.index(node) - gen = self.walk_graph_rec(node_list[:idx], self._consumer.yield_original_val, False, set()) + gen = self.walk_graph_rec(node_list[:idx], False, set()) for consumed_node, orig_node_val in gen: yield consumed_node, orig_node_val # YIELD @@ -378,9 +367,7 @@ class NodeConsumerStub(object): behave strangely (not the same number of yielded values). --> to be investigated (maybe wrong implementation of BasicVisitor and NonTermVisitor) ''' - - def __init__(self, specific_args=None, max_runs_per_node=-1, min_runs_per_node=-1, respect_order=True): - self.yield_original_val = True + def __init__(self, max_runs_per_node=-1, min_runs_per_node=-1, respect_order=True, **kwargs): self.need_reset_when_structure_change = False self._internals_criteria = None @@ -399,10 +386,10 @@ def __init__(self, specific_args=None, max_runs_per_node=-1, min_runs_per_node=- self.__node_backup = None - self.init_specific(specific_args) + self.init_specific(**kwargs) - def init_specific(self, args): + def init_specific(self, **kwargs): self._internals_criteria = dm.NodeInternalsCriteria(negative_node_kinds=[dm.NodeInternals_NonTerm]) @@ -526,27 +513,15 @@ def interested_by(self, node): class BasicVisitor(NodeConsumerStub): - def init_specific(self, args): + def init_specific(self, consume_also_singleton=False): self._internals_criteria = None - self.consumed = False - self.consume_also_singleton = False if args is None else bool(args) + self.consume_also_singleton = consume_also_singleton def consume_node(self, node): - if node.is_nonterm() and self.consumed: - self.consumed = False - if (node.is_exhausted() and not self.consume_also_singleton) or node.is_nonterm(): # in this case we ignore the node return False - else: - if self.consumed: - node.get_value() - node.unfreeze(recursive=False) - node.get_value() - else: - self.consumed = True - return True def save_node(self, node): @@ -554,12 +529,9 @@ def save_node(self, node): def recover_node(self, node): node.reset_state(recursive=False) - node.get_value() + node.freeze() def need_reset(self, node): - if node.is_nonterm() and self.consumed: - self.consumed = False - if node.is_nonterm(): return True else: @@ -574,39 +546,39 @@ def wait_for_exhaustion(self, node): class NonTermVisitor(BasicVisitor): - def init_specific(self, args): - self.consumed = False - self._internals_criteria = None - self._internals_criteria = dm.NodeInternalsCriteria(negative_node_kinds=[dm.NodeInternals_NonTerm]) - self.current_nt_node = None + def init_specific(self, **kwargs): + self._internals_criteria = dm.NodeInternalsCriteria(node_kinds=[dm.NodeInternals_NonTerm]) + self.need_reset_when_structure_change = True + self.last_node = None + self.current_node = None def need_reset(self, node): - # DEBUG_PRINT('--(1)-> Node:' + node.name + ', exhausted:' + repr(node.is_exhausted()), level=0) - if node.is_nonterm() and node is not self.current_nt_node and node.cc.structure_will_change(): - # this case is called outside node_consumer_helper(), - # because we declared to only be interested with other - # kinds of node. Thus it will trigger node.unfreeze() + # DEBUG_PRINT('--(RESET)-> Node:' + node.name + ', exhausted:' + repr(node.is_exhausted()), level=0) + if node.is_nonterm() and self.last_node is not None and \ + node is not self.last_node and not node.is_exhausted(): + self.last_node = None + self.current_node = None return True else: - # Here we already have consumed the node, we don't want a reset return False - def do_after_reset(self, node): - self.consumed = False - self.current_nt_node = node - def consume_node(self, node): - if not self.consumed and not node.is_nonterm(): - self.consumed = True - return True - else: + self.last_node = self.current_node + self.current_node = node + + if node.is_exhausted() and self.last_node is not None: return False + else: + # last_name = self.last_node.name if self.last_node else 'None' + # DEBUG_PRINT('--(1)-> Node:' + node.name + ', exhausted:' + repr(node.is_exhausted()) + \ + # ', curr: ' + self.current_node.name + ', last: ' + last_name, level=0) + return True def still_interested_by(self, node): return False def wait_for_exhaustion(self, node): - return 0 + return -1 # wait until exhaustion @@ -618,10 +590,9 @@ class AltConfConsumer(NodeConsumerStub): nodes, that reuse same subnodes over the various confs). ''' - def init_specific(self, args): + def init_specific(self, **kwargs): self.__node_backup = None - self.yield_original_val = True self.need_reset_when_structure_change = True self._internals_criteria = dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable]) @@ -712,7 +683,7 @@ def wait_for_exhaustion(self, node): class TermNodeDisruption(NodeConsumerStub): - def init_specific(self, base_list): + def init_specific(self, base_list=None): self._internals_criteria = dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable], negative_attrs=[dm.NodeInternals.Separator], node_kinds=[dm.NodeInternals_Term]) @@ -731,7 +702,6 @@ def init_specific(self, base_list): self.val_list = list(base_list) self.orig_internals = None - self.yield_original_val = True self.need_reset_when_structure_change = True @@ -778,7 +748,7 @@ def recover_node(self, node): class TypedNodeDisruption(NodeConsumerStub): - def init_specific(self, args): + def init_specific(self, **kwargs): self._internals_criteria = dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable], negative_attrs=[dm.NodeInternals.Separator], node_kinds=[dm.NodeInternals_TypedValue]) @@ -787,7 +757,6 @@ def init_specific(self, args): self.current_node = None self.orig_internal = None - self.yield_original_val = True self.need_reset_when_structure_change = True def consume_node(self, node): @@ -938,7 +907,7 @@ def _extend_fuzzy_vt_list(flist, e): class SeparatorDisruption(NodeConsumerStub): - def init_specific(self, separators): + def init_specific(self, separators=None): self._internals_criteria = \ dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable, dm.NodeInternals.Separator], node_kinds=[dm.NodeInternals_Term]) @@ -947,7 +916,6 @@ def init_specific(self, separators): if separators is not None: self.val_list += list(separators) - self.yield_original_val = False # self.need_reset_when_structure_change = True def consume_node(self, node): diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index e92cbf2..f7ae03f 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -73,7 +73,7 @@ def set_seed(self, prev_data): if self.nt_only: consumer = NonTermVisitor() else: - consumer = BasicVisitor(specific_args=self.singleton) + consumer = BasicVisitor(consume_also_singleton=self.singleton) consumer.set_node_interest(path_regexp=self.path) self.modelwalker = ModelWalker(prev_data.node, consumer, max_steps=self.max_steps, initial_step=self.init) self.walker = iter(self.modelwalker) @@ -305,7 +305,7 @@ def set_seed(self, prev_data): self.consumer = SeparatorDisruption(max_runs_per_node=self.max_runs_per_node, min_runs_per_node=self.min_runs_per_node, respect_order=self.order, - specific_args=sep_list) + separators=sep_list) self.consumer.need_reset_when_structure_change = self.deep self.consumer.set_node_interest(path_regexp=self.path) self.modelwalker = ModelWalker(prev_data.node, self.consumer, max_steps=self.max_steps, initial_step=self.init) @@ -1066,7 +1066,7 @@ def set_seed(self, prev_data): self.consumer = TermNodeDisruption(max_runs_per_node=self.max_runs_per_node, min_runs_per_node=self.min_runs_per_node, respect_order=False, - specific_args=self.alt_values) + base_list=self.alt_values) self.consumer.determinist = self.determinist if self.ascii: self.consumer.ascii = True diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 8f87bc1..2decd0a 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -991,7 +991,7 @@ def test_NonTerm_Attr_01(self): nt.make_determinist(all_conf=True, recursive=True) nb = self._loop_nodes(nt, loop_count, criteria_func=crit_func) - self.assertEqual(nb, 6) + self.assertEqual(nb, 18) print('\n -=[ determinist & infinite (loop count: %d) ]=- \n' % loop_count) @@ -1004,15 +1004,17 @@ def test_NonTerm_Attr_01(self): nt = dm.get_data('NonTerm') # nt.make_infinite(all_conf=True, recursive=True) + nt.make_random(all_conf=True, recursive=True) self._loop_nodes(nt, loop_count, criteria_func=crit_func) print('\n -=[ random & finite (loop count: %d) ]=- \n' % loop_count) nt = dm.get_data('NonTerm') nt.make_finite(all_conf=True, recursive=True) + nt.make_random(all_conf=True, recursive=True) nb = self._loop_nodes(nt, loop_count, criteria_func=crit_func) - self.assertEqual(nb, 6) + self.assertEqual(nb, 18) def test_BitField_Attr_01(self): ''' @@ -1468,7 +1470,7 @@ def test_NodeConsumerStub_1(self): nt = self.dm.get_data('Simple') default_consumer = NodeConsumerStub() for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, - max_steps=70): + max_steps=200): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 49) @@ -1476,20 +1478,29 @@ def test_NodeConsumerStub_2(self): nt = self.dm.get_data('Simple') default_consumer = NodeConsumerStub(max_runs_per_node=-1, min_runs_per_node=2) for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, - max_steps=70): + max_steps=200): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) self.assertEqual(idx, 35) def test_BasicVisitor(self): nt = self.dm.get_data('Simple') - default_consumer = BasicVisitor() + default_consumer = BasicVisitor(respect_order=True, consume_also_singleton=False) for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, - max_steps=70): + max_steps=200): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - self.assertEqual(idx, 37) + self.assertEqual(idx, 49) + + print('***') + nt = self.dm.get_data('Simple') + default_consumer = BasicVisitor(respect_order=False, consume_also_singleton=False) + for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, + max_steps=200): + print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) + self.assertEqual(idx, 49) def test_NonTermVisitor(self): print('***') + idx = 0 simple = self.dm.get_data('Simple') nonterm_consumer = NonTermVisitor(respect_order=True) for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, nonterm_consumer, make_determinist=True, @@ -1498,7 +1509,7 @@ def test_NonTermVisitor(self): self.assertEqual(idx, 4) print('***') - + idx = 0 simple = self.dm.get_data('Simple') nonterm_consumer = NonTermVisitor(respect_order=False) for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, nonterm_consumer, make_determinist=True, @@ -1507,22 +1518,22 @@ def test_NonTermVisitor(self): self.assertEqual(idx, 4) print('***') - - data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') # idx == 3 + idx = 0 + data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') nonterm_consumer = NonTermVisitor(respect_order=True) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, - max_steps=10): + max_steps=50): print(colorize('[%d] ' % idx + rnode.to_ascii(), rgb=Color.INFO)) - self.assertEqual(idx, 3) + self.assertEqual(idx, 6) print('***') - - data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') # idx == 3 + idx = 0 + data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') nonterm_consumer = NonTermVisitor(respect_order=False) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, - max_steps=10): + max_steps=50): print(colorize('[%d] ' % idx + rnode.to_ascii(), rgb=Color.INFO)) - self.assertEqual(idx, 3) + self.assertEqual(idx, 6) print('***') @@ -1580,13 +1591,30 @@ def test_basics(self): b' [!] ++++++++++ [!] ::\x01:: [!] ', b' [!] ++++++++++ [!] ::\x80:: [!] ', b' [!] ++++++++++ [!] ::\x7f:: [!] ', - b' [!] ++++++++++ [!] ::AA\xc3::AA\xc3::>:: [!] ', # [8] could change has it is a random corrupt_bit + b' [!] ++++++++++ [!] ::IAA::AAA::AAA::AAA::>:: [!] ', # [8] could change has it is a random corrupt_bit + b' [!] ++++++++++ [!] ::AAAA::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::\x00\x00\x00::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::A%n::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::A%s::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::A\r\n::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::=:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::?:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\xff:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x00:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x01:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x80:: [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x7f:: [!] ', + b' [!] ++++++++++ [!] ::AAQ::AAA::>:: [!] ', # [26] could change has it is a random corrupt_bit b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', - - b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \ - b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - + b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::\x00\x00\x00::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%n::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%s::AAA::>:: [!] ', @@ -1601,13 +1629,38 @@ def test_basics(self): b' [!] ++++++++++ [!] ::AAA::AAA::\x01:: [!] ', b' [!] ++++++++++ [!] ::AAA::AAA::\x80:: [!] ', b' [!] ++++++++++ [!] ::AAA::AAA::\x7f:: [!] ', - b' [!] >>>>>>>>>> [!] ::\xc9AA::\xc9AA::>:: [!] ', # [26] could change has it is a random corrupt_bit + + b' [!] >>>>>>>>>> [!] ::=:: [!] ', + b' [!] >>>>>>>>>> [!] ::?:: [!] ', + b' [!] >>>>>>>>>> [!] ::\xff:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x00:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x01:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x80:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x7f:: [!] ', + b' [!] >>>>>>>>>> [!] ::QAA::AAA::AAA::AAA::>:: [!] ', # [51] could change has it is a random corrupt_bit + b' [!] >>>>>>>>>> [!] ::AAAA::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::\x00\x00\x00::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::A%n::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::A%s::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::A\r\n::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::=:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::?:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\xff:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x00:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x01:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x80:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x7f:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAC::AAA::>:: [!] ', # [69] could change has it is a random corrupt_bit b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', - - b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' \ - b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', - + b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::\x00\x00\x00::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%n::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%s::AAA::>:: [!] ', @@ -1621,23 +1674,23 @@ def test_basics(self): b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x00:: [!] ', b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x01:: [!] ', b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x80:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ' + b' [!] >>>>>>>>>> [!] ::AAA::AAA::\x7f:: [!] ', ] - tn_consumer = TypedNodeDisruption() + tn_consumer = TypedNodeDisruption(respect_order=True) ic = NodeInternalsCriteria(mandatory_attrs=[NodeInternals.Mutable], negative_attrs=[NodeInternals.Separator], node_kinds=[NodeInternals_TypedValue], negative_node_subkinds=[String]) tn_consumer.set_node_interest(internals_criteria=ic) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, tn_consumer, make_determinist=True, - max_steps=100): + max_steps=200): val = rnode.to_bytes() print(colorize('[%d] ' % idx + repr(val), rgb=Color.INFO)) - if idx not in [8, 26]: + if idx not in [8, 26, 51, 69]: self.assertEqual(val, raw_vals[idx - 1]) - self.assertEqual(idx, 43) + self.assertEqual(idx, 86) # should be even def test_TypedNodeDisruption_1(self): nt = self.dm.get_data('Simple') @@ -1734,7 +1787,7 @@ def test_TermNodeDisruption_2(self): def test_TermNodeDisruption_3(self): simple = self.dm.get_data('Simple') - consumer = TermNodeDisruption(specific_args=['1_BANG_1', '2_PLOUF_2']) + consumer = TermNodeDisruption(base_list=['1_BANG_1', '2_PLOUF_2']) for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, max_steps=-1): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) @@ -1810,7 +1863,7 @@ def test_USB(self): print(colorize('number of confs: %d' % idx, rgb=Color.INFO)) - self.assertIn(idx, [159]) + self.assertIn(idx, [523]) From a612f60e7cbc7ab2e22460b78fd0ef5e5f69298a Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Wed, 10 Aug 2016 23:26:18 +0200 Subject: [PATCH 58/80] Update BasicVisitor to avoid duplicates Avoid returning a data that has already been returned. This occurred systematically when a new node was consumed. Note that this implementation is agnostic to the parameter 'respect_order'. --- framework/fuzzing_primitives.py | 19 +++++++++++++------ test/integration/test_integration.py | 4 ++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index bf1b849..253b9fb 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -514,14 +514,22 @@ def interested_by(self, node): class BasicVisitor(NodeConsumerStub): def init_specific(self, consume_also_singleton=False): - self._internals_criteria = None + self._internals_criteria = dm.NodeInternalsCriteria(negative_node_kinds=[dm.NodeInternals_NonTerm]) self.consume_also_singleton = consume_also_singleton + self.firstcall = True def consume_node(self, node): - if (node.is_exhausted() and not self.consume_also_singleton) or node.is_nonterm(): + if (node.is_exhausted() and not self.consume_also_singleton): # in this case we ignore the node return False else: + if self.firstcall: + self.firstcall = False + return True + if not node.is_exhausted(): + node.freeze() + node.unfreeze(recursive=False) + node.freeze() return True def save_node(self, node): @@ -533,15 +541,14 @@ def recover_node(self, node): def need_reset(self, node): if node.is_nonterm(): + if not node.is_exhausted(): + self.firstcall = True return True else: return False def wait_for_exhaustion(self, node): - if not node.is_nonterm(): - return -1 # wait until exhaustion - else: - return 0 + return -1 class NonTermVisitor(BasicVisitor): diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 2decd0a..160e626 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1488,7 +1488,7 @@ def test_BasicVisitor(self): for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, max_steps=200): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - self.assertEqual(idx, 49) + self.assertEqual(idx, 37) print('***') nt = self.dm.get_data('Simple') @@ -1496,7 +1496,7 @@ def test_BasicVisitor(self): for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, default_consumer, make_determinist=True, max_steps=200): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - self.assertEqual(idx, 49) + self.assertEqual(idx, 37) def test_NonTermVisitor(self): print('***') From fd7300696fcb63d0b25cc83fa1ed1000fac46bc7 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 11 Aug 2016 10:20:01 +0200 Subject: [PATCH 59/80] Scenario: accept a DataProcess with a 'None' seed. --- framework/database.py | 3 ++- framework/plumbing.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/framework/database.py b/framework/database.py index 82481a5..0f08a45 100644 --- a/framework/database.py +++ b/framework/database.py @@ -799,7 +799,8 @@ def remove_data(self, data_id, colorized=True): "WHERE ID == {data_id:d};".format(data_id=data_id) ) - print(colorize("*** Data and all related records have been removed ***", rgb=Color.FMKINFO)) + print(colorize("*** Data {:d} and all related records have been removed ***".format(data_id), + rgb=Color.FMKINFO)) def get_project_record(self, prj_name=None): diff --git a/framework/plumbing.py b/framework/plumbing.py index 506c3be..d3eb553 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -1559,12 +1559,12 @@ def _handle_data_desc(self, data_desc): else: seed = Data(seed) seed.set_initial_dmaker([data_desc.seed.upper(), 'g_'+data_desc.seed, None]) - else: - if not isinstance(data_desc.seed, Data): - self.set_error(msg='DataProcess object contains an unrecognized seed type!', - code=Error.UserCodeError) - return None + elif data_desc.seed is not None and not isinstance(data_desc.seed, Data): + self.set_error(msg='DataProcess object contains an unrecognized seed type!', + code=Error.UserCodeError) + return None + else: seed = data_desc.seed data = self.get_data(data_desc.process, data_orig=seed) From dbc33d589a33b83e528ceb262a93376b9c29dd6d Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 11 Aug 2016 11:53:11 +0200 Subject: [PATCH 60/80] Improve robustness of 'unconvert_from_internal_repr' --- framework/global_resources.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/framework/global_resources.py b/framework/global_resources.py index e663621..0c72a69 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -93,7 +93,15 @@ def convert_to_internal_repr(val): return val def unconvert_from_internal_repr(val): - return val.decode(internal_repr_codec, 'replace') + if sys.version_info[0] == 2 and isinstance(val, buffer): + # This case occurs when reading from the FmkDB + val = str(val) + else: + try: + val = val.decode(internal_repr_codec, 'strict') + except: + val = val.decode('latin-1') + return val ### Exports for Node Absorption ### From e6625f1cf9d10f730dd7647220f548f754b52d4e Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Thu, 11 Aug 2016 20:37:38 +0200 Subject: [PATCH 61/80] Polish up display stuff (fmkdb.py, ...) --- framework/database.py | 5 +++-- framework/fuzzing_primitives.py | 6 ++++-- framework/generic_data_makers.py | 18 +++++++++++++++--- framework/target.py | 11 ++++++----- framework/value_types.py | 8 ++++++-- libs/debug_facility.py | 8 +++++++- 6 files changed, 41 insertions(+), 15 deletions(-) diff --git a/framework/database.py b/framework/database.py index 0f08a45..f5ab814 100644 --- a/framework/database.py +++ b/framework/database.py @@ -461,7 +461,8 @@ def display_data_info(self, data_id, with_data=False, with_fbk=False, with_fmkin msg += colorize("{:s}".format(prj), rgb=Color.FMKSUBINFO) msg += colorize(" | Target: ", rgb=Color.FMKINFO) msg += colorize("{:s}".format(tg), rgb=Color.FMKSUBINFO) - msg += colorize("\n Status: ", rgb=Color.FMKINFO) + status_prefix = " Status: " + msg += colorize('\n' + status_prefix, rgb=Color.FMKINFO) src_max_sz = 0 for idx, fbk in enumerate(feedback): src, tstamp, status, _ = fbk @@ -473,7 +474,7 @@ def display_data_info(self, data_id, with_data=False, with_fbk=False, with_fmkin colorize(" by ", rgb=Color.FMKINFO) + \ colorize("{!s}".format(src), rgb=Color.FMKSUBINFO) if idx < len(feedback) - 1: - msg += colorize(", ".format(src), rgb=Color.FMKINFO) + msg += colorize(",\n".format(src) + ' '*len(status_prefix), rgb=Color.FMKINFO) msg += '\n' sentd = sent_date.strftime("%d/%m/%Y - %H:%M:%S") if sent_date else 'None' diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 253b9fb..311655f 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -33,10 +33,12 @@ import framework.data_model as dm from framework.basic_primitives import * - from libs.external_modules import * -from libs.debug_facility import * +import libs.debug_facility as dbg + +DEBUG = dbg.MW_DEBUG +DEBUG_PRINT = dbg.DEBUG_PRINT class ModelWalker(object): ''' diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index f7ae03f..fa522da 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -46,6 +46,7 @@ # STATEFUL DISRUPTORS # ####################### +MAX_INFO_SIZE = 200 @disruptor(tactics, dtype="tWALK", weight=1, gen_args = GENERIC_ARGS, @@ -159,6 +160,13 @@ def disrupt_data(self, dm, target, data): else: self.run_num +=1 + corrupt_node_bytes = consumed_node.to_bytes() + corrupt_node_bytes_hex = binascii.b2a_hex(corrupt_node_bytes) + if len(corrupt_node_bytes) > MAX_INFO_SIZE: + corrupt_node_bytes = corrupt_node_bytes[:MAX_INFO_SIZE] + b' ...' + if len(corrupt_node_bytes_hex) > MAX_INFO_SIZE: + corrupt_node_bytes_hex = corrupt_node_bytes_hex[:MAX_INFO_SIZE] + b' ...' + data.add_info('model walking index: {:d}'.format(idx)) data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) data.add_info('current fuzzed node: %s' % self.modelwalker.consumed_node_path) @@ -166,8 +174,8 @@ def disrupt_data(self, dm, target, data): data.add_info(' |_ original node value: %s (ascii: %s)' % \ (binascii.b2a_hex(orig_node_val), orig_node_val)) data.add_info(' |_ corrupt node value: %s (ascii: %s)' % \ - (binascii.b2a_hex(consumed_node.to_bytes()), - consumed_node.to_bytes())) + (corrupt_node_bytes_hex, + corrupt_node_bytes)) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) @@ -1093,11 +1101,15 @@ def disrupt_data(self, dm, target, data): else: self.run_num +=1 + corrupt_node_bytes = consumed_node.to_bytes() + if len(corrupt_node_bytes) > MAX_INFO_SIZE: + corrupt_node_bytes = corrupt_node_bytes[:MAX_INFO_SIZE] + b' ...' + data.add_info('model walking index: {:d}'.format(idx)) data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) data.add_info('current fuzzed node: %s' % consumed_node.get_path_from(rnode)) data.add_info('original val: %s' % repr(orig_node_val)) - data.add_info('corrupted val: %s' % repr(consumed_node.to_bytes())) + data.add_info('corrupted val: %s' % repr(corrupt_node_bytes)) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) diff --git a/framework/target.py b/framework/target.py index 36cd6bb..1f3c219 100644 --- a/framework/target.py +++ b/framework/target.py @@ -1702,11 +1702,12 @@ def _retrieve_feedback_from_serial(self, timeout=None): return feedback def send_data(self, data, from_fmk=False): - node_list = data.node[NodeSemanticsCriteria(mandatory_criteria=['tel num'])] - if node_list and len(node_list)==1: - node_list[0].set_values(value_type=GSMPhoneNum(val_list=[self.tel_num])) - else: - print('\nWARNING: Data does not contain a mobile number.') + if data.node: + node_list = data.node[NodeSemanticsCriteria(mandatory_criteria=['tel num'])] + if node_list and len(node_list)==1: + node_list[0].set_values(value_type=GSMPhoneNum(val_list=[self.tel_num])) + else: + print('\nWARNING: Data does not contain a mobile number.') pdu = b'' raw_data = data.to_bytes() for c in raw_data: diff --git a/framework/value_types.py b/framework/value_types.py index d9ea4f4..8c3bcaa 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -46,7 +46,9 @@ from framework.error_handling import * from framework.global_resources import * -DEBUG = False +import libs.debug_facility as dbg + +DEBUG = dbg.VT_DEBUG class VT(object): ''' @@ -338,8 +340,10 @@ def _str2bytes(self, val): try: b = val.encode(self.codec) except: + if len(val) > 30: + val = val[:30] + ' ...' err_msg = "\n*** WARNING: Encoding issue. With python2 'str' or 'bytes' means " \ - "ASCII, prefix the string {:s} with 'u'".format(repr(val[:30])) + "ASCII, prefix the string {:s} with 'u'".format(repr(val)) print(err_msg) b = val return b diff --git a/libs/debug_facility.py b/libs/debug_facility.py index 015d20d..6022da7 100644 --- a/libs/debug_facility.py +++ b/libs/debug_facility.py @@ -22,12 +22,18 @@ ################################################################################ DEBUG = False -LEVEL = 0 +LEVEL = 2 # related to data_model.py DM_DEBUG = False ABS_DEBUG = False +# related to value_types.py +VT_DEBUG = False + +# related to fuzzing_primitives.py +MW_DEBUG = True + try: from xtermcolor import colorize except ImportError: From 959c20e9fd0e453f2979581bb4bdc84eaa8cd749 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Fri, 12 Aug 2016 19:20:26 +0200 Subject: [PATCH 62/80] Fix Probe synchro issue + some cleanup --- docs/source/tutorial.rst | 6 +-- framework/generic_data_makers.py | 88 +++++++++++++++----------------- framework/monitor.py | 47 ++++++++++------- framework/plumbing.py | 34 ++++++------ framework/target.py | 31 +++++++++++ projects/tuto_proj.py | 4 +- 6 files changed, 125 insertions(+), 85 deletions(-) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 1410cd6..cf69e26 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -2363,8 +2363,8 @@ between two decorators: call to :meth:`framework.monitor.Probe.main()` which is configurable. - ``@blocking_probe`` for probe which will be run just once after each - data emission (default) or after each feedback retrieval. The default behaviour can be - changed by giving a ``after_feedback_retrieval`` parameter set to ``True``. + data emission (default) or after each target feedback retrieval. The default behaviour can be + changed by giving a ``after_target_feedback_retrieval`` parameter set to ``True``. These *decorators* have to take the reference of the project as parameter, in order to register them within. A really basic @@ -2390,7 +2390,7 @@ information from the target is given here under: .. code-block:: python :linenos: - @blocking_probe(project, after_feedback_retrieval=False) + @blocking_probe(project, after_target_feedback_retrieval=False) class health_check(Probe): def start(self, dm, target, logger): diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index fa522da..9e113fa 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -46,7 +46,10 @@ # STATEFUL DISRUPTORS # ####################### -MAX_INFO_SIZE = 200 +def truncate_info(info, max_size=60): + if len(info) > max_size: + info = info[:max_size] + b' ...' + return repr(info) @disruptor(tactics, dtype="tWALK", weight=1, gen_args = GENERIC_ARGS, @@ -89,7 +92,7 @@ def disrupt_data(self, dm, target, data): return data data.add_info('model walking index: {:d}'.format(idx)) - data.add_info('current node: %s' % self.modelwalker.consumed_node_path) + data.add_info('current node: {!s}'.format(self.modelwalker.consumed_node_path)) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) @@ -161,21 +164,15 @@ def disrupt_data(self, dm, target, data): self.run_num +=1 corrupt_node_bytes = consumed_node.to_bytes() - corrupt_node_bytes_hex = binascii.b2a_hex(corrupt_node_bytes) - if len(corrupt_node_bytes) > MAX_INFO_SIZE: - corrupt_node_bytes = corrupt_node_bytes[:MAX_INFO_SIZE] + b' ...' - if len(corrupt_node_bytes_hex) > MAX_INFO_SIZE: - corrupt_node_bytes_hex = corrupt_node_bytes_hex[:MAX_INFO_SIZE] + b' ...' data.add_info('model walking index: {:d}'.format(idx)) data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) - data.add_info('current fuzzed node: %s' % self.modelwalker.consumed_node_path) - data.add_info(' |_ value type: %s' % consumed_node.cc.get_value_type()) - data.add_info(' |_ original node value: %s (ascii: %s)' % \ - (binascii.b2a_hex(orig_node_val), orig_node_val)) - data.add_info(' |_ corrupt node value: %s (ascii: %s)' % \ - (corrupt_node_bytes_hex, - corrupt_node_bytes)) + data.add_info('current fuzzed node: {!s}'.format(self.modelwalker.consumed_node_path)) + data.add_info(' |_ value type: {!s}'.format(consumed_node.cc.get_value_type())) + data.add_info(' |_ original node value (hex): {!s}'.format(truncate_info(binascii.b2a_hex(orig_node_val)))) + data.add_info(' | (ascii): {!s}'.format(truncate_info(orig_node_val))) + data.add_info(' |_ corrupt node value (hex): {!s}'.format(truncate_info(binascii.b2a_hex(corrupt_node_bytes)))) + data.add_info(' (ascii): {!s}'.format(truncate_info(corrupt_node_bytes))) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) @@ -264,9 +261,9 @@ def disrupt_data(self, dm, target, data): data.add_info('model walking index: {:d}'.format(idx)) data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) - data.add_info('current node with alternate conf: %s' % self.modelwalker.consumed_node_path) - data.add_info(' |_ associated value: %s' % repr(consumed_node.to_bytes())) - data.add_info(' |_ original node value: %s' % orig_node_val) + data.add_info('current node with alternate conf: {!s}'.format(self.modelwalker.consumed_node_path)) + data.add_info(' |_ associated value: {!s}'.format(truncate_info(consumed_node.to_bytes()))) + data.add_info(' |_ original node value: {!s}'.format(truncate_info(orig_node_val))) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) @@ -339,15 +336,16 @@ def disrupt_data(self, dm, target, data): else: self.run_num +=1 + corrupt_node_bytes = consumed_node.to_bytes() + data.add_info('model walking index: {:d}'.format(idx)) data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) - data.add_info('current fuzzed separator: %s' % self.modelwalker.consumed_node_path) - data.add_info(' |_ value type: %s' % consumed_node.cc.get_value_type()) - data.add_info(' |_ original separator: %s (ascii: %s)' % \ - (binascii.b2a_hex(orig_node_val), orig_node_val)) - data.add_info(' |_ replaced by: %s (ascii: %s)' % \ - (binascii.b2a_hex(consumed_node.to_bytes()), - consumed_node.to_bytes())) + data.add_info('current fuzzed separator: {!s}'.format(self.modelwalker.consumed_node_path)) + data.add_info(' |_ value type: {!s}'.format(consumed_node.cc.get_value_type())) + data.add_info(' |_ original separator (hex): {!s}'.format(truncate_info(binascii.b2a_hex(orig_node_val)))) + data.add_info(' | (ascii): {!s}'.format(truncate_info(orig_node_val))) + data.add_info(' |_ replaced by (hex): {!s}'.format(truncate_info(binascii.b2a_hex(corrupt_node_bytes)))) + data.add_info(' (ascii): {!s}'.format(truncate_info(corrupt_node_bytes))) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) @@ -691,14 +689,14 @@ def disrupt_data(self, dm, target, prev_data): self.existing_conf = True if self.provided_alt and not self.existing_conf: - prev_data.add_info("NO ALTERNATE CONF '%s' AVAILABLE" % str(self.conf)) + prev_data.add_info("NO ALTERNATE CONF '{!s}' AVAILABLE".format(self.conf)) return prev_data if self.conf_fallback is None: prev_data.add_info("NO ALTERNATE CONF AVAILABLE") return prev_data - prev_data.add_info("ALTERNATE CONF '%s' USED" % str(self.conf)) + prev_data.add_info("ALTERNATE CONF '{!s}' USED".format(self.conf)) prev_data.node.unfreeze_all() prev_data.node.set_current_conf(self.conf, recursive=self.recursive, root_regexp=self.path) @@ -736,7 +734,7 @@ def disrupt_data(self, dm, target, prev_data): val = node.to_bytes() orig_len = len(val) - prev_data.add_info('orig node length: %d' % orig_len) + prev_data.add_info('orig node length: {:d}'.format(orig_len)) if self.sz >= 0: node.set_values([val[:min(self.sz, orig_len)]]) @@ -746,14 +744,14 @@ def disrupt_data(self, dm, target, prev_data): node.set_values([val[orig_len - min(self.sz, orig_len):]]) prev_data.add_info('left truncation') - prev_data.add_info('new node length: %d' % min(self.sz, orig_len)) + prev_data.add_info('new node length: {:d}'.format(min(self.sz, orig_len))) ret = prev_data else: val = prev_data.to_bytes() orig_len = len(val) - prev_data.add_info('orig data length: %d' % orig_len) + prev_data.add_info('orig data length: {:d}'.format(orig_len)) if self.sz >= 0: new_val = val[:min(self.sz, orig_len)] @@ -763,7 +761,7 @@ def disrupt_data(self, dm, target, prev_data): new_val = val[orig_len - min(self.sz, orig_len):] prev_data.add_info('left truncation') - prev_data.add_info('new data length: %d' % len(new_val)) + prev_data.add_info('new data length: {:d}'.format(len(new_val))) prev_data.update_from_str_or_bytes(new_val) ret = prev_data @@ -807,18 +805,18 @@ def disrupt_data(self, dm, target, prev_data): for i in l: val = i.to_bytes() - prev_data.add_info('current fuzzed node: %s' % i.get_path_from(prev_data.node)) - prev_data.add_info('orig data: %s' % repr(val)) + prev_data.add_info('current fuzzed node: {!s}'.format(i.get_path_from(prev_data.node))) + prev_data.add_info('orig data: {!s}'.format(truncate_info(val))) if self.new_val is None: if val != b'': val = corrupt_bits(val, n=1, ascii=self.ascii) - prev_data.add_info('corrupted data: %s' % repr(val)) + prev_data.add_info('corrupt data: {!s}'.format(truncate_info(val))) else: prev_data.add_info('Nothing to corrupt!') else: val = self.new_val - prev_data.add_info('corrupted data: %s' % repr(val)) + prev_data.add_info('corrupt data: {!s}'.format(truncate_info(val))) i.set_values(val_list=[val]) i.get_value() @@ -852,7 +850,7 @@ def disrupt_data(self, dm, target, prev_data): else: val = prev_data.to_bytes() - prev_data.add_info('corrupted bit index: %d' % self.idx) + prev_data.add_info('corrupted bit index: {:d}'.format(self.idx)) new_value = self.new_val if self.new_val is not None \ else corrupt_bits(val[self.idx-1:self.idx], n=1, ascii=self.ascii) @@ -945,8 +943,8 @@ def disrupt_data(self, dm, target, prev_data): for n in l: n.unfreeze(recursive=self.recursive) n.freeze() - prev_data.add_info("unfreeze the node '{:s}'".format(n.get_path_from(prev_data.node))) - prev_data.add_info("new value: '{:s}'".format(n.to_bytes())) + prev_data.add_info("unfreeze the node '{!s}'".format(n.get_path_from(prev_data.node))) + prev_data.add_info("new value: '{!s}'".format(n.to_bytes())) else: prev_data.node.unfreeze(recursive=self.recursive) @@ -1009,9 +1007,9 @@ def disrupt_data(self, dm, target, prev_data): def _add_info(self, prev_data, n, status, size): val_len = len(self.value) - prev_data.add_info("changed node: '{:s}'".format(n.name)) - prev_data.add_info("absorption status: {:s}".format(status)) - prev_data.add_info("value provided: '{:s}'".format(self.value)) + prev_data.add_info("changed node: '{!s}'".format(n.name)) + prev_data.add_info("absorption status: {!s}".format(status)) + prev_data.add_info("value provided: '{!s}'".format(truncate_info(self.value))) prev_data.add_info("__ length: {:d}".format(val_len)) if status != AbsorbStatus.FullyAbsorbed: prev_data.add_info("absorbed size: {:d}".format(size)) @@ -1019,7 +1017,7 @@ def _add_info(self, prev_data, n, status, size): remaining = self.value[size:size+100] + ' ...' else: remaining = self.value[size:] - prev_data.add_info("remaining: '{:s}'".format(remaining)) + prev_data.add_info("remaining: '{!s}'".format(remaining)) @disruptor(tactics, dtype="COPY", weight=4, @@ -1102,14 +1100,12 @@ def disrupt_data(self, dm, target, data): self.run_num +=1 corrupt_node_bytes = consumed_node.to_bytes() - if len(corrupt_node_bytes) > MAX_INFO_SIZE: - corrupt_node_bytes = corrupt_node_bytes[:MAX_INFO_SIZE] + b' ...' data.add_info('model walking index: {:d}'.format(idx)) data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) - data.add_info('current fuzzed node: %s' % consumed_node.get_path_from(rnode)) - data.add_info('original val: %s' % repr(orig_node_val)) - data.add_info('corrupted val: %s' % repr(corrupt_node_bytes)) + data.add_info('current fuzzed node: {!s}'.format(consumed_node.get_path_from(rnode))) + data.add_info('original val: {!s}'.format(truncate_info(orig_node_val))) + data.add_info('corrupted val: {!s}'.format(truncate_info(corrupt_node_bytes))) if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) diff --git a/framework/monitor.py b/framework/monitor.py index 2fedf59..c34b69a 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -162,10 +162,10 @@ def _handle_exception(self, context): class BlockingProbeUser(ProbeUser): - def __init__(self, probe, after_feedback_retrieval): + def __init__(self, probe, after_target_feedback_retrieval): ProbeUser.__init__(self, probe) - self._after_feedback_retrieval = after_feedback_retrieval + self._after_target_feedback_retrieval = after_target_feedback_retrieval self._continue_event = threading.Event() @@ -175,8 +175,8 @@ def __init__(self, probe, after_feedback_retrieval): self._probe_status_event = threading.Event() @property - def after_feedback_retrieval(self): - return self._after_feedback_retrieval + def after_target_feedback_retrieval(self): + return self._after_target_feedback_retrieval def stop(self): ProbeUser.stop(self) @@ -194,6 +194,7 @@ def wait_until_armed(self, timeout=None): raise finally: self._armed_event.clear() + # if error before wait_until_ready, we need to clear its event self._probe_status_event.clear() def wait_until_ready(self, timeout=None): @@ -202,6 +203,8 @@ def wait_until_ready(self, timeout=None): except ProbeTimeoutError as e: e.blocking_methods = ["main()"] raise + finally: + self._probe_status_event.clear() def notify_blocking(self): self._blocking_event.set() @@ -232,6 +235,7 @@ def _wait_for_data_ready(self): self._arm_event.wait(1) self._arm_event.clear() + self._continue_event.clear() return True def _notify_armed(self): @@ -248,7 +252,6 @@ def _wait_for_fmk_sync(self): timeout_appended = True while not self._blocking_event.is_set(): if self._continue_event.is_set() or not self._go_on(): - self._continue_event.clear() self._notify_status_retrieved() timeout_appended = False break @@ -325,12 +328,12 @@ def set_target(self, target): def set_data_model(self, dm): self._dm = dm - def add_probe(self, probe, blocking=False, after_feedback_retrieval=False): + def add_probe(self, probe, blocking=False, after_target_feedback_retrieval=False): if probe.__class__.__name__ in self.probe_users: raise AddExistingProbeToMonitorError(probe.__class__.__name__) if blocking: - self.probe_users[probe.__class__.__name__] = BlockingProbeUser(probe, after_feedback_retrieval) + self.probe_users[probe.__class__.__name__] = BlockingProbeUser(probe, after_target_feedback_retrieval) else: self.probe_users[probe.__class__.__name__] = ProbeUser(probe) @@ -433,10 +436,10 @@ def _wait_for_specific_probes(self, probe_user_class, probe_user_wait_method, pr .format(e.probe_name, e.blocking_methods), code=Error.OperationCancelled) - def do_after_probes_init(self): + def wait_for_probe_initialization(self): self._wait_for_specific_probes(ProbeUser, ProbeUser.wait_for_probe_init) - def do_before_sending_data(self): + def notify_imminent_data_sending(self): if not self.__enable: return self._target_status = None @@ -448,31 +451,33 @@ def do_before_sending_data(self): self._wait_for_specific_probes(BlockingProbeUser, BlockingProbeUser.wait_until_armed) - def do_after_sending_data(self): + def notify_data_sending_event(self): if not self.__enable: return for _, probe_user in self.probe_users.items(): - if isinstance(probe_user, BlockingProbeUser) and not probe_user.after_feedback_retrieval: + if isinstance(probe_user, BlockingProbeUser) and not probe_user.after_target_feedback_retrieval: probe_user.notify_blocking() - def do_after_timeout(self): + def notify_target_feedback_retrieval(self): if not self.__enable: return for _, probe_user in self.probe_users.items(): - if isinstance(probe_user, BlockingProbeUser) and probe_user.after_feedback_retrieval: + if isinstance(probe_user, BlockingProbeUser) and probe_user.after_target_feedback_retrieval: probe_user.notify_blocking() - def do_before_feedback_retrieval(self): + def wait_for_probe_status_retrieval(self): if not self.__enable: return self._wait_for_specific_probes(BlockingProbeUser, BlockingProbeUser.wait_until_ready) - def do_on_error(self): + def notify_error(self): + # WARNING: do not use between BlockingProbeUser.notify_data_ready and + # BlockingProbeUser.wait_until_armed if not self.__enable: return @@ -797,13 +802,16 @@ def _start(self): pass_prompt = b''.join(chunks) time.sleep(0.1) self.ser.write(self.password+b'\r\n') - time.sleep(self.slowness_factor*0.6) + time.sleep(self.slowness_factor*0.7) def _stop(self): self.ser.write(b'\x04\r\n') # we send an EOT (Ctrl+D) self.ser.close() def _exec_command(self, cmd): + if not self.ser.is_open: + raise BackendError('Serial port not open') + if sys.version_info[0] > 2: cmd = bytes(cmd, self.codec) cmd += b'\r\n' @@ -997,7 +1005,7 @@ def _get_mem(self): if entry.find(self.process_name) >= 0: try: rss = int(re.search('\d+', entry.split()[0]).group(0)) - except ValueError: + except: rss = -10 break else: @@ -1066,9 +1074,10 @@ def internal_func(probe_cls): return internal_func -def blocking_probe(project, after_feedback_retrieval=False): +def blocking_probe(project, after_target_feedback_retrieval=False): def internal_func(probe_cls): - project.monitor.add_probe(probe_cls(), blocking=True, after_feedback_retrieval=after_feedback_retrieval) + project.monitor.add_probe(probe_cls(), blocking=True, + after_target_feedback_retrieval=after_target_feedback_retrieval) return probe_cls return internal_func diff --git a/framework/plumbing.py b/framework/plumbing.py index d3eb553..d3040e5 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -926,7 +926,7 @@ def __start_fmk_plumbing(self): if delay is not None: self.mon.set_probe_delay(pname, delay) self.mon.start_probe(pname) - self.mon.do_after_probes_init() + self.mon.wait_for_probe_initialization() self.prj.start() if self.tg.probes: time.sleep(0.5) @@ -943,8 +943,6 @@ def __start_fmk_plumbing(self): def __stop_fmk_plumbing(self): if self.__is_started(): - signal.signal(signal.SIGINT, sig_int_handler) - if self.is_target_enabled(): self.log_target_residual_feedback() @@ -965,6 +963,8 @@ def __stop_fmk_plumbing(self): self.__stop() + signal.signal(signal.SIGINT, sig_int_handler) + @EnforceOrder(accepted_states=['20_load_prj','25_load_dm','S1','S2']) def exit_fmk(self): @@ -1473,13 +1473,12 @@ def __delay_fuzzing(self): else: self._burst_countdown -= 1 - self.mon.do_after_timeout() return ret def _do_before_sending_data(self, data_list): # Monitor hook function before sending - self.mon.do_before_sending_data() + self.mon.notify_imminent_data_sending() data_list = self._handle_data_callbacks(data_list, hook=HOOK.before_sending) return data_list @@ -1756,16 +1755,17 @@ def send_data_and_log(self, data_list, original_data=None, verbose=False): if cont0: cont0 = self.__delay_fuzzing() - else: - self.mon.do_on_error() - - self.mon.do_before_feedback_retrieval() cont1 = True cont2 = True # That means this is the end of a burst if self._burst_countdown == self._burst: cont1 = self.log_target_feedback() + + self.mon.notify_target_feedback_retrieval() + self.mon.wait_for_probe_status_retrieval() + + if self._burst_countdown == self._burst: # We handle probe feedback if any cont2 = self.monitor_probes(force_record=True) self.tg.cleanup() @@ -1812,12 +1812,12 @@ def send_data(self, data_list, add_preamble=False): raise ValueError except TargetStuck as e: self.lg.log_comment("*** WARNING: Unable to send data to the target! [reason: %s]" % str(e)) - self.mon.do_on_error() + self.mon.notify_error() except: self._handle_user_code_exception() - self.mon.do_on_error() + self.mon.notify_error() else: - self.mon.do_after_sending_data() + self.mon.notify_data_sending_event() self._do_after_sending_data(data_list) @@ -2326,7 +2326,7 @@ def launch_operator(self, name, user_input=UserInputContainer(), use_existing_se self._handle_user_code_exception('Operator has crashed during its start() method') return False finally: - self.mon.do_after_probes_init() # operator.start() can start probes. + self.mon.wait_for_probe_initialization() # operator.start() can start probes. if not ok: self.set_error("The _start() method of Operator '%s' has returned an error!" % name, @@ -2451,11 +2451,15 @@ def launch_operator(self, name, user_input=UserInputContainer(), use_existing_se exit_operator = True self.lg.log_fmk_info("Operator will shutdown because waiting has been cancelled by the user") - self.mon.do_before_feedback_retrieval() # Target fbk is logged only at the end of a burst if self._burst_countdown == self._burst: cont1 = self.log_target_feedback() + + self.mon.notify_target_feedback_retrieval() + self.mon.wait_for_probe_status_retrieval() + + if self._burst_countdown == self._burst: cont2 = self.monitor_probes(force_record=True) if not cont1 or not cont2: exit_operator = True @@ -2955,7 +2959,7 @@ def launch_probe(self, name): if not ok: self.set_error('Probe does not exist (or already launched)', code=Error.CommandError) - self.mon.do_after_probes_init() + self.mon.wait_for_probe_initialization() return ok diff --git a/framework/target.py b/framework/target.py index 1f3c219..38f0505 100644 --- a/framework/target.py +++ b/framework/target.py @@ -312,6 +312,37 @@ def send_multiple_data(self, data_list, from_fmk=False): pass +class TestTarget(Target): + + def __init__(self, recover_ratio=100): + self._cpt = None + self._recover_ratio = recover_ratio + + def start(self): + self._cpt = 0 + return True + + def send_data(self, data, from_fmk=False): + pass + + def send_multiple_data(self, data_list, from_fmk=False): + pass + + def is_target_ready_for_new_data(self): + self._cpt += 1 + if self._cpt > 5 and random.choice([True, False]): + self._cpt = 0 + return True + else: + return False + + def recover_target(self): + if random.randint(1, 100) > (100 - self._recover_ratio): + return True + else: + return False + + class NetworkTarget(Target): '''Generic target class for interacting with a network resource. Can be used directly, but some methods may require to be overloaded to diff --git a/projects/tuto_proj.py b/projects/tuto_proj.py index 70004f3..b228fd6 100644 --- a/projects/tuto_proj.py +++ b/projects/tuto_proj.py @@ -117,7 +117,7 @@ def main(self, dm, target, logger): return ProbeStatus(status) -serial_backend = Serial_Backend('/dev/ttyUSB0', username='test', password='test', slowness_factor=4) +serial_backend = Serial_Backend('/dev/ttyUSB0', username='test', password='test', slowness_factor=8) @blocking_probe(project) class probe_pid(ProbePID): @@ -135,7 +135,7 @@ class probe_mem(ProbeMem): targets = [(EmptyTarget(), (P1, 2), (P2, 1.4), health_check), tuto_tg, net_tg, udpnet_tg, udpnetsrv_tg, rawnetsrv_tg, - (EmptyTarget(), probe_pid, (probe_mem, 0.2))] + (TestTarget(), probe_pid, (probe_mem, 0.2))] ### OPERATOR DEFINITION ### From 930126d82b074e05be4d90b612f2683ef81c81c8 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Fri, 12 Aug 2016 19:27:55 +0200 Subject: [PATCH 63/80] Update PPPoE DM regarding PADT --- data_models/protocols/pppoe.py | 2 ++ data_models/protocols/pppoe_strategy.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index 246280e..dd277b2 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -215,6 +215,8 @@ def build_data_model(self): 'custo_clear': MH.Custo.NTerm.FrozenCopy, 'exists_if': (IntCondition(0xa7), 'code'), 'contents': [ + {'name': ('host_uniq_stub', 3), + 'contents': String(val_list=[''])}, (tag_node.get_clone(), 0, 4) ]} ]}, diff --git a/data_models/protocols/pppoe_strategy.py b/data_models/protocols/pppoe_strategy.py index 3386db9..84e11e1 100644 --- a/data_models/protocols/pppoe_strategy.py +++ b/data_models/protocols/pppoe_strategy.py @@ -189,6 +189,8 @@ def disrupt_data(self, dm, target, prev_data): step_wait_padi = NoDataStep(fbk_timeout=1) step_send_valid_pado = Step(DataProcess(process=[('FIX_FIELDS#2', None, UI(reevaluate_csts=True))], seed='pado')) +step_send_padt = Step(DataProcess(process=[('FIX_FIELDS#3', None, UI(reevaluate_csts=True))], + seed='padt'), fbk_timeout=0.1) dp_pads = DataProcess(process=[('tTYPE#2', UI(init=1), UI(order=True)), 'FIX_FIELDS'], seed='pads') dp_pads.append_new_process([('tSTRUCT#2', UI(init=1), UI(deep=True)), 'FIX_FIELDS']) @@ -199,7 +201,9 @@ def disrupt_data(self, dm, target, prev_data): step_send_valid_pado.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback_and_update) step_send_valid_pado.connect_to(step_wait_padr) -step_send_fuzzed_pads.connect_to(step_wait_padr) +# step_send_fuzzed_pads.connect_to(step_wait_padr) +step_send_fuzzed_pads.connect_to(step_send_padt) +step_send_padt.connect_to(step_wait_padr) step_wait_padr.connect_to(step_send_fuzzed_pads, cbk_after_fbk=retrieve_padr_from_feedback_and_update) step_wait_padr.connect_to(step_send_valid_pado, cbk_after_fbk=retrieve_padi_from_feedback) From 431405e5252b6b640e641ce4562573207e9cd4dc Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Fri, 12 Aug 2016 21:18:12 +0200 Subject: [PATCH 64/80] Fix Probe synchro in case of a sending error --- framework/plumbing.py | 16 +++++++++++----- framework/target.py | 9 ++++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/framework/plumbing.py b/framework/plumbing.py index d3040e5..6990b84 100644 --- a/framework/plumbing.py +++ b/framework/plumbing.py @@ -212,6 +212,7 @@ def __init__(self): self.error = False self.fmk_error = [] + self._sending_error = None self.__tg_enabled = False self.__prj_to_be_reloaded = False @@ -1721,7 +1722,7 @@ def send_data_and_log(self, data_list, original_data=None, verbose=False): return True data_list = self.send_data(data_list, add_preamble=True) - if data_list is None: + if data_list is None or self._sending_error: return False if self._wkspace_enabled: @@ -1800,9 +1801,7 @@ def send_data(self, data_list, add_preamble=False): code=Error.DataInvalid) return None - if add_preamble: - self.new_transfer_preamble() - + self._sending_error = False try: if len(data_list) == 1: self.tg.send_data_sync(data_list[0], from_fmk=True) @@ -1813,10 +1812,14 @@ def send_data(self, data_list, add_preamble=False): except TargetStuck as e: self.lg.log_comment("*** WARNING: Unable to send data to the target! [reason: %s]" % str(e)) self.mon.notify_error() + self._sending_error = True except: self._handle_user_code_exception() self.mon.notify_error() + self._sending_error = True else: + if add_preamble: + self.new_transfer_preamble() self.mon.notify_data_sending_event() self._do_after_sending_data(data_list) @@ -2414,7 +2417,10 @@ def launch_operator(self, name, user_input=UserInputContainer(), use_existing_se continue data_list = self.send_data(data_list, add_preamble=True) - if data_list is None: + if self._sending_error: + self.lg.log_fmk_info("Operator will shutdown because of a sending error") + break + elif data_list is None: self.lg.log_fmk_info("Operator will shutdown because there is no data to send") break diff --git a/framework/target.py b/framework/target.py index 38f0505..3a48e03 100644 --- a/framework/target.py +++ b/framework/target.py @@ -482,9 +482,12 @@ def register_new_interface(self, host, port, socket_type, data_semantics, server self.server_mode[(host,port)] = server_mode self._default_fbk_id[(host, port)] = self._default_fbk_socket_id + ' - {:s}:{:d}'.format(host, port) self.hold_connection[(host, port)] = hold_connection - self._mac_src[(host, port)] = self.get_mac_addr(host) if mac_src is None else mac_src - self._mac_dst[(host, port)] = mac_dst - + if socket_type[1] == socket.SOCK_RAW: + self._mac_src[(host, port)] = self.get_mac_addr(host) if mac_src is None else mac_src + self._mac_dst[(host, port)] = mac_dst + else: + self._mac_src = {(host, port): None} + self._mac_dst = {(host, port): None} def set_timeout(self, fbk_timeout, sending_delay): ''' From 2969f006ef7396bb99c3e614bfbd123dab0b630a Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sat, 13 Aug 2016 13:58:02 +0200 Subject: [PATCH 65/80] Monitor: differentiate probe initialization timeout from nominal timeout --- framework/monitor.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/framework/monitor.py b/framework/monitor.py index c34b69a..a2df486 100644 --- a/framework/monitor.py +++ b/framework/monitor.py @@ -34,7 +34,8 @@ class ProbeUser(object): - timeout = 20.0 + timeout = 10.0 + probe_init_timeout = 20.0 def __init__(self, probe): self._probe = probe @@ -412,18 +413,22 @@ def get_probes_names(self): probes_names.append(probe_name) return probes_names - def _wait_for_specific_probes(self, probe_user_class, probe_user_wait_method, probes=None): + def _wait_for_specific_probes(self, probe_user_class, probe_user_wait_method, probes=None, + timeout=None): """ Wait for probes to trigger a specific event Args: - probe_user_class (ProbeUser): probe_user class that defines the method - probe_user_wait_method (method): name of the probe_user's method that will be used to wait - probes (list of :class:`ProbeUser`): probes to wait for. If None all probes will be concerned + probe_user_class (ProbeUser): probe_user class that defines the method. + probe_user_wait_method (method): name of the probe_user's method that will be used to wait. + probes (list of :class:`ProbeUser`): probes to wait for. If None all probes will be concerned. + timeout (float): maximum time to wait for in seconds. """ probes = self.probe_users.items() if probes is None else probes - timeout = datetime.timedelta(seconds=ProbeUser.timeout) + if timeout is None: + timeout = ProbeUser.timeout + timeout = datetime.timedelta(seconds=timeout) start = datetime.datetime.now() for _, probe_user in probes: @@ -437,7 +442,8 @@ def _wait_for_specific_probes(self, probe_user_class, probe_user_wait_method, pr code=Error.OperationCancelled) def wait_for_probe_initialization(self): - self._wait_for_specific_probes(ProbeUser, ProbeUser.wait_for_probe_init) + self._wait_for_specific_probes(ProbeUser, ProbeUser.wait_for_probe_init, + timeout=ProbeUser.probe_init_timeout) def notify_imminent_data_sending(self): if not self.__enable: From 8ea99d8b3eda5384172f35d32ec50a5485eaa534 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sat, 13 Aug 2016 19:40:29 +0200 Subject: [PATCH 66/80] Update documentation regarding Regex Parser --- docs/source/data_model.rst | 48 ++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 144b63b..5f5d65a 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -543,6 +543,8 @@ custo_set, custo_clear Refer to the description of the corresponding *generator node* mode. +.. _dm:nt-keywords: + Keywords to Describe Non Terminal Node -------------------------------------- @@ -1562,19 +1564,20 @@ How to Describe a Data Format That Contains Complex Strings Parts of the data that only contain strings can easily be described using python's regular expressions. Here are some rules to respect: -- The characters couple (``[``, ``]``), ``.`` and meta-sequences, such as ``\s``, ``\S``, ``\w``, ``\W``, - ``\d`` and ``\D``, are the only ways to define a :class:`framework.value_types.String` terminal node that - contains an alphabet. +- Using square brackets ``[ ]`` to indicate a set of characters will result in the creation of a + :class:`framework.value_types.String` terminal node that contains an *alphabet*. Likewise, the usage of + ``.`` or meta-sequences such as ``\s``, ``\S``, ``\w``, ``\W``, ``\d`` or ``\D`` will lead to the + creation of such type of nodes. - Anything else will be translated into a :class:`framework.value_types.String` terminal node that - declares a list of values. The characters couple (``(``, ``)``) can be used to delimit a portion of + declares a list of values. ``( )`` can be used to delimit a portion of the regular expression that need to be translated into a terminal node on its own. .. note:: If each item in a list of values are integers an :class:`framework.value_types.INT_Str` will be created instead of a :class:`framework.value_types.String`. - ``(``, ``)``, ``[``, ``]``, ``?``, ``*``, ``+``, ``{``, ``}``, ``|``, ``\``, ``-``, ``.`` are the only - recognised special chars. They can not be used in an unsuitable context without been escaped + recognised special characters. They cannot be used in an unsuitable context without being escaped (exceptions are made for ``|``, ``.`` and ``-``). - Are only allowed regular expressions that can be translated into one terminal node or into one non-terminal @@ -1584,13 +1587,13 @@ Here are some rules to respect: - An inconsistency between the charset and the characters that compose the regular expression will result in an :class:`framework.error_handling.CharsetError`. -.. note:: The default charset used by Fuddly is ``MH.Charset.ASCII_EXT``. To alter this behaviour, it is - necessary to use the ``charset`` keyword. +.. note:: The default charset used by Fuddly is ``MH.Charset.ASCII_EXT``. To change this behaviour, + use the keyword ``charset`` (refer to :ref:`dm:node_prop_keywords`). To embody these rules, let's take some examples: -Example 1: the basics +Example 1: The basics. .. code-block:: python :linenos: @@ -1603,12 +1606,12 @@ Example 1: the basics {'name': 'HTTP_version_1', 'contents': String(val_list=["HTTP"])}, {'name': 'HTTP_version_2', 'contents': String(val_list=["/"])}, {'name': 'HTTP_version_3', - 'contents': String(alphabet="0123456789", size=[1])}, + 'contents': String(alphabet="0123456789", size=1)}, {'name': 'HTTP_version_4', 'contents': String(val_list=["."])}, - {'name': 'HTTP_version_5', 'contents': INT_Str(mini=0, maxi=9)}]} + {'name': 'HTTP_version_5', 'contents': INT_Str(mini=0, maxi=9)} ]} -Example 2: introducing shapes +Example 2: Introducing shapes. (Refer to :ref:`dm:nt-keywords`) .. code-block:: python :linenos: @@ -1618,13 +1621,22 @@ Example 2: introducing shapes # is equivalent to classic = {'name': 'something', 'contents': [ - {'weight': 1, 'contents': INT_Str(int_list=[333, 444])}, - {'weight': 1, 'contents': String(val_list=["foo-bar"])}, - {'weight': 1, 'contents': String(alphabet="0123456789", size=[1])}, - {'weight': 1, 'contents': String(alphabet="th|is", size=[1])}]} + {'weight': 1, + 'contents': [{'name': 'something_1', + 'contents': INT_Str(int_list=[333, 444])}]}, + {'weight': 1, + 'contents': [{'name': 'something_1', + 'contents': String(val_list=["foo-bar"])}]}, + {'weight': 1, + 'contents': [{'name': 'something_1', + 'contents': String(alphabet="0123456789", size=1)}]}, + {'weight': 1, + 'contents': [{'name': 'something_1', + 'contents': String(alphabet="th|is", size=1)}]} + ]} -Example 3: using quantifiers and the escape character ``\`` +Example 3: Using quantifiers and the escape character ``\``. .. code-block:: python :linenos: @@ -1640,9 +1652,9 @@ Example 3: using quantifiers and the escape character ``\`` {'name': 'something_3', 'contents': String(val_list=["th"])}, {'name': 'something_4', 'qty': (1, -1), 'contents': String(val_list=["e"])}, - {'name': 'something_5', 'contents': String(val_list=["end]"])}, + {'name': 'something_5', 'contents': String(val_list=["end]"])} ]} -Example 4: invalid regular expressions +Example 4: Invalid regular expressions. .. code-block:: python :linenos: From 8758939f4b4252b930fb89f5e76d7b818b8a9f30 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sat, 13 Aug 2016 22:40:19 +0200 Subject: [PATCH 67/80] Regex Parser update and DataModel Fix - Fix Node() cloning that was resetting some meta-data in non-terminal nodes even when the parameter 'ignore_frozen_state' was False. - Change the way a Regex Parser create a non-terminal with choices and update the documentation accordingly. --- data_models/tuto.py | 5 ++++- docs/source/data_model.rst | 25 +++++++++--------------- framework/data_model.py | 29 +++++++++++++++++----------- framework/data_model_helpers.py | 9 ++++----- framework/fuzzing_primitives.py | 2 +- framework/generic_data_makers.py | 12 ++++++------ libs/debug_facility.py | 2 +- test/integration/test_integration.py | 3 ++- 8 files changed, 45 insertions(+), 42 deletions(-) diff --git a/data_models/tuto.py b/data_models/tuto.py index 89f7e66..edc2a44 100644 --- a/data_models/tuto.py +++ b/data_models/tuto.py @@ -430,11 +430,14 @@ def keycode_helper(blob, constraints, node_internals): 'contents': String(val_list=['Red', 'Green', 'Blue']) } ]} + regex_desc = {'name': 'regex', + 'contents': '(333|444)|(foo|bar)|[\d]|[th|is]'} self.register(test_node_desc, abstest_desc, abstest2_desc, separator_desc, sync_desc, len_gen_desc, misc_gen_desc, offset_gen_desc, - shape_desc, for_network_tg1, for_network_tg2, enc_desc, example_desc) + shape_desc, for_network_tg1, for_network_tg2, enc_desc, example_desc, + regex_desc) data_model = MyDF_DataModel() diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 5f5d65a..5e39efc 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -1599,7 +1599,7 @@ Example 1: The basics. :linenos: regex = {'name': 'HTTP_version', - 'contents': '(HTTP)/[0-9]\.(0|1|2|\x33|4|5|6|7|8|9)' + 'contents': '(HTTP)/[0-9]\.(0|1|2|\x33|4|5|6|7|8|9)'} # is equivalent to classic = {'name': 'HTTP_version', 'contents': [ @@ -1611,28 +1611,21 @@ Example 1: The basics. {'name': 'HTTP_version_5', 'contents': INT_Str(mini=0, maxi=9)} ]} -Example 2: Introducing shapes. (Refer to :ref:`dm:nt-keywords`) +Example 2: Introducing choice. (Refer to :ref:`dm:nt-keywords`) .. code-block:: python :linenos: regex = {'name': 'something', - 'contents': '(333|444)|foo-bar|\d|[th|is]' + 'contents': '(333|444)|(foo|bar)|[\d]|[th|is]'} # is equivalent to classic = {'name': 'something', + 'shape_type': MH.Pick, 'contents': [ - {'weight': 1, - 'contents': [{'name': 'something_1', - 'contents': INT_Str(int_list=[333, 444])}]}, - {'weight': 1, - 'contents': [{'name': 'something_1', - 'contents': String(val_list=["foo-bar"])}]}, - {'weight': 1, - 'contents': [{'name': 'something_1', - 'contents': String(alphabet="0123456789", size=1)}]}, - {'weight': 1, - 'contents': [{'name': 'something_1', - 'contents': String(alphabet="th|is", size=1)}]} + {'name':'something_1', 'contents':INT_Str(int_list=[333, 444])}, + {'name':'something_1', 'contents':String(val_list=["foo", "bar"])}, + {'name':'something_1', 'contents':String(alphabet="0123456789",size=1)}, + {'name':'something_1', 'contents':String(alphabet="th|is", size=1)} ]} @@ -1642,7 +1635,7 @@ Example 3: Using quantifiers and the escape character ``\``. :linenos: regex = {'name': 'something', - 'contents': '\(this[is]{3,4}the+end\]' + 'contents': '\(this[is]{3,4}the+end\]'} # is equivalent to classic = {'name': 'something', 'contents': [ diff --git a/framework/data_model.py b/framework/data_model.py index e5ea6d7..9c8b81e 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -2502,8 +2502,6 @@ def import_subnodes_with_csts(self, wlnode_list, separator=None): def import_subnodes_full_format(self, subnodes_csts=None, frozen_node_list=None, internals=None, nodes_drawn_qty=None, custo=None, exhaust_info=None, separator=None): - self.reset(nodes_drawn_qty=nodes_drawn_qty, custo=custo, exhaust_info=exhaust_info) - if internals is not None: # This case is only for Node.set_contents() usage @@ -2516,6 +2514,10 @@ def import_subnodes_full_format(self, subnodes_csts=None, frozen_node_list=None, elif subnodes_csts is not None: # This case is used by self.make_private_subnodes() + # In this case, we can call reset() as self.make_private_subnodes() provide us with + # the parameters we need. + self.reset(nodes_drawn_qty=nodes_drawn_qty, custo=custo, exhaust_info=exhaust_info) + self.subnodes_csts = subnodes_csts self.frozen_node_list = frozen_node_list if separator is not None: @@ -3119,8 +3121,10 @@ def get_subnodes_with_csts(self): self.expanded_nodelist = self._generate_expanded_nodelist(node_list) self.expanded_nodelist_origsz = len(self.expanded_nodelist) - self.expanded_nodelist = self.expanded_nodelist[:self.expanded_nodelist_sz] - + if self.expanded_nodelist_sz > 0: + self.expanded_nodelist = self.expanded_nodelist[:self.expanded_nodelist_sz] + else: + self.expanded_nodelist = self.expanded_nodelist[:1] elif not self.expanded_nodelist: # that is == [] self.expanded_nodelist = self._generate_expanded_nodelist(node_list) self.expanded_nodelist_origsz = len(self.expanded_nodelist) @@ -4249,19 +4253,21 @@ def unfreeze(self, conf=None, recursive=True, dont_change_state=False, ignore_en node_list, idx, self.component_seed = self._get_next_random_component(self.subnodes_csts, excluded_idx=self.excluded_components, seed=self.component_seed) + + fresh_expanded_nodelist = self._generate_expanded_nodelist(node_list) if self.expanded_nodelist is None: - self.expanded_nodelist = self._generate_expanded_nodelist(node_list) - self.expanded_nodelist_origsz = len(self.expanded_nodelist) + self.expanded_nodelist_origsz = len(fresh_expanded_nodelist) if self.expanded_nodelist_sz is not None: # In this case we need to go back to the previous state, thus +1 self.expanded_nodelist_sz += 1 - self.expanded_nodelist = self.expanded_nodelist[:self.expanded_nodelist_sz] + self.expanded_nodelist = fresh_expanded_nodelist[:self.expanded_nodelist_sz] else: # This case should not exist, a priori - self.expanded_nodelist_sz = len(self.expanded_nodelist) + self.expanded_nodelist_sz = len(fresh_expanded_nodelist) + self.expanded_nodelist = fresh_expanded_nodelist else: - assert(node_list is not None) - self.expanded_nodelist.append(node_list) + # assert self.expanded_nodelist_origsz > self.expanded_nodelist_sz + self.expanded_nodelist.append(fresh_expanded_nodelist[self.expanded_nodelist_sz]) self.expanded_nodelist_sz += 1 else: # In this case the states are random, thus we @@ -4804,7 +4810,8 @@ def __init__(self, name, base_node=None, copy_dico=None, ignore_frozen_state=Fal copy_dico (dict): [If `base_node` provided] It is used internally during the cloning process, and should not be used for any functional purpose. new_env (bool): [If `base_node` provided] If True, the `base_node` attached :class:`Env()` - will be copied. Otherwise, the same will be used. + will be copied. Otherwise, the same will be used. If `ignore_frozen_state` is True, a + new :class:`Env()` will be used. ''' assert '/' not in name # '/' is a reserved character diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 5bd1d39..ad5544a 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1882,15 +1882,14 @@ def _create_terminal_node(self, name, type, values=None, alphabet=None, qty=None return [node, qty[0], -1 if qty[1] is None else qty[1]] def _create_non_terminal_node(self): - non_terminal = [1, [MH.Copy + MH.Ordered]] + if self.choice: + non_terminal = [1, [MH.Copy + MH.Pick]] + else: + non_terminal = [1, [MH.Copy + MH.Ordered]] formatted_terminal = non_terminal[1] for terminal in self.nodes: formatted_terminal.append(terminal) - if self.choice and len(self.nodes) > 1: - non_terminal.append(1) - formatted_terminal = [MH.Copy + MH.Ordered] - non_terminal.append(formatted_terminal) return non_terminal diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 311655f..daa849e 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -521,7 +521,7 @@ def init_specific(self, consume_also_singleton=False): self.firstcall = True def consume_node(self, node): - if (node.is_exhausted() and not self.consume_also_singleton): + if node.is_exhausted() and not self.consume_also_singleton: # in this case we ignore the node return False else: diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index 9e113fa..cab6606 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -56,7 +56,7 @@ def truncate_info(info, max_size=60): args={'path': ('graph path regexp to select nodes on which' \ ' the disruptor should apply', None, str), 'nt_only': ('walk through non-terminal nodes only', False, bool), - 'singleton': ('consume also terminal nodes with only one possible value', False, bool), + 'singleton': ('consume also terminal nodes with only one possible value', True, bool), 'fix': ('fix constraints while walking', True, bool)}) class sd_iter_over_data(StatefulDisruptor): ''' @@ -100,7 +100,7 @@ def disrupt_data(self, dm, target, data): exported_node = rnode if self.fix: - exported_node.unfreeze(recursive=True, reevaluate_constraints=True) + exported_node.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) exported_node.freeze() data.add_info('fix constraints (if any)') @@ -518,7 +518,7 @@ def disrupt_data(self, dm, target, data): corrupted_seed = Node(self.seed.name, base_node=self.seed, ignore_frozen_state=False, new_env=True) self.seed.env.remove_node_to_corrupt(consumed_node) - corrupted_seed.unfreeze(recursive=True, reevaluate_constraints=True) + corrupted_seed.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) corrupted_seed.freeze() data.add_info('sample index: {:d}'.format(self.idx)) @@ -894,11 +894,11 @@ def disrupt_data(self, dm, target, prev_data): return prev_data for n in l: - n.unfreeze(recursive=True, reevaluate_constraints=True) - prev_data.add_info("release constraints from the node '%s'" % n.name) + n.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) + prev_data.add_info("release constraints from the node '{!s}'".format(n.name)) else: - prev_data.node.unfreeze(recursive=True, reevaluate_constraints=True) + prev_data.node.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) prev_data.add_info('release constraints from the root') prev_data.node.freeze() diff --git a/libs/debug_facility.py b/libs/debug_facility.py index 6022da7..6953d8b 100644 --- a/libs/debug_facility.py +++ b/libs/debug_facility.py @@ -32,7 +32,7 @@ VT_DEBUG = False # related to fuzzing_primitives.py -MW_DEBUG = True +MW_DEBUG = False try: from xtermcolor import colorize diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 160e626..0e3d1f1 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -3102,7 +3102,8 @@ def test_tuto_specifics(self): dm = fmk.get_data_model_by_name('mydf') dm.load_data_model(fmk._name2dm) - data_id_list = ['misc_gen', 'len_gen', 'exist_cond', 'separator', 'AbsTest', 'AbsTest2'] + data_id_list = ['misc_gen', 'len_gen', 'exist_cond', 'separator', 'AbsTest', 'AbsTest2', + 'regex'] loop_cpt = 5 for data_id in data_id_list: From 6c7e849c937a4ece72e7042e8ddbb2dd49e49f50 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 01:37:46 +0200 Subject: [PATCH 68/80] Fix tTYPE disruptor + Add new fuzzing test cases for String-based nodes - Fix tTYPE disruptor. Previously, the terminal node that was currently fuzzed by tTYPE did not respect constraints associated with it, even with the FIX disruptor called on it. The issue came from the fact the terminal node internals was replaced during the fuzzing process. Hence, every associated constraints (SyncObjs...) was lost temporarily, until tTYPE switched to another node (as the recovering process set back the original internals). - Fix the disruptor FIX when the parameter 'path' was provided. - Add the new parameter 'fix' to tTYPE and apply it by default. - Add new fuzzing test cases for String-based nodes. - Remove obsolete tTERM disruptor. --- docs/source/disruptors.rst | 46 +------------ framework/fuzzing_primitives.py | 24 +++---- framework/generic_data_makers.py | 96 +++++----------------------- framework/value_types.py | 14 ++-- libs/debug_facility.py | 2 +- test/integration/test_integration.py | 78 ++++++++++------------ 6 files changed, 67 insertions(+), 193 deletions(-) diff --git a/docs/source/disruptors.rst b/docs/source/disruptors.rst index 6cdc3c8..4bb7c92 100644 --- a/docs/source/disruptors.rst +++ b/docs/source/disruptors.rst @@ -363,49 +363,6 @@ Parameters: | | default: None [type: str, list, tuple] -tTERM (OBSOLETE) - Basic Alteration of Terminal Node ----------------------------------------------------- - -Description: - Perform alterations on terminal nodes (one at a time), without - considering its type. - -Reference: - :class:`framework.generic_data_makers.sd_fuzz_terminal_nodes` - -Parameters: - .. code-block:: none - - generic args: - |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull - | | to it to False) - | | default: True [type: bool] - |_ init - | | desc: make the model walker ignore all the steps until the provided - | | one - | | default: 1 [type: int] - |_ max_steps - | | desc: maximum number of steps (-1 means until the end) - | | default: -1 [type: int] - |_ runs_per_node - | | desc: maximum number of test cases for a single node (-1 means until - | | the end) - | | default: -1 [type: int] - specific args: - |_ determinist - | | desc: make the disruptor determinist - | | default: True [type: bool] - |_ alt_values - | | desc: list of alternative values to be tested (replace the current - | | base list used by the disruptor) - | | default: None [type: list] - |_ ascii - | | desc: enforce all outputs to be ascii 7bits - | | default: False [type: bool] - - .. _dis:ttype: tTYPE - Advanced Alteration of Terminal Typed Node @@ -449,6 +406,9 @@ Parameters: | | data structure. Otherwise, fuzz weight (if specified in the | | data model) is used for ordering | | default: False [type: bool] + |_ fix + | | desc: fix constraints while walking + | | default: True [type: bool] |_ deep | | desc: when set to True, if a node structure has changed, the modelwalker | | will reset its walk through the children nodes diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index daa849e..f215726 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -774,7 +774,7 @@ def consume_node(self, node): self.current_fuzz_vt_list = None if not self.current_fuzz_vt_list: - self.orig_internal = node.cc + self.orig_internal_vt = node.cc.value_type self.orig_value = node.to_bytes() self.current_fuzz_vt_list = self._create_fuzzy_vt_list(node) @@ -785,7 +785,8 @@ def consume_node(self, node): if self.current_fuzz_vt_list: vt_obj = self.current_fuzz_vt_list.pop(0) - node.set_values(value_type=vt_obj, ignore_entanglement=True) + # node.set_values(value_type=vt_obj, ignore_entanglement=True) + node.cc.value_type=vt_obj node.make_finite() node.make_determinist() node.unfreeze(ignore_entanglement=True) @@ -800,20 +801,11 @@ def save_node(self, node): pass def recover_node(self, node): - node.cc = self.orig_internal - # if node.entangled_nodes is None: - # return - # - # for n in node.entangled_nodes: - # if n is node: - # continue - # if isinstance(n.cc, dm.NodeInternals_TypedValue): - # n.cc.import_value_type(self.orig_internal.value_type) - # else: - # raise ValueError - # - # node.unfreeze(recursive=False, ignore_entanglement=False) - + # We avoid changing the node internals because of specific attributes (that may exist) + # regarding node synchronization, and so on. Thus, we only modify what we need, + # namely the value_type. + node.cc.value_type = self.orig_internal_vt + node.set_frozen_value(self.orig_value) def need_reset(self, node): if node.is_nonterm(): diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index cab6606..57dc971 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -100,7 +100,7 @@ def disrupt_data(self, dm, target, data): exported_node = rnode if self.fix: - exported_node.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) + exported_node.unfreeze(recursive=True, reevaluate_constraints=True) exported_node.freeze() data.add_info('fix constraints (if any)') @@ -118,7 +118,8 @@ def disrupt_data(self, dm, target, data): 'by the data structure. Otherwise, fuzz weight (if specified ' \ 'in the data model) is used for ordering', False, bool), 'deep': ('when set to True, if a node structure has changed, the modelwalker ' \ - 'will reset its walk through the children nodes', True, bool)}) + 'will reset its walk through the children nodes', True, bool), + 'fix': ('fix constraints while walking', True, bool)}) class sd_fuzz_typed_nodes(StatefulDisruptor): ''' Perform alterations on typed nodes (one at a time) according to @@ -176,9 +177,15 @@ def disrupt_data(self, dm, target, data): if self.clone_node: exported_node = Node(rnode.name, base_node=rnode, new_env=True) - data.update_from_node(exported_node) else: - data.update_from_node(rnode) + exported_node = rnode + + if self.fix: + exported_node.unfreeze(recursive=True, reevaluate_constraints=True) + exported_node.freeze() + data.add_info('fix constraints (if any)') + + data.update_from_node(exported_node) return data @@ -518,7 +525,7 @@ def disrupt_data(self, dm, target, data): corrupted_seed = Node(self.seed.name, base_node=self.seed, ignore_frozen_state=False, new_env=True) self.seed.env.remove_node_to_corrupt(consumed_node) - corrupted_seed.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) + corrupted_seed.unfreeze(recursive=True, reevaluate_constraints=True) corrupted_seed.freeze() data.add_info('sample index: {:d}'.format(self.idx)) @@ -894,14 +901,16 @@ def disrupt_data(self, dm, target, prev_data): return prev_data for n in l: - n.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) + n.unfreeze(recursive=True, reevaluate_constraints=True) prev_data.add_info("release constraints from the node '{!s}'".format(n.name)) + n.freeze() else: - prev_data.node.unfreeze(recursive=True, reevaluate_constraints=True, dont_change_state=True) + prev_data.node.unfreeze(recursive=True, reevaluate_constraints=True) prev_data.add_info('release constraints from the root') prev_data.node.freeze() + prev_data.node.show() if self.clone_node: exported_node = Node(prev_data.node.name, base_node=prev_data.node, new_env=True) @@ -1042,76 +1051,3 @@ def disrupt_data(self, dm, target, prev_data): return prev_data - -####################### -# OBSOLETE DISRUPTORS # -####################### - - -@disruptor(tactics, dtype="tTERM", weight=1, - gen_args = GENERIC_ARGS, - args={'ascii': ('enforce all outputs to be ascii 7bits', False, bool), - 'determinist': ('make the disruptor determinist', True, bool), - 'alt_values': ('list of alternative values to be tested ' \ - '(replace the current base list used by the disruptor)', None, list)}) -class sd_fuzz_terminal_nodes(StatefulDisruptor): - ''' - [OBSOLETE] Perform alterations on terminal nodes (one at a time), - without considering its type. - ''' - def setup(self, dm, user_input): - return True - - def set_seed(self, prev_data): - if prev_data.node is None: - prev_data.add_info('DONT_PROCESS_THIS_KIND_OF_DATA') - return prev_data - - prev_data.node.make_finite(all_conf=True, recursive=True) - - self.consumer = TermNodeDisruption(max_runs_per_node=self.max_runs_per_node, - min_runs_per_node=self.min_runs_per_node, - respect_order=False, - base_list=self.alt_values) - self.consumer.determinist = self.determinist - if self.ascii: - self.consumer.ascii = True - - self.walker = iter(ModelWalker(prev_data.node, self.consumer, max_steps=self.max_steps, initial_step=self.init)) - - self.max_runs = None - self.current_node = None - self.run_num = None - - def disrupt_data(self, dm, target, data): - try: - rnode, consumed_node, orig_node_val, idx = next(self.walker) - except StopIteration: - data.make_unusable() - self.handover() - return data - - new_max_runs = self.consumer.max_nb_runs_for(consumed_node) - if self.max_runs != new_max_runs or self.current_node != consumed_node: - self.current_node = consumed_node - self.max_runs = new_max_runs - self.run_num = 1 - else: - self.run_num +=1 - - corrupt_node_bytes = consumed_node.to_bytes() - - data.add_info('model walking index: {:d}'.format(idx)) - data.add_info(' |_ run: {:d} / {:d} (max)'.format(self.run_num, self.max_runs)) - data.add_info('current fuzzed node: {!s}'.format(consumed_node.get_path_from(rnode))) - data.add_info('original val: {!s}'.format(truncate_info(orig_node_val))) - data.add_info('corrupted val: {!s}'.format(truncate_info(corrupt_node_bytes))) - - if self.clone_node: - exported_node = Node(rnode.name, base_node=rnode, new_env=True) - data.update_from_node(exported_node) - else: - data.update_from_node(rnode) - - return data - diff --git a/framework/value_types.py b/framework/value_types.py index 8c3bcaa..90c48b8 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -907,7 +907,7 @@ def enable_fuzz_mode(self): if val != b'': self.val_list_fuzzy.append(val) - val = orig_val + b"X"*(self.max_sz*42) + val = orig_val + b"X"*(self.max_sz*100) self.val_list_fuzzy.append(val) self.val_list_fuzzy.append(b'\x00'*sz if sz>0 else b'\x00') @@ -918,15 +918,15 @@ def enable_fuzz_mode(self): if is_even: self.val_list_fuzzy.append(b'%n' * cpt) self.val_list_fuzzy.append(b'%s' * cpt) - self.val_list_fuzzy.append(b'\r\n' * cpt) else: self.val_list_fuzzy.append(orig_val[:1] + b'%n' * cpt) self.val_list_fuzzy.append(orig_val[:1] + b'%s' * cpt) - self.val_list_fuzzy.append(orig_val[:1] + b'\r\n' * cpt) - else: - self.val_list_fuzzy.append(b'%n%n%n') - self.val_list_fuzzy.append(b'%s%s%s') - self.val_list_fuzzy.append(b'\r\n') + + self.val_list_fuzzy.append(orig_val + b'%n'*400) + self.val_list_fuzzy.append(orig_val + b'%s'*400) + self.val_list_fuzzy.append(orig_val + b'\"%n\"'*400) + self.val_list_fuzzy.append(orig_val + b'\"%s\"'*400) + self.val_list_fuzzy.append(orig_val + b'\r\n'*100) if self.extra_fuzzy_list: for v in self.extra_fuzzy_list: diff --git a/libs/debug_facility.py b/libs/debug_facility.py index 6953d8b..8061cb1 100644 --- a/libs/debug_facility.py +++ b/libs/debug_facility.py @@ -22,7 +22,7 @@ ################################################################################ DEBUG = False -LEVEL = 2 +LEVEL = 0 # related to data_model.py DM_DEBUG = False diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 0e3d1f1..026f8e9 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -946,7 +946,7 @@ def test_TypedNode_1(self): break print('\nTurn number when Node has changed: %r, number of test cases: %d' % (turn_nb_list, i)) - good_list = [1, 9, 17, 25, 33, 42, 51, 59, 67, 75, 83, 91, 99, 107, 115, 124, 132, 140, 148, 157, 166, 172, 187] + good_list = [1, 13, 23, 33, 43, 52, 61, 71, 81, 91, 103, 113, 123, 133, 143, 152, 162, 172, 182, 191, 200, 206, 221] msg = "If Fuzzy_.int_list have been modified in size, the good_list should be updated.\n" \ "If BitField are in random mode [currently put in determinist mode], the fuzzy_mode can produce more" \ " or less value depending on drawn value when .get_value() is called (if the drawn value is" \ @@ -1594,12 +1594,15 @@ def test_basics(self): b' [!] ++++++++++ [!] ::IAA::AAA::AAA::AAA::>:: [!] ', # [8] could change has it is a random corrupt_bit b' [!] ++++++++++ [!] ::AAAA::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::::AAA::AAA::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' - b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'XXX'*100 + b'::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::\x00\x00\x00::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%n::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%s::AAA::AAA::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::A\r\n::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'%n' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'%s' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'\"%n\"' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'\"%s\"' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'\r\n' * 100 + b'::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::AAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::AAA::AAA::>:: [!] ', @@ -1610,15 +1613,18 @@ def test_basics(self): b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x01:: [!] ', b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x80:: [!] ', b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::\x7f:: [!] ', - b' [!] ++++++++++ [!] ::AAQ::AAA::>:: [!] ', # [26] could change has it is a random corrupt_bit + b' [!] ++++++++++ [!] ::AAQ::AAA::>:: [!] ', # [30] could change has it is a random corrupt_bit b' [!] ++++++++++ [!] ::AAAA::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' - b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'XXX'*100 + b'::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::\x00\x00\x00::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%n::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::A%s::AAA::>:: [!] ', - b' [!] ++++++++++ [!] ::A\r\n::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'%n' * 400 + b'::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'%s' * 400 + b'::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'\"%n\"' * 400 + b'::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'\"%s\"' * 400 + b'::AAA::>:: [!] ', + b' [!] ++++++++++ [!] ::AAA' + b'\r\n' * 100 + b'::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::../../../../../../etc/password::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', b' [!] ++++++++++ [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', @@ -1637,15 +1643,18 @@ def test_basics(self): b' [!] >>>>>>>>>> [!] ::\x01:: [!] ', b' [!] >>>>>>>>>> [!] ::\x80:: [!] ', b' [!] >>>>>>>>>> [!] ::\x7f:: [!] ', - b' [!] >>>>>>>>>> [!] ::QAA::AAA::AAA::AAA::>:: [!] ', # [51] could change has it is a random corrupt_bit + b' [!] >>>>>>>>>> [!] ::QAA::AAA::AAA::AAA::>:: [!] ', # [59] could change has it is a random corrupt_bit b' [!] >>>>>>>>>> [!] ::AAAA::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::::AAA::AAA::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' - b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'XXX'*100 + b'::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::\x00\x00\x00::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%n::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%s::AAA::AAA::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::A\r\n::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'%n' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'%s' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'\"%n\"' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'\"%s\"' * 400 + b'::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'\r\n' * 100 + b'::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::AAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::AAA::AAA::>:: [!] ', @@ -1656,15 +1665,18 @@ def test_basics(self): b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x01:: [!] ', b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x80:: [!] ', b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::\x7f:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAC::AAA::>:: [!] ', # [69] could change has it is a random corrupt_bit + b' [!] >>>>>>>>>> [!] ::AAC::AAA::>:: [!] ', # [81] could change has it is a random corrupt_bit b' [!] >>>>>>>>>> [!] ::AAAA::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::AAAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' - b'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'XXX'*100 + b'::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::\x00\x00\x00::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%n::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::A%s::AAA::>:: [!] ', - b' [!] >>>>>>>>>> [!] ::A\r\n::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'%n' * 400 + b'::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'%s' * 400 + b'::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'\"%n\"' * 400 + b'::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'\"%s\"' * 400 + b'::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA' + b'\r\n' * 100 + b'::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::../../../../../../etc/password::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::../../../../../../Windows/system.ini::AAA::>:: [!] ', b' [!] >>>>>>>>>> [!] ::file%n%n%n%nname.txt::AAA::>:: [!] ', @@ -1687,10 +1699,10 @@ def test_basics(self): max_steps=200): val = rnode.to_bytes() print(colorize('[%d] ' % idx + repr(val), rgb=Color.INFO)) - if idx not in [8, 26, 51, 69]: + if idx not in [8, 30, 59, 81]: self.assertEqual(val, raw_vals[idx - 1]) - self.assertEqual(idx, 86) # should be even + self.assertEqual(idx, 102) # should be even def test_TypedNodeDisruption_1(self): nt = self.dm.get_data('Simple') @@ -1724,7 +1736,7 @@ def test_TypedNodeDisruption_3(self): for rnode, consumed_node, orig_node_val, idx in ModelWalker(nt, tn_consumer, make_determinist=True, max_steps=-1): print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - self.assertEqual(idx, 310) + self.assertEqual(idx, 450) def test_TypedNodeDisruption_BitfieldCollapse(self): ''' @@ -1767,32 +1779,6 @@ def test_TypedNodeDisruption_BitfieldCollapse(self): rnode['smscmd/TP-DCS$'].show() self.assertEqual(rnode['smscmd/TP-DCS'].to_bytes(), corrupt_table[idx]) - def test_TermNodeDisruption_1(self): - simple = self.dm.get_data('Simple') - consumer = TermNodeDisruption() - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, - max_steps=-1): - print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - # print('original val: %s' % repr(orig_node_val)) - # print('corrupted val: %s' % repr(consumed_node.to_bytes())) - self.assertEqual(idx, 266) - - def test_TermNodeDisruption_2(self): - simple = self.dm.get_data('Simple') - consumer = TermNodeDisruption(max_runs_per_node=-1, min_runs_per_node=2) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, - max_steps=-1): - print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - self.assertEqual(idx, 91) - - def test_TermNodeDisruption_3(self): - simple = self.dm.get_data('Simple') - consumer = TermNodeDisruption(base_list=['1_BANG_1', '2_PLOUF_2']) - for rnode, consumed_node, orig_node_val, idx in ModelWalker(simple, consumer, make_determinist=True, - max_steps=-1): - print(colorize('[%d] ' % idx + repr(rnode.to_bytes()), rgb=Color.INFO)) - self.assertEqual(idx, 152) - def test_AltConfConsumer_1(self): simple = self.dm.get_data('Simple') consumer = AltConfConsumer(max_runs_per_node=-1, min_runs_per_node=-1) @@ -3432,7 +3418,7 @@ def test_scenario_infra(self): self.assertEqual(code_vector, ['DataUnusable', 'HandOver', 'DataUnusable', 'HandOver', 'DPHandOver', 'NoMoreData']) - self.assertEqual(base_qty, 37) + self.assertEqual(base_qty, 53) print('\n*** test scenario SC_AUTO_REGEN') From 2220381162214a478e426daa693295ba6f0caa2e Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 12:02:35 +0200 Subject: [PATCH 69/80] Update/Fix counter-intuitive Node.set_values()/... behavior A Node on which one of the following methods were called: - set_subnodes_basic() - set_subnodes_with_csts() - set_values() - set_generator_func() - set_func() lost all their NodeInternals attributes, like their possible constraints with the other nodes in the graph. This method take a new parameter 'preserve_node' (defaults to True) which enables to preserve previous non-specific NodeInternals attributes when these methods are called. This enable simpler modification of a graph. Note on disruptors: - tTYPE leverages that behavior change. - SIZE and C will now avoid to break a graph by calling such methods, which enables the user to call the disruptor FIX after them and obtains the desired result. --- framework/data_model.py | 87 +++++++++++++++++++++++++-------- framework/fuzzing_primitives.py | 76 ++-------------------------- 2 files changed, 69 insertions(+), 94 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index 9c8b81e..9913b78 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -407,6 +407,9 @@ def flatten(nested): ### Materials for Node Synchronization ### +# WARNING: If new SyncObj are created or evolve, don't forget to update +# NodeInternals.set_contents_from() accordingly. + class SyncScope(Enum): Qty = 1 QtyFrom = 2 @@ -911,6 +914,24 @@ def make_private(self, ignore_frozen_state, accept_external_entanglement, delaye self._make_private_specific(ignore_frozen_state, accept_external_entanglement) self.custo = copy.copy(self.custo) + def set_contents_from(self, node_internals): + if node_internals is None or node_internals.__class__ == NodeInternals_Empty: + return + + self.private = node_internals.private + self.__attrs = node_internals.__attrs + self._sync_with = node_internals._sync_with + + if self.__class__ == node_internals.__class__: + self.custo = node_internals.custo + self.absorb_constraints = node_internals.absorb_constraints + self.absorb_helper = node_internals.absorb_helper + else: + if self._sync_with is not None and SyncScope.Size in self._sync_with: + # This SyncScope is currently only supported by String-based + # NodeInternals_TypedValue + del self._sync_with[SyncScope.Size] + # Called near the end of Node copy (Node.set_contents) to update # node references inside the NodeInternals def _update_node_refs(self, node_dico, debug): @@ -2372,7 +2393,7 @@ def _init_specific(self, arg): self.encoder = None self.reset() - def reset(self, nodes_drawn_qty=None, custo=None, exhaust_info=None): + def reset(self, nodes_drawn_qty=None, custo=None, exhaust_info=None, preserve_node=False): self.frozen_node_list = None self.subnodes_set = set() self.subnodes_csts = [] @@ -2402,7 +2423,9 @@ def reset(self, nodes_drawn_qty=None, custo=None, exhaust_info=None): self.component_seed = exhaust_info[5] self._perform_first_step = exhaust_info[6] - if custo is None: + if preserve_node: + pass + elif custo is None: self.customize(self.default_custo) else: self.customize(custo) @@ -2425,8 +2448,8 @@ def __iter_csts_verbose(self, node_list): yield idx, delim, sublist idx += 1 - def import_subnodes_basic(self, node_list, separator=None): - self.reset() + def import_subnodes_basic(self, node_list, separator=None, preserve_node=False): + self.reset(preserve_node=preserve_node) self.separator = separator @@ -2445,8 +2468,8 @@ def import_subnodes_basic(self, node_list, separator=None): self.subnodes_set.add(e) - def import_subnodes_with_csts(self, wlnode_list, separator=None): - self.reset() + def import_subnodes_with_csts(self, wlnode_list, separator=None, preserve_node=False): + self.reset(preserve_node=preserve_node) self.separator = separator @@ -5303,11 +5326,15 @@ def _finalize_nonterm_node(self, conf, depth=None): "of this non-terminal node: %s in conf : '%s'." % (self.name, conf)) raise ValueError - def set_subnodes_basic(self, node_list, conf=None, ignore_entanglement=False, separator=None): + def set_subnodes_basic(self, node_list, conf=None, ignore_entanglement=False, separator=None, + preserve_node=True): conf = self.__check_conf(conf) - self.internals[conf] = NodeInternals_NonTerm() - self.internals[conf].import_subnodes_basic(node_list, separator=separator) + new_internals = NodeInternals_NonTerm() + if preserve_node: + new_internals.set_contents_from(self.internals[conf]) + self.internals[conf] = new_internals + self.internals[conf].import_subnodes_basic(node_list, separator=separator, preserve_node=preserve_node) self._finalize_nonterm_node(conf) if not ignore_entanglement and self.entangled_nodes is not None: @@ -5316,11 +5343,15 @@ def set_subnodes_basic(self, node_list, conf=None, ignore_entanglement=False, se - def set_subnodes_with_csts(self, wlnode_list, conf=None, ignore_entanglement=False, separator=None): + def set_subnodes_with_csts(self, wlnode_list, conf=None, ignore_entanglement=False, separator=None, + preserve_node=True): conf = self.__check_conf(conf) - self.internals[conf] = NodeInternals_NonTerm() - self.internals[conf].import_subnodes_with_csts(wlnode_list, separator=separator) + new_internals = NodeInternals_NonTerm() + if preserve_node: + new_internals.set_contents_from(self.internals[conf]) + self.internals[conf] = new_internals + self.internals[conf].import_subnodes_with_csts(wlnode_list, separator=separator, preserve_node=preserve_node) self._finalize_nonterm_node(conf) if not ignore_entanglement and self.entangled_nodes is not None: @@ -5328,23 +5359,30 @@ def set_subnodes_with_csts(self, wlnode_list, conf=None, ignore_entanglement=Fal e.set_subnodes_basic(wlnode_list=wlnode_list, conf=conf, ignore_entanglement=True, separator=separator) - def set_subnodes_full_format(self, full_list, conf=None, separator=None): + def set_subnodes_full_format(self, full_list, conf=None, separator=None, preserve_node=True): conf = self.__check_conf(conf) - self.internals[conf] = NodeInternals_NonTerm() + new_internals = NodeInternals_NonTerm() + if preserve_node: + new_internals.set_contents_from(self.internals[conf]) + self.internals[conf] = new_internals self.internals[conf].import_subnodes_full_format(subnodes_csts=full_list, separator=separator) self._finalize_nonterm_node(conf) - def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglement=False): + def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglement=False, + preserve_node=True): conf = self.__check_conf(conf) + new_internals = NodeInternals_TypedValue() + if preserve_node: + new_internals.set_contents_from(self.internals[conf]) + self.internals[conf] = new_internals + if val_list is not None: - self.internals[conf] = NodeInternals_TypedValue() self.internals[conf].import_value_type(value_type=fvt.String(val_list=val_list)) elif value_type is not None: - self.internals[conf] = NodeInternals_TypedValue() self.internals[conf].import_value_type(value_type) else: @@ -5359,10 +5397,14 @@ def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglem def set_func(self, func, func_node_arg=None, func_arg=None, - conf=None, ignore_entanglement=False, provide_helpers=False): + conf=None, ignore_entanglement=False, provide_helpers=False, + preserve_node=True): conf = self.__check_conf(conf) - self.internals[conf] = NodeInternals_Func() + new_internals = NodeInternals_Func() + if preserve_node: + new_internals.set_contents_from(self.internals[conf]) + self.internals[conf] = new_internals self.internals[conf].import_func(func, fct_node_arg=func_node_arg, fct_arg=func_arg, provide_helpers=provide_helpers) @@ -5376,10 +5418,13 @@ def set_func(self, func, func_node_arg=None, func_arg=None, def set_generator_func(self, gen_func, func_node_arg=None, func_arg=None, conf=None, ignore_entanglement=False, - provide_helpers=False): + provide_helpers=False, preserve_node=True): conf = self.__check_conf(conf) - self.internals[conf] = NodeInternals_GenFunc() + new_internals = NodeInternals_GenFunc() + if preserve_node: + new_internals.set_contents_from(self.internals[conf]) + self.internals[conf] = new_internals self.internals[conf].import_generator_func(gen_func, generator_node_arg=func_node_arg, generator_arg=func_arg, provide_helpers=provide_helpers) diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index f215726..52492f0 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -690,71 +690,6 @@ def wait_for_exhaustion(self, node): return 0 -class TermNodeDisruption(NodeConsumerStub): - - def init_specific(self, base_list=None): - self._internals_criteria = dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable], - negative_attrs=[dm.NodeInternals.Separator], - node_kinds=[dm.NodeInternals_Term]) - self.enforce_ascii = False - self.determinist = True - - if base_list is None: - self.val_list = [ - b'', - b'\x00', - b'AhAh%s%s%s', - b'BBB%n%n%n%n%n', - b'\r\n' - ] - else: - self.val_list = list(base_list) - - self.orig_internals = None - self.need_reset_when_structure_change = True - - - def consume_node(self, node): - self.orig_internal = node.cc - orig_val = node.to_bytes() - new_val_list = copy.copy(self.val_list) - - try: - val = corrupt_bits(orig_val, n=1, ascii=self.enforce_ascii) - new_val_list.insert(0, val) - except: - print("Problematic (empty) node '%s'" % node.name) - - val = orig_val + b"A"*30 - new_val_list.insert(0, val) - - node.set_values(val_list=new_val_list) - node.make_finite() - if self.determinist: - node.make_determinist() - else: - node.make_random() - - return True - - def save_node(self, node): - pass - - def recover_node(self, node): - node.cc = self.orig_internal - if node.entangled_nodes is None: - return - - for n in node.entangled_nodes: - if n is node: - continue - if isinstance(n.cc, dm.NodeInternals_TypedValue): - n.cc.import_value_type(self.orig_internal.value_type) - else: - raise ValueError - node.unfreeze(recursive=False) - - class TypedNodeDisruption(NodeConsumerStub): def init_specific(self, **kwargs): @@ -774,7 +709,7 @@ def consume_node(self, node): self.current_fuzz_vt_list = None if not self.current_fuzz_vt_list: - self.orig_internal_vt = node.cc.value_type + self.orig_internal = node.cc self.orig_value = node.to_bytes() self.current_fuzz_vt_list = self._create_fuzzy_vt_list(node) @@ -785,8 +720,7 @@ def consume_node(self, node): if self.current_fuzz_vt_list: vt_obj = self.current_fuzz_vt_list.pop(0) - # node.set_values(value_type=vt_obj, ignore_entanglement=True) - node.cc.value_type=vt_obj + node.set_values(value_type=vt_obj, ignore_entanglement=True, preserve_node=True) node.make_finite() node.make_determinist() node.unfreeze(ignore_entanglement=True) @@ -801,11 +735,7 @@ def save_node(self, node): pass def recover_node(self, node): - # We avoid changing the node internals because of specific attributes (that may exist) - # regarding node synchronization, and so on. Thus, we only modify what we need, - # namely the value_type. - node.cc.value_type = self.orig_internal_vt - node.set_frozen_value(self.orig_value) + node.cc = self.orig_internal def need_reset(self, node): if node.is_nonterm(): From 274c9ae85386d5c4ae42a62ea427114891c3190c Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 12:51:14 +0200 Subject: [PATCH 70/80] Update/Fix counter-intuitive Node.set_contents() behavior Similar change as the one performed in commit 2220381162214a478e426daa693295ba6f0caa2e --- framework/data_model.py | 39 +++++++++++++++++++++++---------- framework/data_model_helpers.py | 2 +- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index 9913b78..fe9fb68 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -901,15 +901,19 @@ def get_node_sync(self, scope): else: return self._sync_with.get(scope, None) - def make_private(self, ignore_frozen_state, accept_external_entanglement, delayed_node_internals): + def make_private(self, ignore_frozen_state, accept_external_entanglement, delayed_node_internals, + forget_original_sync_objs=False): if self.private is not None: self.private = copy.copy(self.private) self.absorb_constraints = copy.copy(self.absorb_constraints) self.__attrs = copy.copy(self.__attrs) - if self._sync_with: - delayed_node_internals.add(self) - self._sync_with = copy.copy(self._sync_with) + if forget_original_sync_objs: + self._sync_with = None + else: + if self._sync_with: + delayed_node_internals.add(self) + self._sync_with = copy.copy(self._sync_with) self._make_private_specific(ignore_frozen_state, accept_external_entanglement) self.custo = copy.copy(self.custo) @@ -921,10 +925,10 @@ def set_contents_from(self, node_internals): self.private = node_internals.private self.__attrs = node_internals.__attrs self._sync_with = node_internals._sync_with + self.absorb_constraints = node_internals.absorb_constraints if self.__class__ == node_internals.__class__: self.custo = node_internals.custo - self.absorb_constraints = node_internals.absorb_constraints self.absorb_helper = node_internals.absorb_helper else: if self._sync_with is not None and SyncScope.Size in self._sync_with: @@ -4868,7 +4872,7 @@ def __init__(self, name, base_node=None, copy_dico=None, ignore_frozen_state=Fal node_dico = self.set_contents(base_node, copy_dico=copy_dico, ignore_frozen_state=ignore_frozen_state, accept_external_entanglement=accept_external_entanglement, - acceptance_set=acceptance_set) + acceptance_set=acceptance_set, preserve_node=False) if new_env and self.env is not None: self.env.update_node_refs(node_dico, ignore_frozen_state=ignore_frozen_state) @@ -4923,7 +4927,8 @@ def get_clone(self, name=None, ignore_frozen_state=False, new_env=True): def set_contents(self, base_node, copy_dico=None, ignore_frozen_state=False, - accept_external_entanglement=False, acceptance_set=None): + accept_external_entanglement=False, acceptance_set=None, + preserve_node=True): '''Set the contents of the node based on the one provided within `base_node`. This method performs a deep copy of `base_node`, but some parameters can change the behavior of the copy. @@ -4935,6 +4940,8 @@ def set_contents(self, base_node, base_node (Node): (Optional) Used as a template to create the new node. ignore_frozen_state (bool): If True, the clone process of base_node will ignore its current state. + preserve_node (bool): preserve the :class:`NodeInternals` attributes (making sense to preserve) + of the possible overwritten NodeInternals. accept_external_entanglement (bool): If True, during the cloning process of base_node, every entangled nodes outside the current graph will be referenced within the new node without being copied. Otherwise, a *Warning* message will be raised. @@ -4973,10 +4980,20 @@ def set_contents(self, base_node, for conf in base_node.internals: self.add_conf(conf) - self.internals[conf] = copy.copy(base_node.internals[conf]) - self.internals[conf].make_private(ignore_frozen_state=ignore_frozen_state, - accept_external_entanglement=accept_external_entanglement, - delayed_node_internals=delayed_node_internals) + new_internals = copy.copy(base_node.internals[conf]) + if preserve_node: + new_internals.make_private(ignore_frozen_state=ignore_frozen_state, + accept_external_entanglement=accept_external_entanglement, + delayed_node_internals=delayed_node_internals, + forget_original_sync_objs=True) + new_internals.set_contents_from(self.internals[conf]) + else: + new_internals.make_private(ignore_frozen_state=ignore_frozen_state, + accept_external_entanglement=accept_external_entanglement, + delayed_node_internals=delayed_node_internals, + forget_original_sync_objs=False) + + self.internals[conf] = new_internals if base_node.is_nonterm(conf): self.internals[conf].import_subnodes_full_format(internals=base_node.internals[conf]) diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index ad5544a..e934f5a 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -1112,7 +1112,7 @@ def _create_todo_list(self): def _clone_from_dict(self, node, ref, desc): if ref not in self.node_dico: raise ValueError("arguments refer to an inexistent node ({:s}, {!s})!".format(ref[0], ref[1])) - node.set_contents(self.node_dico[ref]) + node.set_contents(self.node_dico[ref], preserve_node=False) self._handle_custo(node, desc, conf=None) self._handle_common_attr(node, desc, conf=None) From 6bf12aca35ef09e10ecf83075e5b7d8f38002c34 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 17:38:32 +0200 Subject: [PATCH 71/80] Change interface of String-based and INT-based node types + WRAP update - the String constructor parameter 'val_list' is renamed 'values' - the INT constructor parameter 'int_list' is renamed 'values' - the WRAP template generator can now also use a String class for its output (previously limited to INT-based class) --- data_models/example.py | 26 ++-- data_models/file_formats/jpg.py | 6 +- data_models/file_formats/pdf.py | 24 ++-- data_models/file_formats/png.py | 20 +-- data_models/file_formats/zip.py | 22 ++-- data_models/protocols/pppoe.py | 50 ++++---- data_models/protocols/sms.py | 16 +-- data_models/protocols/usb.py | 82 ++++++------- data_models/tuto.py | 98 +++++++-------- docs/source/data_manip.rst | 12 +- docs/source/data_model.rst | 98 +++++++-------- docs/source/tutorial.rst | 30 ++--- framework/data_model.py | 10 +- framework/data_model_helpers.py | 43 ++++--- framework/fuzzing_primitives.py | 2 +- framework/generic_data_makers.py | 4 +- framework/target.py | 2 +- framework/value_types.py | 46 +++---- test/integration/test_integration.py | 176 +++++++++++++-------------- 19 files changed, 388 insertions(+), 379 deletions(-) diff --git a/data_models/example.py b/data_models/example.py index 537fd39..f6b7de0 100644 --- a/data_models/example.py +++ b/data_models/example.py @@ -62,7 +62,7 @@ def build_data_model(self): tux_subparts_4 = [u'[\u00c2]PLIP', u'[\u00c2]GLOUP'] ku.add_conf('ALT') - ku.set_values(value_type=String(val_list=tux_subparts_4, codec='utf8'), conf='ALT') + ku.set_values(value_type=String(values=tux_subparts_4, codec='utf8'), conf='ALT') idx = Node('IDX') idx.set_values(value_type=SINT16_be(mini=4,maxi=40)) @@ -146,7 +146,7 @@ def build_data_model(self): evt1 = Node('EVT1') - evt1.set_values(value_type=SINT16_be(int_list=[-4])) + evt1.set_values(value_type=SINT16_be(values=[-4])) evt1.set_fuzz_weight(10) evt2 = Node('EVT2') @@ -187,7 +187,7 @@ def build_data_model(self): # te4.make_determinist() te5 = Node('EVT5') - te5.set_values(value_type=INT_str(int_list=[9])) + te5.set_values(value_type=INT_str(values=[9])) te5.cc.set_specific_fuzzy_values([666]) te5.set_fuzz_weight(6) @@ -227,7 +227,7 @@ def build_data_model(self): # Simple tval1_bottom = Node('TV1_bottom') - vt = UINT16_be(int_list=[1,2,3,4,5,6]) + vt = UINT16_be(values=[1,2,3,4,5,6]) # vt = BitField(subfield_sizes=[4,4,4], # subfield_val_lists=[[4,2,1], None, [10,12,13]], @@ -241,7 +241,7 @@ def build_data_model(self): sep_bottom_alt = Node('sep_bottom_alt', values=[' ;; ']) tval2_bottom = Node('TV2_bottom') - vt = UINT16_be(int_list=[0x42,0x43,0x44]) + vt = UINT16_be(values=[0x42,0x43,0x44]) tval2_bottom.set_values(value_type=vt) alt_tag = Node('AltTag', values=[' |AltTag| ', ' +AltTag+ ']) @@ -270,7 +270,7 @@ def build_data_model(self): ], conf='ALT') tval2_bottom3 = Node('TV2_bottom3') - vt = UINT32_be(int_list=[0xF, 0x7]) + vt = UINT32_be(values=[0xF, 0x7]) tval2_bottom3.set_values(value_type=vt) bottom3 = Node('Bottom_3_NT') bottom3.set_subnodes_with_csts([ @@ -307,7 +307,7 @@ def build_data_model(self): ### NonTerm e = Node('TV2') - vt = UINT16_be(int_list=[1,2,3,4,5,6]) + vt = UINT16_be(values=[1,2,3,4,5,6]) e.set_values(value_type=vt) sep3 = Node('sep3', values=[' # ']) nt = Node('Bottom_NT') @@ -319,7 +319,7 @@ def build_data_model(self): sep2 = Node('sep2', values=[' -|#|- ']) e_val1 = Node('V1', values=['A', 'B', 'C']) - e_typedval1 = Node('TV1', value_type=UINT16_be(int_list=[1,2,3,4,5,6])) + e_typedval1 = Node('TV1', value_type=UINT16_be(values=[1,2,3,4,5,6])) e_val2 = Node('V2', values=['X', 'Y', 'Z']) e_val3 = Node('V3', values=['<', '>']) @@ -357,13 +357,13 @@ def build_data_model(self): 'custo_set': MH.Custo.NTerm.FrozenCopy, 'custo_clear': MH.Custo.NTerm.MutableClone, 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'], absorb_regexp='\n+'), + 'contents': String(values=['\n'], absorb_regexp='\n+'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'contents': [{ 'section_type': MH.Random, 'contents': [ - {'contents': String(val_list=['OK', 'KO'], size=2), + {'contents': String(values=['OK', 'KO'], size=2), 'name': 'val2'}, {'name': 'val21', @@ -385,7 +385,7 @@ def build_data_model(self): 'sync_qty_with': 'val1', 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(values=[1,4,8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56), 'determinist': True}]} @@ -395,10 +395,10 @@ def build_data_model(self): {'section_type': MH.Pick, 'weights': (10,5), 'contents': [ - {'contents': String(val_list=['PLIP', 'PLOP'], size=4), + {'contents': String(values=['PLIP', 'PLOP'], size=4), 'name': ('val21', 2)}, - {'contents': SINT16_be(int_list=[-1, -3, -5, 7]), + {'contents': SINT16_be(values=[-1, -3, -5, 7]), 'name': ('val22', 2)} ]} ]} diff --git a/data_models/file_formats/jpg.py b/data_models/file_formats/jpg.py index 80fd5e4..5a89e40 100644 --- a/data_models/file_formats/jpg.py +++ b/data_models/file_formats/jpg.py @@ -84,7 +84,7 @@ def build_data_model(self): {'name': 'SOF_hdr', 'contents': [ {'name': 'F_marker', - 'contents': UINT16_be(int_list=[m for m in markers['SOF'].values()])}, + 'contents': UINT16_be(values=[m for m in markers['SOF'].values()])}, {'name': 'Lf', 'contents': MH.LEN(vt=UINT16_be, base_len=8), 'node_args': 'F_CompGroup', @@ -93,7 +93,7 @@ def build_data_model(self): 'contents': UINT16_be()} ]}, {'name': 'P', - 'contents': UINT8(int_list=[8,12])}, + 'contents': UINT8(values=[8,12])}, {'name': 'Y', 'contents': UINT16_be(maxi=65535), 'specific_fuzzy_vals': [65500]}, @@ -129,7 +129,7 @@ def build_data_model(self): {'name': 'SOS_hdr', 'contents': [ {'name': 'S_marker', - 'contents': UINT16_be(int_list=[markers['SOS']])}, + 'contents': UINT16_be(values=[markers['SOS']])}, {'name': 'Ls', 'contents': MH.LEN(vt=UINT16_be, base_len=6), 'node_args': 'S_CompGroup', diff --git a/data_models/file_formats/pdf.py b/data_models/file_formats/pdf.py index 7789cb3..002413a 100644 --- a/data_models/file_formats/pdf.py +++ b/data_models/file_formats/pdf.py @@ -157,7 +157,7 @@ def get_number(name, int_m=0, int_M=2**40, dec_m=0, dec_M=2**20, enforce_unsigne int_part = Node('int_part', value_type=INT_str(mini=int_m, maxi=int_M, determinist=False)) int_part.add_conf('ALT') - int_part.set_values(value_type=INT_str(int_list=[20000000]), conf='ALT') + int_part.set_values(value_type=INT_str(values=[20000000]), conf='ALT') dot = Node('dot', values=['.']) val = Node('val', value_type=INT_str(mini=dec_m, maxi=dec_M, determinist=False)) @@ -425,14 +425,14 @@ def _encode_stream_ascii(stream, enc_stream): prefix = ["/Filter "]) def gen_length_func(e_stream): - return Node('length', value_type=INT_str(int_list=[len(e_stream.to_bytes())])) + return Node('length', value_type=INT_str(values=[len(e_stream.to_bytes())])) if use_generator_func: e_length = Node('length_wrapper') e_length.set_generator_func(gen_length_func, func_node_arg=e_stream) e_length.customize(GenFuncCusto(items_to_set=GenFuncCusto.CloneExtNodeArgs)) else: - e_length = Node('length', value_type=INT_str(int_list=[len(e_stream.to_bytes())])) + e_length = Node('length', value_type=INT_str(values=[len(e_stream.to_bytes())])) e_length_entry = make_wrapped_node('E_Length', node = e_length, @@ -487,7 +487,7 @@ def get_jpg(name): xobj_id = e_jpg_xobj.get_private() e_resources_internals = make_wrapped_node('IMG_XObj_resource_' + name, - node=Node("xobj_id", value_type=INT_str(int_list=[xobj_id])), + node=Node("xobj_id", value_type=INT_str(values=[xobj_id])), prefix=["<< /ProcSet [/PDF /ImageC]\n /XObject << /Im1 "], suffix=[" 0 R >> >>"]) e_resources = PDFObj.create_wrapped_obj('IMG_XObj_resource_' + name, e_resources_internals) @@ -522,14 +522,14 @@ def make_page_node(name, page_node_id, kids_id=[4444], parent_id=None, count=Non cpt = count if count is not None else len(l) - e_count_nb = Node("count", value_type=INT_str(int_list=[cpt])) + e_count_nb = Node("count", value_type=INT_str(values=[cpt])) e_count = make_wrapped_node("Count_E", node=e_count_nb, prefix=["/Count "], suffix=["\n"]) if parent_id is not None: - e_parent_id = Node("parent_id", value_type=INT_str(int_list=[parent_id])) + e_parent_id = Node("parent_id", value_type=INT_str(values=[parent_id])) e_parent = make_wrapped_node("Parent_E", node=e_parent_id, prefix=["/Parent "], @@ -554,7 +554,7 @@ def make_page_leaf(name, parent_id=4444, resources_id=4444, contents_id=4444, e_prefix = Node('prefix', values=["<<\n"]) - e_parent_id = Node("parent_id", value_type=INT_str(int_list=[parent_id])) + e_parent_id = Node("parent_id", value_type=INT_str(values=[parent_id])) e_parent = make_wrapped_node("Parent_E", node=e_parent_id, prefix=["/Parent "], @@ -576,13 +576,13 @@ def make_page_leaf(name, parent_id=4444, resources_id=4444, contents_id=4444, prefix=["/MediaBox "], suffix=["\n"]) - e_resources_id = Node("resource_id", value_type=INT_str(int_list=[resources_id])) + e_resources_id = Node("resource_id", value_type=INT_str(values=[resources_id])) e_resources = make_wrapped_node("Resources_E", node=e_resources_id, prefix=["/Resources "], suffix=[" 0 R\n"]) - e_contents_id = Node("contents_id", value_type=INT_str(int_list=[contents_id])) + e_contents_id = Node("contents_id", value_type=INT_str(values=[contents_id])) e_contents = make_wrapped_node("Contents_E", node=e_contents_id, prefix=["/Contents "], @@ -759,7 +759,7 @@ def _generate_pdf_body(pdf_contents, args): node_list = obj_list + pagetree_objs e_raw_catalog = make_wrapped_node("Catalog", - node=Node("pagetree_id", value_type=INT_str(int_list=[pagetree_id])), + node=Node("pagetree_id", value_type=INT_str(values=[pagetree_id])), prefix=["<<\n/Pages "], suffix=[" 0 R\n/Type /Catalog\n>>"]) e_catalog = PDFObj.create_wrapped_obj("Catalog", e_raw_catalog) @@ -1081,7 +1081,7 @@ def change_kids_id(self, kids_id, count_update=None): self.e_kids_id.set_subnodes_basic(rawlist) if count_update is not None: - self.e_count_nb.set_values(value_type=INT_str(int_list=[count_update])) + self.e_count_nb.set_values(value_type=INT_str(values=[count_update])) class PageLeaf(object): @@ -1096,7 +1096,7 @@ def get_id(self): return self.e_leaf.get_private() def set_parent_id(self, pid): - self.e_parent_id.set_values(value_type=INT_str(int_list=[pid])) + self.e_parent_id.set_values(value_type=INT_str(values=[pid])) def set_actions(self, subnodes=None, vals=None): if subnodes is not None: diff --git a/data_models/file_formats/png.py b/data_models/file_formats/png.py index 351367f..4661780 100644 --- a/data_models/file_formats/png.py +++ b/data_models/file_formats/png.py @@ -56,14 +56,14 @@ def build_data_model(self): {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': String(val_list=[u'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': String(values=[u'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,200), 'contents': [ {'name': 'len', 'contents': UINT32_be()}, {'name': 'type', - 'contents': String(val_list=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': String(values=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', 'contents': lambda x: Node('data', value_type=String(size=x[0].cc.get_raw_value())), 'node_args': ['len']}, @@ -78,7 +78,7 @@ def build_data_model(self): {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': String(val_list=[u'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': String(values=[u'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,200), 'contents': [ @@ -89,27 +89,27 @@ def build_data_model(self): {'weight': 10, 'contents': [ {'name': 'type1', - 'contents': String(val_list=['IHDR'], size=4), + 'contents': String(values=['IHDR'], size=4), 'absorb_csts': AbsFullCsts()}, {'name': 'width', 'contents': UINT32_be()}, {'name': 'height', 'contents': UINT32_be()}, {'name': 'bit_depth', - 'contents': UINT8(int_list=[1,2,4,8,16])}, + 'contents': UINT8(values=[1,2,4,8,16])}, {'name': 'color_type', - 'contents': UINT8(int_list=[0,2,3,4,6])}, + 'contents': UINT8(values=[0,2,3,4,6])}, {'name': 'compression_method', - 'contents': UINT8(int_list=[0])}, + 'contents': UINT8(values=[0])}, {'name': 'filter_method', - 'contents': UINT8(int_list=[0])}, + 'contents': UINT8(values=[0])}, {'name': 'interlace_method', - 'contents': UINT8(int_list=[0,1])} + 'contents': UINT8(values=[0,1])} ]}, {'weight': 5, 'contents': [ {'name': 'type2', - 'contents': String(val_list=['IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': String(values=['IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', 'contents': lambda x: Node('data', value_type=String(size=x.get_raw_value())), 'node_args': 'len'} diff --git a/data_models/file_formats/zip.py b/data_models/file_formats/zip.py index 06da657..d58615a 100644 --- a/data_models/file_formats/zip.py +++ b/data_models/file_formats/zip.py @@ -83,7 +83,7 @@ def build_data_model(self): {'name': 'header', 'contents': [ {'name': 'sig', - 'contents': UINT32_le(int_list=[0x04034b50]), + 'contents': UINT32_le(values=[0x04034b50]), 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'common_attrs', @@ -111,7 +111,7 @@ def build_data_model(self): {'name': 'compressed_size', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - UINT32_le(int_list=[len(x.to_bytes())])), + UINT32_le(values=[len(x.to_bytes())])), 'node_args': 'data', 'alt': [ {'conf': 'ABS', @@ -139,7 +139,7 @@ def build_data_model(self): {'name': 'data', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - String(val_list=[zlib.compress(b'a'*x.get_raw_value())])), + String(values=[zlib.compress(b'a'*x.get_raw_value())])), 'node_args': 'uncompressed_size', 'alt': [ {'conf': 'ABS', @@ -171,7 +171,7 @@ def build_data_model(self): {'conf': 'ABS', 'contents': [ {'name': 'archive_extra_data_sig', - 'contents': UINT32_le(int_list=[0x08064b50]), + 'contents': UINT32_le(values=[0x08064b50]), 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'extra_enc_field_len', @@ -198,7 +198,7 @@ def build_data_model(self): 'absorb_csts': AbsNoCsts()} ]}, {'name': ('sig', 2), - 'contents': UINT32_le(int_list=[0x02014b50]), + 'contents': UINT32_le(values=[0x02014b50]), 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'version_made_by', @@ -259,7 +259,7 @@ def build_data_model(self): {'weight': 1, 'contents': [ {'name': 'full', - 'contents': String(val_list=['PK\x06\x06'+'A'*20+'PK\x06\x07'+'B'*16])}, + 'contents': String(values=['PK\x06\x06'+'A'*20+'PK\x06\x07'+'B'*16])}, ]}, ], 'alt': [ @@ -271,7 +271,7 @@ def build_data_model(self): {'name': 'end_of_cdir', 'contents': [ {'name': 'zip64_sig_record', - 'contents': UINT32_le(int_list=[0x06064b50]), + 'contents': UINT32_le(values=[0x06064b50]), 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'record_meta_data', @@ -279,7 +279,7 @@ def build_data_model(self): 'set_attrs': [MH.Attr.Abs_Postpone], 'absorb_csts': AbsNoCsts()}, {'name': 'zip64_sig_locator', - 'contents': UINT32_le(int_list=[0x07064b50]), + 'contents': UINT32_le(values=[0x07064b50]), 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'locator_meta_data', @@ -294,7 +294,7 @@ def build_data_model(self): 'exists_if_not': 'ZIP64_specifics', 'contents': [ {'name': ('ecd_sig', 3), - 'contents': UINT32_le(int_list=[0x06054b50]), + 'contents': UINT32_le(values=[0x06054b50]), 'absorb_csts': AbsFullCsts(), 'clear_attrs': [MH.Attr.Mutable]}, {'name': 'disk_number', @@ -303,7 +303,7 @@ def build_data_model(self): 'contents': UINT16_le()}, {'name': 'total_nb_of_cdir_entries_in_this_disk', 'contents': lambda x: Node('cts', value_type=\ - UINT16_le(int_list=[x.get_subnode_qty()])), + UINT16_le(values=[x.get_subnode_qty()])), 'node_args': 'cdir'}, {'name': 'total_nb_of_cdir_entries', 'clone': 'total_nb_of_cdir_entries_in_this_disk'}, @@ -312,7 +312,7 @@ def build_data_model(self): {'name': 'off_of_cdir', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type=\ - UINT32_le(int_list=[len(x[0].to_bytes()) \ + UINT32_le(values=[len(x[0].to_bytes()) \ + len(x[1].to_bytes()) #])), + len(x[2].to_bytes())])), 'node_args': ['start_padding', 'file_list', 'archive_desc_header']}, diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index dd277b2..1bdd1de 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -40,7 +40,7 @@ def build_data_model(self): {'name': 'tag', 'contents': [ {'name': 'type', - 'contents': UINT16_be(int_list=[0,0x0101,0x0102,0x0103,0x0104,0x0105, + 'contents': UINT16_be(values=[0,0x0101,0x0102,0x0103,0x0104,0x0105, 0x0110,0x201,0x0202,0x0203]), 'absorb_csts': AbsFullCsts()}, {'name': 'len', @@ -56,22 +56,22 @@ def build_data_model(self): {'name': 'v101', # Service Name 'exists_if': (IntCondition(0x0101), 'type'), 'sync_enc_size_with': 'len', - 'contents': String(val_list=[u'my \u00fcber service'], codec='utf8'), + 'contents': String(values=[u'my \u00fcber service'], codec='utf8'), }, {'name': 'v102', # AC name 'exists_if': (IntCondition(0x0102), 'type'), 'sync_enc_size_with': 'len', - 'contents': String(val_list=['AC name'], codec='utf8'), + 'contents': String(values=['AC name'], codec='utf8'), }, {'name': 'v103', # Host Identifier 'exists_if': (IntCondition(0x0103), 'type'), 'sync_enc_size_with': 'len', - 'contents': String(val_list=['Host Identifier']), + 'contents': String(values=['Host Identifier']), }, {'name': 'v104', # Cookie 'exists_if': (IntCondition(0x0104), 'type'), 'sync_enc_size_with': 'len', - 'contents': String(val_list=['Cookie'], min_sz=0, max_sz=1000), + 'contents': String(values=['Cookie'], min_sz=0, max_sz=1000), }, {'name': 'v105', # Vendor Specific 'exists_if': (IntCondition(0x0105), 'type'), @@ -82,7 +82,7 @@ def build_data_model(self): subfield_descs=['type','version']) }, {'name': 'remainder', 'sync_enc_size_with': ('len', 4), - 'contents': String(val_list=['unspecified...'], min_sz=0, max_sz=1000), + 'contents': String(values=['unspecified...'], min_sz=0, max_sz=1000), }, ]}, {'name': 'v110', # Relay session ID @@ -92,22 +92,22 @@ def build_data_model(self): {'name': 'v201', 'exists_if': (IntCondition([0x201, 0x202]), 'type'), 'sync_enc_size_with': 'len', - 'contents': String(val_list=['Service Name Error or AC System Error!'], codec='utf8'), + 'contents': String(values=['Service Name Error or AC System Error!'], codec='utf8'), }, {'name': 'v203', # Generic Error 'exists_if': (IntCondition(0x0203), 'type'), 'sync_enc_size_with': 'len', - 'contents': String(val_list=['Generic Error!'], codec='utf8'), + 'contents': String(values=['Generic Error!'], codec='utf8'), 'alt': [ {'conf': 'null-terminated', # RFC2516 says it MUST NOT be null terminated 'exists_if': (IntCondition(0x0203), 'type'), 'contents': [ {'name': 'data', 'sync_enc_size_with': ('len', -1), - 'contents': String(val_list=['Generic Error!'], codec='utf8')}, + 'contents': String(values=['Generic Error!'], codec='utf8')}, {'name': 'null', 'mutable': False, - 'contents': UINT8(int_list=[0])} + 'contents': UINT8(values=[0])} ]} ]}, ]} @@ -118,16 +118,16 @@ def build_data_model(self): tag_node_4pads = tag_node.get_clone() tag_service_name = tag_node.get_clone('tag_sn') - tag_service_name['.*/type'].set_values(value_type=UINT16_be(int_list=[0x0101])) + tag_service_name['.*/type'].set_values(value_type=UINT16_be(values=[0x0101])) tag_host_uniq = tag_node.get_clone('tag_host_uniq') - tag_host_uniq['.*/type'].set_values(value_type=UINT16_be(int_list=[0x0103])) + tag_host_uniq['.*/type'].set_values(value_type=UINT16_be(values=[0x0103])) tag_ac_name = tag_node.get_clone('tag_ac_name') # Access Concentrator Name - tag_ac_name['.*/type'].set_values(value_type=UINT16_be(int_list=[0x0102])) + tag_ac_name['.*/type'].set_values(value_type=UINT16_be(values=[0x0102])) tag_sn_error = tag_node.get_clone('tag_sn_error') # Service Name Error - tag_sn_error['.*/type'].set_values(value_type=UINT16_be(int_list=[0x0202])) + tag_sn_error['.*/type'].set_values(value_type=UINT16_be(values=[0x0202])) pppoe_desc = \ {'name': 'pppoe', @@ -142,14 +142,14 @@ def build_data_model(self): 'contents': String(size=6)}, {'name': 'proto', 'mutable': False, - 'contents': UINT16_be(int_list=[0x8863])}, + 'contents': UINT16_be(values=[0x8863])}, {'name': 'version-type', 'contents': BitField(subfield_sizes=[4,4], endian=VT.BigEndian, subfield_val_lists=[[1],[1]], subfield_descs=['type','version'])}, {'name': 'code', 'mutable': False, - 'contents': UINT8(int_list=[0x9,0x7,0x19,0x65,0xa7]), + 'contents': UINT8(values=[0x9,0x7,0x19,0x65,0xa7]), 'absorb_csts': AbsFullCsts()}, {'name': 'session_id', 'contents': UINT16_be()}, @@ -178,7 +178,7 @@ def build_data_model(self): (tag_ac_name, 1), (tag_service_name.get_clone(), 1), {'name': 'host_uniq_stub', - 'contents': String(val_list=[''])}, + 'contents': String(values=[''])}, (tag_node.get_clone(), 0, 4) ]}, {'name': '4padr', @@ -199,7 +199,7 @@ def build_data_model(self): 'contents': [ (tag_service_name.get_clone(), 1), {'name': ('host_uniq_stub', 2), - 'contents': String(val_list=[''])}, + 'contents': String(values=[''])}, (tag_node_4pads, 0, 4) ]}, # Reject PPPoE session Case @@ -216,7 +216,7 @@ def build_data_model(self): 'exists_if': (IntCondition(0xa7), 'code'), 'contents': [ {'name': ('host_uniq_stub', 3), - 'contents': String(val_list=[''])}, + 'contents': String(values=[''])}, (tag_node.get_clone(), 0, 4) ]} ]}, @@ -231,21 +231,21 @@ def build_data_model(self): pppoe_msg.make_random(recursive=True) padi = pppoe_msg.get_clone('padi') - padi['.*/mac_dst'].set_values(value_type=String(val_list=[u'\xff\xff\xff\xff\xff\xff'])) - padi['.*/code'].set_values(value_type=UINT8(int_list=[0x9])) + padi['.*/mac_dst'].set_values(value_type=String(values=[u'\xff\xff\xff\xff\xff\xff'])) + padi['.*/code'].set_values(value_type=UINT8(values=[0x9])) pado = pppoe_msg.get_clone('pado') - pado['.*/code'].set_values(value_type=UINT8(int_list=[0x7])) + pado['.*/code'].set_values(value_type=UINT8(values=[0x7])) pado['.*/code'].clear_attr(MH.Attr.Mutable) padr = pppoe_msg.get_clone('padr') - padr['.*/code'].set_values(value_type=UINT8(int_list=[0x19])) + padr['.*/code'].set_values(value_type=UINT8(values=[0x19])) pads = pppoe_msg.get_clone('pads') - pads['.*/code'].set_values(value_type=UINT8(int_list=[0x65])) + pads['.*/code'].set_values(value_type=UINT8(values=[0x65])) padt = pppoe_msg.get_clone('padt') - padt['.*/code'].set_values(value_type=UINT8(int_list=[0xa7])) + padt['.*/code'].set_values(value_type=UINT8(values=[0xa7])) self.register(pppoe_msg, padi, pado, padr, pads, padt, tag_host_uniq) diff --git a/data_models/protocols/sms.py b/data_models/protocols/sms.py index 41d650f..75eba87 100644 --- a/data_models/protocols/sms.py +++ b/data_models/protocols/sms.py @@ -54,7 +54,7 @@ def build_data_model(self): ) }, {'name': 'TP-MR', # Message Reference (refer to TS 100 901) 'mutable': False, - 'contents': UINT8(int_list=[0])}, + 'contents': UINT8(values=[0])}, {'name': 'TP-DA', # Destination Address (refer to TS 100 901 - chapter 9.1.2.5) 'mutable': False, 'contents': [ @@ -73,7 +73,7 @@ def build_data_model(self): ) }, {'name': 'tel_num', 'semantics': ['tel num'], - 'contents': GSMPhoneNum(val_list=['33612345678'])} + 'contents': GSMPhoneNum(values=['33612345678'])} ]}, {'name': 'TP-PID', # Protocol Identifier (refer to TS 100 901) 'determinist': True, @@ -92,7 +92,7 @@ def build_data_model(self): 'contents': MH.LEN(vt=UINT8, after_encoding=False), 'node_args': 'user_data'}, {'name': 'user_data', - 'contents': GSM7bitPacking(val_list=['Hello World!'], max_sz=160) + 'contents': GSM7bitPacking(values=['Hello World!'], max_sz=160) } ] } @@ -116,7 +116,7 @@ def build_data_model(self): ) }, {'name': 'TP-MR', # Message Reference (refer to TS 100 901) 'mutable': False, - 'contents': UINT8(int_list=[0])}, + 'contents': UINT8(values=[0])}, {'name': 'TP-DA', # Destination Address (refer to TS 100 901 - chapter 9.1.2.5) 'mutable': False, 'contents': [ @@ -135,7 +135,7 @@ def build_data_model(self): ) }, {'name': 'tel_num', 'semantics': ['tel num'], - 'contents': GSMPhoneNum(val_list=['33612345678'])} + 'contents': GSMPhoneNum(values=['33612345678'])} ]}, {'name': 'TP-PID', # Protocol Identifier (refer to TS 100 901) 'determinist': True, @@ -183,11 +183,11 @@ def build_data_model(self): {'name': 'user_data', 'contents': [ {'name': 'UDHL', - 'contents': UINT8(int_list=[2])}, + 'contents': UINT8(values=[2])}, {'name': 'IEIa', # 0x70 = command packet identifier - 'contents': UINT8(int_list=[0x70], mini=0x70, maxi=0x7F)}, + 'contents': UINT8(values=[0x70], mini=0x70, maxi=0x7F)}, {'name': 'IEDLa', - 'contents': UINT8(int_list=[0])}, + 'contents': UINT8(values=[0])}, {'name': 'CPL', # command packet length 'contents': MH.LEN(vt=UINT16_be), 'node_args': 'cmd'}, diff --git a/data_models/protocols/usb.py b/data_models/protocols/usb.py index 50f5958..f938917 100644 --- a/data_models/protocols/usb.py +++ b/data_models/protocols/usb.py @@ -75,9 +75,9 @@ def build_data_model(self): {'name': 'EP_desc', 'contents': [ {'name': 'bLength', - 'contents': UINT8(int_list=[7])}, + 'contents': UINT8(values=[7])}, {'name': 'bDescType', - 'contents': UINT8(int_list=[USB_DEFS.DT_ENDPOINT])}, + 'contents': UINT8(values=[USB_DEFS.DT_ENDPOINT])}, {'name': 'bEndpointAddr', 'contents': BitField(subfield_limits=[4,7,8], subfield_val_extremums=[[0,0b1111],None,[0,1]], @@ -116,10 +116,10 @@ def build_data_model(self): subfield_val_lists=[[0x8, 0x10, 0x20, 0x40],[0],[0]], endian=VT.LittleEndian)}]}, {'name': 'bInterval', - 'contents': UINT8(int_list=[4]), + 'contents': UINT8(values=[4]), 'alt': [ {'conf': 'MSD', - 'contents': UINT8(int_list=[0])}]} + 'contents': UINT8(values=[0])}]} ]} mh = ModelHelper(add_env=False) @@ -138,22 +138,22 @@ def build_data_model(self): {'name': ('Ihdr', 2), 'contents': [ {'name': ('bLength', 2), - 'contents': UINT8(int_list=[9])}, + 'contents': UINT8(values=[9])}, {'name': ('bDescType', 2), - 'contents': UINT8(int_list=[USB_DEFS.DT_INTERFACE])}, + 'contents': UINT8(values=[USB_DEFS.DT_INTERFACE])}, {'name': 'bInterfaceNum', 'contents': UINT8(mini=0, maxi=10)}, {'name': 'bAlternateSetting', - 'contents': UINT8(int_list=[0, 1, 2, 3, 4])}, + 'contents': UINT8(values=[0, 1, 2, 3, 4])}, {'name': 'bNumEndpoints', # 'random': True, 'contents': UINT8(mini=1, maxi=8, default=4), 'alt': [ {'conf': 'MSD', - 'contents': UINT8(int_list=[2])} + 'contents': UINT8(values=[2])} ]}, {'name': 'bInterfaceClass', - 'contents': UINT8(int_list=[ + 'contents': UINT8(values=[ USB_DEFS.USB_CLASS_MASS_STORAGE, USB_DEFS.USB_CLASS_PRINTER, USB_DEFS.USB_CLASS_HID, @@ -164,23 +164,23 @@ def build_data_model(self): ), 'alt': [ {'conf': 'MSD', - 'contents': UINT8(int_list=[0x8])} + 'contents': UINT8(values=[0x8])} ] }, {'name': 'bInterfaceSubClass', - 'contents': UINT8(int_list=[0x06, 0, 1, 2, 3, 4, 5, 7, 8]), + 'contents': UINT8(values=[0x06, 0, 1, 2, 3, 4, 5, 7, 8]), 'alt': [ {'conf': 'MSD', - 'contents': UINT8(int_list=[0x6])} + 'contents': UINT8(values=[0x6])} ]}, {'name': 'bInterfaceProtocol', - 'contents': UINT8(int_list=[0x80, 0x06, 0, 1, 2]), + 'contents': UINT8(values=[0x80, 0x06, 0, 1, 2]), 'alt': [ {'conf': 'MSD', - 'contents': UINT8(int_list=[0x50])} + 'contents': UINT8(values=[0x50])} ]}, {'name': 'iInterface', - 'contents': UINT8(int_list=[USB_DEFS.STRINGID_INTERFACE])}, + 'contents': UINT8(values=[USB_DEFS.STRINGID_INTERFACE])}, ]}, {'name': 'EP_Group', 'custo_clear': MH.Custo.NTerm.MutableClone, @@ -207,9 +207,9 @@ def build_data_model(self): {'name': 'hdr', 'contents': [ {'name': 'bLength', - 'contents': UINT8(int_list=[9])}, + 'contents': UINT8(values=[9])}, {'name': 'bDescType', - 'contents': UINT8(int_list=[USB_DEFS.DT_CONFIGURATION])}, + 'contents': UINT8(values=[USB_DEFS.DT_CONFIGURATION])}, {'name': 'wTotalLength', 'contents': MH.LEN(vt=UINT16_le, base_len=9), 'node_args': 'Intf_Group'}, @@ -218,18 +218,18 @@ def build_data_model(self): 'node_args': 'Intf_Group', 'alt': [ {'conf': 'MSD', - 'contents': UINT8(int_list=[1])} + 'contents': UINT8(values=[1])} ]}, {'name': 'bConfValue', 'contents': UINT8(mini=1, maxi=50)}, {'name': 'iConf', - 'contents': UINT8(int_list=[USB_DEFS.STRINGID_CONFIG])}, + 'contents': UINT8(values=[USB_DEFS.STRINGID_CONFIG])}, {'name': 'bmAttributes', 'contents': BitField(subfield_limits=[5,6,7,8], subfield_val_lists=[[0],[1],[1],[1]], endian=VT.LittleEndian)}, {'name': 'bMaxPower', - 'contents': UINT8(int_list=[50])}, + 'contents': UINT8(values=[50])}, ]}, {'name': 'Intf_Group', 'custo_clear': MH.Custo.NTerm.MutableClone, @@ -255,45 +255,45 @@ def build_data_model(self): 'semantics': 'DEV_DESC', 'contents': [ {'name': 'bLength', - 'contents': UINT8(int_list=[18])}, + 'contents': UINT8(values=[18])}, {'name': 'bDescType', - 'contents': UINT8(int_list=[USB_DEFS.DT_DEVICE])}, + 'contents': UINT8(values=[USB_DEFS.DT_DEVICE])}, {'name': 'bcdUSB', - 'contents': UINT16_le(int_list=[0x200, 0x100])}, + 'contents': UINT16_le(values=[0x200, 0x100])}, {'name': 'bDeviceClass', - 'contents': UINT8(int_list=[0]), + 'contents': UINT8(values=[0]), 'alt': [ {'conf': 'MS', # mass-storage - 'contents': UINT8(int_list=[0])} + 'contents': UINT8(values=[0])} ]}, {'name': 'bDeviceSubClass', - 'contents': UINT8(int_list=[0]), + 'contents': UINT8(values=[0]), 'alt': [ {'conf': 'MS', # mass-storage - 'contents': UINT8(int_list=[0])} + 'contents': UINT8(values=[0])} ]}, {'name': 'bDeviceProto', - 'contents': UINT8(int_list=[0]), + 'contents': UINT8(values=[0]), 'alt': [ {'conf': 'MS', # mass-storage - 'contents': UINT8(int_list=[0])} + 'contents': UINT8(values=[0])} ]}, {'name': 'bMaxPacketSize0', - 'contents': UINT8(int_list=[64])}, + 'contents': UINT8(values=[64])}, {'name': 'idVendor', - 'contents': UINT16_le(int_list=[0x1307])}, + 'contents': UINT16_le(values=[0x1307])}, {'name': 'idProduct', - 'contents': UINT16_le(int_list=[0x0165])}, + 'contents': UINT16_le(values=[0x0165])}, {'name': 'bcdDevice', - 'contents': UINT16_le(int_list=[0x100])}, + 'contents': UINT16_le(values=[0x100])}, {'name': 'iManufacturer', - 'contents': UINT8(int_list=[USB_DEFS.STRINGID_MFR])}, + 'contents': UINT8(values=[USB_DEFS.STRINGID_MFR])}, {'name': 'iProduct', - 'contents': UINT8(int_list=[USB_DEFS.STRINGID_PRODUCT])}, + 'contents': UINT8(values=[USB_DEFS.STRINGID_PRODUCT])}, {'name': 'iSerialNumber', - 'contents': UINT8(int_list=[USB_DEFS.STRINGID_SERIAL])}, + 'contents': UINT8(values=[USB_DEFS.STRINGID_SERIAL])}, {'name': 'bNumConfigs', - 'contents': UINT8(int_list=[1])} + 'contents': UINT8(values=[1])} ]} langid_desc = \ @@ -304,12 +304,12 @@ def build_data_model(self): 'contents': MH.LEN(vt=UINT8,base_len=2), 'node_args': 'contents'}, {'name': 'bDescType', - 'contents': UINT8(int_list=[USB_DEFS.DT_STRING])}, + 'contents': UINT8(values=[USB_DEFS.DT_STRING])}, {'name': 'contents', 'contents': [ {'name': 'LangID', 'qty': (0,30), - 'contents': UINT16_le(int_list=[0x040c, 0x0409])} + 'contents': UINT16_le(values=[0x040c, 0x0409])} ]}, ]} @@ -320,10 +320,10 @@ def build_data_model(self): {'name': 'bLength', 'contents': UINT8()}, {'name': 'bDescType', - 'contents': UINT8(int_list=[USB_DEFS.DT_STRING])}, + 'contents': UINT8(values=[USB_DEFS.DT_STRING])}, {'name': 'contents', 'sync_enc_size_with': ('bLength', 2), - 'contents': String(val_list=[u'\u00fcber string', u'what an interesting string!'], + 'contents': String(values=[u'\u00fcber string', u'what an interesting string!'], max_sz=126, max_encoded_sz=253, codec='utf-16-le')}, ]} diff --git a/data_models/tuto.py b/data_models/tuto.py index edc2a44..0be6265 100644 --- a/data_models/tuto.py +++ b/data_models/tuto.py @@ -35,19 +35,19 @@ def build_data_model(self): {'name': 'val22'}, {'name': 'val21-qty', - 'contents': UINT16_be(int_list=[2,4])}, + 'contents': UINT16_be(values=[2,4])}, {'name': 'middle', 'custo_set': MH.Custo.NTerm.FrozenCopy, 'custo_clear': MH.Custo.NTerm.MutableClone, 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'], absorb_regexp='\n+'), + 'contents': String(values=['\n'], absorb_regexp='\n+'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'contents': [{ 'section_type': MH.Random, 'contents': [ - {'contents': String(val_list=['OK', 'KO'], size=2), + {'contents': String(values=['OK', 'KO'], size=2), 'name': 'val2', 'qty': (1, 3)}, @@ -71,7 +71,7 @@ def build_data_model(self): 'sync_qty_with': 'val1', 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(values=[1,4,8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56), 'determinist': True}]} @@ -81,20 +81,20 @@ def build_data_model(self): {'section_type': MH.Pick, 'weights': (10,5), 'contents': [ - {'contents': String(val_list=['PLIP', 'PLOP'], size=4), + {'contents': String(values=['PLIP', 'PLOP'], size=4), 'name': 'val4'}, - {'contents': SINT16_be(int_list=[-1, -3, -5, 7]), + {'contents': SINT16_be(values=[-1, -3, -5, 7]), 'name': 'val5'} ]}, # block 3 {'section_type': MH.FullyRandom, 'contents': [ - {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + {'contents': String(values=['AAA', 'BBBB', 'CCCCC']), 'name': ('val21', 2)}, - {'contents': UINT8(int_list=[2, 4, 6, 8]), + {'contents': UINT8(values=[2, 4, 6, 8]), 'qty': (2, 3), 'name': 'val7'} ]} @@ -113,18 +113,18 @@ def keycode_helper(blob, constraints, node_internals): 'contents': [ {'name': 'prefix', - 'contents': UINT8(int_list=[0xcc, 0xff, 0xee])}, + 'contents': UINT8(values=[0xcc, 0xff, 0xee])}, {'name': 'variable_string', 'contents': String(max_sz=20), 'set_attrs': [NodeInternals.Abs_Postpone]}, {'name': 'keycode', - 'contents': UINT16_be(int_list=[0xd2d3, 0xd2fe, 0xd2aa]), + 'contents': UINT16_be(values=[0xd2d3, 0xd2fe, 0xd2aa]), 'absorb_helper': keycode_helper}, {'name': 'variable_suffix', - 'contents': String(val_list=['END', 'THE_END'])} + 'contents': String(values=['END', 'THE_END'])} ]} @@ -133,24 +133,24 @@ def keycode_helper(blob, constraints, node_internals): 'contents': [ {'name': 'prefix', - 'contents': UINT8(int_list=[0xcc, 0xff, 0xee])}, + 'contents': UINT8(values=[0xcc, 0xff, 0xee])}, {'name': 'variable_string', 'contents': String(max_sz=20), 'set_attrs': [NodeInternals.Abs_Postpone]}, {'name': 'keycode', - 'contents': UINT16_be(int_list=[0xd2d3, 0xd2fe, 0xd2aa])}, + 'contents': UINT16_be(values=[0xd2d3, 0xd2fe, 0xd2aa])}, {'name': 'variable_suffix', - 'contents': String(val_list=['END', 'THE_END'])} + 'contents': String(values=['END', 'THE_END'])} ]} separator_desc = \ {'name': 'separator', 'separator': {'contents': {'name': 'sep_nl', - 'contents': String(val_list=['\n'], absorb_regexp='[\r\n|\n]+'), + 'contents': String(values=['\n'], absorb_regexp='[\r\n|\n]+'), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, @@ -160,7 +160,7 @@ def keycode_helper(blob, constraints, node_internals): 'contents': [ {'name': 'parameters', 'separator': {'contents': {'name': ('sep',2), - 'contents': String(val_list=[' '], absorb_regexp=' +'), + 'contents': String(values=[' '], absorb_regexp=' +'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'qty': 3, 'contents': [ @@ -170,19 +170,19 @@ def keycode_helper(blob, constraints, node_internals): 'determinist': True, # used only for test purpose 'contents': [ {'name': 'id', - 'contents': String(val_list=['color='])}, + 'contents': String(values=['color='])}, {'name': 'val', - 'contents': String(val_list=['red', 'black'])} + 'contents': String(values=['red', 'black'])} ]}, {'name': 'type', 'contents': [ {'name': ('id', 2), - 'contents': String(val_list=['type='])}, + 'contents': String(values=['type='])}, {'name': ('val', 2), - 'contents': String(val_list=['circle', 'cube', 'rectangle'], determinist=False)} + 'contents': String(values=['circle', 'cube', 'rectangle'], determinist=False)} ]}, ]}]}, - {'contents': String(val_list=['AAAA', 'BBBB', 'CCCC'], determinist=False), + {'contents': String(values=['AAAA', 'BBBB', 'CCCC'], determinist=False), 'qty': (4, 6), 'name': 'str'} ]} @@ -194,15 +194,15 @@ def keycode_helper(blob, constraints, node_internals): 'shape_type': MH.Ordered, 'contents': [ {'name': 'opcode', - 'contents': String(val_list=['A1', 'A2', 'A3'], determinist=True)}, + 'contents': String(values=['A1', 'A2', 'A3'], determinist=True)}, {'name': 'command_A1', - 'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + 'contents': String(values=['AAA', 'BBBB', 'CCCCC']), 'exists_if': (RawCondition('A1'), 'opcode'), 'qty': 3}, {'name': 'command_A2', - 'contents': UINT32_be(int_list=[0xDEAD, 0xBEEF]), + 'contents': UINT32_be(values=[0xDEAD, 0xBEEF]), 'exists_if': (RawCondition('A2'), 'opcode')}, {'name': 'command_A3', @@ -215,23 +215,23 @@ def keycode_helper(blob, constraints, node_internals): determinist=False)}, {'name': 'A3_int', - 'contents': UINT16_be(int_list=[10, 20, 30], determinist=False)}, + 'contents': UINT16_be(values=[10, 20, 30], determinist=False)}, {'name': 'A3_deco1', 'exists_if': (IntCondition(10), 'A3_int'), - 'contents': String(val_list=['*1*0*'])}, + 'contents': String(values=['*1*0*'])}, {'name': 'A3_deco2', 'exists_if': (IntCondition(neg_val=[10]), 'A3_int'), - 'contents': String(val_list=['+2+0+3+0+'])} + 'contents': String(values=['+2+0+3+0+'])} ]}, {'name': 'A31_payload', - 'contents': String(val_list=['$ A31_OK $', '$ A31_KO $'], determinist=False), + 'contents': String(values=['$ A31_OK $', '$ A31_KO $'], determinist=False), 'exists_if': (BitFieldCondition(sf=2, val=[6,12]), 'A3_subopcode')}, {'name': 'A32_payload', - 'contents': String(val_list=['$ A32_VALID $', '$ A32_INVALID $'], determinist=False), + 'contents': String(values=['$ A32_VALID $', '$ A32_INVALID $'], determinist=False), 'exists_if': (BitFieldCondition(sf=[0, 1, 2], val=[[500, 501], [1, 2], 5]), 'A3_subopcode')} ]} @@ -263,10 +263,10 @@ def keycode_helper(blob, constraints, node_internals): 'shape_type': MH.FullyRandom, 'random': True, 'contents': [ - {'contents': String(val_list=['AAA']), + {'contents': String(values=['AAA']), 'qty': 10, 'name': 'str'}, - {'contents': UINT8(int_list=[0x3F]), + {'contents': UINT8(values=[0x3F]), 'name': 'int'} ]}, @@ -284,11 +284,11 @@ def keycode_helper(blob, constraints, node_internals): 'contents': [ {'name': 'int16', 'qty': (2, 10), - 'contents': UINT16_be(int_list=[16, 1, 6], determinist=False)}, + 'contents': UINT16_be(values=[16, 1, 6], determinist=False)}, {'name': 'int32', 'qty': (3, 8), - 'contents': UINT32_be(int_list=[32, 3, 2], determinist=False)} + 'contents': UINT32_be(values=[32, 3, 2], determinist=False)} ]}, {'name': 'int16_qty', @@ -319,7 +319,7 @@ def keycode_helper(blob, constraints, node_internals): shape_desc = \ {'name': 'shape', 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=[' [!] '])}}, + 'contents': String(values=[' [!] '])}}, 'contents': [ {'weight': 20, @@ -332,19 +332,19 @@ def keycode_helper(blob, constraints, node_internals): {'name': 'body', 'separator': {'contents': {'name': 'sep2', - 'contents': String(val_list=['::'])}}, + 'contents': String(values=['::'])}}, 'shape_type': MH.Random, # ignored in determnist mode 'contents': [ - {'contents': String(val_list=['AAA', 'BBB']), + {'contents': String(values=['AAA', 'BBB']), 'qty': (0, 4), 'name': 'str'}, - {'contents': UINT8(int_list=[0x3E]), # chr(0x3E) == '>' + {'contents': UINT8(values=[0x3E]), # chr(0x3E) == '>' 'name': 'int'} ]}, ]}, - {'contents': String(val_list=['?','!']), + {'contents': String(values=['?','!']), 'name': 'int3'} ]}, @@ -357,17 +357,17 @@ def keycode_helper(blob, constraints, node_internals): ]} ]} - for_network_tg1 = Node('4tg1', vt=String(val_list=['FOR_TARGET_1'])) + for_network_tg1 = Node('4tg1', vt=String(values=['FOR_TARGET_1'])) for_network_tg1.set_semantics(['TG1']) - for_network_tg2 = Node('4tg2', vt=String(val_list=['FOR_TARGET_2'])) + for_network_tg2 = Node('4tg2', vt=String(values=['FOR_TARGET_2'])) for_network_tg2.set_semantics(['TG2']) enc_desc = \ {'name': 'enc', 'contents': [ {'name': 'data0', - 'contents': String(val_list=['Plip', 'Plop']) }, + 'contents': String(values=['Plip', 'Plop']) }, {'name': 'crc', 'contents': MH.CRC(vt=UINT32_be, after_encoding=False), 'node_args': ['enc_data', 'data2'], @@ -381,10 +381,10 @@ def keycode_helper(blob, constraints, node_internals): 'node_args': 'data1', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': String(val_list=['Test!', 'Hello World!'], codec='utf-16-le') }, + 'contents': String(values=['Test!', 'Hello World!'], codec='utf-16-le') }, ]}, {'name': 'data2', - 'contents': String(val_list=['Red', 'Green', 'Blue']) }, + 'contents': String(values=['Red', 'Green', 'Blue']) }, ]} @@ -393,7 +393,7 @@ def keycode_helper(blob, constraints, node_internals): {'name': 'ex', 'contents': [ {'name': 'data0', - 'contents': String(val_list=['Plip', 'Plop']) }, + 'contents': String(values=['Plip', 'Plop']) }, {'name': 'data_group', 'contents': [ @@ -405,7 +405,7 @@ def keycode_helper(blob, constraints, node_internals): 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': String(val_list=['Test!', 'Hello World!']) }, + 'contents': String(values=['Test!', 'Hello World!']) }, {'name': 'data2', 'qty': (1,3), @@ -413,21 +413,21 @@ def keycode_helper(blob, constraints, node_internals): 'contents': UINT16_be(mini=10, maxi=0xa0ff), 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(values=[1,4,8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56)} ]}, {'name': 'data3', 'semantics': ['sem2'], 'sync_qty_with': 'data2', - 'contents': UINT8(int_list=[30,40,50]), + 'contents': UINT8(values=[30,40,50]), 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}]}, + 'contents': SINT8(values=[1,4,8])}]}, ]}, {'name': 'data4', - 'contents': String(val_list=['Red', 'Green', 'Blue']) } + 'contents': String(values=['Red', 'Green', 'Blue']) } ]} regex_desc = {'name': 'regex', diff --git a/docs/source/data_manip.rst b/docs/source/data_manip.rst index 7d9c155..b6f877e 100644 --- a/docs/source/data_manip.rst +++ b/docs/source/data_manip.rst @@ -26,7 +26,7 @@ To guide you over what is possible to perform, let's consider the following data {'name': 'ex', 'contents': [ {'name': 'data0', - 'contents': String(val_list=['Plip', 'Plop']) }, + 'contents': String(values=['Plip', 'Plop']) }, {'name': 'data_group', 'contents': [ @@ -38,7 +38,7 @@ To guide you over what is possible to perform, let's consider the following data 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': String(val_list=['Test!', 'Hello World!']) }, + 'contents': String(values=['Test!', 'Hello World!']) }, {'name': 'data2', 'qty': (1,3), @@ -46,21 +46,21 @@ To guide you over what is possible to perform, let's consider the following data 'contents': UINT16_be(mini=10, maxi=0xa0ff), 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(values=[1,4,8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56)} ]}, {'name': 'data3', 'semantics': ['sem2'], 'sync_qty_with': 'data2', - 'contents': UINT8(int_list=[30,40,50]), + 'contents': UINT8(values=[30,40,50]), 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}]}, + 'contents': SINT8(values=[1,4,8])}]}, ]}, {'name': 'data4', - 'contents': String(val_list=['Red', 'Green', 'Blue']) } + 'contents': String(values=['Red', 'Green', 'Blue']) } ]} diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 5e39efc..833e7b6 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -27,7 +27,7 @@ All integer types listed below provide the same interface (:class:`framework.value_types.INT`). Their constructor take the following parameters: -``int_list`` [optional, default value: **None**] +``values`` [optional, default value: **None**] List of the integers that are considered valid for the node backed by this *Integer object*. The default value is the first element of the list. @@ -38,7 +38,7 @@ following parameters: Maximum valid value for the node backed by this *Integer object*. ``default`` [optional, default value: **None**] - If ``int_list`` is not provided, this value if provided will be used as the default one. + If ``values`` is not provided, this value if provided will be used as the default one. ``determinist`` [default value: **True**] If set to ``True`` generated values will be in a deterministic @@ -83,7 +83,7 @@ All string types listed below provide the same interface (:class:`framework.value_types.String`). Their constructor take the following parameters: -``val_list`` [optional, default value: **None**] +``values`` [optional, default value: **None**] List of the character strings that are considered valid for the node backed by this *String object*. The default string is the first element of the list. @@ -94,13 +94,13 @@ following parameters: ``min_sz`` [optional, default value: **None**] Minimum valid size for the character strings for the node backed by this *String object*. If not set, this parameter will be - automatically inferred by looking at the parameter ``val_list`` + automatically inferred by looking at the parameter ``values`` whether this latter is provided. ``max_sz`` [optional, default value: **None**] Maximum valid size for the character strings for the node backed by this *String object*. If not set, this parameter will be - automatically inferred by looking at the parameter ``val_list`` + automatically inferred by looking at the parameter ``values`` whether this latter is provided. ``deteterminist`` [default value: **True**] @@ -127,9 +127,9 @@ following parameters: :ref:`tuto:dm-absorption` for more information on that topic). ``alphabet`` [optional, default value: **string.printable**] - The alphabet to use for generating data, in case no ``val_list`` is + The alphabet to use for generating data, in case no ``values`` is provided. Also use during absorption to validate the contents. It is - checked if there is no ``val_list``. + checked if there is no ``values``. ``max_encoded_sz`` [optional, default value: **None**] Only relevant for subclasses that leverage the encoding infrastructure. @@ -451,7 +451,7 @@ alt .. code-block:: python 'alt': [ {'conf': 'config_n1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(values=[1,4,8])}, {'conf': 'config_n2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56), 'determinist': True} ] @@ -612,25 +612,25 @@ section_type 'contents': [ {'name': 'val1', - 'contents': String(val_list=['OK', 'KO']), + 'contents': String(values=['OK', 'KO']), 'qty': (0, 5)}, {'section_type': MH.Ordered, 'contents': [ {'name': 'val2', - 'contents': UINT16_be(int_list=[10, 20, 30])}, + 'contents': UINT16_be(values=[10, 20, 30])}, {'name': 'val3', 'contents': String(min_sz=2, max_sz=10, alphabet='XYZ')}, {'name': 'val4', - 'contents': UINT32_le(int_list=[0xDEAD, 0xBEEF])}, + 'contents': UINT32_le(values=[0xDEAD, 0xBEEF])}, ]} {'name': 'val5', - 'contents': String(val_list=['OPEN', 'CLOSE']), + 'contents': String(values=['OPEN', 'CLOSE']), 'qty': 3} ]} @@ -659,7 +659,7 @@ separator .. code-block:: python 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'])}, + 'contents': String(values=['\n'])}, 'prefix': False, 'suffix': False, 'unique': True}, @@ -895,14 +895,14 @@ exists_if/and, exists_if/or {'name': 'test', 'contents': [ {'name': 'opcode', - 'contents': String(val_list=['A3', 'A2'])}, + 'contents': String(values=['A3', 'A2'])}, {'name': 'subopcode', 'contents': BitField(subfield_sizes=[15,2,4], subfield_val_lists=[[500], [1,2], [5,6,12]])}, {'name': 'and_condition', 'exists_if/and': [(RawCondition('A2'), 'opcode'), (BitFieldCondition(sf=2, val=[5]), 'subopcode')], - 'contents': String(val_list=['and_condition_true'])} + 'contents': String(values=['and_condition_true'])} ]} exists_if_not @@ -948,7 +948,7 @@ it in terms of shapes like illustrated by the example below: {'name': 'shape', 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=[' [!] '])}}, + 'contents': String(values=[' [!] '])}}, 'contents': [ ### SHAPE 1 #### @@ -962,13 +962,13 @@ it in terms of shapes like illustrated by the example below: {'name': 'body', 'separator': {'contents': {'name': 'sep2', - 'contents': String(val_list=['::'])}}, + 'contents': String(values=['::'])}}, 'shape_type': MH.Random, 'contents': [ - {'contents': String(val_list=['AAA']), + {'contents': String(values=['AAA']), 'qty': (0, 4), 'name': 'str1'}, - {'contents': String(val_list=['42']), + {'contents': String(values=['42']), 'name': 'str2'} ]} ]} @@ -1043,7 +1043,7 @@ parameters with space characters (line 12-14). {'name': 'separator_test', 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=['\n'], absorb_regexp='[\r\n|\n]+'), + 'contents': String(values=['\n'], absorb_regexp='[\r\n|\n]+'), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, @@ -1053,7 +1053,7 @@ parameters with space characters (line 12-14). 'contents': [ {'name': 'parameters', 'separator': {'contents': {'name': ('sep',2), - 'contents': String(val_list=[' '], absorb_regexp=' +'), + 'contents': String(values=[' '], absorb_regexp=' +'), 'absorb_csts': AbsNoCsts(regexp=True)}}, 'qty': 3, 'contents': [ @@ -1062,19 +1062,19 @@ parameters with space characters (line 12-14). {'name': 'color', 'contents': [ {'name': 'id', - 'contents': String(val_list=['color='])}, + 'contents': String(values=['color='])}, {'name': 'val', - 'contents': String(val_list=['red', 'black'])} + 'contents': String(values=['red', 'black'])} ]}, {'name': 'type', 'contents': [ {'name': ('id', 2), - 'contents': String(val_list=['type='])}, + 'contents': String(values=['type='])}, {'name': ('val', 2), - 'contents': String(val_list=['circle', 'cube', 'rectangle'], determinist=False)} + 'contents': String(values=['circle', 'cube', 'rectangle'], determinist=False)} ]}, ]}]}, - {'contents': String(val_list=['AAAA', 'BBBB', 'CCCC'], determinist=False), + {'contents': String(values=['AAAA', 'BBBB', 'CCCC'], determinist=False), 'qty': (4, 6), 'name': 'str'} ]} @@ -1129,15 +1129,15 @@ that purpose the keyword ``exists_if`` with some subclasses of 'shape_type': MH.Ordered, 'contents': [ {'name': 'opcode', - 'contents': String(val_list=['A1', 'A2', 'A3'], determinist=True)}, + 'contents': String(values=['A1', 'A2', 'A3'], determinist=True)}, {'name': 'command_A1', - 'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + 'contents': String(values=['AAA', 'BBBB', 'CCCCC']), 'exists_if': (RawCondition('A1'), 'opcode'), 'qty': 3}, {'name': 'command_A2', - 'contents': UINT32_be(int_list=[0xDEAD, 0xBEEF]), + 'contents': UINT32_be(values=[0xDEAD, 0xBEEF]), 'exists_if': (RawCondition('A2'), 'opcode')}, {'name': 'command_A3', @@ -1150,23 +1150,23 @@ that purpose the keyword ``exists_if`` with some subclasses of determinist=False)}, {'name': 'A3_int', - 'contents': UINT16_be(int_list=[10, 20, 30], determinist=False)}, + 'contents': UINT16_be(values=[10, 20, 30], determinist=False)}, {'name': 'A3_deco1', 'exists_if': (IntCondition(10), 'A3_int'), - 'contents': String(val_list=['*1*0*'])}, + 'contents': String(values=['*1*0*'])}, {'name': 'A3_deco2', 'exists_if': (IntCondition([20, 30]), 'A3_int'), - 'contents': String(val_list=['+2+0+3+0+'])} + 'contents': String(values=['+2+0+3+0+'])} ]}, {'name': 'A31_payload', - 'contents': String(val_list=['$ A31_OK $', '$ A31_KO $'], determinist=False), + 'contents': String(values=['$ A31_OK $', '$ A31_KO $'], determinist=False), 'exists_if': (BitFieldCondition(sf=2, val=[6,12]), 'A3_subopcode')}, {'name': 'A32_payload', - 'contents': String(val_list=['$ A32_VALID $', '$ A32_INVALID $'], determinist=False), + 'contents': String(values=['$ A32_VALID $', '$ A32_INVALID $'], determinist=False), 'exists_if': (BitFieldCondition(sf=[0, 1, 2], val=[[500, 501], [1, 2], 5]), 'A3_subopcode')} ]} @@ -1235,7 +1235,7 @@ character string in our case. {'name': 'len', 'type': MH.Generator, 'contents': lambda x: Node('cts', value_type= \ - UINT32_be(int_list=[len(x.to_bytes())])), + UINT32_be(values=[len(x.to_bytes())])), 'node_args': 'payload'}, {'name': 'payload', @@ -1356,11 +1356,11 @@ Finally, let's take the following example that illustrates other 'contents': [ {'name': 'int16', 'qty': (2, 10), - 'contents': UINT16_be(int_list=[16, 1, 6], determinist=False)}, + 'contents': UINT16_be(values=[16, 1, 6], determinist=False)}, {'name': 'int32', 'qty': (3, 8), - 'contents': UINT32_be(int_list=[32, 3, 2], determinist=False)} + 'contents': UINT32_be(values=[32, 3, 2], determinist=False)} ]}, {'name': 'int16_qty', @@ -1482,7 +1482,7 @@ on the encoded form or the decoded form of their node parameters. {'name': 'enc', 'contents': [ {'name': 'data0', - 'contents': String(val_list=['Plip', 'Plop']) }, + 'contents': String(values=['Plip', 'Plop']) }, {'name': 'crc', 'contents': MH.CRC(vt=UINT32_be, after_encoding=False), 'node_args': ['enc_data', 'data2'], @@ -1496,10 +1496,10 @@ on the encoded form or the decoded form of their node parameters. 'node_args': 'data1', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': UTF16_LE(val_list=['Test!', 'Hello World!']) }, + 'contents': UTF16_LE(values=['Test!', 'Hello World!']) }, ]}, {'name': 'data2', - 'contents': String(val_list=['Red', 'Green', 'Blue']) } + 'contents': String(values=['Red', 'Green', 'Blue']) } ]} This data description will enable you to produce data compliant to the specified encoding schemes @@ -1603,11 +1603,11 @@ Example 1: The basics. # is equivalent to classic = {'name': 'HTTP_version', 'contents': [ - {'name': 'HTTP_version_1', 'contents': String(val_list=["HTTP"])}, - {'name': 'HTTP_version_2', 'contents': String(val_list=["/"])}, + {'name': 'HTTP_version_1', 'contents': String(values=["HTTP"])}, + {'name': 'HTTP_version_2', 'contents': String(values=["/"])}, {'name': 'HTTP_version_3', 'contents': String(alphabet="0123456789", size=1)}, - {'name': 'HTTP_version_4', 'contents': String(val_list=["."])}, + {'name': 'HTTP_version_4', 'contents': String(values=["."])}, {'name': 'HTTP_version_5', 'contents': INT_Str(mini=0, maxi=9)} ]} @@ -1622,8 +1622,8 @@ Example 2: Introducing choice. (Refer to :ref:`dm:nt-keywords`) classic = {'name': 'something', 'shape_type': MH.Pick, 'contents': [ - {'name':'something_1', 'contents':INT_Str(int_list=[333, 444])}, - {'name':'something_1', 'contents':String(val_list=["foo", "bar"])}, + {'name':'something_1', 'contents':INT_Str(values=[333, 444])}, + {'name':'something_1', 'contents':String(values=["foo", "bar"])}, {'name':'something_1', 'contents':String(alphabet="0123456789",size=1)}, {'name':'something_1', 'contents':String(alphabet="th|is", size=1)} ]} @@ -1639,13 +1639,13 @@ Example 3: Using quantifiers and the escape character ``\``. # is equivalent to classic = {'name': 'something', 'contents': [ - {'name': 'something_1', 'contents': String(val_list=["(this"])}, + {'name': 'something_1', 'contents': String(values=["(this"])}, {'name': 'something_2', 'contents': String(alphabet="is", min_sz=3, max_sz=4)}, - {'name': 'something_3', 'contents': String(val_list=["th"])}, + {'name': 'something_3', 'contents': String(values=["th"])}, {'name': 'something_4', 'qty': (1, -1), - 'contents': String(val_list=["e"])}, - {'name': 'something_5', 'contents': String(val_list=["end]"])} ]} + 'contents': String(values=["e"])}, + {'name': 'something_5', 'contents': String(values=["end]"])} ]} Example 4: Invalid regular expressions. diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index cf69e26..bbf58e9 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -1151,14 +1151,14 @@ the PNG data format: {'name': 'PNG_model', 'contents': [ {'name': 'sig', - 'contents': String(val_list=[b'\x89PNG\r\n\x1a\n'], size=8)}, + 'contents': String(values=[b'\x89PNG\r\n\x1a\n'], size=8)}, {'name': 'chunks', 'qty': (2,-1), 'contents': [ {'name': 'len', 'contents': UINT32_be()}, {'name': 'type', - 'contents': String(val_list=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, + 'contents': String(values=['IHDR', 'IEND', 'IDAT', 'PLTE'], size=4)}, {'name': 'data_gen', 'type': MH.Generator, 'contents': lambda x: Node('data', value_type= \ @@ -1330,7 +1330,7 @@ various constructions, and value types. 'section_type': MH.Random, 'contents': [ - {'contents': String(val_list=['OK', 'KO'], size=2), + {'contents': String(values=['OK', 'KO'], size=2), 'name': 'val2', 'qty': (1, -1)}, @@ -1353,7 +1353,7 @@ various constructions, and value types. 'sync_qty_with': 'val1', 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1,4,8])}, + 'contents': SINT8(values=[1,4,8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56), 'determinist': True}]} @@ -1363,20 +1363,20 @@ various constructions, and value types. {'section_type': MH.Pick, 'weights': (10,5), 'contents': [ - {'contents': String(val_list=['PLIP', 'PLOP'], size=4), + {'contents': String(values=['PLIP', 'PLOP'], size=4), 'name': 'val4'}, - {'contents': SINT16_be(int_list=[-1, -3, -5, 7]), + {'contents': SINT16_be(values=[-1, -3, -5, 7]), 'name': 'val5'} ]}, # block 3 {'section_type': MH.FullyRandom, 'contents': [ - {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + {'contents': String(values=['AAA', 'BBBB', 'CCCCC']), 'name': ('val21', 2)}, - {'contents': UINT8(int_list=[2, 4, 6, 8]), + {'contents': UINT8(values=[2, 4, 6, 8]), 'qty': (2, 3), 'name': ('val22', 2)} ]} @@ -1636,16 +1636,16 @@ like that: 'contents': [ {'name': 'prefix', - 'contents': UINT8(int_list=[0xcc, 0xff, 0xee])}, + 'contents': UINT8(values=[0xcc, 0xff, 0xee])}, {'name': 'variable_string', 'contents': String(max_sz=20)}, {'name': 'keycode', - 'contents': UINT16_be(int_list=[0xd2d3, 0xd2fe, 0xd2aa])}, + 'contents': UINT16_be(values=[0xd2d3, 0xd2fe, 0xd2aa])}, {'name': 'variable_suffix', - 'contents': String(val_list=['END', 'THE_END'])} + 'contents': String(values=['END', 'THE_END'])} ]} It works as intended for data generation, but if you want to absorb a @@ -1677,18 +1677,18 @@ what follows: 'contents': [ {'name': 'prefix', - 'contents': UINT8(int_list=[0xcc, 0xff, 0xee])}, + 'contents': UINT8(values=[0xcc, 0xff, 0xee])}, {'name': 'variable_string', 'contents': String(max_sz=20), 'set_attrs': [NodeInternals.Abs_Postpone]}, {'name': 'keycode', - 'contents': UINT16_be(int_list=[0xd2d3, 0xd2fe, 0xd2aa]), + 'contents': UINT16_be(values=[0xd2d3, 0xd2fe, 0xd2aa]), 'absorb_helper': keycode_helper}, {'name': 'variable_suffix', - 'contents': String(val_list=['END', 'THE_END'])} + 'contents': String(values=['END', 'THE_END'])} ]} Notice that we also add a specific attribute to the node @@ -1921,7 +1921,7 @@ another inappropriate separator. if orig_val in new_val_list: new_val_list.remove(orig_val) - node.import_value_type(value_type=vtype.String(val_list=new_val_list)) + node.import_value_type(value_type=vtype.String(values=new_val_list)) node.make_finite() node.make_determinist() diff --git a/framework/data_model.py b/framework/data_model.py index fe9fb68..105ebd9 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -4895,7 +4895,7 @@ def __init__(self, name, base_node=None, copy_dico=None, ignore_frozen_state=Fal self.set_subnodes_basic(subnodes) elif values is not None: - self.set_values(val_list=values) + self.set_values(values=values) elif value_type is not None: self.set_values(value_type=value_type) @@ -5387,7 +5387,7 @@ def set_subnodes_full_format(self, full_list, conf=None, separator=None, preserv self._finalize_nonterm_node(conf) - def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglement=False, + def set_values(self, values=None, value_type=None, conf=None, ignore_entanglement=False, preserve_node=True): conf = self.__check_conf(conf) @@ -5396,8 +5396,8 @@ def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglem new_internals.set_contents_from(self.internals[conf]) self.internals[conf] = new_internals - if val_list is not None: - self.internals[conf].import_value_type(value_type=fvt.String(val_list=val_list)) + if values is not None: + self.internals[conf].import_value_type(value_type=fvt.String(values=values)) elif value_type is not None: self.internals[conf].import_value_type(value_type) @@ -5410,7 +5410,7 @@ def set_values(self, val_list=None, value_type=None, conf=None, ignore_entanglem if value_type is not None: value_type = copy.copy(value_type) value_type.make_private(forget_current_state=True) - e.set_values(val_list=copy.copy(val_list), value_type=value_type, conf=conf, ignore_entanglement=True) + e.set_values(values=copy.copy(values), value_type=value_type, conf=conf, ignore_entanglement=True) def set_func(self, func, func_node_arg=None, func_arg=None, diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index e934f5a..6209d12 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -160,7 +160,7 @@ def LEN(vt=fvt.INT_str, base_len=0, ''' def length(vt, set_attrs, clear_attrs, node): blob = node.to_bytes() if after_encoding else node.get_raw_value() - n = Node('cts', value_type=vt(int_list=[len(blob)+base_len])) + n = Node('cts', value_type=vt(values=[len(blob)+base_len])) n.set_semantics(NodeSemantics(['len'])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -183,7 +183,7 @@ def QTY(node_name, vt=fvt.INT_str, ''' def qty(node_name, vt, set_attrs, clear_attrs, node): nb = node.cc.get_drawn_node_qty(node_name) - n = Node('cts', value_type=vt(int_list=[nb])) + n = Node('cts', value_type=vt(values=[nb])) n.set_semantics(NodeSemantics(['qty'])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -208,7 +208,7 @@ def timestamp(time_format, utc, set_attrs, clear_attrs): else: now = datetime.datetime.now() ts = now.strftime(time_format) - n = Node('cts', value_type=fvt.String(val_list=[ts], size=len(ts))) + n = Node('cts', value_type=fvt.String(values=[ts], size=len(ts))) n.set_semantics(NodeSemantics(['timestamp'])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -248,7 +248,7 @@ def crc(vt, poly, init_crc, xor_out, rev, set_attrs, clear_attrs, nodes): result = crc_func(s) - n = Node('cts', value_type=vt(int_list=[result])) + n = Node('cts', value_type=vt(values=[result])) n.set_semantics(NodeSemantics(['crc'])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -261,7 +261,7 @@ def crc(vt, poly, init_crc, xor_out, rev, set_attrs, clear_attrs, nodes): @staticmethod - def WRAP(func, vt=fvt.INT_str, + def WRAP(func, vt=fvt.String, set_attrs=[], clear_attrs=[], after_encoding=True): '''Return a *generator* that returns the result (in the chosen type) of the provided function applied on the concatenation of all @@ -290,11 +290,16 @@ def map_func(vt, func, set_attrs, clear_attrs, nodes): result = func(s) - n = Node('cts', value_type=vt(int_list=[result])) + if issubclass(vt, fvt.String): + result = convert_to_internal_repr(result) + else: + assert isinstance(result, int) + + n = Node('cts', value_type=vt(values=[result])) MH._handle_attrs(n, set_attrs, clear_attrs) return n - vt = MH._validate_int_vt(vt) + vt = MH._validate_vt(vt) return functools.partial(map_func, vt, func, set_attrs, clear_attrs) @staticmethod @@ -335,9 +340,9 @@ def __call__(self, helper): idx = 0 idx = idx % self.vals_sz if issubclass(self.vt, fvt.INT): - vtype = self.vt(int_list=[self.vals[idx]]) + vtype = self.vt(values=[self.vals[idx]]) elif issubclass(self.vt, fvt.String): - vtype = self.vt(val_list=[self.vals[idx]]) + vtype = self.vt(values=[self.vals[idx]]) else: raise NotImplementedError('Value type not supported') @@ -422,7 +427,7 @@ def __call__(self, nodes, helper): base = len(s) off = nodes[-1].get_subnode_off(idx) - n = Node('cts_off', value_type=self.vt(int_list=[base+off])) + n = Node('cts_off', value_type=self.vt(values=[base+off])) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -493,9 +498,9 @@ def __call__(self, node, helper): self.vt = tg_node.get_current_subkind() if issubclass(self.vt, fvt.INT): - vtype = self.vt(int_list=[tg_node.get_raw_value()]) + vtype = self.vt(values=[tg_node.get_raw_value()]) elif issubclass(self.vt, fvt.String): - vtype = self.vt(val_list=[blob]) + vtype = self.vt(values=[blob]) else: raise NotImplementedError('Value type not supported') n = Node('cts', value_type=vtype) @@ -512,9 +517,13 @@ def __call__(self, node, helper): @staticmethod def _validate_int_vt(vt): if not issubclass(vt, fvt.INT): - print("*** WARNING: the value type of typed node requested is not supported!" \ - " Use of 'INT_str' instead.") - vt = fvt.INT_str + raise DataModelDefinitionError("The value type requested is not supported! (expect a subclass of INT)") + return vt + + @staticmethod + def _validate_vt(vt): + if not issubclass(vt, fvt.INT) and not issubclass(vt, fvt.String): + raise DataModelDefinitionError("The value type requested is not supported!") return vt @staticmethod @@ -1875,9 +1884,9 @@ def _create_terminal_node(self, name, type, values=None, alphabet=None, qty=None codec=self.codec)), 1, 1] else: if type == fvt.String: - node = Node(name=name, vt=fvt.String(val_list=values, codec=self.codec)) + node = Node(name=name, vt=fvt.String(values=values, codec=self.codec)) else: - node = Node(name=name, vt=fvt.INT_str(int_list=values)) + node = Node(name=name, vt=fvt.INT_str(values=values)) return [node, qty[0], -1 if qty[1] is None else qty[1]] diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 52492f0..babeb46 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -856,7 +856,7 @@ def consume_node(self, node): if orig_val in new_val_list: new_val_list.remove(orig_val) - node.cc.import_value_type(value_type=vtype.String(val_list=new_val_list)) + node.cc.import_value_type(value_type=vtype.String(values=new_val_list)) # Note, that node attributes are not altered by this # operation, especially usefull in our case, because we have # to preserve dm.NodeInternals.Separator diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index 57dc971..beb1432 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -622,7 +622,7 @@ def disrupt_data(self, dm, target, prev_data): if node is None: prev_data.update_from_str_or_bytes(out_val) else: - node.set_values(val_list=[out_val]) + node.set_values(values=[out_val]) node.get_value() return prev_data @@ -825,7 +825,7 @@ def disrupt_data(self, dm, target, prev_data): val = self.new_val prev_data.add_info('corrupt data: {!s}'.format(truncate_info(val))) - i.set_values(val_list=[val]) + i.set_values(values=[val]) i.get_value() ret = prev_data diff --git a/framework/target.py b/framework/target.py index 3a48e03..9dca050 100644 --- a/framework/target.py +++ b/framework/target.py @@ -1739,7 +1739,7 @@ def send_data(self, data, from_fmk=False): if data.node: node_list = data.node[NodeSemanticsCriteria(mandatory_criteria=['tel num'])] if node_list and len(node_list)==1: - node_list[0].set_values(value_type=GSMPhoneNum(val_list=[self.tel_num])) + node_list[0].set_values(value_type=GSMPhoneNum(values=[self.tel_num])) else: print('\nWARNING: Data does not contain a mobile number.') pdu = b'' diff --git a/framework/value_types.py b/framework/value_types.py index 90c48b8..1b031e3 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -107,7 +107,7 @@ def __init__(self, *args, **kargs): self._fuzzy_mode = False self.init_specific(*args, **kargs) - def init_specific(self): + def init_specific(self, *args, **kargs): raise NotImplementedError def switch_mode(self): @@ -360,7 +360,7 @@ def _bytes2str(self, val): ASCII = codecs.lookup('ascii').name LATIN_1 = codecs.lookup('latin-1').name - def init_specific(self, val_list=None, size=None, min_sz=None, + def init_specific(self, values=None, size=None, min_sz=None, max_sz=None, determinist=True, codec='latin-1', extra_fuzzy_list=None, absorb_regexp=None, alphabet=None, min_encoded_sz=None, max_encoded_sz=None, encoding_arg=None): @@ -369,7 +369,7 @@ def init_specific(self, val_list=None, size=None, min_sz=None, Initialize the String Args: - val_list: List of the character strings that are considered valid for the node + values: List of the character strings that are considered valid for the node backed by this *String object*. size: Valid character string size for the node backed by this *String object*. min_sz: Minimum valid size for the character strings for the node backed by @@ -419,7 +419,7 @@ def init_specific(self, val_list=None, size=None, min_sz=None, self.encoding_arg = encoding_arg self.init_encoding_scheme(self.encoding_arg) - self.set_description(val_list=val_list, size=size, min_sz=min_sz, + self.set_description(values=values, size=size, min_sz=min_sz, max_sz=max_sz, determinist=determinist, codec=codec, extra_fuzzy_list=extra_fuzzy_list, absorb_regexp=absorb_regexp, alphabet=alphabet, @@ -668,9 +668,9 @@ def rewind(self): self.drawn_val = None - def _check_sizes(self, val_list): - if val_list is not None: - for v in val_list: + def _check_sizes(self, values): + if values is not None: + for v in values: sz = len(v) if self.max_sz is not None: assert(self.max_sz >= sz >= self.min_sz) @@ -678,7 +678,7 @@ def _check_sizes(self, val_list): assert(sz >= self.min_sz) - def set_description(self, val_list=None, size=None, min_sz=None, + def set_description(self, values=None, size=None, min_sz=None, max_sz=None, determinist=True, codec='latin-1', extra_fuzzy_list=None, absorb_regexp=None, alphabet=None, @@ -712,9 +712,9 @@ def set_description(self, val_list=None, size=None, min_sz=None, else: self.extra_fuzzy_list = None - if val_list is not None: - assert isinstance(val_list, list) - self.val_list = self._str2bytes(val_list) + if values is not None: + assert isinstance(values, list) + self.val_list = self._str2bytes(values) for val in self.val_list: if not self._check_compliance(val, force_max_enc_sz=self.max_enc_sz_provided, force_min_enc_sz=self.min_enc_sz_provided, @@ -750,9 +750,9 @@ def set_description(self, val_list=None, size=None, min_sz=None, elif max_sz is not None: self.max_sz = max_sz self.min_sz = 0 - elif val_list is not None: + elif values is not None: sz = 0 - for v in val_list: + for v in values: length = len(v) if length > sz: sz = length @@ -768,7 +768,7 @@ def set_description(self, val_list=None, size=None, min_sz=None, self.min_sz = 0 self.max_sz = self.DEFAULT_MAX_SZ - self._check_sizes(val_list) + self._check_sizes(values) self.determinist = determinist @@ -1039,16 +1039,16 @@ class INT(VT): # and that mini is not specified by the user - def __init__(self, int_list=None, mini=None, maxi=None, default=None, determinist=True): + def __init__(self, values=None, mini=None, maxi=None, default=None, determinist=True): self.idx = 0 self.determinist = determinist self.exhausted = False self.drawn_val = None self.default = None - if int_list: + if values: assert default is None - self.int_list = list(int_list) + self.int_list = list(values) self.int_list_copy = list(self.int_list) else: @@ -1419,7 +1419,7 @@ def __init__(self, endian=VT.BigEndian, supp_list=None): self.extend_value_list(supp_list) assert(self.int_list is not None) - INT.__init__(self, int_list=self.int_list, determinist=True) + INT.__init__(self, values=self.int_list, determinist=True) def make_private(self, forget_current_state): self.int_list = copy.copy(self.int_list) @@ -2456,7 +2456,7 @@ class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): obj[k].get_value() try: - obj[k] = v(int_list=[0x11,0x12,0x13]) + obj[k] = v(values=[0x11,0x12,0x13]) except TypeError: obj[k] = v() @@ -2466,7 +2466,7 @@ class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): print('\n********\n') try: - obj[k] = v(int_list=[0x11,0x12,0x13], determinist=False) + obj[k] = v(values=[0x11,0x12,0x13], determinist=False) except TypeError: print(v().__class__) obj[k] = v() @@ -2537,7 +2537,7 @@ class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): print('\n***** [ String ] *****\n') - t = String(val_list=['AA', 'BBB', 'CCCC'], min_sz=1, max_sz=10, + t = String(values=['AA', 'BBB', 'CCCC'], min_sz=1, max_sz=10, extra_fuzzy_list=['XTRA_1', '', 'XTRA_2']) for i in range(30): @@ -2564,7 +2564,7 @@ class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): print('\n====> New String\n') - t = String(val_list=['AAA', 'BBBB', 'CCCCC'], min_sz=3, max_sz=10) + t = String(values=['AAA', 'BBBB', 'CCCCC'], min_sz=3, max_sz=10) for i in range(30): print(t.get_value()) @@ -2603,7 +2603,7 @@ class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): print('\n====> New String\n') - t = String(val_list=['AAA', 'BBBB', 'CCCCC'], max_sz=10) + t = String(values=['AAA', 'BBBB', 'CCCCC'], max_sz=10) print(t.get_value()) print(t.get_value()) diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 026f8e9..83ff395 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -60,7 +60,7 @@ class TEST_Fuzzy_INT16(Fuzzy_INT16): def __init__(self, endian=None, supp_list=None): self.endian = endian self.idx = 0 - INT.__init__(self, int_list=self.int_list, determinist=True) + INT.__init__(self, values=self.int_list, determinist=True) def is_compatible(self, integer): return False @@ -1408,7 +1408,7 @@ def test_MISC(self): loop_count = 20 e = Node('VT1') - vt = UINT16_be(int_list=[1, 2, 3, 4, 5, 6]) + vt = UINT16_be(values=[1, 2, 3, 4, 5, 6]) e.set_values(value_type=vt) e.set_env(Env()) e.make_determinist(all_conf=True, recursive=True) @@ -1544,7 +1544,7 @@ def test_basics(self): 'custo_set': MH.Custo.NTerm.FrozenCopy, 'custo_clear': MH.Custo.NTerm.MutableClone, 'separator': {'contents': {'name': 'sep', - 'contents': String(val_list=[' [!] '])}}, + 'contents': String(values=[' [!] '])}}, 'contents': [ {'weight': 20, @@ -1559,13 +1559,13 @@ def test_basics(self): 'custo_set': MH.Custo.NTerm.FrozenCopy, 'custo_clear': MH.Custo.NTerm.MutableClone, 'separator': {'contents': {'name': 'sep2', - 'contents': String(val_list=['::'])}}, + 'contents': String(values=['::'])}}, 'shape_type': MH.Random, # ignored in determnist mode 'contents': [ - {'contents': Filename(val_list=['AAA']), + {'contents': Filename(values=['AAA']), 'qty': (0, 4), 'name': 'str'}, - {'contents': UINT8(int_list=[0x3E]), # chr(0x3E) == '>' + {'contents': UINT8(values=[0x3E]), # chr(0x3E) == '>' 'name': 'int'} ]} ]} @@ -1862,12 +1862,12 @@ def setUp(self): pass def test_absorb_nonterm_1(self): - nint_1 = Node('nint1', value_type=UINT16_le(int_list=[0xabcd])) - nint_2 = Node('nint2', value_type=UINT8(int_list=[0xf])) - nint_3 = Node('nint3', value_type=UINT16_be(int_list=[0xeffe])) + nint_1 = Node('nint1', value_type=UINT16_le(values=[0xabcd])) + nint_2 = Node('nint2', value_type=UINT8(values=[0xf])) + nint_3 = Node('nint3', value_type=UINT16_be(values=[0xeffe])) - nstr_1 = Node('str1', value_type=String(val_list=['TBD1'], max_sz=5)) - nstr_2 = Node('str2', value_type=String(val_list=['TBD2'], max_sz=8)) + nstr_1 = Node('str1', value_type=String(values=['TBD1'], max_sz=5)) + nstr_2 = Node('str2', value_type=String(values=['TBD2'], max_sz=8)) vt = BitField(subfield_sizes=[4, 4, 4], subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], @@ -1901,12 +1901,12 @@ def test_absorb_nonterm_1(self): self.assertEqual(size, len(msg)) def test_absorb_nonterm_2(self): - nint_1 = Node('nint1', value_type=UINT16_le(int_list=[0xcdab, 0xffee])) - nint_2 = Node('nint2', value_type=UINT8(int_list=[0xaf, 0xbf, 0xcf])) - nint_3 = Node('nint3', value_type=UINT16_be(int_list=[0xcfab, 0xeffe])) + nint_1 = Node('nint1', value_type=UINT16_le(values=[0xcdab, 0xffee])) + nint_2 = Node('nint2', value_type=UINT8(values=[0xaf, 0xbf, 0xcf])) + nint_3 = Node('nint3', value_type=UINT16_be(values=[0xcfab, 0xeffe])) - nstr_1 = Node('str1', value_type=String(val_list=['STR1', 'str1'], max_sz=5)) - nstr_2 = Node('str2', value_type=String(val_list=['STR22', 'str222'], max_sz=8)) + nstr_1 = Node('str1', value_type=String(values=['STR1', 'str1'], max_sz=5)) + nstr_2 = Node('str2', value_type=String(values=['STR22', 'str222'], max_sz=8)) top = Node('top') top.set_subnodes_with_csts([ @@ -1930,12 +1930,12 @@ def test_absorb_nonterm_2(self): self.assertEqual(size, len(msg)) def test_absorb_nonterm_3(self): - nint_1 = Node('nint1', value_type=UINT16_le(int_list=[0xcdab, 0xffee])) - nint_2 = Node('nint2', value_type=UINT8(int_list=[0xaf, 0xbf, 0xcf])) - nint_3 = Node('nint3', value_type=UINT16_be(int_list=[0xcfab, 0xeffe])) + nint_1 = Node('nint1', value_type=UINT16_le(values=[0xcdab, 0xffee])) + nint_2 = Node('nint2', value_type=UINT8(values=[0xaf, 0xbf, 0xcf])) + nint_3 = Node('nint3', value_type=UINT16_be(values=[0xcfab, 0xeffe])) - nstr_1 = Node('str1', value_type=String(val_list=['STR1', 'str1'], max_sz=5)) - nstr_2 = Node('str2', value_type=String(val_list=['STR22', 'str222'], max_sz=8)) + nstr_1 = Node('str1', value_type=String(values=['STR1', 'str1'], max_sz=5)) + nstr_2 = Node('str2', value_type=String(values=['STR22', 'str222'], max_sz=8)) top = Node('top') top.set_subnodes_with_csts([ @@ -1964,11 +1964,11 @@ def test_absorb_nonterm_fullyrandom(self): 'contents': [ {'section_type': MH.FullyRandom, 'contents': [ - {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + {'contents': String(values=['AAA', 'BBBB', 'CCCCC']), 'qty': (2, 3), 'name': 'str'}, - {'contents': UINT8(int_list=[2, 4, 6, 8]), + {'contents': UINT8(values=[2, 4, 6, 8]), 'qty': (3, 6), 'name': 'int'} ]} @@ -2013,22 +2013,22 @@ def nint_1_alt_helper(blob, constraints, node_internals): self.helper1_called = True return AbsorbStatus.Reject, 0, None - nint_1 = Node('nint1', value_type=UINT16_le(int_list=[0xabcd, 0xe2e1])) + nint_1 = Node('nint1', value_type=UINT16_le(values=[0xabcd, 0xe2e1])) nint_1.set_absorb_helper(nint_1_helper) - nint_1_alt = Node('nint1_alt', value_type=UINT16_le(int_list=[0xabff, 0xe2ff])) + nint_1_alt = Node('nint1_alt', value_type=UINT16_le(values=[0xabff, 0xe2ff])) nint_1_alt.set_absorb_helper(nint_1_alt_helper) - nint_2 = Node('nint2', value_type=UINT8(int_list=[0xf, 0xff, 0xee])) - nint_3 = Node('nint3', value_type=UINT16_be(int_list=[0xeffe, 0xc1c2, 0x8899])) + nint_2 = Node('nint2', value_type=UINT8(values=[0xf, 0xff, 0xee])) + nint_3 = Node('nint3', value_type=UINT16_be(values=[0xeffe, 0xc1c2, 0x8899])) - nstr_1 = Node('cool', value_type=String(val_list=['TBD1'], size=4, codec='ascii')) + nstr_1 = Node('cool', value_type=String(values=['TBD1'], size=4, codec='ascii')) nstr_1.enforce_absorb_constraints(AbsNoCsts(regexp=True)) - nstr_2 = Node('str2', value_type=String(val_list=['TBD2TBD2', '12345678'], size=8, codec='ascii')) + nstr_2 = Node('str2', value_type=String(values=['TBD2TBD2', '12345678'], size=8, codec='ascii')) - nint_50 = Node('nint50', value_type=UINT8(int_list=[0xaf, 0xbf, 0xcf])) - nint_51 = Node('nint51', value_type=UINT16_be(int_list=[0xcfab, 0xeffe])) - nstr_50 = Node('str50', value_type=String(val_list=['HERE', 'IAM'], max_sz=7)) + nint_50 = Node('nint50', value_type=UINT8(values=[0xaf, 0xbf, 0xcf])) + nint_51 = Node('nint51', value_type=UINT16_be(values=[0xcfab, 0xeffe])) + nstr_50 = Node('str50', value_type=String(values=['HERE', 'IAM'], max_sz=7)) middle1 = Node('middle1') middle1.set_subnodes_with_csts([ @@ -2038,9 +2038,9 @@ def nint_1_alt_helper(blob, constraints, node_internals): 'u=.', [nint_50, 1], [nint_51, 1], [nstr_50, 2, 3]] ]) - yeah = Node('yeah', value_type=String(val_list=['TBD', 'YEAH!'], max_sz=10, codec='ascii')) + yeah = Node('yeah', value_type=String(values=['TBD', 'YEAH!'], max_sz=10, codec='ascii')) - splitter = Node('splitter', value_type=String(val_list=['TBD'], max_sz=10)) + splitter = Node('splitter', value_type=String(values=['TBD'], max_sz=10)) splitter.set_attr(NodeInternals.Abs_Postpone) splitter.enforce_absorb_constraints(AbsNoCsts()) @@ -2052,13 +2052,13 @@ def nint_10_helper(blob, constraints, node_internals): else: return AbsorbStatus.Reject, 0, None - nint_10 = Node('nint10', value_type=UINT16_be(int_list=[0xcbbc, 0xd2d3])) + nint_10 = Node('nint10', value_type=UINT16_be(values=[0xcbbc, 0xd2d3])) nint_10.set_absorb_helper(nint_10_helper) - nstr_10 = Node('str10', value_type=String(val_list=['TBD', 'THE_END'], max_sz=7)) + nstr_10 = Node('str10', value_type=String(values=['TBD', 'THE_END'], max_sz=7)) - delim = Node('delim', value_type=String(val_list=[','], size=1)) - nint_20 = Node('nint20', value_type=INT_str(int_list=[1, 2, 3])) - nint_21 = Node('nint21', value_type=UINT8(int_list=[0xbb])) + delim = Node('delim', value_type=String(values=[','], size=1)) + nint_20 = Node('nint20', value_type=INT_str(values=[1, 2, 3])) + nint_21 = Node('nint21', value_type=UINT8(values=[0xbb])) bottom = Node('bottom', subnodes=[delim, nint_20, nint_21]) bottom2 = Node('bottom2', base_node=bottom) @@ -2183,7 +2183,7 @@ def test_exist_condition_02(self): 'contents': [ {'name': 'opcode', 'determinist': True, - 'contents': String(val_list=['A3', 'A2'])}, + 'contents': String(values=['A3', 'A2'])}, {'name': 'command_A3', 'exists_if': (RawCondition('A3'), 'opcode'), @@ -2196,27 +2196,27 @@ def test_exist_condition_02(self): {'name': 'A3_int', 'determinist': True, - 'contents': UINT16_be(int_list=[10, 20, 30])}, + 'contents': UINT16_be(values=[10, 20, 30])}, {'name': 'A3_deco1', 'exists_if/and': [(IntCondition(val=[10]), 'A3_int'), (BitFieldCondition(sf=2, val=[5]), 'A3_subopcode')], - 'contents': String(val_list=['$ and_OK $'])}, + 'contents': String(values=['$ and_OK $'])}, {'name': 'A3_deco2', 'exists_if/and': [(IntCondition(val=[10]), 'A3_int'), (BitFieldCondition(sf=2, val=[6]), 'A3_subopcode')], - 'contents': String(val_list=['! and_KO !'])} + 'contents': String(values=['! and_KO !'])} ]}, {'name': 'A31_payload1', - 'contents': String(val_list=['$ or_OK $']), + 'contents': String(values=['$ or_OK $']), 'exists_if/or': [(IntCondition(val=[20]), 'A3_int'), (BitFieldCondition(sf=2, val=[5]), 'A3_subopcode')], }, {'name': 'A31_payload2', - 'contents': String(val_list=['! or_KO !']), + 'contents': String(values=['! or_KO !']), 'exists_if/or': [(IntCondition(val=[20]), 'A3_int'), (BitFieldCondition(sf=2, val=[6]), 'A3_subopcode')], }, @@ -2240,44 +2240,44 @@ def test_generalized_exist_cond(self): gen_exist_desc = \ {'name': 'gen_exist_cond', 'separator': {'contents': {'name': 'sep_nl', - 'contents': String(val_list=['\n'], max_sz=100, absorb_regexp='[\r\n|\n]+'), + 'contents': String(values=['\n'], max_sz=100, absorb_regexp='[\r\n|\n]+'), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': False, 'suffix': False, 'unique': True}, 'contents': [ {'name': 'body', 'qty': 7, 'separator': {'contents': {'name': 'sep_space', - 'contents': String(val_list=[' '], max_sz=100, absorb_regexp=b'\s+'), + 'contents': String(values=[' '], max_sz=100, absorb_regexp=b'\s+'), 'absorb_csts': AbsNoCsts(size=True, regexp=True)}, 'prefix': False, 'suffix': False, 'unique': True}, 'contents': [ {'name': 'val_blk', 'separator': {'contents': {'name': 'sep_quote', - 'contents': String(val_list=['"'])}, + 'contents': String(values=['"'])}, 'prefix': False, 'suffix': True, 'unique': True}, 'contents': [ {'name': 'key', - 'contents': String(val_list=['value='])}, + 'contents': String(values=['value='])}, {'name': 'val1', - 'contents': String(val_list=['Toulouse', 'Paris', 'Lyon']), + 'contents': String(values=['Toulouse', 'Paris', 'Lyon']), 'exists_if': (RawCondition('Location'), 'param')}, {'name': 'val2', - 'contents': String(val_list=['2015/10/08']), + 'contents': String(values=['2015/10/08']), 'exists_if': (RawCondition('Date'), 'param')}, {'name': 'val3', - 'contents': String(val_list=['10:40:42']), + 'contents': String(values=['10:40:42']), 'exists_if': (RawCondition('Time'), 'param')}, {'name': 'val4', - 'contents': String(val_list=['NOT_SUPPORTED']), + 'contents': String(values=['NOT_SUPPORTED']), 'exists_if': (RawCondition(['NOTSUP1', 'NOTSUP2', 'NOTSUP3']), 'param')} ]}, {'name': 'name_blk', 'separator': {'contents': {'name': ('sep_quote', 2), - 'contents': String(val_list=['"'])}, + 'contents': String(values=['"'])}, 'prefix': False, 'suffix': True, 'unique': True}, 'contents': [ {'name': ('key', 2), - 'contents': String(val_list=['name='])}, + 'contents': String(values=['name='])}, {'name': 'param', 'contents': MH.CYCLE(['NOTSUP1', 'Date', 'Time', 'NOTSUP2', 'NOTSUP3', 'Location'], depth=2)} @@ -2314,20 +2314,20 @@ def test_pick_and_cond(self): 'contents': [ {'name': 'opcode', 'determinist': True, - 'contents': String(val_list=['A1', 'A2', 'A3'])}, + 'contents': String(values=['A1', 'A2', 'A3'])}, {'name': 'part1', 'determinist': True, 'shape_type': MH.Pick, 'contents': [ {'name': 'option2', 'exists_if': (RawCondition('A2'), 'opcode'), - 'contents': String(val_list=[' 1_KO_A2'])}, + 'contents': String(values=[' 1_KO_A2'])}, {'name': 'option3', 'exists_if': (RawCondition('A3'), 'opcode'), - 'contents': String(val_list=[' 1_KO_A3'])}, + 'contents': String(values=[' 1_KO_A3'])}, {'name': 'option1', 'exists_if': (RawCondition('A1'), 'opcode'), - 'contents': String(val_list=[' 1_OK_A1'])}, + 'contents': String(values=[' 1_OK_A1'])}, ]}, {'name': 'part2', 'determinist': False, @@ -2336,13 +2336,13 @@ def test_pick_and_cond(self): 'contents': [ {'name': 'optionB', 'exists_if': (RawCondition('A2'), 'opcode'), - 'contents': String(val_list=[' 2_KO_A2'])}, + 'contents': String(values=[' 2_KO_A2'])}, {'name': 'optionC', 'exists_if': (RawCondition('A3'), 'opcode'), - 'contents': String(val_list=[' 2_KO_A3'])}, + 'contents': String(values=[' 2_KO_A3'])}, {'name': 'optionA', 'exists_if': (RawCondition('A1'), 'opcode'), - 'contents': String(val_list=[' 2_OK_A1'])}, + 'contents': String(values=[' 2_OK_A1'])}, ]}, ]} @@ -2451,13 +2451,13 @@ def test_infinity(self): {'name': 'infinity', 'contents': [ {'name': 'prefix', - 'contents': String(val_list=['A']), + 'contents': String(values=['A']), 'qty': (2, -1)}, {'name': 'mid', - 'contents': String(val_list=['H']), + 'contents': String(values=['H']), 'qty': -1}, {'name': 'suffix', - 'contents': String(val_list=['Z']), + 'contents': String(values=['Z']), 'qty': (2, -1)}, ]} @@ -2509,7 +2509,7 @@ def test_separator(self): {'name': 'test', 'determinist': True, 'separator': {'contents': {'name': 'SEP', - 'contents': String(val_list=[' ', ' ', ' '], + 'contents': String(values=[' ', ' ', ' '], absorb_regexp='\s+', determinist=False), 'absorb_csts': AbsNoCsts(regexp=True)}, 'prefix': True, @@ -2518,32 +2518,32 @@ def test_separator(self): 'contents': [ {'section_type': MH.FullyRandom, 'contents': [ - {'contents': String(val_list=['AAA', 'BBBB', 'CCCCC']), + {'contents': String(values=['AAA', 'BBBB', 'CCCCC']), 'qty': (3, 5), 'name': 'str'}, - {'contents': String(val_list=['1', '22', '333']), + {'contents': String(values=['1', '22', '333']), 'qty': (3, 5), 'name': 'int'} ]}, {'section_type': MH.Random, 'contents': [ - {'contents': String(val_list=['WW', 'YYY', 'ZZZZ']), + {'contents': String(values=['WW', 'YYY', 'ZZZZ']), 'qty': (2, 2), 'name': 'str2'}, - {'contents': UINT16_be(int_list=[0xFFFF, 0xAAAA, 0xCCCC]), + {'contents': UINT16_be(values=[0xFFFF, 0xAAAA, 0xCCCC]), 'qty': (3, 3), 'name': 'int2'} ]}, {'section_type': MH.Pick, 'contents': [ - {'contents': String(val_list=['LAST', 'END']), + {'contents': String(values=['LAST', 'END']), 'qty': (2, 2), 'name': 'str3'}, - {'contents': UINT16_be(int_list=[0xDEAD, 0xBEEF]), + {'contents': UINT16_be(values=[0xDEAD, 0xBEEF]), 'qty': (2, 2), 'name': 'int3'} ]} @@ -2583,7 +2583,7 @@ def test_encoding_attr(self): {'name': 'enc', 'contents': [ {'name': 'data0', - 'contents': String(val_list=['Plip', 'Plop'])}, + 'contents': String(values=['Plip', 'Plop'])}, {'name': 'crc', 'contents': MH.CRC(vt=UINT32_be, after_encoding=False), 'node_args': ['enc_data', 'data2'], @@ -2597,10 +2597,10 @@ def test_encoding_attr(self): 'node_args': 'data1', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'data1', - 'contents': String(val_list=['Test!', 'Hello World!'], codec='utf-16-le')}, + 'contents': String(values=['Test!', 'Hello World!'], codec='utf-16-le')}, ]}, {'name': 'data2', - 'contents': String(val_list=['Red', 'Green', 'Blue'])}, + 'contents': String(values=['Red', 'Green', 'Blue'])}, ]} mh = ModelHelper() @@ -2647,12 +2647,12 @@ def test_str_alphabet(self): {'name': 'top', 'contents': [ {'name': 'alpha1', - 'contents': String(min_sz=10, max_sz=100, val_list=['A' * 10], alphabet=alphabet1), + 'contents': String(min_sz=10, max_sz=100, values=['A' * 10], alphabet=alphabet1), 'set_attrs': [NodeInternals.Abs_Postpone]}, {'name': 'alpha2', 'contents': String(min_sz=10, max_sz=100, alphabet=alphabet2)}, {'name': 'end', - 'contents': String(val_list=['END'])}, + 'contents': String(values=['END'])}, ]} mh = ModelHelper() @@ -2725,9 +2725,9 @@ def decode(self, val): 'node_args': 'user_data', 'absorb_csts': AbsFullCsts(contents=False)}, {'name': 'user_data', - 'contents': EncodedStr(val_list=data, codec='utf8')}, + 'contents': EncodedStr(values=data, codec='utf8')}, {'name': 'compressed_data', - 'contents': GZIP(val_list=data, encoding_arg=6)} + 'contents': GZIP(values=data, encoding_arg=6)} ]} mh = ModelHelper() @@ -2836,7 +2836,7 @@ def test_encoded_str_2(self): 'contents': UINT8()}, {'name': 'user_data', 'sync_enc_size_with': 'len', - 'contents': String(val_list=['TEST'], codec='utf8')}, + 'contents': String(values=['TEST'], codec='utf8')}, {'name': 'padding', 'contents': String(max_sz=0), 'absorb_csts': AbsNoCsts()}, @@ -2902,7 +2902,7 @@ def test_create_graph(self): 'section_type': MH.Ordered, 'contents': [ - {'contents': String(val_list=['OK', 'KO'], size=2), + {'contents': String(values=['OK', 'KO'], size=2), 'name': 'val2'}, {'name': 'val21', @@ -2924,7 +2924,7 @@ def test_create_graph(self): 'sync_qty_with': 'val1', 'alt': [ {'conf': 'alt1', - 'contents': SINT8(int_list=[1, 4, 8])}, + 'contents': SINT8(values=[1, 4, 8])}, {'conf': 'alt2', 'contents': UINT16_be(mini=0xeeee, maxi=0xff56), 'determinist': True}]} @@ -2933,10 +2933,10 @@ def test_create_graph(self): # block 2 {'section_type': MH.Pick, 'contents': [ - {'contents': String(val_list=['PLIP', 'PLOP'], size=4), + {'contents': String(values=['PLIP', 'PLOP'], size=4), 'name': ('val21', 2)}, - {'contents': SINT16_be(int_list=[-1, -3, -5, 7]), + {'contents': SINT16_be(values=[-1, -3, -5, 7]), 'name': ('val22', 2)} ]} ]} @@ -3228,12 +3228,12 @@ def test_regex(self, regex_node_name): HTTP_version_classic = \ {'name': 'HTTP_version_classic', 'contents': [ - {'name': 'HTTP_name', 'contents': String(val_list=["HTTP"])}, - {'name': 'slash', 'contents': String(val_list=["/"])}, - {'name': 'major_version_digit', 'contents': String(size=1, val_list=["0", "1", "2", "3", "4", + {'name': 'HTTP_name', 'contents': String(values=["HTTP"])}, + {'name': 'slash', 'contents': String(values=["/"])}, + {'name': 'major_version_digit', 'contents': String(size=1, values=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])}, - {'name': '.', 'contents': String(val_list=["."])}, + {'name': '.', 'contents': String(values=["."])}, {'name': 'minor_version_digit', 'clone': 'major_version_digit'}, ]} From 950b62aa8b5d154c6814797d13a30ea875339766 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 19:57:01 +0200 Subject: [PATCH 72/80] Rename INT class internal attributes --- framework/data_model.py | 35 +++---- framework/fuzzing_primitives.py | 4 +- framework/value_types.py | 140 +++++++++++++-------------- test/integration/test_integration.py | 6 +- 4 files changed, 91 insertions(+), 94 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index 105ebd9..11a5bb7 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -1201,9 +1201,6 @@ def is_frozen(self): def pretty_print(self, max_size=None): return None - def _get_value(self, conf=None, recursive=True, return_node_internals=False): - raise NotImplementedError - def reset_depth_specific(self, depth): pass @@ -5885,17 +5882,17 @@ def tobytes_helper(node_internals): return node_internals._get_value(conf=conf, recursive=recursive, return_node_internals=False)[0] - node_int_list = self.freeze(conf=conf, recursive=recursive) - if isinstance(node_int_list, list): - node_int_list = list(flatten(node_int_list)) - if node_int_list: - if issubclass(node_int_list[0].__class__, NodeInternals): - node_int_list = list(map(tobytes_helper, node_int_list)) - val = b''.join(node_int_list) + node_internals_list = self.freeze(conf=conf, recursive=recursive) + if isinstance(node_internals_list, list): + node_internals_list = list(flatten(node_internals_list)) + if node_internals_list: + if issubclass(node_internals_list[0].__class__, NodeInternals): + node_internals_list = list(map(tobytes_helper, node_internals_list)) + val = b''.join(node_internals_list) else: val = b'' else: - val = node_int_list + val = node_internals_list return val @@ -5924,17 +5921,17 @@ def tobytes_helper(node_internals): return node_internals._get_value(conf=conf, recursive=recursive, return_node_internals=False)[0] - node_int_list = self._get_value(conf=conf, recursive=recursive) - if isinstance(node_int_list, list): - node_int_list = list(flatten(node_int_list)) - if node_int_list: - if issubclass(node_int_list[0].__class__, NodeInternals): - node_int_list = list(map(tobytes_helper, node_int_list)) - val = b''.join(node_int_list) + node_internals_list = self._get_value(conf=conf, recursive=recursive) + if isinstance(node_internals_list, list): + node_internals_list = list(flatten(node_internals_list)) + if node_internals_list: + if issubclass(node_internals_list[0].__class__, NodeInternals): + node_internals_list = list(map(tobytes_helper, node_internals_list)) + val = b''.join(node_internals_list) else: val = b'' else: - val = node_int_list + val = node_internals_list return val diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index babeb46..319f7db 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -781,8 +781,8 @@ def _extend_fuzzy_vt_list(flist, e): # don't use a set to preserve determinism if needed supp_list = [val + 1, val - 1] - if vt.int_list is not None: - orig_set = set(vt.int_list) + if vt.values is not None: + orig_set = set(vt.values) max_oset = max(orig_set) min_oset = min(orig_set) if min_oset != max_oset: diff --git a/framework/value_types.py b/framework/value_types.py index 1b031e3..f9368ae 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -1048,29 +1048,29 @@ def __init__(self, values=None, mini=None, maxi=None, default=None, determinist= if values: assert default is None - self.int_list = list(values) - self.int_list_copy = list(self.int_list) + self.values = list(values) + self.values_copy = list(self.values) else: if mini is not None and maxi is not None: assert maxi >= mini if mini is not None and maxi is not None and abs(maxi - mini) < 200: - self.int_list = list(range(mini, maxi+1)) + self.values = list(range(mini, maxi + 1)) # we keep min/max information as it may be valuable for fuzzing self.mini = self.mini_gen = mini self.maxi = self.maxi_gen = maxi if default is not None: assert mini <= default <= maxi - self.int_list.remove(default) - self.int_list.insert(0,default) + self.values.remove(default) + self.values.insert(0, default) # Once inserted at this place, its position is preserved, especially with reset_state() # (assuming do_absorb() is not called), so we do not save 'default' value in this case - self.int_list_copy = copy.copy(self.int_list) + self.values_copy = copy.copy(self.values) else: - self.int_list = None - self.int_list_copy = None + self.values = None + self.values_copy = None if self.mini is not None: self.mini = max(mini, self.mini) if mini is not None else self.mini self.mini_gen = self.mini @@ -1101,26 +1101,26 @@ def __init__(self, values=None, mini=None, maxi=None, default=None, determinist= def make_private(self, forget_current_state): # no need to copy self.default (that should not be modified) if forget_current_state: - self.int_list_copy = copy.copy(self.int_list) + self.values_copy = copy.copy(self.values) self.idx = 0 self.exhausted = False self.drawn_val = None else: - self.int_list_copy = copy.copy(self.int_list_copy) + self.values_copy = copy.copy(self.values_copy) def absorb_auto_helper(self, blob, constraints): off = 0 # If 'Contents' constraint is set, we seek for int within - # int_list. - # If INT() does not have int_list, we assume off==0 + # values. + # If INT() does not have values, we assume off==0 # and let do_absorb() decide if it's OK. - if constraints[AbsCsts.Contents] and self.int_list is not None: - for v in self.int_list: + if constraints[AbsCsts.Contents] and self.values is not None: + for v in self.values: if blob.startswith(self._convert_value(v)): break else: - for v in self.int_list: + for v in self.values: off = blob.find(self._convert_value(v)) if off > -1: break @@ -1133,8 +1133,8 @@ def absorb_auto_helper(self, blob, constraints): def do_absorb(self, blob, constraints, off=0, size=None): - self.orig_int_list = copy.copy(self.int_list) - self.orig_int_list_copy = copy.copy(self.int_list_copy) + self.orig_values = copy.copy(self.values) + self.orig_values_copy = copy.copy(self.values_copy) self.orig_drawn_val = self.drawn_val blob = blob[off:] @@ -1142,19 +1142,19 @@ def do_absorb(self, blob, constraints, off=0, size=None): val, sz = self._read_value_from(blob, size) orig_val = self._unconvert_value(val) - if self.int_list is not None: + if self.values is not None: if constraints[AbsCsts.Contents]: - if orig_val not in self.int_list: + if orig_val not in self.values: raise ValueError('contents not valid!') - self.int_list.insert(0, orig_val) - self.int_list_copy = copy.copy(self.int_list) + self.values.insert(0, orig_val) + self.values_copy = copy.copy(self.values) else: if constraints[AbsCsts.Contents]: if self.maxi is not None and orig_val > self.maxi: raise ValueError('contents not valid! (max limit)') if self.mini is not None and orig_val < self.mini: raise ValueError('contents not valid! (min limit)') - # self.int_list = [orig_val] + # self.values = [orig_val] self.idx = orig_val - self.mini # self.reset_state() @@ -1169,14 +1169,14 @@ def do_revert_absorb(self): If needed should be called just after self.do_absorb(). ''' if hasattr(self, 'orig_drawn_val'): - self.int_list = self.orig_int_list - self.int_list_copy = self.orig_int_list_copy + self.values = self.orig_values + self.values_copy = self.orig_values_copy self.drawn_val = self.orig_drawn_val def do_cleanup_absorb(self): if hasattr(self, 'orig_drawn_val'): - del self.orig_int_list - del self.orig_int_list_copy + del self.orig_values + del self.orig_values_copy del self.orig_drawn_val def make_determinist(self): @@ -1186,7 +1186,7 @@ def make_random(self): self.determinist = False def get_value_list(self): - return self.int_list + return self.values def get_current_raw_val(self): if self.drawn_val is None: @@ -1201,69 +1201,69 @@ def is_compatible(self, integer): def set_value_list(self, new_list): ret = False - if self.int_list: + if self.values: l = list(filter(self.is_compatible, new_list)) if l: - self.int_list = l - self.int_list_copy = copy.copy(self.int_list) + self.values = l + self.values_copy = copy.copy(self.values) self.idx = 0 ret = True return ret def extend_value_list(self, new_list): - if self.int_list is not None: + if self.values is not None: l = list(filter(self.is_compatible, new_list)) if l: - int_list_enc = list(map(self._convert_value, self.int_list)) + values_enc = list(map(self._convert_value, self.values)) # We copy the list as it is a class attribute in # Fuzzy_* classes, and we don't want to change the classes # (as we modify the list contents and not the list itself) - self.int_list = list(self.int_list) + self.values = list(self.values) # we don't use a set to preserve the order for v in l: # we check the converted value to avoid duplicated # values (negative and positive value coded the # same) --> especially usefull for the Fuzzy_INT class - if self._convert_value(v) not in int_list_enc: - self.int_list.insert(0, v) + if self._convert_value(v) not in values_enc: + self.values.insert(0, v) self.idx = 0 - self.int_list_copy = copy.copy(self.int_list) + self.values_copy = copy.copy(self.values) def remove_value_list(self, value_list): - if self.int_list is not None: + if self.values is not None: l = list(filter(self.is_compatible, value_list)) if l: # We copy the list as it is a class attribute in # Fuzzy_* classes, and we don't want to change the classes # (as we modify the list contents and not the list itself) - self.int_list = list(self.int_list) + self.values = list(self.values) for v in l: try: - self.int_list.remove(v) + self.values.remove(v) except ValueError: pass self.idx = 0 - self.int_list_copy = copy.copy(self.int_list) + self.values_copy = copy.copy(self.values) def get_value(self): - if self.int_list is not None: - if not self.int_list_copy: - self.int_list_copy = copy.copy(self.int_list) + if self.values is not None: + if not self.values_copy: + self.values_copy = copy.copy(self.values) if self.determinist: - val = self.int_list_copy.pop(0) + val = self.values_copy.pop(0) else: - val = random.choice(self.int_list_copy) - self.int_list_copy.remove(val) - if not self.int_list_copy: - self.int_list_copy = copy.copy(self.int_list) + val = random.choice(self.values_copy) + self.values_copy.remove(val) + if not self.values_copy: + self.values_copy = copy.copy(self.values) self.exhausted = True else: self.exhausted = False @@ -1279,7 +1279,7 @@ def get_value(self): else: # Finite mode is implemented in this way when 'max - # min' is considered too big to be transformed as an - # 'int_list'. It avoids cunsuming too much memory and + # 'values'. It avoids cunsuming too much memory and # provide an end result that seems sufficient for such # situation val = random.randint(self.mini_gen, self.maxi_gen) @@ -1317,9 +1317,9 @@ def rewind(self): if self.exhausted: self.exhausted = False - if self.int_list is not None: - if self.int_list_copy is not None and self.drawn_val is not None: - self.int_list_copy.insert(0, self.drawn_val) + if self.values is not None: + if self.values_copy is not None and self.drawn_val is not None: + self.values_copy.insert(0, self.drawn_val) else: if self.idx > 0: self.idx -= 1 @@ -1349,16 +1349,16 @@ def reset_state(self): self.idx = self.default - self.mini_gen else: self.idx = 0 - if self.int_list is not None: - self.int_list_copy = copy.copy(self.int_list) + if self.values is not None: + self.values_copy = copy.copy(self.values) self.exhausted = False self.drawn_val = None def update_raw_value(self, val): if isinstance(val, int): - if self.int_list is not None: - self.int_list.append(val) - self.int_list_copy = copy.copy(self.int_list) + if self.values is not None: + self.values.append(val) + self.values_copy = copy.copy(self.values) else: self.idx = val - self.mini else: @@ -1410,7 +1410,7 @@ class Fuzzy_INT(INT): ''' Base class to be inherited and not used directly ''' - int_list = None + values = None short_cformat = None def __init__(self, endian=VT.BigEndian, supp_list=None): @@ -1418,11 +1418,11 @@ def __init__(self, endian=VT.BigEndian, supp_list=None): if supp_list: self.extend_value_list(supp_list) - assert(self.int_list is not None) - INT.__init__(self, values=self.int_list, determinist=True) + assert(self.values is not None) + INT.__init__(self, values=self.values, determinist=True) def make_private(self, forget_current_state): - self.int_list = copy.copy(self.int_list) + self.values = copy.copy(self.values) def is_compatible(self, integer): if self.mini <= integer <= self.maxi: @@ -1478,7 +1478,7 @@ def _str2bytes(self, val): #class Fuzzy_INT_str(Fuzzy_INT, metaclass=meta_int_str): class Fuzzy_INT_str(with_metaclass(meta_int_str, Fuzzy_INT)): - int_list = [0, 2**32-1, 2**32] + values = [0, 2 ** 32 - 1, 2 ** 32] def is_compatible(self, integer): return True @@ -2291,7 +2291,7 @@ class UINT8(INT8): class Fuzzy_INT8(with_metaclass(meta_8b, Fuzzy_INT)): mini = 0 maxi = 2**8-1 - int_list = [0xFF, 0, 0x01, 0x80, 0x7F] + values = [0xFF, 0, 0x01, 0x80, 0x7F] short_cformat = 'B' alt_short_cformat = 'b' @@ -2330,14 +2330,14 @@ class UINT16_le(INT16): class Fuzzy_INT16(with_metaclass(meta_16b, Fuzzy_INT)): mini = 0 maxi = 2**16-1 - int_list = [0xFFFF, 0, 0x8000, 0x7FFF] + values = [0xFFFF, 0, 0x8000, 0x7FFF] short_cformat = 'H' alt_short_cformat = 'h' # class Other_Fuzzy_INT16(Fuzzy_INT16): # mini = 0 # maxi = 2**16-1 -# int_list = [0xDEAD, 0xBEEF, 0xCAFE] +# values = [0xDEAD, 0xBEEF, 0xCAFE] # short_cformat = 'H' # alt_short_cformat = 'h' @@ -2375,14 +2375,14 @@ class UINT32_le(INT32): class Fuzzy_INT32(with_metaclass(meta_32b, Fuzzy_INT)): mini = 0 maxi = 2**32-1 - int_list = [0xFFFFFFFF, 0, 0x80000000, 0x7FFFFFFF] + values = [0xFFFFFFFF, 0, 0x80000000, 0x7FFFFFFF] short_cformat = 'L' alt_short_cformat = 'l' # class Other_Fuzzy_INT32(Fuzzy_INT32): # mini = 0 # maxi = 2**32-1 -# int_list = [0xDEADBEEF, 0xAAAAAAAA] +# values = [0xDEADBEEF, 0xAAAAAAAA] # short_cformat = 'L' # alt_short_cformat = 'l' @@ -2420,14 +2420,14 @@ class UINT64_le(INT64): class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): mini = 0 maxi = 2**64-1 - int_list = [0xFFFFFFFFFFFFFFFF, 0, 0x8000000000000000, 0x7FFFFFFFFFFFFFFF, 0x1111111111111111] + values = [0xFFFFFFFFFFFFFFFF, 0, 0x8000000000000000, 0x7FFFFFFFFFFFFFFF, 0x1111111111111111] short_cformat = 'Q' alt_short_cformat = 'q' # class Other_Fuzzy_INT64(Fuzzy_INT64): # mini = 0 # maxi = 2**64-1 -# int_list = [0xDEADBEEFDEADBEEF, 0xAAAAAAAAAAAAAAAA] +# values = [0xDEADBEEFDEADBEEF, 0xAAAAAAAAAAAAAAAA] # short_cformat = 'Q' # alt_short_cformat = 'q' @@ -2507,7 +2507,7 @@ class Fuzzy_INT64(with_metaclass(meta_64b, Fuzzy_INT)): print('\n***\n') - t = UINT16_le(int_list = range(100,400,4)) + t = UINT16_le(values=range(100,400,4)) print('size: ', t.size) print('class: ', t.__class__) print('compatible classes: ') diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 83ff395..d1a6f67 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -55,12 +55,12 @@ def tearDownModule(): class TEST_Fuzzy_INT16(Fuzzy_INT16): - int_list = ['TEST_OK', 'BLABLA', 'PLOP'] + values = ['TEST_OK', 'BLABLA', 'PLOP'] def __init__(self, endian=None, supp_list=None): self.endian = endian self.idx = 0 - INT.__init__(self, values=self.int_list, determinist=True) + INT.__init__(self, values=self.values, determinist=True) def is_compatible(self, integer): return False @@ -947,7 +947,7 @@ def test_TypedNode_1(self): print('\nTurn number when Node has changed: %r, number of test cases: %d' % (turn_nb_list, i)) good_list = [1, 13, 23, 33, 43, 52, 61, 71, 81, 91, 103, 113, 123, 133, 143, 152, 162, 172, 182, 191, 200, 206, 221] - msg = "If Fuzzy_.int_list have been modified in size, the good_list should be updated.\n" \ + msg = "If Fuzzy_.values have been modified in size, the good_list should be updated.\n" \ "If BitField are in random mode [currently put in determinist mode], the fuzzy_mode can produce more" \ " or less value depending on drawn value when .get_value() is called (if the drawn value is" \ " the max for instance, drawn_value+1 will not be produced)" From 0d35325f70791fb481d71cecae2425e58ce5db62 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 20:24:52 +0200 Subject: [PATCH 73/80] Rename a BitField constructor parameter + rename String and BitField internal attributes --- data_models/example.py | 10 +- data_models/file_formats/pdf.py | 8 +- data_models/file_formats/zip.py | 2 +- data_models/protocols/pppoe.py | 4 +- data_models/protocols/sms.py | 32 +-- data_models/protocols/usb.py | 16 +- data_models/tuto.py | 4 +- docs/source/data_model.rst | 12 +- docs/source/tutorial.rst | 14 +- framework/fuzzing_primitives.py | 12 +- framework/value_types.py | 300 +++++++++++++-------------- test/integration/test_integration.py | 48 ++--- 12 files changed, 231 insertions(+), 231 deletions(-) diff --git a/data_models/example.py b/data_models/example.py index f6b7de0..d7b9ac1 100644 --- a/data_models/example.py +++ b/data_models/example.py @@ -177,7 +177,7 @@ def build_data_model(self): prefix.make_determinist() te3 = Node('EVT3') - te3.set_values(value_type=BitField(subfield_sizes=[4,4], subfield_val_lists=[[0x5, 0x6], [0xF, 0xC]])) + te3.set_values(value_type=BitField(subfield_sizes=[4,4], subfield_values=[[0x5, 0x6], [0xF, 0xC]])) te3.set_fuzz_weight(8) # te3.make_determinist() @@ -192,7 +192,7 @@ def build_data_model(self): te5.set_fuzz_weight(6) te6 = Node('EVT6') - vt = BitField(subfield_limits=[2,6,8,10], subfield_val_lists=[[4,2,1],[2,15,16,3],[2,3,0],[1]], + vt = BitField(subfield_limits=[2,6,8,10], subfield_values=[[4,2,1],[2,15,16,3],[2,3,0],[1]], padding=0, lsb_padding=True, endian=VT.LittleEndian) te6.set_values(value_type=vt) te6.set_fuzz_weight(5) @@ -201,7 +201,7 @@ def build_data_model(self): te7 = Node('EVT7') vt = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[4,2,1], None, [2,3,0]], + subfield_values=[[4,2,1], None, [2,3,0]], subfield_val_extremums=[None, [3, 15], None], padding=0, lsb_padding=False, endian=VT.BigEndian) te7.set_values(value_type=vt) @@ -230,7 +230,7 @@ def build_data_model(self): vt = UINT16_be(values=[1,2,3,4,5,6]) # vt = BitField(subfield_sizes=[4,4,4], - # subfield_val_lists=[[4,2,1], None, [10,12,13]], + # subfield_values=[[4,2,1], None, [10,12,13]], # subfield_val_extremums=[None, [14, 15], None], # padding=0, lsb_padding=False, endian=VT.BigEndian) @@ -346,7 +346,7 @@ def build_data_model(self): 'contents': [ {'contents': BitField(subfield_sizes=[21,2,1], endian=VT.BigEndian, - subfield_val_lists=[None, [0b10], [0,1]], + subfield_values=[None, [0b10], [0,1]], subfield_val_extremums=[[500, 600], None, None]), 'name': 'val1', 'qty': (1, 5)}, diff --git a/data_models/file_formats/pdf.py b/data_models/file_formats/pdf.py index 002413a..0367d0c 100644 --- a/data_models/file_formats/pdf.py +++ b/data_models/file_formats/pdf.py @@ -794,7 +794,7 @@ def _generate_xref(objs): # node_list last Node is the catalog catalog_id = catalog.get_private() - val_list = list(map(lambda x: x.to_bytes(), node_list)) + values = list(map(lambda x: x.to_bytes(), node_list)) sorted_node_list = sorted(node_list, key=lambda x: x.get_private()) nb_objs = len(node_list) + 1 # we have to count the object 0 @@ -802,7 +802,7 @@ def _generate_xref(objs): off = header_len objs_offset = {} - for v, e in zip(val_list, node_list): + for v, e in zip(values, node_list): obj_len = len(v) objs_offset[e] = off off += obj_len @@ -845,7 +845,7 @@ def _generate_xref_loop(objs): # node_list last Node is the catalog catalog_id = catalog.get_private() - val_list = list(map(lambda x: x.to_bytes(), node_list)) + values = list(map(lambda x: x.to_bytes(), node_list)) sorted_node_list = sorted(node_list, key=lambda x: x.get_private()) nb_objs = len(node_list) + 1 # we have to count the object 0 @@ -853,7 +853,7 @@ def _generate_xref_loop(objs): off = header_len objs_offset = {} - for v, e in zip(val_list, node_list): + for v, e in zip(values, node_list): obj_len = len(v) objs_offset[e] = off off += obj_len diff --git a/data_models/file_formats/zip.py b/data_models/file_formats/zip.py index d58615a..917f93a 100644 --- a/data_models/file_formats/zip.py +++ b/data_models/file_formats/zip.py @@ -92,7 +92,7 @@ def build_data_model(self): 'contents': UINT16_le()}, {'name': 'gp_bit_flag', 'contents': BitField(subfield_sizes=[2,1,13], endian=VT.LittleEndian, - subfield_val_lists=[None, [0,1], None], + subfield_values=[None, [0,1], None], subfield_val_extremums=[[0,3], None, [0, 8191]])}, {'name': 'compression_method', 'contents': UINT16_le()}, diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index 1bdd1de..11a7ddb 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -78,7 +78,7 @@ def build_data_model(self): 'contents': [ {'name': 'vendorID', 'contents': BitField(subfield_sizes=[24,8], endian=VT.BigEndian, - subfield_val_lists=[None,[0]], + subfield_values=[None,[0]], subfield_descs=['type','version']) }, {'name': 'remainder', 'sync_enc_size_with': ('len', 4), @@ -145,7 +145,7 @@ def build_data_model(self): 'contents': UINT16_be(values=[0x8863])}, {'name': 'version-type', 'contents': BitField(subfield_sizes=[4,4], endian=VT.BigEndian, - subfield_val_lists=[[1],[1]], + subfield_values=[[1],[1]], subfield_descs=['type','version'])}, {'name': 'code', 'mutable': False, diff --git a/data_models/protocols/sms.py b/data_models/protocols/sms.py index 75eba87..8d2a251 100644 --- a/data_models/protocols/sms.py +++ b/data_models/protocols/sms.py @@ -42,7 +42,7 @@ def build_data_model(self): {'name': 'SMS-SUBMIT', # refer to TS 100 901 (chapter 9.2.3) 'mutable': False, 'contents': BitField(subfield_sizes=[2,1,2,1,1,1], endian=VT.BigEndian, - subfield_val_lists=[ + subfield_values=[ [0b01], # message type indicator, [0,1], # reject duplicates [0b00,0b10,0b01,0b11], # validity period format @@ -63,7 +63,7 @@ def build_data_model(self): 'node_args': 'tel_num'}, {'name': 'addr_type', 'contents': BitField(subfield_sizes=[4,3,1], endian=VT.BigEndian, - subfield_val_lists=[[0b0001], # numbering-plan-identification + subfield_values=[[0b0001], # numbering-plan-identification [0b001], # type of number [1]], # always set to 1 subfield_val_extremums=[None, @@ -78,14 +78,14 @@ def build_data_model(self): {'name': 'TP-PID', # Protocol Identifier (refer to TS 100 901) 'determinist': True, 'contents': BitField(subfield_sizes=[5,1,2], endian=VT.BigEndian, - subfield_val_lists=[[0b00000], # implicit + subfield_values=[[0b00000], # implicit [0, 1], # no interworking (default) [0b00]] # kind of opcode ) }, {'name': 'TP-DCS', # Data Coding Scheme (refer to GSM 03.38) 'determinist': True, 'contents': BitField(subfield_sizes=[4,4], endian=VT.BigEndian, - subfield_val_lists=[[0b0000], # default alphabet + subfield_values=[[0b0000], # default alphabet [0b0000]] # first coding group ) }, {'name': 'UDL', @@ -104,7 +104,7 @@ def build_data_model(self): {'name': 'SMS-SUBMIT', # refer to TS 100 901 (chapter 9.2.3) 'mutable': False, 'contents': BitField(subfield_sizes=[2,1,2,1,1,1], endian=VT.BigEndian, - subfield_val_lists=[ + subfield_values=[ [0b01], # message type indicator, [0,1], # reject duplicates [0b00,0b10,0b01,0b11], # validity period format @@ -125,7 +125,7 @@ def build_data_model(self): 'node_args': 'tel_num'}, {'name': 'addr_type', 'contents': BitField(subfield_sizes=[4,3,1], endian=VT.BigEndian, - subfield_val_lists=[[0b0001], # numbering-plan-identification + subfield_values=[[0b0001], # numbering-plan-identification [0b001], # type of number [1]], # always set to 1 subfield_val_extremums=[None, @@ -140,7 +140,7 @@ def build_data_model(self): {'name': 'TP-PID', # Protocol Identifier (refer to TS 100 901) 'determinist': True, 'contents': BitField(subfield_sizes=[6,2], endian=VT.BigEndian, - subfield_val_lists=[[0b111111], # SIM Data Download + subfield_values=[[0b111111], # SIM Data Download [0b01]], # kind of opcode ) }, {'name': 'TP-DCS', # Data Coding Scheme (refer to GSM 03.38) @@ -150,7 +150,7 @@ def build_data_model(self): 'determinist': True, 'exists_if': (BitFieldCondition(sf=0, val=[0b1111]), 'msb'), 'contents': BitField(subfield_sizes=[2,1,1], endian=VT.BigEndian, - subfield_val_lists=[[0b10,0b11,0b00,0b01], # class 2 (default) + subfield_values=[[0b10,0b11,0b00,0b01], # class 2 (default) [1,0], # 8-bit data (default) [0]] # reserved ) }, @@ -158,7 +158,7 @@ def build_data_model(self): 'determinist': True, 'exists_if': (BitFieldCondition(sf=0, val=[0b1101,0b1100]), 'msb'), 'contents': BitField(subfield_sizes=[2,1,1], endian=VT.BigEndian, - subfield_val_lists=[[0b10,0b11,0b00,0b01], # indication type + subfield_values=[[0b10,0b11,0b00,0b01], # indication type [0], # reserved [0,1]] # set indication Active/Inactive ) }, @@ -166,14 +166,14 @@ def build_data_model(self): 'determinist': True, 'exists_if': (BitFieldCondition(sf=0, val=[0]), 'msb'), 'contents': BitField(subfield_sizes=[4], endian=VT.BigEndian, - subfield_val_lists=[ + subfield_values=[ [0b0000] # Default alphabet ] ) }, {'name': 'msb', 'determinist': True, 'contents': BitField(subfield_sizes=[4], endian=VT.BigEndian, - subfield_val_lists=[ + subfield_values=[ [0b1111,0b1101,0b1100,0b0000]], # last coding group ) }, ]}, @@ -200,7 +200,7 @@ def build_data_model(self): 'contents': [ {'name': 'SPI_p1', # Security Parameter Indicator (part 1) 'contents': BitField(subfield_sizes=[2,1,2,3], endian=VT.BigEndian, - subfield_val_lists=[None,None,None,[0b000]], + subfield_values=[None,None,None,[0b000]], subfield_val_extremums=[[0,3],[0,1],[0,3],None], defaults = [1, # redundancy check 0, # no ciphering @@ -211,7 +211,7 @@ def build_data_model(self): {'name': 'SPI_p2', # Security Parameter Indicator (part 1) 'contents': BitField(subfield_sizes=[2,2,1,1,2], endian=VT.BigEndian, - subfield_val_lists=[None,None,None,None,[0b00]], + subfield_values=[None,None,None,None,[0b00]], defaults = [1, # PoR required 3, # PoR Digital Signature required 0, # PoR not ciphered @@ -224,7 +224,7 @@ def build_data_model(self): {'name': 'KIc', # Key and algo ID for ciphering 'contents': BitField(subfield_sizes=[2,2,4], endian=VT.BigEndian, - subfield_val_lists=[[1,0,3], # 1 = DES (default) + subfield_values=[[1,0,3], # 1 = DES (default) [3], # ECB mode [0b1010]], subfield_val_extremums=[None,[0,3],None], @@ -233,7 +233,7 @@ def build_data_model(self): {'name': 'KID_RC', # Key and algo ID for CRC # TS 102 225 (5.1.3.2) 'contents': BitField(subfield_sizes=[2,2,4], endian=VT.BigEndian, - subfield_val_lists=[[1,0,3], # 1 = CRC (default) + subfield_values=[[1,0,3], # 1 = CRC (default) [0b01,0b00], # 0b01 = CRC 32 [0b1010]], subfield_val_extremums=[None,None, @@ -243,7 +243,7 @@ def build_data_model(self): {'name': 'TAR', # Toolkit Application Reference 'contents': BitField(subfield_sizes=[24], - subfield_val_lists=[[0]], # Card Manager + subfield_values=[[0]], # Card Manager subfield_val_extremums=[[0,2**24-1]])}, {'name': 'CNTR', # Counter (replay detection and sequence integrity counter) diff --git a/data_models/protocols/usb.py b/data_models/protocols/usb.py index f938917..efb1540 100644 --- a/data_models/protocols/usb.py +++ b/data_models/protocols/usb.py @@ -81,39 +81,39 @@ def build_data_model(self): {'name': 'bEndpointAddr', 'contents': BitField(subfield_limits=[4,7,8], subfield_val_extremums=[[0,0b1111],None,[0,1]], - subfield_val_lists=[None,[0],None], + subfield_values=[None,[0],None], endian=VT.LittleEndian), 'alt': [ {'conf': 'BULK-IN', 'contents': BitField(subfield_limits=[4,7,8], - subfield_val_lists=[[1],[0],[1]], + subfield_values=[[1],[0],[1]], endian=VT.LittleEndian)}, {'conf': 'BULK-OUT', 'contents': BitField(subfield_limits=[4,7,8], - subfield_val_lists=[[2],[0],[0]], + subfield_values=[[2],[0],[0]], endian=VT.LittleEndian)}]}, {'name': 'bmAttributes', - 'contents': BitField(subfield_limits=[2,6,8], subfield_val_lists=[[0,2,3],[0],[0]], + 'contents': BitField(subfield_limits=[2,6,8], subfield_values=[[0,2,3],[0],[0]], endian=VT.LittleEndian), 'fuzz_weight': 5, 'alt': [ {'conf': 'ISO', 'contents': BitField(subfield_limits=[2,4,6,8], subfield_val_extremums=[None,[0,3],[0,2],None], - subfield_val_lists=[[1],None,None,[0]], + subfield_values=[[1],None,None,[0]], endian=VT.LittleEndian)} ]}, {'name': 'wMaxPacketSize', 'contents': BitField(subfield_limits=[11,13,16], subfield_val_extremums=[None,[0,2],[0,0]], - subfield_val_lists=[[2**x for x in range(1,12)],None,[0]], + subfield_values=[[2**x for x in range(1,12)],None,[0]], endian=VT.LittleEndian), 'random': True, 'alt': [ {'conf': 'MSD', 'contents': BitField(subfield_limits=[11,13,16], subfield_val_extremums=[None,[0,2],[0,0]], - subfield_val_lists=[[0x8, 0x10, 0x20, 0x40],[0],[0]], + subfield_values=[[0x8, 0x10, 0x20, 0x40],[0],[0]], endian=VT.LittleEndian)}]}, {'name': 'bInterval', 'contents': UINT8(values=[4]), @@ -226,7 +226,7 @@ def build_data_model(self): 'contents': UINT8(values=[USB_DEFS.STRINGID_CONFIG])}, {'name': 'bmAttributes', 'contents': BitField(subfield_limits=[5,6,7,8], - subfield_val_lists=[[0],[1],[1],[1]], + subfield_values=[[0],[1],[1],[1]], endian=VT.LittleEndian)}, {'name': 'bMaxPower', 'contents': UINT8(values=[50])}, diff --git a/data_models/tuto.py b/data_models/tuto.py index 0be6265..7a3aeac 100644 --- a/data_models/tuto.py +++ b/data_models/tuto.py @@ -27,7 +27,7 @@ def build_data_model(self): 'contents': [ {'contents': BitField(subfield_sizes=[21,2,1], endian=VT.BigEndian, - subfield_val_lists=[None, [0b10], [0,1]], + subfield_values=[None, [0b10], [0,1]], subfield_val_extremums=[[500, 600], None, None]), 'name': 'val1', 'qty': (1, 5)}, @@ -210,7 +210,7 @@ def keycode_helper(blob, constraints, node_internals): 'contents': [ {'name': 'A3_subopcode', 'contents': BitField(subfield_sizes=[15,2,4], endian=VT.BigEndian, - subfield_val_lists=[None, [1,2], [5,6,12]], + subfield_values=[None, [1,2], [5,6,12]], subfield_val_extremums=[[500, 600], None, None], determinist=False)}, diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 833e7b6..86de3fd 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -195,7 +195,7 @@ parameters: ``subfield_limits``), beginning from the least significant sub-field to the more significant sub-field. -``subfield_val_lists`` [optional, default value: **None**] +``subfield_values`` [optional, default value: **None**] List of valid values for each sub-fields. Look at the following examples for usage. For each sub-field value list, the first value is the default. @@ -241,7 +241,7 @@ parameters: ``defaults`` [optional, default value: **None**] List of default value for each sub-field. Used only when the related sub-field is - not described through ``subfield_val_lists``. If ``subfield_val_lists`` describes the related + not described through ``subfield_values``. If ``subfield_values`` describes the related sub-field, then a ``None`` item should be inserted at the corresponding position in the list. ``subfield_descs`` [optional, default value: **None**] @@ -264,7 +264,7 @@ going through the definition of a data model (for this topic refer to :emphasize-lines: 8-10 t = BitField(subfield_limits=[2,6,10,12], - subfield_val_lists=[[4,2,1], [2,15,16,3], None, [1]], + subfield_values=[[4,2,1], [2,15,16,3], None, [1]], subfield_val_extremums=[None, None, [3,11], None], padding=0, lsb_padding=True, endian=VT.LittleEndian) @@ -292,7 +292,7 @@ the first example. We additionally specify the parameter :emphasize-lines: 9-11 t = BitField(subfield_sizes=[4,4,4], - subfield_val_lists=[[4,2,1], None, [10,13]], + subfield_values=[[4,2,1], None, [10,13]], subfield_val_extremums=[None, [14, 15], None], padding=0, lsb_padding=False, endian=VT.BigEndian, subfield_descs=['first', None, 'last']) @@ -898,7 +898,7 @@ exists_if/and, exists_if/or 'contents': String(values=['A3', 'A2'])}, {'name': 'subopcode', 'contents': BitField(subfield_sizes=[15,2,4], - subfield_val_lists=[[500], [1,2], [5,6,12]])}, + subfield_values=[[500], [1,2], [5,6,12]])}, {'name': 'and_condition', 'exists_if/and': [(RawCondition('A2'), 'opcode'), (BitFieldCondition(sf=2, val=[5]), 'subopcode')], @@ -1145,7 +1145,7 @@ that purpose the keyword ``exists_if`` with some subclasses of 'contents': [ {'name': 'A3_subopcode', 'contents': BitField(subfield_sizes=[15,2,4], endian=VT.BigEndian, - subfield_val_lists=[None, [1,2], [5,6,12]], + subfield_values=[None, [1,2], [5,6,12]], subfield_val_extremums=[[500, 600], None, None], determinist=False)}, diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index bbf58e9..e1b7e3b 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -1316,7 +1316,7 @@ various constructions, and value types. 'contents': [ {'contents': BitField(subfield_sizes=[21,2,1], endian=VT.BigEndian, - subfield_val_lists=[None, [0b10], [0,1]], + subfield_values=[None, [0b10], [0,1]], subfield_val_extremums=[[500, 600], None, None]), 'name': 'val1', 'qty': (1, 5)}, @@ -1910,18 +1910,18 @@ another inappropriate separator. dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable, dm.NodeInternals.Separator], node_kinds=[dm.NodeInternals_Term]) - self.val_list = [b''] + self.values = [b''] if separators is not None: - self.val_list += list(separators) + self.values += list(separators) def consume_node(self, node): orig_val = node.to_bytes() - new_val_list = copy.copy(self.val_list) + new_values = copy.copy(self.values) - if orig_val in new_val_list: - new_val_list.remove(orig_val) + if orig_val in new_values: + new_values.remove(orig_val) - node.import_value_type(value_type=vtype.String(values=new_val_list)) + node.import_value_type(value_type=vtype.String(values=new_values)) node.make_finite() node.make_determinist() diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 319f7db..626f944 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -843,20 +843,20 @@ def init_specific(self, separators=None): dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable, dm.NodeInternals.Separator], node_kinds=[dm.NodeInternals_Term]) - self.val_list = [b''] + self.values = [b''] if separators is not None: - self.val_list += list(separators) + self.values += list(separators) # self.need_reset_when_structure_change = True def consume_node(self, node): orig_val = node.to_bytes() - new_val_list = copy.copy(self.val_list) + new_values = copy.copy(self.values) - if orig_val in new_val_list: - new_val_list.remove(orig_val) + if orig_val in new_values: + new_values.remove(orig_val) - node.cc.import_value_type(value_type=vtype.String(values=new_val_list)) + node.cc.import_value_type(value_type=vtype.String(values=new_values)) # Note, that node attributes are not altered by this # operation, especially usefull in our case, because we have # to preserve dm.NodeInternals.Separator diff --git a/framework/value_types.py b/framework/value_types.py index f9368ae..3bc5111 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -322,7 +322,7 @@ def encoding_test_cases(self, current_val, max_sz, min_sz, min_encoded__sz, max_ def __repr__(self): if DEBUG: - return VT_Alt.__repr__(self)[:-1] + ' contents:' + str(self.val_list) + '>' + return VT_Alt.__repr__(self)[:-1] + ' contents:' + str(self.values) + '>' else: return VT_Alt.__repr__(self) @@ -374,11 +374,11 @@ def init_specific(self, values=None, size=None, min_sz=None, size: Valid character string size for the node backed by this *String object*. min_sz: Minimum valid size for the character strings for the node backed by this *String object*. If not set, this parameter will be - automatically inferred by looking at the parameter ``val_list`` + automatically inferred by looking at the parameter ``values`` whether this latter is provided. max_sz: Maximum valid size for the character strings for the node backed by this *String object*. If not set, this parameter will be - automatically inferred by looking at the parameter ``val_list`` + automatically inferred by looking at the parameter ``values`` whether this latter is provided. determinist: If set to ``True`` generated values will be in a deterministic order, otherwise in a random order. @@ -388,9 +388,9 @@ def init_specific(self, values=None, size=None, min_sz=None, the generic disruptor tTYPE. absorb_regexp (str): You can specify a regular expression in this parameter as a supplementary constraint for data absorption operation. - alphabet: The alphabet to use for generating data, in case no `val_list` is + alphabet: The alphabet to use for generating data, in case no ``values`` is provided. Also use during absorption to validate the contents. It is - checked if there is no `val_list`. + checked if there is no ``values``. min_encoded_sz: Only relevant for subclasses that leverage the encoding infrastructure. Enable to provide the minimum legitimate size for an encoded string. max_encoded_sz: Only relevant for subclasses that leverage the encoding infrastructure. @@ -403,12 +403,12 @@ def init_specific(self, values=None, size=None, min_sz=None, self.drawn_val = None - self.val_list = None - self.val_list_copy = None - self.val_list_fuzzy = None - self.val_list_save = None + self.values = None + self.values_copy = None + self.values_fuzzy = None + self.values_save = None - self.is_val_list_provided = None + self.is_values_provided = None self.min_sz = None self.max_sz = None @@ -427,16 +427,16 @@ def init_specific(self, values=None, size=None, min_sz=None, def make_private(self, forget_current_state): if forget_current_state: - if self.is_val_list_provided: - self.val_list = copy.copy(self.val_list) + if self.is_values_provided: + self.values = copy.copy(self.values) else: - self._populate_val_list(force_max_enc_sz=self.max_enc_sz_provided, - force_min_enc_sz=self.min_enc_sz_provided) + self._populate_values(force_max_enc_sz=self.max_enc_sz_provided, + force_min_enc_sz=self.min_enc_sz_provided) self._ensure_enc_sizes_consistency() self.reset_state() else: - self.val_list = copy.copy(self.val_list) - self.val_list_copy = copy.copy(self.val_list_copy) + self.values = copy.copy(self.values) + self.values_copy = copy.copy(self.values_copy) if self.encoded_string: self.encoding_arg = copy.copy(self.encoding_arg) @@ -451,19 +451,19 @@ def absorb_auto_helper(self, blob, constraints): off = 0 size = self.max_encoded_sz # If 'Contents' constraint is set, we seek for string within - # val_list or conforming to the alphabet. + # values or conforming to the alphabet. # If 'Regexp' constraint is set, we seek for string matching # the regexp. # If no such constraints are provided, we assume off==0 # and let do_absorb() decide if it's OK (via size constraints # for instance). blob_dec = self.decode(blob) - if constraints[AbsCsts.Contents] and self.is_val_list_provided and self.alphabet is None: - for v in self.val_list: + if constraints[AbsCsts.Contents] and self.is_values_provided and self.alphabet is None: + for v in self.values: if blob_dec.startswith(v): break else: - for v in self.val_list: + for v in self.values: if self.encoded_string: v = self.encode(v) off = blob.find(v) @@ -521,8 +521,8 @@ def do_absorb(self, blob, constraints, off=0, size=None): self.orig_min_encoded_sz = self.min_encoded_sz self.orig_max_encoded_sz = self.max_encoded_sz self.orig_min_sz = self.min_sz - self.orig_val_list = copy.copy(self.val_list) - self.orig_val_list_copy = copy.copy(self.val_list_copy) + self.orig_values = copy.copy(self.values) + self.orig_values_copy = copy.copy(self.values_copy) self.orig_drawn_val = self.drawn_val if constraints[AbsCsts.Size]: @@ -541,8 +541,8 @@ def do_absorb(self, blob, constraints, off=0, size=None): val = self._read_value_from(blob, constraints) val_sz = len(val) - if constraints[AbsCsts.Contents] and self.is_val_list_provided: - for v in self.val_list: + if constraints[AbsCsts.Contents] and self.is_values_provided: + for v in self.values: if val.startswith(v): val = v val_sz = len(val) @@ -572,10 +572,10 @@ def do_absorb(self, blob, constraints, off=0, size=None): elif val_enc_sz < self.min_encoded_sz: self.min_encoded_sz = val_enc_sz - if self.val_list is None: - self.val_list = [] + if self.values is None: + self.values = [] - self.val_list.insert(0, val) + self.values.insert(0, val) self.reset_state() @@ -619,8 +619,8 @@ def do_revert_absorb(self): (safe to recall it more than once) ''' if hasattr(self, 'orig_drawn_val'): - self.val_list = self.orig_val_list - self.val_list_copy = self.orig_val_list_copy + self.values = self.orig_values + self.values_copy = self.orig_values_copy self.min_sz = self.orig_min_sz self.max_sz = self.orig_max_sz self.min_encoded_sz = self.orig_min_encoded_sz @@ -632,8 +632,8 @@ def do_cleanup_absorb(self): To be called after self.do_absorb() or self.do_revert_absorb() ''' if hasattr(self, 'orig_drawn_val'): - del self.orig_val_list - del self.orig_val_list_copy + del self.orig_values + del self.orig_values_copy del self.orig_min_sz del self.orig_max_sz del self.orig_max_encoded_sz @@ -652,19 +652,19 @@ def _read_value_from(self, blob, constraints): return blob def reset_state(self): - self.val_list_copy = copy.copy(self.val_list) + self.values_copy = copy.copy(self.values) self.drawn_val = None if self.encoded_string: self.encoding_arg = copy.copy(self.encoding_arg) self.init_encoding_scheme(self.encoding_arg) def rewind(self): - sz_vlist_copy = len(self.val_list_copy) - sz_vlist = len(self.val_list) - if self.val_list_copy is not None and \ + sz_vlist_copy = len(self.values_copy) + sz_vlist = len(self.values) + if self.values_copy is not None and \ sz_vlist_copy < sz_vlist: - val = self.val_list[sz_vlist - sz_vlist_copy - 1] - self.val_list_copy.insert(0, val) + val = self.values[sz_vlist - sz_vlist_copy - 1] + self.values_copy.insert(0, val) self.drawn_val = None @@ -714,8 +714,8 @@ def set_description(self, values=None, size=None, min_sz=None, if values is not None: assert isinstance(values, list) - self.val_list = self._str2bytes(values) - for val in self.val_list: + self.values = self._str2bytes(values) + for val in self.values: if not self._check_compliance(val, force_max_enc_sz=self.max_enc_sz_provided, force_min_enc_sz=self.min_enc_sz_provided, update_list=False): @@ -726,13 +726,13 @@ def set_description(self, values=None, size=None, min_sz=None, if l not in self.alphabet: raise ValueError("The value '%s' does not conform to the alphabet!" % val) - self.val_list_copy = copy.copy(self.val_list) - self.is_val_list_provided = True # distinguish cases where - # val_list is provided or + self.values_copy = copy.copy(self.values) + self.is_values_provided = True # distinguish cases where + # values is provided or # created based on size - self.user_provided_list = copy.copy(self.val_list) + self.user_provided_list = copy.copy(self.values) else: - self.is_val_list_provided = False + self.is_values_provided = False self.user_provided_list = None if size is not None: @@ -800,14 +800,14 @@ def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_li if self.min_encoded_sz is None or val_sz < self.min_encoded_sz: self.min_encoded_sz = val_sz if update_list: - self.val_list.append(value) + self.values.append(value) return True elif force_max_enc_sz and not force_min_enc_sz: if val_sz <= self.max_encoded_sz: if self.min_encoded_sz is None or val_sz < self.min_encoded_sz: self.min_encoded_sz = val_sz if update_list: - self.val_list.append(value) + self.values.append(value) return True else: return False @@ -816,14 +816,14 @@ def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_li if self.max_encoded_sz is None or val_sz > self.max_encoded_sz: self.max_encoded_sz = val_sz if update_list: - self.val_list.append(value) + self.values.append(value) return True else: return False else: if val_sz <= self.max_encoded_sz and val_sz >= self.min_encoded_sz: if update_list: - self.val_list.append(value) + self.values.append(value) return True else: return False @@ -834,11 +834,11 @@ def _check_compliance(self, value, force_max_enc_sz, force_min_enc_sz, update_li if self.min_encoded_sz is None or val_sz < self.min_encoded_sz: self.min_encoded_sz = val_sz if update_list: - self.val_list.append(value) + self.values.append(value) return True - def _populate_val_list(self, force_max_enc_sz=False, force_min_enc_sz=False): - self.val_list = [] + def _populate_values(self, force_max_enc_sz=False, force_min_enc_sz=False): + self.values = [] alpbt = string.printable if self.alphabet is None else self._bytes2str(self.alphabet) if self.min_sz < self.max_sz: self._check_compliance(self._str2bytes(bp.rand_string(size=self.max_sz, str_set=alpbt)), @@ -861,7 +861,7 @@ def _populate_val_list(self, force_max_enc_sz=False, force_min_enc_sz=False): else: retry_cpt += 1 - if len(self.val_list) == 0: + if len(self.values) == 0: raise DataModelDefinitionError def get_current_raw_val(self, str_form=False): @@ -871,67 +871,67 @@ def get_current_raw_val(self, str_form=False): return val def enable_normal_mode(self): - self.val_list = self.val_list_save - self.val_list_copy = copy.copy(self.val_list) - self.val_list_fuzzy = None + self.values = self.values_save + self.values_copy = copy.copy(self.values) + self.values_fuzzy = None self.drawn_val = None def enable_fuzz_mode(self): - self.val_list_fuzzy = [] + self.values_fuzzy = [] if self.drawn_val is not None: orig_val = self.drawn_val else: if self.determinist: - orig_val = self.val_list_copy[0] + orig_val = self.values_copy[0] else: - orig_val = random.choice(self.val_list_copy) + orig_val = random.choice(self.values_copy) sz = len(orig_val) sz_delta_with_max = self.max_sz - sz try: val = bp.corrupt_bits(orig_val, n=1) - self.val_list_fuzzy.append(val) + self.values_fuzzy.append(val) except: print("\n*** Value is empty! --> skipping bitflip test case ***") val = orig_val + b"A"*(sz_delta_with_max + 1) - self.val_list_fuzzy.append(val) + self.values_fuzzy.append(val) - self.val_list_fuzzy.append(b'') + self.values_fuzzy.append(b'') if sz > 0: sz_delta_with_min = sz - self.min_sz val = orig_val[:-sz_delta_with_min-1] if val != b'': - self.val_list_fuzzy.append(val) + self.values_fuzzy.append(val) val = orig_val + b"X"*(self.max_sz*100) - self.val_list_fuzzy.append(val) + self.values_fuzzy.append(val) - self.val_list_fuzzy.append(b'\x00'*sz if sz>0 else b'\x00') + self.values_fuzzy.append(b'\x00' * sz if sz > 0 else b'\x00') if sz > 1: is_even = sz % 2 == 0 cpt = sz // 2 if is_even: - self.val_list_fuzzy.append(b'%n' * cpt) - self.val_list_fuzzy.append(b'%s' * cpt) + self.values_fuzzy.append(b'%n' * cpt) + self.values_fuzzy.append(b'%s' * cpt) else: - self.val_list_fuzzy.append(orig_val[:1] + b'%n' * cpt) - self.val_list_fuzzy.append(orig_val[:1] + b'%s' * cpt) + self.values_fuzzy.append(orig_val[:1] + b'%n' * cpt) + self.values_fuzzy.append(orig_val[:1] + b'%s' * cpt) - self.val_list_fuzzy.append(orig_val + b'%n'*400) - self.val_list_fuzzy.append(orig_val + b'%s'*400) - self.val_list_fuzzy.append(orig_val + b'\"%n\"'*400) - self.val_list_fuzzy.append(orig_val + b'\"%s\"'*400) - self.val_list_fuzzy.append(orig_val + b'\r\n'*100) + self.values_fuzzy.append(orig_val + b'%n' * 400) + self.values_fuzzy.append(orig_val + b'%s' * 400) + self.values_fuzzy.append(orig_val + b'\"%n\"' * 400) + self.values_fuzzy.append(orig_val + b'\"%s\"' * 400) + self.values_fuzzy.append(orig_val + b'\r\n' * 100) if self.extra_fuzzy_list: for v in self.extra_fuzzy_list: - if v not in self.val_list_fuzzy: - self.val_list_fuzzy.append(v) + if v not in self.values_fuzzy: + self.values_fuzzy.append(v) if self.codec == self.ASCII: val = bytearray(orig_val) @@ -940,40 +940,40 @@ def enable_fuzz_mode(self): val = bytes(val) else: val = b'\xe9' - if val not in self.val_list_fuzzy: - self.val_list_fuzzy.append(val) + if val not in self.values_fuzzy: + self.values_fuzzy.append(val) elif self.codec == self.UTF16BE or self.codec == self.UTF16LE: if self.max_sz > 0: if self.max_encoded_sz % 2 == 1: nb = self.max_sz // 2 # euro character at the end that 'fully' use the 2 bytes of utf-16 val = ('A' * nb).encode(self.codec) + b'\xac\x20' - if val not in self.val_list_fuzzy: - self.val_list_fuzzy.append(val) + if val not in self.values_fuzzy: + self.values_fuzzy.append(val) enc_cases = self.encoding_test_cases(orig_val, self.max_sz, self.min_sz, self.min_encoded_sz, self.max_encoded_sz) if enc_cases: - self.val_list_fuzzy += enc_cases + self.values_fuzzy += enc_cases - self.val_list_save = self.val_list - self.val_list = self.val_list_fuzzy - self.val_list_copy = copy.copy(self.val_list) + self.values_save = self.values + self.values = self.values_fuzzy + self.values_copy = copy.copy(self.values) self.drawn_val = None def get_value(self): - if not self.val_list: - self._populate_val_list(force_max_enc_sz=self.max_enc_sz_provided, - force_min_enc_sz=self.min_enc_sz_provided) + if not self.values: + self._populate_values(force_max_enc_sz=self.max_enc_sz_provided, + force_min_enc_sz=self.min_enc_sz_provided) self._ensure_enc_sizes_consistency() - if not self.val_list_copy: - self.val_list_copy = copy.copy(self.val_list) + if not self.values_copy: + self.values_copy = copy.copy(self.values) if self.determinist: - ret = self.val_list_copy.pop(0) + ret = self.values_copy.pop(0) else: - ret = random.choice(self.val_list_copy) - self.val_list_copy.remove(ret) + ret = random.choice(self.values_copy) + self.values_copy.remove(ret) self.drawn_val = ret if self.encoded_string: @@ -981,14 +981,14 @@ def get_value(self): return ret def is_exhausted(self): - if self.val_list_copy: + if self.values_copy: return False else: return True def set_size_from_constraints(self, size=None, encoded_size=None): # This method is used only for absorption purpose, thus no modification - # is performed on self.val_list. To be reconsidered in the case the method + # is performed on self.values. To be reconsidered in the case the method # has to be used for an another purpose. assert size is not None or encoded_size is not None @@ -1492,13 +1492,13 @@ class BitField(VT_Alt): ''' Provide: - either @subfield_limits or @subfield_sizes - - either @subfield_val_lists or @subfield_val_extremums + - either @subfield_values or @subfield_val_extremums ''' padding_one = [0, 1, 0b11, 0b111, 0b1111, 0b11111, 0b111111, 0b1111111] def init_specific(self, subfield_limits=None, subfield_sizes=None, - subfield_val_lists=None, subfield_val_extremums=None, + subfield_values=None, subfield_val_extremums=None, padding=0, lsb_padding=True, endian=VT.LittleEndian, determinist=True, subfield_descs=None, defaults=None): @@ -1527,7 +1527,7 @@ def init_specific(self, subfield_limits=None, subfield_sizes=None, self.current_idx = None self.idx = None self.idx_inuse = None - self.set_bitfield(sf_val_lists=subfield_val_lists, sf_val_extremums=subfield_val_extremums, + self.set_bitfield(sf_valuess=subfield_values, sf_val_extremums=subfield_val_extremums, sf_limits=subfield_limits, sf_sizes=subfield_sizes, sf_descs=subfield_descs, sf_defaults=defaults) @@ -1591,8 +1591,8 @@ def set_subfield(self, idx, val): else: # Note that the case "self.idx[idx]==1" has not to be # specifically handled here (for preventing overflow), - # because even if len(val_list)==1, we add a new element - # within, making a val_list always >= 2. + # because even if len(subfield_vals)==1, we add a new element + # within, making a subfield_vals always >= 2. self.subfield_vals[idx].insert(self.idx[idx], val) self.idx_inuse[idx] = self.idx[idx] @@ -1606,14 +1606,14 @@ def get_subfield(self, idx): mini, maxi = self.subfield_extrems[idx] ret = mini + self.idx_inuse[idx] else: - val_list = self.subfield_vals[idx] - index = 0 if len(val_list) == 1 else self.idx_inuse[idx] - ret = val_list[index] + values = self.subfield_vals[idx] + index = 0 if len(values) == 1 else self.idx_inuse[idx] + ret = values[index] return ret - def set_bitfield(self, sf_val_lists=None, sf_val_extremums=None, sf_limits=None, sf_sizes=None, + def set_bitfield(self, sf_valuess=None, sf_val_extremums=None, sf_limits=None, sf_sizes=None, sf_descs=None, sf_defaults=None): if sf_limits is not None: @@ -1626,9 +1626,9 @@ def set_bitfield(self, sf_val_lists=None, sf_val_extremums=None, sf_limits=None, else: raise DataModelDefinitionError - if sf_val_lists is None: - sf_val_lists = [None for i in range(len(self.subfield_limits))] - elif len(sf_val_lists) != len(self.subfield_limits): + if sf_valuess is None: + sf_valuess = [None for i in range(len(self.subfield_limits))] + elif len(sf_valuess) != len(self.subfield_limits): raise DataModelDefinitionError if sf_val_extremums is None: @@ -1663,17 +1663,17 @@ def set_bitfield(self, sf_val_lists=None, sf_val_extremums=None, sf_limits=None, # provided limits are not included in the subfields for idx, lim in enumerate(self.subfield_limits): - val_list = sf_val_lists[idx] + values = sf_valuess[idx] extrems = sf_val_extremums[idx] size = lim - prev_lim self.subfield_sizes.append(size) - if val_list is not None: + if values is not None: default = self.subfield_defaults[idx] assert default is None l = [] - for v in val_list: + for v in values: if self.is_compatible(v, size): l.append(v) self.subfield_vals.append(l) @@ -1779,7 +1779,7 @@ def set_size_from_constraints(self, size=None, encoded_size=None): def pretty_print(self, max_size=None): first_pass = True - for lim, sz, val_list, extrems, i in zip(self.subfield_limits[::-1], + for lim, sz, values, extrems, i in zip(self.subfield_limits[::-1], self.subfield_sizes[::-1], self.subfield_vals[::-1], self.subfield_extrems[::-1], @@ -1796,12 +1796,12 @@ def pretty_print(self, max_size=None): else: string += ' ' + prefix - if val_list is None: + if values is None: mini, maxi = extrems string += bin(mini+self.idx_inuse[i])[2:].zfill(sz) else: - index = 0 if len(val_list) == 1 else self.idx_inuse[i] - string += bin(val_list[index])[2:].zfill(sz) + index = 0 if len(values) == 1 else self.idx_inuse[i] + string += bin(values[index])[2:].zfill(sz) if self.padding_size != 0: if self.padding == 1: @@ -1849,9 +1849,9 @@ def enable_fuzz_mode(self): # max is needed because self.idx[0] is equal to 0 in this case curr_idx = max(self.idx[idx]-1, 0) - curr_val_list = self.subfield_vals[idx] - if curr_val_list is not None: - current = curr_val_list[curr_idx] + curr_values = self.subfield_vals[idx] + if curr_values is not None: + current = curr_values[curr_idx] else: mini, maxi = self.subfield_extrems[idx] current = mini + curr_idx @@ -1882,8 +1882,8 @@ def enable_fuzz_mode(self): if b not in l and self.is_compatible(b, sz): l.append(b) - if curr_val_list is not None: - orig_set = set(curr_val_list) + if curr_values is not None: + orig_set = set(curr_values) max_oset = max(orig_set) min_oset = min(orig_set) if min_oset != max_oset: @@ -1930,12 +1930,12 @@ def __compute_total_possible_values(self): return self.__count_of_possible_values s = 1 - for val_list, extrems in zip(self.subfield_vals, self.subfield_extrems): - if val_list is None: + for values, extrems in zip(self.subfield_vals, self.subfield_extrems): + if values is None: mini, maxi = extrems s += maxi - mini else: - s += len(val_list) - 1 + s += len(values) - 1 self.__count_of_possible_values = s return self.__count_of_possible_values @@ -1982,32 +1982,32 @@ def rewind(self): def _read_value_from(self, blob, size, endian, constraints): - val_list = list(struct.unpack('B'*size, blob)) + values = list(struct.unpack('B'*size, blob)) if endian == VT.BigEndian: - val_list = val_list[::-1] + values = values[::-1] - # val_list from LSB to MSB + # values from LSB to MSB if self.padding_size != 0: if self.lsb_padding: if constraints[AbsCsts.Contents]: mask = self.padding_one[self.padding_size] - if self.padding == 1 and val_list[0] & mask != mask: + if self.padding == 1 and values[0] & mask != mask: raise ValueError('contents not valid! (padding should be 1s)') - elif self.padding == 0 and val_list[0] & self.padding_one[self.padding_size] != 0: + elif self.padding == 0 and values[0] & self.padding_one[self.padding_size] != 0: raise ValueError('contents not valid! (padding should be 0s)') else: if constraints[AbsCsts.Contents]: mask = self.padding_one[self.padding_size]<<(8-self.padding_size) - if self.padding == 1 and val_list[-1] & mask != mask: + if self.padding == 1 and values[-1] & mask != mask: raise ValueError('contents not valid! (padding should be 1s)') - elif self.padding == 0 and val_list[-1] & mask != 0: + elif self.padding == 0 and values[-1] & mask != 0: raise ValueError('contents not valid! (padding should be 0s)') - val_list_sz = len(val_list) + values_sz = len(values) result = 0 - for v, i in zip(val_list,range(val_list_sz)): + for v, i in zip(values,range(values_sz)): result += v<<(i*8) decoded_val = result @@ -2016,7 +2016,7 @@ def _read_value_from(self, blob, size, endian, constraints): if self.lsb_padding: result >>= self.padding_size else: - shift = (val_list_sz-1)*8 + shift = (values_sz-1)*8 result &= (((1<<(8-self.padding_size))-1)<> lim) & ((1< val or maxi < val): raise ValueError("Value for subfield number {:d} does not match the constraints!".format(i+1)) @@ -2060,9 +2060,9 @@ def do_absorb(self, blob, constraints, off=0, size=None): extrems[0] = min(extrems[0], val) extrems[1] = max(extrems[1], val) else: - if constraints[AbsCsts.Contents] and val not in val_list: + if constraints[AbsCsts.Contents] and val not in values: raise ValueError("Value for subfield number {:d} does not match the constraints!".format(i+1)) - val_list.insert(insert_idx, val) + values.insert(insert_idx, val) if first_pass: first_pass = False @@ -2113,11 +2113,11 @@ def get_value(self): self.idx_inuse = copy.copy(self.idx) - for lim, val_list, extrems, i in zip(self.subfield_limits, self.subfield_vals, self.subfield_extrems, + for lim, values, extrems, i in zip(self.subfield_limits, self.subfield_vals, self.subfield_extrems, range(len(self.subfield_limits))): if self.determinist: if i == self.current_idx: - if val_list is None: + if values is None: mini, maxi = extrems v = mini + self.idx[self.current_idx] if v >= maxi: @@ -2126,45 +2126,45 @@ def get_value(self): self.idx[self.current_idx] += 1 val += v << prev_lim else: - if len(val_list) == 1: + if len(values) == 1: index = 0 else: index = self.idx[self.current_idx] - if index >= len(val_list) - 1: + if index >= len(values) - 1: update_current_idx = True else: self.idx[self.current_idx] += 1 self.idx_inuse[self.current_idx] = index - val += val_list[index] << prev_lim + val += values[index] << prev_lim else: if self._fuzzy_mode: cursor = 0 else: - if val_list is not None and len(val_list) == 1: + if values is not None and len(values) == 1: cursor = 0 else: if i > self.current_idx and self.subfield_defaults[i] is None: # Note on the use of max(): in the - # case of val_list, idx is always > 1, + # case of values, idx is always > 1, # whereas when it is extrems, idx can # be 0. cursor = max(self.idx[i] - 1, 0) else: cursor = self.idx[i] self.idx_inuse[i] = cursor - if val_list is None: + if values is None: mini, maxi = extrems val += (mini + cursor) << prev_lim else: - val += (val_list[cursor]) << prev_lim + val += (values[cursor]) << prev_lim else: - if val_list is None: + if values is None: mini, maxi = extrems drawn_val = random.randint(mini, maxi) self.idx[i] = self.idx_inuse[i] = drawn_val - mini else: - drawn_val = random.choice(val_list) - self.idx[i] = self.idx_inuse[i] = val_list.index(drawn_val) + drawn_val = random.choice(values) + self.idx[i] = self.idx_inuse[i] = values.index(drawn_val) val += drawn_val << prev_lim @@ -2216,18 +2216,18 @@ def get_current_value(self): val = 0 prev_lim = 0 - for lim, val_list, extrems, i in zip(self.subfield_limits, self.subfield_vals, self.subfield_extrems, + for lim, values, extrems, i in zip(self.subfield_limits, self.subfield_vals, self.subfield_extrems, range(len(self.subfield_limits))): - if val_list is None: + if values is None: mini, maxi = extrems v = mini + self.idx_inuse[i] val += v << prev_lim else: - if len(val_list) == 1: + if len(values) == 1: index = 0 else: index = self.idx_inuse[i] - val += val_list[index] << prev_lim + val += values[index] << prev_lim prev_lim = lim diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index d1a6f67..293ef0f 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1027,7 +1027,7 @@ def test_BitField_Attr_01(self): print('\n -=[ random & infinite (loop count: %d) ]=- \n' % loop_count) t = BitField(subfield_limits=[2, 6, 10, 12], - subfield_val_lists=[[4, 2, 1], [2, 15, 16, 3], None, [1]], + subfield_values=[[4, 2, 1], [2, 15, 16, 3], None, [1]], subfield_val_extremums=[None, None, [3, 11], None], padding=0, lsb_padding=True, endian=VT.LittleEndian) node = Node('BF', value_type=t) @@ -1063,7 +1063,7 @@ def test_BitField(self): loop_count = 20 e_bf = Node('BF') vt = BitField(subfield_sizes=[4, 4, 4], - subfield_val_lists=[[4, 2, 1], None, [10, 13]], + subfield_values=[[4, 2, 1], None, [10, 13]], subfield_val_extremums=[None, [14, 15], None], padding=0, lsb_padding=False, endian=VT.BigEndian) e_bf.set_values(value_type=vt) @@ -1095,7 +1095,7 @@ def test_BitField(self): print('\n***') print('Random & finite: (should result in only 1 possible values)') - vt = BitField(subfield_sizes=[4, 4], subfield_val_lists=[[0x3], [0xF]]) + vt = BitField(subfield_sizes=[4, 4], subfield_values=[[0x3], [0xF]]) e = Node('bf_test', value_type=vt) e.set_env(Env()) e.make_finite() @@ -1110,7 +1110,7 @@ def test_BitField_basic_features(self): i = 0 ok = True - t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_values=[[1], [1], [1], [1]], padding=0, lsb_padding=False, endian=VT.LittleEndian) val = binascii.b2a_hex(t.get_value()) print(t.pretty_print(), t.drawn_val) @@ -1118,46 +1118,46 @@ def test_BitField_basic_features(self): i += 1 self.assertEqual(val, b'4501') - t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_values=[[1], [1], [1], [1]], padding=0, lsb_padding=True, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'5140') - t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_values=[[1], [1], [1], [1]], padding=1, lsb_padding=True, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'517f') - t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_values=[[1], [1], [1], [1]], padding=0, lsb_padding=False, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'0145') - t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[1], [1], [1], [1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_values=[[1], [1], [1], [1]], padding=1, lsb_padding=False, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'fd45') - t = BitField(subfield_sizes=[2, 4, 2, 2], subfield_val_lists=[[1], [1], [1], [1]], + t = BitField(subfield_sizes=[2, 4, 2, 2], subfield_values=[[1], [1], [1], [1]], padding=1, lsb_padding=False, endian=VT.BigEndian) val = binascii.b2a_hex(t.get_value()) print('*** [%d] ' % i, val) i += 1 self.assertEqual(val, b'fd45') - print('\n******** subfield_val_list\n') + print('\n******** subfield_values\n') # Note that 4 in subfield 1 and 16 in subfield 2 are ignored # --> 6 different values are output before looping - t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_lists=[[4, 2, 1], [2, 15, 16, 3], [2, 3, 0], [1]], + t = BitField(subfield_limits=[2, 6, 8, 10], subfield_values=[[4, 2, 1], [2, 15, 16, 3], [2, 3, 0], [1]], padding=0, lsb_padding=True, endian=VT.LittleEndian, determinist=True) for i in range(30): val = binascii.b2a_hex(t.get_value()) @@ -1211,7 +1211,7 @@ def test_BitField_basic_features(self): t = BitField(subfield_limits=[2, 6, 8, 10], subfield_val_extremums=[[1, 2], [4, 12], [0, 3], None], - subfield_val_lists=[None, None, None, [3]], + subfield_values=[None, None, None, [3]], padding=0, lsb_padding=False, endian=VT.BigEndian, determinist=True) val = {} @@ -1295,7 +1295,7 @@ def test_BitField_various_features(self): bf = Node('BF') vt1 = BitField(subfield_sizes=[3, 5, 7], - subfield_val_lists=[[2, 1], None, [10, 120]], + subfield_values=[[2, 1], None, [10, 120]], subfield_val_extremums=[None, [6, 15], None], padding=0, lsb_padding=True, endian=VT.BigEndian) bf.set_values(value_type=vt1) @@ -1311,7 +1311,7 @@ def test_BitField_various_features(self): # bf.show() vt2 = BitField(subfield_sizes=[4, 3, 4, 4, 2], - subfield_val_lists=[None, [3, 5], [15], [14], [2]], + subfield_values=[None, [3, 5], [15], [14], [2]], subfield_val_extremums=[[8, 12], None, None, None, None], padding=0, lsb_padding=False, endian=VT.BigEndian) @@ -1340,26 +1340,26 @@ def test_BitField_various_features(self): def test_BitField_absorb(self): vt = BitField(subfield_sizes=[4, 4, 4], - subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], + subfield_values=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], padding=1, endian=VT.BigEndian, lsb_padding=True) bfield_1 = Node('bfield_1', value_type=vt) # bfield.set_env(Env()) vt = BitField(subfield_sizes=[4, 4, 4], - subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], + subfield_values=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], padding=0, endian=VT.BigEndian, lsb_padding=True) bfield_2 = Node('bfield_2', value_type=vt) vt = BitField(subfield_sizes=[4, 4, 4], - subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], + subfield_values=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], padding=1, endian=VT.BigEndian, lsb_padding=False) bfield_3 = Node('bfield_3', value_type=vt) vt = BitField(subfield_sizes=[4, 4, 4], - subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], + subfield_values=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], padding=0, endian=VT.BigEndian, lsb_padding=False) bfield_4 = Node('bfield_4', value_type=vt) @@ -1870,7 +1870,7 @@ def test_absorb_nonterm_1(self): nstr_2 = Node('str2', value_type=String(values=['TBD2'], max_sz=8)) vt = BitField(subfield_sizes=[4, 4, 4], - subfield_val_lists=[[3, 2, 0xe, 1], None, [10, 13, 3]], + subfield_values=[[3, 2, 0xe, 1], None, [10, 13, 3]], subfield_val_extremums=[None, [14, 15], None], padding=1, endian=VT.BigEndian, lsb_padding=True) @@ -2190,7 +2190,7 @@ def test_exist_condition_02(self): 'contents': [ {'name': 'A3_subopcode', 'contents': BitField(subfield_sizes=[15, 2, 4], endian=VT.BigEndian, - subfield_val_lists=[None, [1, 2], [5, 6, 12]], + subfield_values=[None, [1, 2], [5, 6, 12]], subfield_val_extremums=[[500, 600], None, None], determinist=False)}, @@ -2367,7 +2367,7 @@ def test_collapse_padding(self): {'name': 'part1', 'determinist': True, 'contents': BitField(subfield_sizes=[3, 1], padding=0, endian=VT.BigEndian, - subfield_val_lists=[None, [1]], + subfield_values=[None, [1]], subfield_val_extremums=[[1, 3], None]) }, {'name': 'sublevel', @@ -2375,17 +2375,17 @@ def test_collapse_padding(self): {'name': 'part2_o1', 'exists_if': (BitFieldCondition(sf=0, val=[1]), 'part1'), 'contents': BitField(subfield_sizes=[2, 2, 1], endian=VT.BigEndian, - subfield_val_lists=[[1, 2], [3], [0]]) + subfield_values=[[1, 2], [3], [0]]) }, {'name': 'part2_o2', 'exists_if': (BitFieldCondition(sf=0, val=[1]), 'part1'), 'contents': BitField(subfield_sizes=[2, 2], endian=VT.BigEndian, - subfield_val_lists=[[3], [3]]) + subfield_values=[[3], [3]]) }, {'name': 'part2_KO', 'exists_if': (BitFieldCondition(sf=0, val=[2]), 'part1'), 'contents': BitField(subfield_sizes=[2, 2], endian=VT.BigEndian, - subfield_val_lists=[[1], [1]]) + subfield_values=[[1], [1]]) } ]} ]} From 870ebb9eef53c370f246d2b947caf807bf9e168f Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Sun, 14 Aug 2016 23:20:07 +0200 Subject: [PATCH 74/80] Add a 'fuzz magnitude' parameter to tTYPE disruptor + Fix infinite test cases situation - The 'fuzz_magnitude' parameter has been added to NodeConsumerStub constructor. TypedNodeDisruption provide this parameter to compatible types (currently only used by String-based type). String-based type use this magnitude to compute the length of some fuzzing test cases. - Fix infinite test cases of some disruptors that were triggered because of node entanglement in conjunction with the 'fix' parameter set to True. --- data_models/protocols/pppoe.py | 3 ++- docs/source/data_model.rst | 4 +-- docs/source/disruptors.rst | 9 ++++--- framework/fuzzing_primitives.py | 16 ++++++----- framework/generic_data_makers.py | 15 ++++++----- framework/value_types.py | 40 ++++++++++++++++++---------- test/integration/test_integration.py | 11 ++++++++ 7 files changed, 65 insertions(+), 33 deletions(-) diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index 11a7ddb..74881ca 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -40,6 +40,7 @@ def build_data_model(self): {'name': 'tag', 'contents': [ {'name': 'type', + 'random': True, 'contents': UINT16_be(values=[0,0x0101,0x0102,0x0103,0x0104,0x0105, 0x0110,0x201,0x0202,0x0203]), 'absorb_csts': AbsFullCsts()}, @@ -228,7 +229,7 @@ def build_data_model(self): mh = ModelHelper(delayed_jobs=True, add_env=False) pppoe_msg = mh.create_graph_from_desc(pppoe_desc) - pppoe_msg.make_random(recursive=True) + # pppoe_msg.make_random(recursive=True) padi = pppoe_msg.get_clone('padi') padi['.*/mac_dst'].set_values(value_type=String(values=[u'\xff\xff\xff\xff\xff\xff'])) diff --git a/docs/source/data_model.rst b/docs/source/data_model.rst index 86de3fd..c6b5364 100644 --- a/docs/source/data_model.rst +++ b/docs/source/data_model.rst @@ -232,7 +232,7 @@ parameters: purpose. Additionally, note that such nominal generation are not the one used by the generic disruptor ``tTYPE`` which rely on ``BitField`` *fuzzy mode* (reachable through - :func:`framework.value_types.VT_Alt.switch_mode`). + :func:`framework.value_types.VT_Alt.enable_fuzz_mode`). This parameter is for internal usage and will always follow the *hosting* node instructions. If you want to change the deterministic order you have @@ -312,7 +312,7 @@ the first example. We additionally specify the parameter - :func:`framework.value_types.BitField.set_subfield`, :func:`framework.value_types.BitField.get_subfield` - :func:`framework.value_types.BitField.extend_right` - :func:`framework.value_types.BitField.reset_state`, :func:`framework.value_types.BitField.rewind` - - :func:`framework.value_types.VT_Alt.switch_mode` (used currently by the disruptor ``tTYPE``) + - :func:`framework.value_types.VT_Alt.enable_fuzz_mode` (used currently by the disruptor ``tTYPE``) .. _dm:generators: diff --git a/docs/source/disruptors.rst b/docs/source/disruptors.rst index 4bb7c92..b4f5550 100644 --- a/docs/source/disruptors.rst +++ b/docs/source/disruptors.rst @@ -406,13 +406,16 @@ Parameters: | | data structure. Otherwise, fuzz weight (if specified in the | | data model) is used for ordering | | default: False [type: bool] - |_ fix - | | desc: fix constraints while walking - | | default: True [type: bool] |_ deep | | desc: when set to True, if a node structure has changed, the modelwalker | | will reset its walk through the children nodes | | default: True [type: bool] + |_ fuzz_mag + | | desc: order of magnitude for maximum size of some fuzzing test cases. + | | default: 1.0 [type: float] + |_ fix + | | desc: fix constraints while walking + | | default: True [type: bool] tSEP - Alteration of Separator Node diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 626f944..23d53e6 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -131,10 +131,10 @@ def __iter__(self): def _do_reset(self, node): last_gen = self._root_node.get_reachable_nodes(internals_criteria=self.triglast_ic) for n in last_gen: - n.unfreeze() + n.unfreeze(ignore_entanglement=True) node.unfreeze(recursive=False) # self._root_node.unfreeze(recursive=True, dont_change_state=True) - node.unfreeze(recursive=True, dont_change_state=True) + node.unfreeze(recursive=True, dont_change_state=True, ignore_entanglement=True) self._consumer.do_after_reset(node) def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): @@ -369,8 +369,10 @@ class NodeConsumerStub(object): behave strangely (not the same number of yielded values). --> to be investigated (maybe wrong implementation of BasicVisitor and NonTermVisitor) ''' - def __init__(self, max_runs_per_node=-1, min_runs_per_node=-1, respect_order=True, **kwargs): + def __init__(self, max_runs_per_node=-1, min_runs_per_node=-1, respect_order=True, + fuzz_magnitude=1.0, **kwargs): self.need_reset_when_structure_change = False + self.fuzz_magnitude = fuzz_magnitude self._internals_criteria = None self._semantics_criteria = None @@ -530,7 +532,7 @@ def consume_node(self, node): return True if not node.is_exhausted(): node.freeze() - node.unfreeze(recursive=False) + node.unfreeze(recursive=False, ignore_entanglement=True) node.freeze() return True @@ -712,7 +714,7 @@ def consume_node(self, node): self.orig_internal = node.cc self.orig_value = node.to_bytes() - self.current_fuzz_vt_list = self._create_fuzzy_vt_list(node) + self.current_fuzz_vt_list = self._create_fuzzy_vt_list(node, self.fuzz_magnitude) self._extend_fuzzy_vt_list(self.current_fuzz_vt_list, node) DEBUG_PRINT(' *** CONSUME: ' + node.name + ', ' + repr(self.current_fuzz_vt_list), level=0) @@ -750,13 +752,13 @@ def still_interested_by(self, node): return False @staticmethod - def _create_fuzzy_vt_list(e): + def _create_fuzzy_vt_list(e, fuzz_magnitude): vt = e.cc.get_value_type() if issubclass(vt.__class__, vtype.VT_Alt): new_vt = copy.copy(vt) new_vt.make_private(forget_current_state=False) - new_vt.switch_mode() + new_vt.enable_fuzz_mode(fuzz_magnitude=fuzz_magnitude) fuzzy_vt_list = [new_vt] else: diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index beb1432..5589567 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -100,7 +100,7 @@ def disrupt_data(self, dm, target, data): exported_node = rnode if self.fix: - exported_node.unfreeze(recursive=True, reevaluate_constraints=True) + exported_node.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) exported_node.freeze() data.add_info('fix constraints (if any)') @@ -119,7 +119,9 @@ def disrupt_data(self, dm, target, data): 'in the data model) is used for ordering', False, bool), 'deep': ('when set to True, if a node structure has changed, the modelwalker ' \ 'will reset its walk through the children nodes', True, bool), - 'fix': ('fix constraints while walking', True, bool)}) + 'fix': ('fix constraints while walking', True, bool), + 'fuzz_mag': ('order of magnitude for maximum size of some fuzzing test cases.', + 1.0, float)}) class sd_fuzz_typed_nodes(StatefulDisruptor): ''' Perform alterations on typed nodes (one at a time) according to @@ -138,6 +140,7 @@ def set_seed(self, prev_data): self.consumer = TypedNodeDisruption(max_runs_per_node=self.max_runs_per_node, min_runs_per_node=self.min_runs_per_node, + fuzz_magnitude=self.fuzz_mag, respect_order=self.order) self.consumer.need_reset_when_structure_change = self.deep self.consumer.set_node_interest(path_regexp=self.path) @@ -181,7 +184,7 @@ def disrupt_data(self, dm, target, data): exported_node = rnode if self.fix: - exported_node.unfreeze(recursive=True, reevaluate_constraints=True) + exported_node.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) exported_node.freeze() data.add_info('fix constraints (if any)') @@ -525,7 +528,7 @@ def disrupt_data(self, dm, target, data): corrupted_seed = Node(self.seed.name, base_node=self.seed, ignore_frozen_state=False, new_env=True) self.seed.env.remove_node_to_corrupt(consumed_node) - corrupted_seed.unfreeze(recursive=True, reevaluate_constraints=True) + corrupted_seed.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) corrupted_seed.freeze() data.add_info('sample index: {:d}'.format(self.idx)) @@ -901,12 +904,12 @@ def disrupt_data(self, dm, target, prev_data): return prev_data for n in l: - n.unfreeze(recursive=True, reevaluate_constraints=True) + n.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) prev_data.add_info("release constraints from the node '{!s}'".format(n.name)) n.freeze() else: - prev_data.node.unfreeze(recursive=True, reevaluate_constraints=True) + prev_data.node.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) prev_data.add_info('release constraints from the root') prev_data.node.freeze() diff --git a/framework/value_types.py b/framework/value_types.py index 3bc5111..313f600 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -112,9 +112,9 @@ def init_specific(self, *args, **kargs): def switch_mode(self): if self._fuzzy_mode: - self.enable_normal_mode() + self._enable_normal_mode() else: - self.enable_fuzz_mode() + self._enable_fuzz_mode() self._fuzzy_mode = not self._fuzzy_mode self.after_enabling_mode() @@ -122,10 +122,22 @@ def switch_mode(self): def after_enabling_mode(self): pass + def enable_fuzz_mode(self, fuzz_magnitude=1.0): + if not self._fuzzy_mode: + self._enable_fuzz_mode(fuzz_magnitude=fuzz_magnitude) + self._fuzzy_mode = True + self.after_enabling_mode() + def enable_normal_mode(self): + if self._fuzzy_mode: + self._enable_normal_mode() + self._fuzzy_mode = False + self.after_enabling_mode() + + def _enable_normal_mode(self): raise NotImplementedError - def enable_fuzz_mode(self): + def _enable_fuzz_mode(self, fuzz_magnitude=1.0): raise NotImplementedError @@ -870,14 +882,14 @@ def get_current_raw_val(self, str_form=False): val = self._bytes2str(self.drawn_val) if str_form else self.drawn_val return val - def enable_normal_mode(self): + def _enable_normal_mode(self): self.values = self.values_save self.values_copy = copy.copy(self.values) self.values_fuzzy = None self.drawn_val = None - def enable_fuzz_mode(self): + def _enable_fuzz_mode(self, fuzz_magnitude=1.0): self.values_fuzzy = [] if self.drawn_val is not None: @@ -889,7 +901,7 @@ def enable_fuzz_mode(self): orig_val = random.choice(self.values_copy) sz = len(orig_val) - sz_delta_with_max = self.max_sz - sz + sz_delta_with_max = self.max_encoded_sz - sz try: val = bp.corrupt_bits(orig_val, n=1) @@ -907,7 +919,7 @@ def enable_fuzz_mode(self): if val != b'': self.values_fuzzy.append(val) - val = orig_val + b"X"*(self.max_sz*100) + val = orig_val + b"X"*(self.max_sz*int(100*fuzz_magnitude)) self.values_fuzzy.append(val) self.values_fuzzy.append(b'\x00' * sz if sz > 0 else b'\x00') @@ -922,11 +934,11 @@ def enable_fuzz_mode(self): self.values_fuzzy.append(orig_val[:1] + b'%n' * cpt) self.values_fuzzy.append(orig_val[:1] + b'%s' * cpt) - self.values_fuzzy.append(orig_val + b'%n' * 400) - self.values_fuzzy.append(orig_val + b'%s' * 400) - self.values_fuzzy.append(orig_val + b'\"%n\"' * 400) - self.values_fuzzy.append(orig_val + b'\"%s\"' * 400) - self.values_fuzzy.append(orig_val + b'\r\n' * 100) + self.values_fuzzy.append(orig_val + b'%n' * int(400*fuzz_magnitude)) + self.values_fuzzy.append(orig_val + b'%s' * int(400*fuzz_magnitude)) + self.values_fuzzy.append(orig_val + b'\"%n\"' * int(400*fuzz_magnitude)) + self.values_fuzzy.append(orig_val + b'\"%s\"' * int(400*fuzz_magnitude)) + self.values_fuzzy.append(orig_val + b'\r\n' * int(100*fuzz_magnitude)) if self.extra_fuzzy_list: for v in self.extra_fuzzy_list: @@ -1827,7 +1839,7 @@ def after_enabling_mode(self): self.__count_of_possible_values = None self._reset_idx() - def enable_normal_mode(self): + def _enable_normal_mode(self): if self.determinist_save is not None: self.determinist = self.determinist_save @@ -1838,7 +1850,7 @@ def enable_normal_mode(self): self.subfield_fuzzy_vals = [None for i in range(len(self.subfield_sizes))] self.exhausted = False - def enable_fuzz_mode(self): + def _enable_fuzz_mode(self, fuzz_magnitude=1.0): for idx in range(len(self.subfield_fuzzy_vals)): sz = self.subfield_sizes[idx] diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 293ef0f..5f05c45 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -1518,12 +1518,23 @@ def test_NonTermVisitor(self): self.assertEqual(idx, 4) print('***') + + results = [ + b' [!] ++++++++++ [!] ::>:: [!] ? [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::AAA::AAA::>:: [!] ? [!] ', + b' [!] ++++++++++ [!] ::AAA::AAA::>:: [!] ? [!] ', + b' [!] >>>>>>>>>> [!] ::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::AAA::AAA::>:: [!] ', + b' [!] >>>>>>>>>> [!] ::AAA::AAA::>:: [!] ', + ] + idx = 0 data = fmk.dm.get_external_node(dm_name='mydf', data_id='shape') nonterm_consumer = NonTermVisitor(respect_order=True) for rnode, consumed_node, orig_node_val, idx in ModelWalker(data, nonterm_consumer, make_determinist=True, max_steps=50): print(colorize('[%d] ' % idx + rnode.to_ascii(), rgb=Color.INFO)) + self.assertEqual(rnode.to_bytes(), results[idx-1]) self.assertEqual(idx, 6) print('***') From 20dcee2fe314d7fa21b36091e46a1e95c4bd10f5 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 15 Aug 2016 02:27:54 +0200 Subject: [PATCH 75/80] Improve tTYPE disruptor - Non-freezable generator nodes are now taken into account if they generate a typed node. This enable tTYPE to also fuzz generator nodes specified to always be recomputed in order to make the data valid (like CRC, payload length, etc.). - If the 'fix' parameter is set, the disruptor will handle some constraints related to the currently fuzzed node (within the graph), contrary to the previous behavior (now renamed 'fix_all') where the constraints on the whole graph were reevaluated. This enables to preserve some subtle discrepancies. Note that only 'sync_size_with' and 'sync_enc_size_with' are currently handled by 'fix'. - PPPoE DM is an example of DM that leverages these enhancements with tTYPE. --- data_models/protocols/pppoe.py | 3 +- docs/source/disruptors.rst | 548 ++++++++++++++++--------------- framework/data_model.py | 57 +++- framework/fuzzing_primitives.py | 47 ++- framework/generic_data_makers.py | 17 +- 5 files changed, 359 insertions(+), 313 deletions(-) diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index 74881ca..e639e41 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -155,6 +155,7 @@ def build_data_model(self): {'name': 'session_id', 'contents': UINT16_be()}, {'name': 'length', + 'clear_attrs': MH.Attr.Freezable, 'contents': MH.LEN(vt=UINT16_be), 'node_args': 'payload', 'alt': [ @@ -169,7 +170,7 @@ def build_data_model(self): 'exists_if': (IntCondition(0x9), 'code'), 'contents': [ (tag_service_name, 1), - (tag_node, 0, 4) + # (tag_node, 0, 4) ]}, {'name': '4pado', 'shape_type': MH.FullyRandom, diff --git a/docs/source/disruptors.rst b/docs/source/disruptors.rst index b4f5550..7ec1317 100644 --- a/docs/source/disruptors.rst +++ b/docs/source/disruptors.rst @@ -6,6 +6,255 @@ Generic Disruptors The current generic disruptors are presented within the following sections. +Stateful Disruptors +=================== + +.. _dis:ttype: + +tTYPE - Advanced Alteration of Terminal Typed Node +-------------------------------------------------- + +Description: + Perform alterations on typed nodes (one at a time) according to + its type and various complementary information (such as size, + allowed values, ...). + +Reference: + :class:`framework.generic_data_makers.sd_fuzz_typed_nodes` + +Parameters: + .. code-block:: none + + generic args: + |_ init + | | desc: make the model walker ignore all the steps until the provided + | | one + | | default: 1 [type: int] + |_ runs_per_node + | | desc: maximum number of test cases for a single node (-1 means until + | | the end) + | | default: -1 [type: int] + |_ max_steps + | | desc: maximum number of steps (-1 means until the end) + | | default: -1 [type: int] + |_ clone_node + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull + | | to it to False) + | | default: True [type: bool] + + specific args: + |_ path + | | desc: graph path regexp to select nodes on which the disruptor should + | | apply + | | default: None [type: str] + |_ deep + | | desc: when set to True, if a node structure has changed, the modelwalker + | | will reset its walk through the children nodes + | | default: True [type: bool] + |_ fix + | | desc: limit constraints fixing to the nodes related to the currently + | | fuzzed one (only implemented for 'sync_size_with' and + | | 'sync_enc_size_with') + | | default: True [type: bool] + |_ fix_all + | | desc: for each produced data, reevaluate the constraints on the whole + | | graph + | | default: False [type: bool] + |_ order + | | desc: when set to True, the fuzzing order is strictly guided by the + | | data structure. Otherwise, fuzz weight (if specified in the + | | data model) is used for ordering + | | default: False [type: bool] + |_ fuzz_mag + | | desc: order of magnitude for maximum size of some fuzzing test cases. + | | default: 1.0 [type: float] + +tSTRUCT - Alter Data Structure +------------------------------ + +Description: + For each node associated to existence constraints or quantity + constraints or size constraints, alter the constraint, one at a time, after each call + to this disruptor. If `deep` is set, enable new structure corruption cases, based on + the minimum and maximum amount of non-terminal nodes (within the + input data) specified in the data model. + +Reference: + :class:`framework.generic_data_makers.sd_struct_constraints` + +Parameters: + .. code-block:: none + + generic args: + |_ init + | | desc: make the model walker ignore all the steps until the provided + | | one + | | default: 1 [type: int] + |_ max_steps + | | desc: maximum number of steps (-1 means until the end) + | | default: -1 [type: int] + specific args: + |_ path + | | desc: graph path regexp to select nodes on which the disruptor should + | | apply + | | default: None [type: str] + |_ deep + | | desc: if True, enable corruption of minimum and maxium amount of non-terminal + | | nodes + | | default: False [type: bool] + +Usage Example: + A typical *disruptor chain* for leveraging this disruptor could be: + + .. code-block:: none + + tWALK(path='path/to/some/node') tSTRUCT + + .. note:: Test this chain with the data example found at + :ref:`dm:pattern:existence-cond`, and set the path to the + ``opcode`` node path. + + .. seealso:: Refer to :ref:`tuto:dmaker-chain` for insight + into *disruptor chains*. + + + +tALT - Walk Through Alternative Node Configurations +--------------------------------------------------- + +Description: + Switch the configuration of each node, one by one, with the provided + alternate configuration. + +Reference: + :class:`framework.generic_data_makers.sd_switch_to_alternate_conf` + +Parameters: + .. code-block:: none + + generic args: + |_ clone_node + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull + | | to it to False) + | | default: True [type: bool] + |_ init + | | desc: make the model walker ignore all the steps until the provided + | | one + | | default: 1 [type: int] + |_ max_steps + | | desc: maximum number of steps (-1 means until the end) + | | default: -1 [type: int] + |_ runs_per_node + | | desc: maximum number of test cases for a single node (-1 means until + | | the end) + | | default: -1 [type: int] + specific args: + |_ conf + | | desc: change the configuration, with the one provided (by name), of + | | all subnodes fetched by @path, one-by-one. [default value is + | | set dynamically with the first-found existing alternate configuration] + | | default: None [type: str, list, tuple] + + +tSEP - Alteration of Separator Node +----------------------------------- + +Description: + Perform alterations on separators (one at a time). Each time a + separator is encountered in the provided data, it will be replaced + by another separator picked from the ones existing within the + provided data. + +Reference: + :class:`framework.generic_data_makers.sd_fuzz_separator_nodes` + +Parameters: + .. code-block:: none + + generic args: + |_ clone_node + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull + | | to it to False) + | | default: True [type: bool] + |_ init + | | desc: make the model walker ignore all the steps until the provided + | | one + | | default: 1 [type: int] + |_ max_steps + | | desc: maximum number of steps (-1 means until the end) + | | default: -1 [type: int] + |_ runs_per_node + | | desc: maximum number of test cases for a single node (-1 means until + | | the end) + | | default: -1 [type: int] + specific args: + |_ path + | | desc: graph path regexp to select nodes on which the disruptor should + | | apply + | | default: None [type: str] + |_ order + | | desc: when set to True, the fuzzing order is strictly guided by the + | | data structure. Otherwise, fuzz weight (if specified in the + | | data model) is used for ordering + | | default: False [type: bool] + |_ deep + | | desc: when set to True, if a node structure has changed, the modelwalker + | | will reset its walk through the children nodes + | | default: True [type: bool] + + + +tWALK - Walk Through a Data Model +--------------------------------- + +Description: + Walk through the provided data and for each visited node, iterates + over the allowed values (with respect to the data model). Note: *no + alteration* is performed by this disruptor. + +Reference: + :class:`framework.generic_data_makers.sd_iter_over_data` + +Parameters: + .. code-block:: none + + generic args: + |_ clone_node + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull + | | to it to False) + | | default: True [type: bool] + |_ init + | | desc: make the model walker ignore all the steps until the provided + | | one + | | default: 1 [type: int] + |_ max_steps + | | desc: maximum number of steps (-1 means until the end) + | | default: -1 [type: int] + |_ runs_per_node + | | desc: maximum number of test cases for a single node (-1 means until + | | the end) + | | default: -1 [type: int] + specific args: + |_ path + | | desc: graph path regexp to select nodes on which the disruptor should + | | apply + | | default: None [type: str] + |_ singleton + | | desc: consume also terminal nodes with only one possible value + | | default: False [type: bool] + |_ nt_only + | | desc: walk through non-terminal nodes only + | | default: False [type: bool] + |_ fix_all + | | desc: for each produced data, reevaluate the constraints on the whole + | | graph + | | default: True [type: bool] + + Stateless Disruptors ==================== @@ -25,14 +274,14 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull | | to it to False) | | default: False [type: bool] |_ value @@ -59,14 +308,14 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull | | to it to False) | | default: False [type: bool] |_ recursive @@ -94,14 +343,14 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull + | | desc: if True the dmaker will always return a copy of the node. (for + | | stateless diruptors dealing with big data it can be usefull | | to it to False) | | default: False [type: bool] @@ -118,18 +367,18 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] |_ recursive - | | desc: does the reachable nodes from the selected ones need also to + | | desc: does the reachable nodes from the selected ones need also to | | be changed? | | default: True [type: bool] |_ conf - | | desc: change the configuration, with the one provided (by name), of - | | all subnodes fetched by @path, one-by-one. [default value is + | | desc: change the configuration, with the one provided (by name), of + | | all subnodes fetched by @path, one-by-one. [default value is | | set dynamically with the first-found existing alternate configuration] | | default: None [type: str] @@ -146,13 +395,13 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] |_ nb - | | desc: apply corruption on @nb Nodes fetched randomly within the data + | | desc: apply corruption on @nb Nodes fetched randomly within the data | | model | | default: 2 [type: int] |_ ascii @@ -175,7 +424,7 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ new_val | | desc: if provided change the selected byte with the new one | | default: None [type: str] @@ -199,16 +448,16 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] |_ cmd | | desc: the command | | default: None [type: list, tuple, str] |_ file_mode - | | desc: if True the data will be provided through a file to the external + | | desc: if True the data will be provided through a file to the external | | program, otherwise it will be provided on the command line directly | | default: True [type: bool] @@ -225,12 +474,12 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ sz | | desc: truncate the data (or part of the data) to the provided size | | default: 10 [type: int] |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] @@ -248,9 +497,9 @@ Reference: Parameters: .. code-block:: none - specific args: + specific args: |_ path - | | desc: graph path regexp to select nodes on which the disruptor should + | | desc: graph path regexp to select nodes on which the disruptor should | | apply | | default: None [type: str] @@ -269,246 +518,3 @@ Reference: .. note:: Random seeds are generally set while loading the data model. This disruptor enables you to reset the seeds for the input data. - - -Stateful Disruptors -=================== - - -tSTRUCT - Alter Data Structure ------------------------------- - -Description: - For each node associated to existence constraints or quantity - constraints or size constraints, alter the constraint, one at a time, after each call - to this disruptor. If `deep` is set, enable new structure corruption cases, based on - the minimum and maximum amount of non-terminal nodes (within the - input data) specified in the data model. - -Reference: - :class:`framework.generic_data_makers.sd_struct_constraints` - -Parameters: - .. code-block:: none - - generic args: - |_ init - | | desc: make the model walker ignore all the steps until the provided - | | one - | | default: 1 [type: int] - |_ max_steps - | | desc: maximum number of steps (-1 means until the end) - | | default: -1 [type: int] - specific args: - |_ path - | | desc: graph path regexp to select nodes on which the disruptor should - | | apply - | | default: None [type: str] - |_ deep - | | desc: if True, enable corruption of minimum and maxium amount of non-terminal - | | nodes - | | default: False [type: bool] - -Usage Example: - A typical *disruptor chain* for leveraging this disruptor could be: - - .. code-block:: none - - tWALK(path='path/to/some/node') tSTRUCT - - .. note:: Test this chain with the data example found at - :ref:`dm:pattern:existence-cond`, and set the path to the - ``opcode`` node path. - - .. seealso:: Refer to :ref:`tuto:dmaker-chain` for insight - into *disruptor chains*. - - - -tALT - Walk Through Alternative Node Configurations ---------------------------------------------------- - -Description: - Switch the configuration of each node, one by one, with the provided - alternate configuration. - -Reference: - :class:`framework.generic_data_makers.sd_switch_to_alternate_conf` - -Parameters: - .. code-block:: none - - generic args: - |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull - | | to it to False) - | | default: True [type: bool] - |_ init - | | desc: make the model walker ignore all the steps until the provided - | | one - | | default: 1 [type: int] - |_ max_steps - | | desc: maximum number of steps (-1 means until the end) - | | default: -1 [type: int] - |_ runs_per_node - | | desc: maximum number of test cases for a single node (-1 means until - | | the end) - | | default: -1 [type: int] - specific args: - |_ conf - | | desc: change the configuration, with the one provided (by name), of - | | all subnodes fetched by @path, one-by-one. [default value is - | | set dynamically with the first-found existing alternate configuration] - | | default: None [type: str, list, tuple] - - -.. _dis:ttype: - -tTYPE - Advanced Alteration of Terminal Typed Node --------------------------------------------------- - -Description: - Perform alterations on typed nodes (one at a time) according to - its type and various complementary information (such as size, - allowed values, ...). - -Reference: - :class:`framework.generic_data_makers.sd_fuzz_typed_nodes` - -Parameters: - .. code-block:: none - - generic args: - |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull - | | to it to False) - | | default: True [type: bool] - |_ init - | | desc: make the model walker ignore all the steps until the provided - | | one - | | default: 1 [type: int] - |_ max_steps - | | desc: maximum number of steps (-1 means until the end) - | | default: -1 [type: int] - |_ runs_per_node - | | desc: maximum number of test cases for a single node (-1 means until - | | the end) - | | default: -1 [type: int] - specific args: - |_ path - | | desc: graph path regexp to select nodes on which the disruptor should - | | apply - | | default: None [type: str] - |_ order - | | desc: when set to True, the fuzzing order is strictly guided by the - | | data structure. Otherwise, fuzz weight (if specified in the - | | data model) is used for ordering - | | default: False [type: bool] - |_ deep - | | desc: when set to True, if a node structure has changed, the modelwalker - | | will reset its walk through the children nodes - | | default: True [type: bool] - |_ fuzz_mag - | | desc: order of magnitude for maximum size of some fuzzing test cases. - | | default: 1.0 [type: float] - |_ fix - | | desc: fix constraints while walking - | | default: True [type: bool] - - -tSEP - Alteration of Separator Node ------------------------------------ - -Description: - Perform alterations on separators (one at a time). Each time a - separator is encountered in the provided data, it will be replaced - by another separator picked from the ones existing within the - provided data. - -Reference: - :class:`framework.generic_data_makers.sd_fuzz_separator_nodes` - -Parameters: - .. code-block:: none - - generic args: - |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull - | | to it to False) - | | default: True [type: bool] - |_ init - | | desc: make the model walker ignore all the steps until the provided - | | one - | | default: 1 [type: int] - |_ max_steps - | | desc: maximum number of steps (-1 means until the end) - | | default: -1 [type: int] - |_ runs_per_node - | | desc: maximum number of test cases for a single node (-1 means until - | | the end) - | | default: -1 [type: int] - specific args: - |_ path - | | desc: graph path regexp to select nodes on which the disruptor should - | | apply - | | default: None [type: str] - |_ order - | | desc: when set to True, the fuzzing order is strictly guided by the - | | data structure. Otherwise, fuzz weight (if specified in the - | | data model) is used for ordering - | | default: False [type: bool] - |_ deep - | | desc: when set to True, if a node structure has changed, the modelwalker - | | will reset its walk through the children nodes - | | default: True [type: bool] - - - -tWALK - Walk Through a Data Model ---------------------------------- - -Description: - Walk through the provided data and for each visited node, iterates - over the allowed values (with respect to the data model). Note: *no - alteration* is performed by this disruptor. - -Reference: - :class:`framework.generic_data_makers.sd_iter_over_data` - -Parameters: - .. code-block:: none - - generic args: - |_ clone_node - | | desc: if True the dmaker will always return a copy of the node. (for - | | stateless diruptors dealing with big data it can be usefull - | | to it to False) - | | default: True [type: bool] - |_ init - | | desc: make the model walker ignore all the steps until the provided - | | one - | | default: 1 [type: int] - |_ max_steps - | | desc: maximum number of steps (-1 means until the end) - | | default: -1 [type: int] - |_ runs_per_node - | | desc: maximum number of test cases for a single node (-1 means until - | | the end) - | | default: -1 [type: int] - specific args: - |_ path - | | desc: graph path regexp to select nodes on which the disruptor should - | | apply - | | default: None [type: str] - |_ singleton - | | desc: consume also terminal nodes with only one possible value - | | default: False [type: bool] - |_ nt_only - | | desc: walk through non-terminal nodes only - | | default: False [type: bool] - |_ fix - | | desc: fix constraints while walking - | | default: True [type: bool] diff --git a/framework/data_model.py b/framework/data_model.py index 11a5bb7..c556527 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -468,6 +468,12 @@ def make_private(self, node_dico): else: raise TypeError + def synchronize_nodes(self, src_node): + self._sync_nodes_specific(src_node) + + def _sync_nodes_specific(self, src_node): + pass + class SyncQtyFromObj(SyncObj): @@ -512,6 +518,25 @@ def set_size_on_source_node(self, size): except: raise DataModelDefinitionError("The node '{:s}' is not compatible with integer absorption".format(self._node.name)) + def _sync_nodes_specific(self, src_node): + if self.apply_to_enc_size: + sz = len(src_node.to_bytes()) + else: + if src_node.is_typed_value(subkind=fvt.String): + # We need to get the str form to be agnostic to any low-level encoding + # that may change the size ('utf8', ...). + decoded_val = src_node.get_raw_value(str_form=True) + else: + decoded_val = src_node.get_raw_value() + if not isinstance(decoded_val, bytes): + # In this case, this is a BitField or an INT-based object, which are + # fixed size object + raise DataModelDefinitionError('size sync should not be used for fixed sized object!') + sz = len(decoded_val) + sz += self.base_size + self.set_size_on_source_node(NodeInternals_NonTerm.sizesync_corrupt_hook(src_node, sz)) + + class SyncExistenceObj(SyncObj): def __init__(self, sync_list, and_junction=True): @@ -901,6 +926,15 @@ def get_node_sync(self, scope): else: return self._sync_with.get(scope, None) + def synchronize_nodes(self, src_node): + if self._sync_with is None: + return + + for scope, obj in self._sync_with.items(): + if isinstance(obj, SyncObj): + obj.synchronize_nodes(src_node) + + def make_private(self, ignore_frozen_state, accept_external_entanglement, delayed_node_internals, forget_original_sync_objs=False): if self.private is not None: @@ -2964,22 +2998,7 @@ def _construct_subnodes(self, node_desc, subnode_list, mode, ignore_sep_fstate, def _sync_size_handling(node): obj = node.synchronized_with(SyncScope.Size) if obj is not None: - if obj.apply_to_enc_size: - sz = len(node.to_bytes()) - else: - if node.is_typed_value(subkind=fvt.String): - # We need to get the str form to be agnostic to any low-level encoding - # that may change the size ('utf8', ...). - decoded_val = node.get_raw_value(str_form=True) - else: - decoded_val = node.get_raw_value() - if not isinstance(decoded_val, bytes): - # In this case, this is a BitField or an INT-based object, which are - # fixed size object - raise DataModelDefinitionError('size sync should not be used for fixed sized object!') - sz = len(decoded_val) - sz += obj.base_size - obj.set_size_on_source_node(NodeInternals_NonTerm.sizesync_corrupt_hook(node, sz)) + obj.synchronize_nodes(node) node_attrs = node_desc[1:] # node = node_desc[0] @@ -5944,8 +5963,12 @@ def set_frozen_value(self, value, conf=None): else: raise ValueError + def fix_synchronized_nodes(self, conf=None): + conf = self.__check_conf(conf) + self.internals[conf].synchronize_nodes(self) - def unfreeze(self, conf=None, recursive=True, dont_change_state=False, ignore_entanglement=False, only_generators=False, + def unfreeze(self, conf=None, recursive=True, dont_change_state=False, + ignore_entanglement=False, only_generators=False, reevaluate_constraints=False): self._delayed_jobs_called = False diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 23d53e6..40833e2 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -90,7 +90,7 @@ def __iter__(self): self._cpt = 1 gen = self.walk_graph_rec([self._root_node], structure_has_changed=False, - consumed_nodes=set()) + consumed_nodes=set(), parent_node=self._root_node) for consumed_node, orig_node_val in gen: self._root_node.freeze() @@ -137,7 +137,7 @@ def _do_reset(self, node): node.unfreeze(recursive=True, dont_change_state=True, ignore_entanglement=True) self._consumer.do_after_reset(node) - def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): + def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes, parent_node): reset = False guilty = None @@ -168,7 +168,6 @@ def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): # For each node we look for direct subnodes fnodes = node.get_reachable_nodes(internals_criteria=self.ic, exclude_self=True, respect_order=self._consumer.respect_order, relative_depth=1) - if DEBUG: DEBUG_PRINT('--(2)-> Node:' + node.name + ', exhausted:' + repr(node.is_exhausted()), level=2) for e in fnodes: @@ -178,7 +177,8 @@ def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): # node is terminal, and we go to Step 2. Otherwise, we # call ourselves recursively with the list of subnodes if fnodes: - generator = self.walk_graph_rec(fnodes, structure_has_changed, consumed_nodes) + generator = self.walk_graph_rec(fnodes, structure_has_changed, consumed_nodes, + parent_node=node) for consumed_node, orig_node_val in generator: yield consumed_node, orig_node_val # YIELD @@ -188,7 +188,8 @@ def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): # for possible uses/modifications. This is performed within our # method node_consumer_helper(). if perform_second_step: - consumer_gen = self.node_consumer_helper(node, structure_has_changed, consumed_nodes) + consumer_gen = self.node_consumer_helper(node, structure_has_changed, consumed_nodes, + parent_node=parent_node) for consumed_node, orig_node_val, reset, ignore_node in consumer_gen: DEBUG_PRINT(" [ reset: {!r:s} | ignore_node: {!r:s} | " \ @@ -243,7 +244,7 @@ def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): idx = node_list.index(node) - gen = self.walk_graph_rec(node_list[:idx], False, set()) + gen = self.walk_graph_rec(node_list[:idx], False, set(), parent_node=parent_node) for consumed_node, orig_node_val in gen: yield consumed_node, orig_node_val # YIELD @@ -267,7 +268,7 @@ def walk_graph_rec(self, node_list, structure_has_changed, consumed_nodes): return - def node_consumer_helper(self, node, structure_has_changed, consumed_nodes): + def node_consumer_helper(self, node, structure_has_changed, consumed_nodes, parent_node): def _do_if_not_interested(node, orig_node_val): reset = self._consumer.need_reset(node) @@ -328,6 +329,8 @@ def _do_if_not_interested(node, orig_node_val): self._consumer.consume_node(node) else: self._consumer.recover_node(node) + if self._consumer.fix_constraints: + node.fix_synchronized_nodes() yield _do_if_not_interested(node, orig_node_val) raise ValueError # We should never return here, otherwise its a bug we want to alert on @@ -338,6 +341,8 @@ def _do_if_not_interested(node, orig_node_val): else: if node in consumed_nodes: self._consumer.recover_node(node) + if self._consumer.fix_constraints: + node.fix_synchronized_nodes() not_recovered = False return @@ -349,9 +354,13 @@ def _do_if_not_interested(node, orig_node_val): # In this case we iterate only on the current node node.unfreeze(recursive=False, ignore_entanglement=True) node.freeze() + if self._consumer.fix_constraints: + node.fix_synchronized_nodes() elif not consume_called_again: if not_recovered and (self._consumer.interested_by(node) or node in consumed_nodes): self._consumer.recover_node(node) + if self._consumer.fix_constraints: + node.fix_synchronized_nodes() if not node.is_exhausted() and self._consumer.need_reset(node): yield None, None, True, True again = False @@ -364,15 +373,11 @@ def _do_if_not_interested(node, orig_node_val): class NodeConsumerStub(object): - ''' - TOFIX (TBC since last cleanup): when respect_order=False, BasicVisitor - behave strangely (not the same number of yielded values). - --> to be investigated (maybe wrong implementation of BasicVisitor and NonTermVisitor) - ''' def __init__(self, max_runs_per_node=-1, min_runs_per_node=-1, respect_order=True, - fuzz_magnitude=1.0, **kwargs): + fuzz_magnitude=1.0, fix_constraints=False, **kwargs): self.need_reset_when_structure_change = False self.fuzz_magnitude = fuzz_magnitude + self.fix_constraints = fix_constraints self._internals_criteria = None self._semantics_criteria = None @@ -697,7 +702,8 @@ class TypedNodeDisruption(NodeConsumerStub): def init_specific(self, **kwargs): self._internals_criteria = dm.NodeInternalsCriteria(mandatory_attrs=[dm.NodeInternals.Mutable], negative_attrs=[dm.NodeInternals.Separator], - node_kinds=[dm.NodeInternals_TypedValue]) + node_kinds=[dm.NodeInternals_TypedValue, + dm.NodeInternals_GenFunc]) self.orig_value = None self.current_fuzz_vt_list = None self.current_node = None @@ -706,6 +712,10 @@ def init_specific(self, **kwargs): self.need_reset_when_structure_change = True def consume_node(self, node): + if node.is_genfunc() and (node.is_attr_set(dm.NodeInternals.Freezable) or + not node.generated_node.is_typed_value()): + return False + if node is not self.current_node: self.current_node = node self.current_fuzz_vt_list = None @@ -714,8 +724,9 @@ def consume_node(self, node): self.orig_internal = node.cc self.orig_value = node.to_bytes() - self.current_fuzz_vt_list = self._create_fuzzy_vt_list(node, self.fuzz_magnitude) - self._extend_fuzzy_vt_list(self.current_fuzz_vt_list, node) + vt_node = node.generated_node if node.is_genfunc() else node + self.current_fuzz_vt_list = self._create_fuzzy_vt_list(vt_node, self.fuzz_magnitude) + self._extend_fuzzy_vt_list(self.current_fuzz_vt_list, vt_node) DEBUG_PRINT(' *** CONSUME: ' + node.name + ', ' + repr(self.current_fuzz_vt_list), level=0) @@ -726,8 +737,8 @@ def consume_node(self, node): node.make_finite() node.make_determinist() node.unfreeze(ignore_entanglement=True) - # we need to be sure that the current node is freezable (always the case by default) - # node.set_attr(dm.NodeInternals.Freezable) + # we need to be sure that the current node is freezable + node.set_attr(dm.NodeInternals.Freezable) return True else: diff --git a/framework/generic_data_makers.py b/framework/generic_data_makers.py index 5589567..bbd058a 100644 --- a/framework/generic_data_makers.py +++ b/framework/generic_data_makers.py @@ -57,7 +57,8 @@ def truncate_info(info, max_size=60): ' the disruptor should apply', None, str), 'nt_only': ('walk through non-terminal nodes only', False, bool), 'singleton': ('consume also terminal nodes with only one possible value', True, bool), - 'fix': ('fix constraints while walking', True, bool)}) + 'fix_all': ('for each produced data, reevaluate the constraints on the whole graph', + True, bool)}) class sd_iter_over_data(StatefulDisruptor): ''' Walk through the provided data and for each visited node, iterates @@ -99,10 +100,10 @@ def disrupt_data(self, dm, target, data): else: exported_node = rnode - if self.fix: + if self.fix_all: exported_node.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) exported_node.freeze() - data.add_info('fix constraints (if any)') + data.add_info('reevaluate all the constraints (if any)') data.update_from_node(exported_node) @@ -119,7 +120,10 @@ def disrupt_data(self, dm, target, data): 'in the data model) is used for ordering', False, bool), 'deep': ('when set to True, if a node structure has changed, the modelwalker ' \ 'will reset its walk through the children nodes', True, bool), - 'fix': ('fix constraints while walking', True, bool), + 'fix_all': ('for each produced data, reevaluate the constraints on the whole graph', + False, bool), + 'fix': ("limit constraints fixing to the nodes related to the currently fuzzed one" + " (only implemented for 'sync_size_with' and 'sync_enc_size_with')", True, bool), 'fuzz_mag': ('order of magnitude for maximum size of some fuzzing test cases.', 1.0, float)}) class sd_fuzz_typed_nodes(StatefulDisruptor): @@ -141,6 +145,7 @@ def set_seed(self, prev_data): self.consumer = TypedNodeDisruption(max_runs_per_node=self.max_runs_per_node, min_runs_per_node=self.min_runs_per_node, fuzz_magnitude=self.fuzz_mag, + fix_constraints=self.fix, respect_order=self.order) self.consumer.need_reset_when_structure_change = self.deep self.consumer.set_node_interest(path_regexp=self.path) @@ -183,10 +188,10 @@ def disrupt_data(self, dm, target, data): else: exported_node = rnode - if self.fix: + if self.fix_all: exported_node.unfreeze(recursive=True, reevaluate_constraints=True, ignore_entanglement=True) exported_node.freeze() - data.add_info('fix constraints (if any)') + data.add_info('reevaluate all the constraints (if any)') data.update_from_node(exported_node) From 4451e1aef053759cc75a139db1c273c16509c73b Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 15 Aug 2016 02:33:14 +0200 Subject: [PATCH 76/80] PPPoE DM: uncomment a line in PADI definition --- data_models/protocols/pppoe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index e639e41..0852428 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -170,7 +170,7 @@ def build_data_model(self): 'exists_if': (IntCondition(0x9), 'code'), 'contents': [ (tag_service_name, 1), - # (tag_node, 0, 4) + (tag_node, 0, 4) ]}, {'name': '4pado', 'shape_type': MH.FullyRandom, From 19a4adbc5e9a0399c81dee4c32a7d9b116941818 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 15 Aug 2016 16:29:31 +0200 Subject: [PATCH 77/80] Make some Generator templates not freezable + bug fixes - MH.LEN, MH.QTY, MH.CRC, MH.WRAP, MH.OFFSET are now by default not freezable - Make these generators more robust when they produce their Node - Data validation added to INT class constructor. - Fix TypedNodeDisruption.recover_node() regarding original node attributes - Avoid raising an exception in SyncSizeObj.set_size_on_source_node() when the size is not compatible with the typed Node. Handle the situation by changing the value. --- data_models/file_formats/png.py | 6 +- data_models/protocols/pppoe.py | 1 - framework/data_model.py | 17 ++- framework/data_model_helpers.py | 187 ++++++++++++++++++--------- framework/fuzzing_primitives.py | 7 +- framework/value_types.py | 31 ++++- test/integration/test_integration.py | 16 +-- 7 files changed, 168 insertions(+), 97 deletions(-) diff --git a/data_models/file_formats/png.py b/data_models/file_formats/png.py index 4661780..a9c257a 100644 --- a/data_models/file_formats/png.py +++ b/data_models/file_formats/png.py @@ -69,8 +69,7 @@ def build_data_model(self): 'node_args': ['len']}, {'name': 'crc32_gen', 'contents': MH.CRC(vt=UINT32_be, clear_attrs=[MH.Attr.Mutable]), - 'node_args': ['type', 'data_gen'], - 'clear_attrs': MH.Attr.Freezable} + 'node_args': ['type', 'data_gen']} ]} ]} @@ -117,8 +116,7 @@ def build_data_model(self): ]}, {'name': 'crc32_gen', 'contents': MH.CRC(vt=UINT32_be, clear_attrs=[MH.Attr.Mutable]), - 'node_args': ['chk'], - 'clear_attrs': MH.Attr.Freezable} + 'node_args': ['chk']} ]} ]} diff --git a/data_models/protocols/pppoe.py b/data_models/protocols/pppoe.py index 0852428..74881ca 100644 --- a/data_models/protocols/pppoe.py +++ b/data_models/protocols/pppoe.py @@ -155,7 +155,6 @@ def build_data_model(self): {'name': 'session_id', 'contents': UINT16_be()}, {'name': 'length', - 'clear_attrs': MH.Attr.Freezable, 'contents': MH.LEN(vt=UINT16_be), 'node_args': 'payload', 'alt': [ diff --git a/framework/data_model.py b/framework/data_model.py index c556527..eb04506 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -512,11 +512,11 @@ def size_for_absorption(self): return max(0, self._node.get_raw_value() - self.base_size) def set_size_on_source_node(self, size): - try: - self._node.update_raw_value(size) - self._node.set_frozen_value(self._node.get_current_encoded_value()) - except: - raise DataModelDefinitionError("The node '{:s}' is not compatible with integer absorption".format(self._node.name)) + ok = self._node.update_raw_value(size) + if not ok: + print("\n*** WARNING: The node '{:s}' is not compatible with the integer" + " '{:d}'".format(self._node.name, size)) + self._node.set_frozen_value(self._node.get_current_encoded_value()) def _sync_nodes_specific(self, src_node): if self.apply_to_enc_size: @@ -970,6 +970,13 @@ def set_contents_from(self, node_internals): # NodeInternals_TypedValue del self._sync_with[SyncScope.Size] + def get_attrs_copy(self): + return (copy.copy(self.__attrs), copy.copy(self.custo)) + + def set_attrs_from(self, all_attrs): + self.__attrs = all_attrs[0] + self.custo = all_attrs[1] + # Called near the end of Node copy (Node.set_contents) to update # node references inside the NodeInternals def _update_node_refs(self, node_dico, debug): diff --git a/framework/data_model_helpers.py b/framework/data_model_helpers.py index 6209d12..7ecaa60 100644 --- a/framework/data_model_helpers.py +++ b/framework/data_model_helpers.py @@ -146,7 +146,7 @@ class Attr: @staticmethod def LEN(vt=fvt.INT_str, base_len=0, - set_attrs=[], clear_attrs=[], after_encoding=True): + set_attrs=[], clear_attrs=[], after_encoding=True, freezable=False): ''' Return a *generator* that returns the length of a node parameter. @@ -157,20 +157,30 @@ def LEN(vt=fvt.INT_str, base_len=0, clear_attrs (list): attributes that will be cleared on the generated node. after_encoding (bool): if False compute the length before any encoding. Can be set to False only if node arguments support encoding. + freezable (bool): If ``False`` make the generator unfreezable in order to always provide + the right value. (Note that tTYPE will still be able to corrupt the generator.) ''' - def length(vt, set_attrs, clear_attrs, node): - blob = node.to_bytes() if after_encoding else node.get_raw_value() - n = Node('cts', value_type=vt(values=[len(blob)+base_len])) - n.set_semantics(NodeSemantics(['len'])) - MH._handle_attrs(n, set_attrs, clear_attrs) - return n + class Length(object): + unfreezable = not freezable + + def __init__(self, vt, set_attrs, clear_attrs): + self.vt = vt + self.set_attrs = set_attrs + self.clear_attrs = clear_attrs + + def __call__(self, node): + blob = node.to_bytes() if after_encoding else node.get_raw_value() + n = Node('cts', value_type=self.vt(values=[len(blob)+base_len], force_mode=True)) + n.set_semantics(NodeSemantics(['len'])) + MH._handle_attrs(n, self.set_attrs, self.clear_attrs) + return n vt = MH._validate_int_vt(vt) - return functools.partial(length, vt, set_attrs, clear_attrs) + return Length(vt, set_attrs, clear_attrs) @staticmethod def QTY(node_name, vt=fvt.INT_str, - set_attrs=[], clear_attrs=[]): + set_attrs=[], clear_attrs=[], freezable=False): '''Return a *generator* that returns the quantity of child node instances (referenced by name) of the node parameter provided to the *generator*. @@ -180,16 +190,27 @@ def QTY(node_name, vt=fvt.INT_str, by the generator set_attrs (list): attributes that will be set on the generated node. clear_attrs (list): attributes that will be cleared on the generated node. + freezable (bool): If ``False`` make the generator unfreezable in order to always provide + the right value. (Note that tTYPE will still be able to corrupt the generator.) ''' - def qty(node_name, vt, set_attrs, clear_attrs, node): - nb = node.cc.get_drawn_node_qty(node_name) - n = Node('cts', value_type=vt(values=[nb])) - n.set_semantics(NodeSemantics(['qty'])) - MH._handle_attrs(n, set_attrs, clear_attrs) - return n + class Qty(object): + unfreezable = not freezable + + def __init__(self, node_name, vt, set_attrs, clear_attrs): + self.node_name = node_name + self.vt = vt + self.set_attrs = set_attrs + self.clear_attrs = clear_attrs + + def __call__(self, node): + nb = node.cc.get_drawn_node_qty(self.node_name) + n = Node('cts', value_type=self.vt(values=[nb], force_mode=True)) + n.set_semantics(NodeSemantics(['qty'])) + MH._handle_attrs(n, self.set_attrs, self.clear_attrs) + return n vt = MH._validate_int_vt(vt) - return functools.partial(qty, node_name, vt, set_attrs, clear_attrs) + return Qty(node_name, vt, set_attrs, clear_attrs) @staticmethod def TIMESTAMP(time_format="%H%M%S", utc=False, @@ -217,7 +238,7 @@ def timestamp(time_format, utc, set_attrs, clear_attrs): @staticmethod def CRC(vt=fvt.INT_str, poly=0x104c11db7, init_crc=0, xor_out=0xFFFFFFFF, rev=True, - set_attrs=[], clear_attrs=[], after_encoding=True): + set_attrs=[], clear_attrs=[], after_encoding=True, freezable=False): '''Return a *generator* that returns the CRC (in the chosen type) of all the node parameters. (Default CRC is PKZIP CRC32) @@ -231,38 +252,53 @@ def CRC(vt=fvt.INT_str, poly=0x104c11db7, init_crc=0, xor_out=0xFFFFFFFF, rev=Tr clear_attrs (list): attributes that will be cleared on the generated node. after_encoding (bool): if False compute the CRC before any encoding. Can be set to False only if node arguments support encoding. + freezable (bool): if ``False`` make the generator unfreezable in order to always provide + the right value. (Note that tTYPE will still be able to corrupt the generator.) ''' - def crc(vt, poly, init_crc, xor_out, rev, set_attrs, clear_attrs, nodes): - crc_func = crcmod.mkCrcFun(poly, initCrc=init_crc, xorOut=xor_out, rev=rev) - if isinstance(nodes, Node): - s = nodes.to_bytes() if after_encoding else nodes.get_raw_value() - else: - if issubclass(nodes.__class__, NodeAbstraction): - nodes = nodes.get_concrete_nodes() - elif not isinstance(nodes, (tuple, list)): - raise TypeError("Contents of 'nodes' parameter is incorrect!") - s = b'' - for n in nodes: - blob = n.to_bytes() if after_encoding else n.get_raw_value() - s += blob - - result = crc_func(s) - - n = Node('cts', value_type=vt(values=[result])) - n.set_semantics(NodeSemantics(['crc'])) - MH._handle_attrs(n, set_attrs, clear_attrs) - return n + class Crc(object): + unfreezable = not freezable + + def __init__(self, vt, poly, init_crc, xor_out, rev, set_attrs, clear_attrs): + self.vt = vt + self.poly = poly + self.init_crc = init_crc + self.xor_out = xor_out + self.rev = rev + self.set_attrs = set_attrs + self.clear_attrs = clear_attrs + + def __call__(self, nodes): + crc_func = crcmod.mkCrcFun(self.poly, initCrc=self.init_crc, + xorOut=self.xor_out, rev=self.rev) + if isinstance(nodes, Node): + s = nodes.to_bytes() if after_encoding else nodes.get_raw_value() + else: + if issubclass(nodes.__class__, NodeAbstraction): + nodes = nodes.get_concrete_nodes() + elif not isinstance(nodes, (tuple, list)): + raise TypeError("Contents of 'nodes' parameter is incorrect!") + s = b'' + for n in nodes: + blob = n.to_bytes() if after_encoding else n.get_raw_value() + s += blob + + result = crc_func(s) + + n = Node('cts', value_type=self.vt(values=[result], force_mode=True)) + n.set_semantics(NodeSemantics(['crc'])) + MH._handle_attrs(n, self.set_attrs, self.clear_attrs) + return n if not crcmod_module: raise NotImplementedError('the CRC template has been disabled because python-crcmod module is not installed!') vt = MH._validate_int_vt(vt) - return functools.partial(crc, vt, poly, init_crc, xor_out, rev, set_attrs, clear_attrs) + return Crc(vt, poly, init_crc, xor_out, rev, set_attrs, clear_attrs) @staticmethod def WRAP(func, vt=fvt.String, - set_attrs=[], clear_attrs=[], after_encoding=True): + set_attrs=[], clear_attrs=[], after_encoding=True, freezable=False): '''Return a *generator* that returns the result (in the chosen type) of the provided function applied on the concatenation of all the node parameters. @@ -274,33 +310,49 @@ def WRAP(func, vt=fvt.String, clear_attrs (list): attributes that will be cleared on the generated node. after_encoding (bool): if False, execute `func` on node arguments before any encoding. Can be set to False only if node arguments support encoding. + freezable (bool): If ``False`` make the generator unfreezable in order to always provide + the right value. (Note that tTYPE will still be able to corrupt the generator.) ''' - def map_func(vt, func, set_attrs, clear_attrs, nodes): - if isinstance(nodes, Node): - s = nodes.to_bytes() if after_encoding else nodes.get_raw_value() - else: - if issubclass(nodes.__class__, NodeAbstraction): - nodes = nodes.get_concrete_nodes() - elif not isinstance(nodes, (tuple, list)): - raise TypeError("Contents of 'nodes' parameter is incorrect!") - s = b'' - for n in nodes: - blob = n.to_bytes() if after_encoding else n.get_raw_value() - s += blob - - result = func(s) - - if issubclass(vt, fvt.String): - result = convert_to_internal_repr(result) - else: - assert isinstance(result, int) + class WrapFunc(object): + unfreezable = not freezable - n = Node('cts', value_type=vt(values=[result])) - MH._handle_attrs(n, set_attrs, clear_attrs) - return n + def __init__(self, vt, func, set_attrs, clear_attrs): + self.vt = vt + self.func = func + self.set_attrs = set_attrs + self.clear_attrs = clear_attrs + + def __call__(self, nodes): + if isinstance(nodes, Node): + s = nodes.to_bytes() if after_encoding else nodes.get_raw_value() + else: + if issubclass(nodes.__class__, NodeAbstraction): + nodes = nodes.get_concrete_nodes() + elif not isinstance(nodes, (tuple, list)): + raise TypeError("Contents of 'nodes' parameter is incorrect!") + s = b'' + for n in nodes: + blob = n.to_bytes() if after_encoding else n.get_raw_value() + s += blob + + result = self.func(s) + + if issubclass(self.vt, fvt.String): + result = convert_to_internal_repr(result) + else: + assert isinstance(result, int) + + if issubclass(vt, fvt.INT): + vt_obj = self.vt(values=[result], force_mode=True) + else: + vt_obj = self.vt(values=[result]) + n = Node('cts', value_type=vt_obj) + MH._handle_attrs(n, self.set_attrs, self.clear_attrs) + return n vt = MH._validate_vt(vt) - return functools.partial(map_func, vt, func, set_attrs, clear_attrs) + return WrapFunc(vt, func, set_attrs, clear_attrs) + @staticmethod def CYCLE(vals, depth=1, vt=fvt.String, @@ -356,7 +408,7 @@ def __call__(self, helper): @staticmethod def OFFSET(use_current_position=True, depth=1, vt=fvt.INT_str, - set_attrs=[], clear_attrs=[], after_encoding=True): + set_attrs=[], clear_attrs=[], after_encoding=True, freezable=False): '''Return a *generator* that computes the offset of a child node within its parent node. @@ -383,9 +435,12 @@ def OFFSET(use_current_position=True, depth=1, vt=fvt.INT_str, clear_attrs (list): attributes that will be cleared on the generated node. after_encoding (bool): if False compute the fixed amount part of the offset before any encoding. Can be set to False only if node arguments support encoding. + freezable (bool): If ``False`` make the generator unfreezable in order to always provide + the right value. (Note that tTYPE will still be able to corrupt the generator.) ''' class Offset(object): provide_helpers = True + unfreezable = not freezable def __init__(self, use_current_position, depth, vt, set_attrs, clear_attrs): self.vt = vt @@ -427,7 +482,7 @@ def __call__(self, nodes, helper): base = len(s) off = nodes[-1].get_subnode_off(idx) - n = Node('cts_off', value_type=self.vt(values=[base+off])) + n = Node('cts_off', value_type=self.vt(values=[base+off], force_mode=True)) MH._handle_attrs(n, set_attrs, clear_attrs) return n @@ -765,6 +820,10 @@ def _create_generator_node(self, desc, node=None): node_args = desc.get('node_args', None) n.set_generator_func(contents, func_arg=other_args, provide_helpers=provide_helpers, conf=conf) + + if hasattr(contents, 'unfreezable') and contents.unfreezable: + n.clear_attr(MH.Attr.Freezable, conf=conf) + if node_args is not None: # node_args interpretation is postponed after all nodes has been created self._register_todo(n, self._complete_generator, args=(node_args, conf), unpack_args=True, diff --git a/framework/fuzzing_primitives.py b/framework/fuzzing_primitives.py index 40833e2..801fb97 100644 --- a/framework/fuzzing_primitives.py +++ b/framework/fuzzing_primitives.py @@ -704,7 +704,7 @@ def init_specific(self, **kwargs): negative_attrs=[dm.NodeInternals.Separator], node_kinds=[dm.NodeInternals_TypedValue, dm.NodeInternals_GenFunc]) - self.orig_value = None + # self.orig_value = None self.current_fuzz_vt_list = None self.current_node = None self.orig_internal = None @@ -722,7 +722,9 @@ def consume_node(self, node): if not self.current_fuzz_vt_list: self.orig_internal = node.cc - self.orig_value = node.to_bytes() + self.orig_all_attrs = node.cc.get_attrs_copy() + # self.orig_value = node.to_bytes() + vt_node = node.generated_node if node.is_genfunc() else node self.current_fuzz_vt_list = self._create_fuzzy_vt_list(vt_node, self.fuzz_magnitude) @@ -749,6 +751,7 @@ def save_node(self, node): def recover_node(self, node): node.cc = self.orig_internal + node.cc.set_attrs_from(self.orig_all_attrs) def need_reset(self, node): if node.is_nonterm(): diff --git a/framework/value_types.py b/framework/value_types.py index 313f600..4c8fb80 100644 --- a/framework/value_types.py +++ b/framework/value_types.py @@ -1051,7 +1051,8 @@ class INT(VT): # and that mini is not specified by the user - def __init__(self, values=None, mini=None, maxi=None, default=None, determinist=True): + def __init__(self, values=None, mini=None, maxi=None, default=None, determinist=True, + force_mode=False): self.idx = 0 self.determinist = determinist self.exhausted = False @@ -1060,7 +1061,20 @@ def __init__(self, values=None, mini=None, maxi=None, default=None, determinist= if values: assert default is None - self.values = list(values) + if force_mode: + new_values = [] + for v in values: + if not self.is_compatible(v): + if v > self.__class__.maxi: + v = self.__class__.maxi + new_values.append(v) + self.values = new_values + else: + for v in values: + if not self.is_compatible(v): + raise DataModelDefinitionError("Incompatible value ({!r}) with {!s}".format(v, self.__class__)) + self.values = list(values) + self.values_copy = list(self.values) else: @@ -1206,10 +1220,7 @@ def get_current_raw_val(self): return self.drawn_val def is_compatible(self, integer): - if self.mini <= integer <= self.maxi: - return True - else: - return False + return self.mini <= integer <= self.maxi def set_value_list(self, new_list): ret = False @@ -1367,18 +1378,24 @@ def reset_state(self): self.drawn_val = None def update_raw_value(self, val): + ok = True if isinstance(val, int): + if val > self.__class__.maxi: + val = self.__class__.maxi + ok = False if self.values is not None: self.values.append(val) self.values_copy = copy.copy(self.values) else: - self.idx = val - self.mini + self.idx = val - self.mini_gen else: raise TypeError self.drawn_val = val self.exhausted = False + return ok + # To be used after calling get_value() def is_exhausted(self): return self.exhausted diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 5f05c45..ad2b107 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -55,19 +55,7 @@ def tearDownModule(): class TEST_Fuzzy_INT16(Fuzzy_INT16): - values = ['TEST_OK', 'BLABLA', 'PLOP'] - - def __init__(self, endian=None, supp_list=None): - self.endian = endian - self.idx = 0 - INT.__init__(self, values=self.values, determinist=True) - - def is_compatible(self, integer): - return False - - def _convert_value(self, val): - return val - + values = [0xDEAD, 0xBEEF, 0xCAFE] ######## Tests cases begins Here ######## @@ -1860,7 +1848,7 @@ def test_USB(self): print(colorize('number of confs: %d' % idx, rgb=Color.INFO)) - self.assertIn(idx, [523]) + self.assertIn(idx, [527]) From 8795bbe8568710cb277dbe76c67571cda9b2432e Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 15 Aug 2016 18:10:24 +0200 Subject: [PATCH 78/80] Fix inconsistent NT 'expanded_nodelist' in a specific situation --- framework/data_model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/framework/data_model.py b/framework/data_model.py index eb04506..af70999 100644 --- a/framework/data_model.py +++ b/framework/data_model.py @@ -2450,18 +2450,24 @@ def reset(self, nodes_drawn_qty=None, custo=None, exhaust_info=None, preserve_no self.exhausted = False self.excluded_components = [] self.subcomp_exhausted = True - self.expanded_nodelist = [] self.expanded_nodelist_sz = None self.expanded_nodelist_origsz = None + self.expanded_nodelist = [] self.component_seed = None self._perform_first_step = True else: self.exhausted = exhaust_info[0] self.excluded_components = exhaust_info[1] self.subcomp_exhausted = exhaust_info[2] - self.expanded_nodelist = None self.expanded_nodelist_sz = exhaust_info[3] self.expanded_nodelist_origsz = exhaust_info[4] + if self.expanded_nodelist_sz is None: + # this case may exist if a node has been created (sz/origsz == None) and copied + # without being frozen first. (e.g., node absorption during a data model construction) + assert self.expanded_nodelist_origsz is None + self.expanded_nodelist = [] + else: + self.expanded_nodelist = None self.component_seed = exhaust_info[5] self._perform_first_step = exhaust_info[6] From 07205ea4bf5a2ab7cffff913047041bc96945204 Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 15 Aug 2016 20:32:54 +0200 Subject: [PATCH 79/80] Update TODO list --- TODO | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/TODO b/TODO index 0685572..41b4310 100644 --- a/TODO +++ b/TODO @@ -5,9 +5,16 @@ - Add GDB/PIN/QEMU probes/managers - Add support for evolutionary fuzzing - Add FmkDB visualization tools +- Add support for automatic adaptation of fuzz test cases depending on + specific Target meta-data (HW architecture, programming language, ...) - Implement new node types that leverage python-constraint, or more powerfull constraint programming library [ENHANCEMENT] -- Clean up test.py +- Add support for absorption of nodes leveraging the 'Bitfield collapse' + customization (i.e., absorption of bit-oriented nodes without a byte boundary). + (Counter-part of the generation supported feature.) +- Add support for absorption of nodes whose existence has not been resolved yet. + (Counter-part of the generation supported feature.) +- Clean up test/test_integration.py From 46cfc3ff3595eb1a8f0860652ce5e0a6ef2d8c6a Mon Sep 17 00:00:00 2001 From: Eric Lacombe Date: Mon, 15 Aug 2016 20:38:57 +0200 Subject: [PATCH 80/80] Bumped version number to 0.25 --- docs/source/conf.py | 4 ++-- framework/global_resources.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 8e407d1..453a636 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -55,9 +55,9 @@ # built documents. # # The short X.Y version. -version = '0.24' +version = '0.25' # The full version, including alpha/beta/rc tags. -release = '0.24.2' +release = '0.25.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/framework/global_resources.py b/framework/global_resources.py index 0c72a69..ac4e78b 100644 --- a/framework/global_resources.py +++ b/framework/global_resources.py @@ -31,7 +31,7 @@ from libs.utils import ensure_dir, ensure_file -fuddly_version = '0.24.2' +fuddly_version = '0.25' framework_folder = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # framework_folder = os.path.dirname(framework.__file__)