diff --git a/examples/linux/basic_state_merging.c b/examples/linux/basic_state_merging.c new file mode 100644 index 000000000..07033030c --- /dev/null +++ b/examples/linux/basic_state_merging.c @@ -0,0 +1,63 @@ +/** + * Symbolic values are read from stdin using standard libc calls. + * Program compares if two binary packed integers at the input with 0x41. + * + * Compile with : + * $ gcc -static -Os basic_statemerging.c -o basic_statemerging + * + * Analyze it with: + * $ python examples/script/basic_statemerging.py examples/linux/basic_statemerging + * + * The Merger plugin used in basic_statemerging.py will find two states with state IDs 2, 4 to be at the same program + * location (0x40060d) and merge their CPU states which should only require the value for RDI to be merged. + * + * Expected output: + * $ python /Users/vaibhav/git_repos/manticore/examples/script/basic_statemerging.py examples/linux/basic_statemerging-Os + about to load state_id = 0 + loaded state_id = 0 at cpu = 0x4008e0 + about to load state_id = 1 + loaded state_id = 1 at cpu = 0x400604 + about to load state_id = 2 + Merged registers: + RDI + at PC = 0x40060d, merge succeeded for state id = 2 and 4 + loaded state_id = 2 at cpu = 0x40060d + about to load state_id = 3 + loaded state_id = 3 at cpu = 0x400612 + * +*/ + +#include +#include +#include +#include + + int main(int argc, char* argv[], char* envp[]){ + unsigned int cmd1, cmd2; + unsigned int cmdChanged = 0; + + if (read(0, &cmd1, sizeof(cmd1)) != sizeof(cmd1)) + { + printf("Error reading stdin!"); + exit(-1); + } + if (read(0, &cmd2, sizeof(cmd2)) != sizeof(cmd2)) + { + printf("Error reading stdin!"); + exit(-1); + } + + if (cmd1 > 0x41) + { + cmdChanged = cmd1 - 0x42; + } + if (cmd2 < 0x41) + { + cmdChanged = cmd2 + 0x42; + } + + if (cmdChanged == 0) printf("equal\n"); + else printf("not equal\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/script/basic_statemerging.py b/examples/script/basic_statemerging.py new file mode 100644 index 000000000..c6854bb87 --- /dev/null +++ b/examples/script/basic_statemerging.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +import sys + +from manticore.native.plugins import Merger +from manticore.utils import config + +from manticore.native import Manticore +from manticore import set_verbosity + +""" +Demonstrates the ability to do state merging on a simple program by merging states with id 2, 4 that happen to be +at the same program location 0x40060d. This script uses the Merger plugin to apply opportunistic state merging. +""" +if __name__ == "__main__": + config.get_group("core").seed = 2 + config.get_group("core").mprocessing = config.get_group("core").mprocessing.single + path = sys.argv[1] + m = Manticore(path, policy="random") + + def will_load_state_callback(_mc, state_id): + print("about to load state_id = " + str(state_id)) + + def did_load_state_callback(_mc, state): + print("loaded state_id = " + str(state.id) + " at cpu = " + hex(state.cpu.PC)) + + m.subscribe("will_load_state", will_load_state_callback) + m.subscribe("did_load_state", did_load_state_callback) + m.register_plugin(Merger()) + m.run() diff --git a/manticore/core/manticore.py b/manticore/core/manticore.py index 2776fe09b..dfb4cd3d0 100644 --- a/manticore/core/manticore.py +++ b/manticore/core/manticore.py @@ -63,6 +63,11 @@ def to_class(self): default=MProcessingType.multiprocessing, description="single: No multiprocessing at all. Single process.\n threading: use threads\m multiprocessing: use forked processes", ) +consts.add( + "seed", + default=random.getrandbits(32), + description="The seed to use when randomly selecting states", +) class ManticoreBase(Eventful): @@ -71,13 +76,11 @@ def __new__(cls, *args, **kwargs): raise Exception("Should not instantiate this") cl = consts.mprocessing.to_class() - if cl not in cls.__bases__: - # change ManticoreBase for the more specific class - idx = cls.__bases__.index(ManticoreBase) - bases = list(cls.__bases__) - bases[idx] = cl - cls.__bases__ = tuple(bases) + # change ManticoreBase for the more specific class + bases = {cl if issubclass(base, ManticoreBase) else base for base in cls.__bases__} + cls.__bases__ = tuple(bases) + random.seed(consts.seed) return super().__new__(cls) # Decorators added first for convenience. @@ -112,6 +115,7 @@ def at_not_running(func): @functools.wraps(func) def newFunction(self, *args, **kw): if self.is_running(): + logger.error("Calling at running not allowed") raise Exception(f"{func.__name__} only allowed while NOT exploring states") return func(self, *args, **kw) @@ -350,12 +354,8 @@ def setstate(x, y): setstate(new_state, new_value) # enqueue new_state, assign new state id - new_state_id = self._save(new_state, state_id=None) - with self._lock: - self._ready_states.append(new_state_id) - self._lock.notify_all() # Must notify one! + new_state_id = self._put_state(new_state) - self._publish("did_fork_state", new_state, expression, new_value, policy) # maintain a list of children for logging purpose children.append(new_state_id) @@ -365,6 +365,8 @@ def setstate(x, y): state._id = None self._lock.notify_all() + self._publish("did_fork_state", new_state, expression, new_value, policy) + logger.debug("Forking current state %r into states %r", state.id, children) @staticmethod @@ -377,6 +379,7 @@ def verbosity(level): set_verbosity(level) # State storage + @Eventful.will_did("save_state") def _save(self, state, state_id=None): """ Store or update a state in secondary storage under state_id. Use a fresh id is None is provided. @@ -389,6 +392,7 @@ def _save(self, state, state_id=None): state._id = self._workspace.save_state(state, state_id=state_id) return state.id + @Eventful.will_did("load_state") def _load(self, state_id): """ Load the state from the secondary storage @@ -396,19 +400,17 @@ def _load(self, state_id): :type state_id: int :returns: the state id used """ - if not hasattr(self, "stcache"): self.stcache = weakref.WeakValueDictionary() if state_id in self.stcache: return self.stcache[state_id] - self._publish("will_load_state", state_id) state = self._workspace.load_state(state_id, delete=False) state._id = state_id self.forward_events_from(state, True) - self._publish("did_load_state", state, state_id) self.stcache[state_id] = state return state + @Eventful.will_did("remove_state") def _remove(self, state_id): """ Remove a state from secondary storage diff --git a/manticore/ethereum/account.py b/manticore/ethereum/account.py index 7cab833a0..a9fc16779 100644 --- a/manticore/ethereum/account.py +++ b/manticore/ethereum/account.py @@ -46,6 +46,9 @@ def __int__(self): def __str__(self): return str(self._address) + def __hash__(self): + return self._address + class EVMContract(EVMAccount): """ diff --git a/manticore/native/plugins.py b/manticore/native/plugins.py new file mode 100644 index 000000000..d28454efb --- /dev/null +++ b/manticore/native/plugins.py @@ -0,0 +1,129 @@ +from ..core.plugin import Plugin +from .state_merging import merge_constraints, is_merge_possible, merge +import logging + +logger = logging.getLogger(__name__) + + +class Merger(Plugin): + def on_register(self): + p = self.manticore._publish + self.manticore._publish = lambda *args, **kwargs: None + for state in self.manticore.ready_states: + self.did_fork_state_callback(state, None, None, None) + + self.manticore._publish = p + + def load_state(self, state_id, delete=False): + """ + Loads a state in Manticore with state-id = `state_id` by using the corresponding API in Executor + :param state_id: state-id for state that is being loaded + :param delete: If set to True, deletes the state with state-id = `state_id` + :return: None + """ + state = self.manticore._workspace.load_state(state_id, delete=delete) + return state + + def delete_state(self, state_id): + """ + Deletes a state in Manticore with state-id = `state_id` by using the corresponding API in Executor + :param state_id: state-id for state that is being deleted + :return: None + """ + with self.manticore._lock: + self.manticore._ready_states.remove(state_id) + self.manticore._remove(state_id) + + def replace_state(self, state_id, state): + """ + Replaces a state in Manticore with state-id = `state_id` by using the corresponding API in Executor + :param state_id: state-id for state that is being replaced + :param state: State object that is replacing the existing state with `state_id` + :return: None + """ + return self.manticore._save(state, state_id=state_id) + + def did_fork_state_callback(self, state, expression, new_value, policy): + state_id = state.id + """That means no one else has to offer one Message Input + + Maintains a `cpu_stateid_dict` in context that keeps maps program counter values to a list of state-id values + for states that will execute the instruction that that program counter as the next instruction + :param state_id: id for state that was just enqueued + :param state: State object that was just enqueued + :return: None + """ + # when a new state is addded to the list we save it so we do not have + # to repload all states when try to merges last PC + with self.locked_context("cpu_stateid_dict", dict) as cpu_stateid_dict: + # as we may be running in a different process we need to access this + # on a lock and over shared memory like this + state_id_list = cpu_stateid_dict.get(state.cpu.PC, list()) + state_id_list.append(state_id) + cpu_stateid_dict[state.cpu.PC] = state_id_list + + def will_load_state_callback(self, current_state_id): + """ + Checks if the state to be loaded (referenced by `current_state_id` can be merged with another currently enqueued + state and replaced by the merged state + :param current_state_id: state about to be loaded + :return: None + """ + # When a state is loaded for exploration lets check if we can find it a + # mate for merging + with self.locked_context("cpu_stateid_dict", dict) as cpu_stateid_dict: + # we get the lock and get a copy of the shared context + merged_state = self.load_state(current_state_id) + states_at_pc = cpu_stateid_dict.get(merged_state.cpu.PC, []) + + # lets remove ourself from the list of waiting states + # assert current_state_id in states_at_pc #??? + if current_state_id in states_at_pc: + states_at_pc.remove(current_state_id) + + # Iterate over all remaining states that are waiting for exploration + # at the same PC + merged_ids = [] + for new_state_id in states_at_pc: + new_state = self.load_state(new_state_id) + (exp_merged_state, exp_new_state, merged_constraint) = merge_constraints( + merged_state.constraints, new_state.constraints + ) + is_mergeable, reason = is_merge_possible(merged_state, new_state, merged_constraint) + + if is_mergeable: + # Ok we'll merge it! + merged_state = merge( + merged_state, new_state, exp_merged_state, merged_constraint + ) + + # lets remove the vestigial links to the old state + self.delete_state(new_state_id) + + merged_ids.append(new_state_id) + is_mergeable = "succeeded" + else: + is_mergeable = "failed because of " + reason + debug_string = ( + "at PC = " + + hex(merged_state.cpu.PC) + + ", merge " + + is_mergeable + + " for state id = " + + str(current_state_id) + + " and " + + str(new_state_id) + ) + logger.debug(debug_string) + + for i in merged_ids: + states_at_pc.remove(i) + + cpu_stateid_dict[merged_state.cpu.PC] = states_at_pc + + # Ok so we have merged current_state_id with {merged_ids} + # And removed all merged_ids from everywhere + + # UGLY we are replacing a state_id. This may be breaking caches in + # the future + self.replace_state(current_state_id, merged_state) diff --git a/manticore/native/state_merging.py b/manticore/native/state_merging.py new file mode 100644 index 000000000..51836c764 --- /dev/null +++ b/manticore/native/state_merging.py @@ -0,0 +1,218 @@ +from ..core.smtlib import Z3Solver, ConstraintSet, Operators, issymbolic, BitVec + + +def compare_sockets(cs, socket1, socket2): + """ + This method compares Socket objects for equality using the buffer and peer attributes. + It uses `compare_buffers` for checking buffer attributes for equality. + It calls itself for comparing peer Socket objects. + Returns True if the Socket objects are equal, false otherwise. + :param cs: ConstraintSet to be used for checking Socket.buffer for semantic equality using `Z3Solver.instance().must_be_true()` + :param socket1: one of two Socket objects to be compared for equality against socket2 + :param socket2: one of two Socket objects to be compared for equality against socket1 + :return: True, if the Socket objects are found to be equal, False otherwise + """ + if socket1 is None: + return socket2 is None + if socket2 is None: + return socket1 is None + if not compare_buffers(cs, socket1.buffer, socket2.buffer): + return False + return compare_sockets(cs, socket1.peer, socket2.peer) + + +def compare_buffers(cs, buffer1, buffer2): + """ + This method compares the two List objects for equality using the `Z3Solver.instance().must_be_true()` call. + :param cs: ConstraintSet to be used for checking buffer1 for semantic equality with buffer2 using `Z3Solver.instance().must_be_true()` + :param buffer1: one of two List objects to be compared for equality against buffer2 + :param buffer2: one of two List objects to be compared for equality against buffer1 + :return: True, if the List objects are equal, False otherwise + """ + if len(buffer1) != len(buffer2): + return False + for b1, b2 in zip(buffer1, buffer2): + if not Z3Solver.instance().must_be_true(cs, b1 == b2): + return False + return True + + +def merge_constraints(constraints1, constraints2): + """ + :param constraints1: one of two ConstraintSet objects to be merged + :param constraints2: second of two ConstraintSet objects to be merged + :return: (Expression, Expression, ConstraintSet) where the first and second Expression objects are conjunctions of + of all the constraints in constraints1 and constraints2 respectively. The ConstraintSet is an object that contains + a single constraint that is a logical OR of these two Expression objects. + """ + exp1 = constraints1.constraints[0] + for i in range(1, len(constraints1.constraints)): + exp1 = exp1 & constraints1.constraints[i] + exp2 = constraints2.constraints[0] + for i in range(1, len(constraints2.constraints)): + exp2 = exp2 & constraints2.constraints[i] + merged_constraint = ConstraintSet() + merged_constraint.add(exp1 | exp2) + return exp1, exp2, merged_constraint + + +def compare_byte_vals(mem1, mem2, addr, merged_constraint): + """ + Compares values in memory at address `addr`, returns True if they are semantically equal, False otherwise + :param mem1: first of two memory objects we want to use for comparison + :param mem2: second of two memory objects we want to use for comparison + :param addr: address at which bytes values are to be compared + :param merged_constraint: ConstraintSet to be used when using the call to `Z3Solver.instance().must_be_true()` + :return: returns True if 1 byte values at address `addr` in `mem1` and `mem2` are semantically equal, False otherwise + """ + val1 = mem1.read(addr, 1) + val2 = mem2.read(addr, 1) + # since we only read a single byte value, these lists should only have one entry in them + assert len(val1) == 1 and len(val2) == 1 + cond_to_check = val1[0] == val2[0] + if not Z3Solver.instance().must_be_true(merged_constraint, cond_to_check): + return False + else: + return True + + +# TODO move this comparison into an Executor API that uses an internal State API +def compare_mem(mem1, mem2, merged_constraint): + """ + This method compares the number of maps, and then their names, permissions, start, and end values. + If they all match, then it compares the concrete byte values for equality. + If those match too, it then compares _symbols attribute values for equality if the two memory objects are of + type SMemory. + :param mem1: one of two memory objects to be compared + :param mem2: second of two memory objects to be compared + :param merged_constraint: ConstraintSet object that is to be used with `Z3Solver.instance().must_be_true()` calls to check the + memory objects for semantic equality + :return: True, if the memory objects are equal, False otherwise + """ + maps1 = sorted(list(mem1.maps)) + maps2 = sorted(list(mem2.maps)) + if len(maps1) != len(maps2): + return False + ret_val = None + for m1, m2 in zip(maps1, maps2): + if m1 != m2: # compares the maps' names, permissions, starts, and ends + ret_val = False + break + # Compare concrete byte values in the data in these memory maps for equality + bytes1 = m1[m1.start : m1.end] + bytes2 = m2[m2.start : m2.end] + if bytes1 != bytes2: + ret_val = False + break + checked_addrs = [] + # compare symbolic byte values in memory + # hack to avoid importing SMemory because that import introduces a circular dependency on ManticoreBase + if mem1.__class__.__name__ == "SMemory" and ret_val is not None: + for addr1, _ in mem1._symbols.items(): + checked_addrs.append(addr1) + if not compare_byte_vals(mem1, mem2, addr1, merged_constraint): + ret_val = False + break + # hack to avoid importing SMemory because that import introduces a circular dependency on ManticoreBase + if mem2.__class__.__name__ == "SMemory" and ret_val is not None: + for addr2, _ in mem2._symbols.items(): + if addr2 not in checked_addrs: + if not compare_byte_vals(mem1, mem2, addr2, merged_constraint): + ret_val = False + break + if ret_val is not None: + return ret_val + else: + return True + + +def is_merge_possible(state1, state2, merged_constraint): + """ + Checks if a merge is possible by checking Input, Output sockets, symbolic_files, syscall_trace, and memory + for equality. + :param state1: one of two possible states we want to check for mergeability + :param state2: second of two possible states we want to check for mergeability + :param merged_constraint: ConstraintSet of merged constraints for state1 and state2 + :return: True, if state1 and state2 can be merged, False if otherwise + """ + platform1 = state1.platform + platform2 = state2.platform + + ret_val = None + + # compare input and output sockets of the states + if not compare_sockets( + merged_constraint, platform1.input, platform2.input + ) or not compare_sockets(merged_constraint, platform1.output, platform2.output): + ret_val = False, "inequivalent socket operations" + + # compare symbolic files opened by the two states + if ret_val is None and platform1.symbolic_files != platform2.symbolic_files: + ret_val = False, "inequivalent symbolic files" + + # compare system call traces of the two states + if ret_val is None and len(platform1.syscall_trace) != len(platform2.syscall_trace): + ret_val = False, "inequivalent syscall trace lengths" + if ret_val is None: + for i, (name1, fd1, data1) in enumerate(platform1.syscall_trace): + (name2, fd2, data2) = platform2.syscall_trace[i] + if not ( + name1 == name2 and fd1 == fd2 and compare_buffers(merged_constraint, data1, data2) + ): + ret_val = False, "inequivalent syscall traces" + break + + # compare memory of the two states + if ret_val is None and not compare_mem(state1.mem, state2.mem, merged_constraint): + ret_val = False, "inequivalent memory" + if ret_val is not None: + return ret_val + else: + return True, None + + +def merge_cpu(cpu1, cpu2, state, exp1, merged_constraint): + """ + Merge CPU objects into the state.CPU + :param cpu1: one of two CPU objects that we wish to merge + :param cpu2: second of two CPU objects that we wish to merge + :param state: the state whose CPU attribute we will be updating + :param exp1: the expression that if satisfiable will cause the CPU registers to take corresponding values from + `cpu1`, else they will take corresponding values from `cpu2` + :param merged_constraint: ConstraintSet under which we would want inequality between CPU register values to be + satisfiable as checked using `Z3Solver.instance().must_be_true()` + :return: List of registers that were merged + """ + merged_regs = [] + for reg in cpu1.canonical_registers: + val1 = cpu1.read_register(reg) + val2 = cpu2.read_register(reg) + if isinstance(val1, BitVec) and isinstance(val2, BitVec): + assert val1.size == val2.size + if issymbolic(val1) or issymbolic(val2) or val1 != val2: + if Z3Solver.instance().must_be_true(merged_constraint, val1 != val2): + merged_regs.append(reg) + if cpu1.regfile.sizeof(reg) == 1: + state.cpu.write_register(reg, Operators.ITE(exp1, val1, val2)) + else: + state.cpu.write_register( + reg, Operators.ITEBV(cpu1.regfile.sizeof(reg), exp1, val1, val2) + ) + return merged_regs + + +def merge(state1, state2, exp1, merged_constraint): + """ + Merge state1 and state2 into a single state + :param state1: + :param state2: + :param exp1: + :param merged_constraint: + :return: the state that is the result of the merging of `state1` and `state2` + """ + merged_state = state1 + merged_reg_list = merge_cpu(state1.cpu, state2.cpu, merged_state, exp1, merged_constraint) + print("Merged registers: ") + print(*merged_reg_list, sep=",") + merged_state.constraints = merged_constraint + return merged_state diff --git a/manticore/utils/event.py b/manticore/utils/event.py index 02d1f0eb8..d3374a96e 100644 --- a/manticore/utils/event.py +++ b/manticore/utils/event.py @@ -1,6 +1,7 @@ import copy import inspect import logging +import functools from itertools import takewhile from weakref import WeakKeyDictionary, ref @@ -72,6 +73,22 @@ def all_events(cls): all_evts.update(evts) return all_evts + @staticmethod + def will_did(name): + """Pre/pos emiting signal""" + + def deco(func): + @functools.wraps(func) + def newFunction(self, *args, **kw): + self._publish(f"will_{name}", *args, **kw) + result = func(self, *args, **kw) + self._publish(f"did_{name}", result) + return result + + return newFunction + + return deco + def __init__(self, *args, **kwargs): # A dictionary from "event name" -> callback methods # Note that several methods can be associated with the same object diff --git a/tests/native/binaries/basic_state_merging b/tests/native/binaries/basic_state_merging new file mode 100755 index 000000000..f9e4a37a5 Binary files /dev/null and b/tests/native/binaries/basic_state_merging differ diff --git a/tests/native/test_state.py b/tests/native/test_state.py index e476d9fc3..b6accd5ff 100644 --- a/tests/native/test_state.py +++ b/tests/native/test_state.py @@ -5,6 +5,10 @@ from manticore.platforms import linux from manticore.native.state import State from manticore.core.smtlib import BitVecVariable, ConstraintSet +from manticore.native import Manticore +from manticore.native.plugins import Merger +from manticore.core.plugin import Plugin +from manticore.utils import config class FakeMemory: @@ -257,3 +261,62 @@ def testContextSerialization(self): self.assertEqual(new_state.context["step"], 20) new_new_state = pickle.loads(new_new_file) self.assertEqual(new_new_state.context["step"], 30) + + +class StateMergeTest(unittest.TestCase): + + # Need to add a plugin that counts the number of states in did_fork_state, and records the max + # Then, when we hit + + class StateCounter(Plugin): + def did_fork_state_callback(self, *_args, **_kwargs): + self.max_states = max( + self.max_states, + self.manticore.count_busy_states() + + self.manticore.count_ready_states() + + self.manticore.count_killed_states() + + self.manticore.count_terminated_states(), + ) + + @property + def max_states(self): + with self.manticore.locked_context() as ctx: + return ctx.setdefault("max_states", 0) + + @max_states.setter + def max_states(self, new_val): + with self.manticore.locked_context() as ctx: + ctx["max_states"] = new_val + + def setUp(self): + core = config.get_group("core") + core.seed = 61 + core.mprocessing = core.mprocessing.single + + dirname = os.path.dirname(__file__) + self.m = Manticore( + os.path.join(dirname, "binaries", "basic_state_merging"), policy="random" + ) + self.plugin = self.StateCounter() + + self.m.register_plugin(Merger()) + self.m.register_plugin(self.plugin) + + def test_state_merging(self): + @self.m.hook(0x40065D) + def hook_post_merge(*_args, **_kwargs): + with self.m.locked_context() as ctx: + ctx["state_count"] = ( + self.m.count_busy_states() + + self.m.count_ready_states() + + self.m.count_killed_states() + + self.m.count_terminated_states() + ) + + self.m.run() + s = config.get_group("core").seed + self.assertLess( + self.m.context["state_count"], + self.plugin.max_states, + f"State merging failed with seed: {s}", + )