Initial master merge into experimental state merging for native (#1482)

* Initial master merge into experimental st merging for native * black * Add C file and fix arguments * Push (randomly failing) rudimentary state merging test Fails nondeterministically, so like, that's a problem. The issue I think lies in the fact that we pull states from the disk in a random order. Fix that so it's the right order, and this will hopefully work. * Blacken * Add basic_state_merging binary * Restore seed parameter to random number generator * Handle issues with instantiating Manitcore when multiple instances run simultaneously * Replace print with logging
trailofbits · Aug 5, 2019 · 0f1a4f9 · 0f1a4f9
1 parent f2099bd
commit 0f1a4f9
Show file tree

Hide file tree

Showing 9 changed files with 539 additions and 14 deletions.
diff --git a/examples/linux/basic_state_merging.c b/examples/linux/basic_state_merging.c
@@ -0,0 +1,63 @@
+/**
+ * Symbolic values are read from stdin using standard libc calls.
+ * Program compares if two binary packed integers at the input with 0x41.
+ *
+ * Compile with :
+ *   $ gcc -static -Os basic_statemerging.c -o basic_statemerging
+ *
+ * Analyze it with:
+ *   $ python examples/script/basic_statemerging.py examples/linux/basic_statemerging
+ *
+ *   The Merger plugin used in basic_statemerging.py will find two states with state IDs 2, 4 to be at the same program
+ *   location (0x40060d) and merge their CPU states which should only require the value for RDI to be merged.
+ *
+ * Expected output:
+ *  $ python /Users/vaibhav/git_repos/manticore/examples/script/basic_statemerging.py examples/linux/basic_statemerging-Os
+      about to load state_id = 0
+      loaded state_id = 0 at cpu = 0x4008e0
+      about to load state_id = 1
+      loaded state_id = 1 at cpu = 0x400604
+      about to load state_id = 2
+      Merged registers:
+      RDI
+      at PC = 0x40060d, merge succeeded for state id = 2 and 4
+      loaded state_id = 2 at cpu = 0x40060d
+      about to load state_id = 3
+      loaded state_id = 3 at cpu = 0x400612
+ *
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+ int main(int argc, char* argv[], char* envp[]){
+    unsigned int cmd1, cmd2;
+    unsigned int cmdChanged = 0;
+
+     if (read(0, &cmd1, sizeof(cmd1)) != sizeof(cmd1))
+    {
+        printf("Error reading stdin!");
+        exit(-1);
+    }
+    if (read(0, &cmd2, sizeof(cmd2)) != sizeof(cmd2))
+    {
+        printf("Error reading stdin!");
+        exit(-1);
+    }
+
+     if (cmd1 > 0x41)
+    {
+        cmdChanged = cmd1 - 0x42;
+    }
+    if (cmd2 < 0x41)
+    {
+        cmdChanged = cmd2 + 0x42;
+    }
+
+     if (cmdChanged == 0) printf("equal\n");
+    else printf("not equal\n");
+
+     return 0;
+}
diff --git a/examples/script/basic_statemerging.py b/examples/script/basic_statemerging.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+import sys
+
+from manticore.native.plugins import Merger
+from manticore.utils import config
+
+from manticore.native import Manticore
+from manticore import set_verbosity
+
+"""
+Demonstrates the ability to do state merging on a simple program by merging states with id 2, 4 that happen to be 
+at the same program location 0x40060d. This script uses the Merger plugin to apply opportunistic state merging.
+"""
+if __name__ == "__main__":
+    config.get_group("core").seed = 2
+    config.get_group("core").mprocessing = config.get_group("core").mprocessing.single
+    path = sys.argv[1]
+    m = Manticore(path, policy="random")
+
+    def will_load_state_callback(_mc, state_id):
+        print("about to load state_id = " + str(state_id))
+
+    def did_load_state_callback(_mc, state):
+        print("loaded state_id = " + str(state.id) + " at cpu = " + hex(state.cpu.PC))
+
+    m.subscribe("will_load_state", will_load_state_callback)
+    m.subscribe("did_load_state", did_load_state_callback)
+    m.register_plugin(Merger())
+    m.run()
diff --git a/manticore/core/manticore.py b/manticore/core/manticore.py
@@ -64,6 +64,11 @@ def to_class(self):
     default=MProcessingType.multiprocessing,
     description="single: No multiprocessing at all. Single process.\n threading: use threads\m multiprocessing: use forked processes",
 )
+consts.add(
+    "seed",
+    default=random.getrandbits(32),
+    description="The seed to use when randomly selecting states",
+)
 
 
 class ManticoreBase(Eventful):
@@ -72,13 +77,11 @@ def __new__(cls, *args, **kwargs):
             raise Exception("Should not instantiate this")
 
         cl = consts.mprocessing.to_class()
-        if cl not in cls.__bases__:
-            # change ManticoreBase for the more specific class
-            idx = cls.__bases__.index(ManticoreBase)
-            bases = list(cls.__bases__)
-            bases[idx] = cl
-            cls.__bases__ = tuple(bases)
+        # change ManticoreBase for the more specific class
+        bases = {cl if issubclass(base, ManticoreBase) else base for base in cls.__bases__}
+        cls.__bases__ = tuple(bases)
 
+        random.seed(consts.seed)
         return super().__new__(cls)
 
     # Decorators added first for convenience.
@@ -113,6 +116,7 @@ def at_not_running(func):
         @functools.wraps(func)
         def newFunction(self, *args, **kw):
             if self.is_running():
+                logger.error("Calling at running not allowed")
                 raise Exception(f"{func.__name__} only allowed while NOT exploring states")
             return func(self, *args, **kw)
 
@@ -351,12 +355,8 @@ def setstate(x, y):
                 setstate(new_state, new_value)
 
                 # enqueue new_state, assign new state id
-                new_state_id = self._save(new_state, state_id=None)
-                with self._lock:
-                    self._ready_states.append(new_state_id)
-                    self._lock.notify_all()  # Must notify one!
+                new_state_id = self._put_state(new_state)
 
-                self._publish("did_fork_state", new_state, expression, new_value, policy)
                 # maintain a list of children for logging purpose
                 children.append(new_state_id)
 
@@ -366,6 +366,8 @@ def setstate(x, y):
             state._id = None
             self._lock.notify_all()
 
+        self._publish("did_fork_state", new_state, expression, new_value, policy)
+
         logger.debug("Forking current state %r into states %r", state.id, children)
 
     @staticmethod
@@ -378,6 +380,7 @@ def verbosity(level):
         set_verbosity(level)
 
     # State storage
+    @Eventful.will_did("save_state")
     def _save(self, state, state_id=None):
         """ Store or update a state in secondary storage under state_id.
             Use a fresh id is None is provided.
@@ -390,26 +393,25 @@ def _save(self, state, state_id=None):
         state._id = self._workspace.save_state(state, state_id=state_id)
         return state.id
 
+    @Eventful.will_did("load_state")
     def _load(self, state_id):
         """ Load the state from the secondary storage
 
             :param state_id: a estate id
             :type state_id: int
             :returns: the state id used
         """
-
         if not hasattr(self, "stcache"):
             self.stcache = weakref.WeakValueDictionary()
         if state_id in self.stcache:
             return self.stcache[state_id]
-        self._publish("will_load_state", state_id)
         state = self._workspace.load_state(state_id, delete=False)
         state._id = state_id
         self.forward_events_from(state, True)
-        self._publish("did_load_state", state, state_id)
         self.stcache[state_id] = state
         return state
 
+    @Eventful.will_did("remove_state")
     def _remove(self, state_id):
         """ Remove a state from secondary storage
 

diff --git a/manticore/ethereum/account.py b/manticore/ethereum/account.py
@@ -46,6 +46,9 @@ def __int__(self):
     def __str__(self):
         return str(self._address)
 
+    def __hash__(self):
+        return self._address
+
 
 class EVMContract(EVMAccount):
     """

diff --git a/manticore/native/plugins.py b/manticore/native/plugins.py
@@ -0,0 +1,129 @@
+from ..core.plugin import Plugin
+from .state_merging import merge_constraints, is_merge_possible, merge
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class Merger(Plugin):
+    def on_register(self):
+        p = self.manticore._publish
+        self.manticore._publish = lambda *args, **kwargs: None
+        for state in self.manticore.ready_states:
+            self.did_fork_state_callback(state, None, None, None)
+
+        self.manticore._publish = p
+
+    def load_state(self, state_id, delete=False):
+        """
+        Loads a state in Manticore with state-id = `state_id` by using the corresponding API in Executor
+        :param state_id: state-id for state that is being loaded
+        :param delete: If set to True, deletes the state with state-id = `state_id`
+        :return: None
+        """
+        state = self.manticore._workspace.load_state(state_id, delete=delete)
+        return state
+
+    def delete_state(self, state_id):
+        """
+        Deletes a state in Manticore with state-id = `state_id` by using the corresponding API in Executor
+        :param state_id: state-id for state that is being deleted
+        :return: None
+        """
+        with self.manticore._lock:
+            self.manticore._ready_states.remove(state_id)
+            self.manticore._remove(state_id)
+
+    def replace_state(self, state_id, state):
+        """
+        Replaces a state in Manticore with state-id = `state_id` by using the corresponding API in Executor
+        :param state_id: state-id for state that is being replaced
+        :param state: State object that is replacing the existing state with `state_id`
+        :return: None
+        """
+        return self.manticore._save(state, state_id=state_id)
+
+    def did_fork_state_callback(self, state, expression, new_value, policy):
+        state_id = state.id
+        """That means no one else has to offer one Message Input
+
+        Maintains a `cpu_stateid_dict` in context that keeps maps program counter values to a list of state-id values
+        for states that will execute the instruction that that program counter as the next instruction
+        :param state_id: id for state that was just enqueued
+        :param state: State object that was just enqueued
+        :return: None
+        """
+        # when a new state is addded to the list we save it so we do not have
+        # to repload all states when try to merges last PC
+        with self.locked_context("cpu_stateid_dict", dict) as cpu_stateid_dict:
+            # as we may be running in a different process we need to access this
+            # on a lock and over shared memory like this
+            state_id_list = cpu_stateid_dict.get(state.cpu.PC, list())
+            state_id_list.append(state_id)
+            cpu_stateid_dict[state.cpu.PC] = state_id_list
+
+    def will_load_state_callback(self, current_state_id):
+        """
+        Checks if the state to be loaded (referenced by `current_state_id` can be merged with another currently enqueued
+        state and replaced by the merged state
+        :param current_state_id: state about to be loaded
+        :return: None
+        """
+        # When a state is loaded for exploration lets check if we can find it a
+        # mate for merging
+        with self.locked_context("cpu_stateid_dict", dict) as cpu_stateid_dict:
+            # we get the lock and get a copy of the shared context
+            merged_state = self.load_state(current_state_id)
+            states_at_pc = cpu_stateid_dict.get(merged_state.cpu.PC, [])
+
+            # lets remove ourself from the list of waiting states
+            # assert current_state_id in states_at_pc #???
+            if current_state_id in states_at_pc:
+                states_at_pc.remove(current_state_id)
+
+            # Iterate over all remaining states that are waiting for exploration
+            # at the same PC
+            merged_ids = []
+            for new_state_id in states_at_pc:
+                new_state = self.load_state(new_state_id)
+                (exp_merged_state, exp_new_state, merged_constraint) = merge_constraints(
+                    merged_state.constraints, new_state.constraints
+                )
+                is_mergeable, reason = is_merge_possible(merged_state, new_state, merged_constraint)
+
+                if is_mergeable:
+                    # Ok we'll merge it!
+                    merged_state = merge(
+                        merged_state, new_state, exp_merged_state, merged_constraint
+                    )
+
+                    # lets remove the vestigial links to the old state
+                    self.delete_state(new_state_id)
+
+                    merged_ids.append(new_state_id)
+                    is_mergeable = "succeeded"
+                else:
+                    is_mergeable = "failed because of " + reason
+                debug_string = (
+                    "at PC = "
+                    + hex(merged_state.cpu.PC)
+                    + ", merge "
+                    + is_mergeable
+                    + " for state id = "
+                    + str(current_state_id)
+                    + " and "
+                    + str(new_state_id)
+                )
+                logger.debug(debug_string)
+
+            for i in merged_ids:
+                states_at_pc.remove(i)
+
+            cpu_stateid_dict[merged_state.cpu.PC] = states_at_pc
+
+            # Ok so we have merged current_state_id with {merged_ids}
+            # And removed all merged_ids from everywhere
+
+            # UGLY we are replacing a state_id. This may be breaking caches in
+            # the future
+            self.replace_state(current_state_id, merged_state)