From e59b06e3db9ce1e7a00d94d8646f02b9f7874595 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Tue, 24 Oct 2017 22:16:09 -0500 Subject: [PATCH 01/65] Push to github Partially working implementation of unicorn for instruction emulation. Still not working: Mapping symbolic memory Global Descriptor tables --- manticore/core/cpu/abstractcpu.py | 46 +++-- manticore/core/cpu/x86.py | 12 +- manticore/core/memory.py | 3 +- manticore/core/smtlib/expression.py | 8 +- manticore/core/state.py | 2 +- manticore/platforms/linux.py | 42 ++--- manticore/utils/emulate.py | 260 ++++++++++++++++++++-------- 7 files changed, 255 insertions(+), 118 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index aaf2bf939..e8890911b 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -123,6 +123,7 @@ def _reg_name(self, reg_id): :param int reg_id: Register ID ''' + if reg_id > 234: return None #https://github.com/aquynh/capstone/blob/master/bindings/python/capstone/x86_const.py#L239 cs_reg_name = self.cpu.instruction.reg_name(reg_id) if cs_reg_name is None or cs_reg_name.lower() == '(invalid)': return None @@ -372,6 +373,7 @@ def __init__(self, regfile, memory, **kwargs): self._instruction_cache = {} self._icount = 0 self._last_pc = None + self._non_unicorn_instrs = 0 if not hasattr(self, "disasm"): self.disasm = init_disassembler(self._disasm, self.arch, self.mode) # Ensure that regfile created STACK/PC aliases @@ -561,12 +563,12 @@ def write_string(self, where, string, max_length=None): The size in bytes to cap the string at, or None [default] for no limit. This includes the NULL terminator. ''' - + if max_length is not None: string = string[:max_length-1] - + self.write_bytes(where, string + '\x00') - + def read_string(self, where, max_length=None): ''' Read a NUL-terminated concrete buffer from memory. @@ -742,15 +744,30 @@ def execute(self): def fallback_to_emulate(*operands): text_bytes = ' '.join('%02x'%x for x in insn.bytes) - logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", - insn.address, text_bytes, insn.mnemonic, insn.op_str) - - self.publish('will_emulate_instruction', insn) - self.emulate(insn) - self.publish('did_emulate_instruction', insn) - - implementation = getattr(self, name, fallback_to_emulate) + # logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", + # insn.address, text_bytes, insn.mnemonic, insn.op_str) + if 'SYSCALL' in name: + self.emu.sync_unicorn_to_manticore() + implementation = getattr(self, name) + implementation(*insn.operands) + else: + self.publish('will_emulate_instruction', insn) + self.emulate(insn) + self.publish('did_emulate_instruction', insn) + + def determine_implementation(instruction): + implementation = fallback_to_emulate + for op in instruction.operands: + if op.mem.segment is not None and 'FS' in op.mem.segment: + print("%s uses the %s segment" % (name, op.mem.segment)) + implementation = getattr(self, name, fallback_to_emulate) + self._non_unicorn_instrs += 1 + + return implementation + + #implementation = getattr(self, name, fallback_to_emulate) + implementation = determine_implementation(insn) if logger.level == logging.DEBUG : logger.debug(self.render_instruction(insn)) for l in self.render_registers(): @@ -769,14 +786,15 @@ def emulate(self, insn): :param capstone.CsInsn instruction: The instruction object to emulate ''' - emu = UnicornEmulator(self) - emu.emulate(insn) + if not hasattr(self, 'emu'): + self.emu = UnicornEmulator(self) + self.emu.emulate(insn) # We have been seeing occasional Unicorn issues with it not clearing # the backing unicorn instance. Saw fewer issues with the following # line present. - del emu + # del emu def render_instruction(self, insn=None): try: diff --git a/manticore/core/cpu/x86.py b/manticore/core/cpu/x86.py index 7fe636654..1097334a7 100644 --- a/manticore/core/cpu/x86.py +++ b/manticore/core/cpu/x86.py @@ -620,6 +620,7 @@ def address(self): address = 0 if self.mem.segment is not None: seg = self.mem.segment + print("Accessing %s" % seg) base, size, ty = cpu.get_descriptor(cpu.read_register(seg)) address += base #todo check limits and perms else: @@ -702,14 +703,19 @@ def __setstate__(self, state): #################### # Segments def set_descriptor(self, selector, base, limit, perms): - assert selector>0 and selector < 0xffff + assert selector>=0 and selector < 0xffff assert base>=0 and base < (1<=0 and limit < 0xffff or limit&0xfff == 0 #perms ? not used yet Also is not really perms but rather a bunch of attributes + self.publish('will_set_descriptor', selector, base, limit, perms) self._segments[selector] = (base, limit, perms) + self.publish('did_set_descriptor', selector, base, limit, perms) def get_descriptor(self, selector): - return self._segments.setdefault(selector, (0, 0xfffff000, 'rwx')) + if selector in self._segments: + return self._segments[selector] + self.set_descriptor(selector, 0, 0xfffff000, 'rwx') + return self._segments[selector] def _wrap_operands(self, operands): @@ -6023,5 +6029,3 @@ def XLATB(cpu): :param dest: destination operand. ''' cpu.AL = cpu.read_int(cpu.EBX + Operators.ZEXTEND(cpu.AL, 32), 8) - - diff --git a/manticore/core/memory.py b/manticore/core/memory.py index 6d6e0ffad..9194ad260 100644 --- a/manticore/core/memory.py +++ b/manticore/core/memory.py @@ -84,6 +84,7 @@ def __init__(self, start, size, perms, name=None): :param perms: the access permissions of the map (rwx). ''' assert isinstance(start, (int, long)) and start >= 0, 'Invalid start address' + print("(M) Mapping %s bytes from %s to %s" % (size, hex(start), hex(start+size))) assert isinstance(size, (int, long)) and size > 0, 'Invalid end address' super(Map, self).__init__() @@ -146,7 +147,7 @@ def __cmp__(self, other): result = cmp(self.end, other.end) if result != 0: return result - # go by each char permission + # go by each char permission result = cmp(self.perms, other.perms) if result != 0: return result diff --git a/manticore/core/smtlib/expression.py b/manticore/core/smtlib/expression.py index 07794ed03..937c0599f 100644 --- a/manticore/core/smtlib/expression.py +++ b/manticore/core/smtlib/expression.py @@ -612,7 +612,7 @@ def __init__(self, array): @property def operands(self): return self._array.operands - + @property def taint(self): return self._array.taint @@ -673,6 +673,9 @@ def array(self): def index(self): return self.operands[1] + def __repr__(self): + return "smtlib.expression.ArraySelect Object with index %s:\n%s" % (self.index, self.array) + class BitVecSignExtend(BitVecOperation): def __init__(self, operand, size_dest, *args, **kwargs): @@ -715,5 +718,4 @@ def __init__(self, size, condition, true_value, false_value, *args, **kwargs): assert isinstance(true_value, BitVec) assert isinstance(false_value, BitVec) assert true_value.size == false_value.size - super(BitVecITE, self).__init__(size, condition, true_value, false_value, *args, **kwargs) - + super(BitVecITE, self).__init__(size, condition, true_value, false_value, *args, **kwargs) diff --git a/manticore/core/state.py b/manticore/core/state.py index 43e7da347..22c366a28 100644 --- a/manticore/core/state.py +++ b/manticore/core/state.py @@ -134,7 +134,7 @@ def setstate(state, value): except ConcretizeMemory as e: expression = self.cpu.read_int(e.address, e.size) def setstate(state, value): - state.cpu.write_int(e.reg_name, value, e.size) + state.cpu.write_int(e.address, value, e.size) raise Concretize(e.message, expression=expression, setstate=setstate, diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index abe93d624..d3243f0e9 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1049,27 +1049,27 @@ def sys_getcwd(self, buf, size): ''' getcwd - Get the current working directory :param int buf: Pointer to dest array - :param size: size in bytes of the array pointed to by the buf + :param size: size in bytes of the array pointed to by the buf :return: buf (Success), or 0 ''' - + try: - current_dir = os.getcwd() + current_dir = os.getcwd() length = len(current_dir) + 1 - + if size > 0 and size < length: - logger.info("GETCWD: size is greater than 0, but is smaller than the length" + logger.info("GETCWD: size is greater than 0, but is smaller than the length" "of the path + 1. Returning ERANGE") return -errno.ERANGE - + if not self.current.memory.access_ok(slice(buf, buf+length), 'w'): logger.info("GETCWD: buf within invalid memory. Returning EFAULT") return -errno.EFAULT - + self.current.write_string(buf, current_dir) logger.debug("getcwd(0x%08x, %u) -> <%s> (Size %d)", buf, size, current_dir, length) return length - + except OSError as e: return -e.errno @@ -1106,6 +1106,8 @@ def sys_lseek(self, fd, offset, whence): return 0 def sys_read(self, fd, buf, count): + + print("Reading %s bytes from FD %s into %02x" % (count, fd, buf)) data = '' if count != 0: # TODO check count bytes from buf @@ -1375,7 +1377,7 @@ def sys_rt_sigprocmask(self, cpu, how, newset, oldset): def sys_sigprocmask(self, cpu, how, newset, oldset): logger.debug("SIGACTION, Ignoring changing signal mask set cmd:%d", how) return 0 - + def sys_dup(self, fd): ''' Duplicates an open file descriptor @@ -1383,15 +1385,15 @@ def sys_dup(self, fd): :param fd: the open file descriptor to duplicate. :return: the new file descriptor. ''' - + if not self._is_fd_open(fd): logger.info("DUP: Passed fd is not open. Returning EBADF") return -errno.EBADF - + newfd = self._dup(fd) logger.debug('sys_dup(%d) -> %d', fd, newfd) return newfd - + def sys_dup2(self, fd, newfd): ''' Duplicates an open fd to newfd. If newfd is open, it is first closed @@ -1410,18 +1412,18 @@ def sys_dup2(self, fd, newfd): if newfd >= soft_max: logger.info("DUP2: newfd is above max descriptor table size") return -errno.EBADF - + if self._is_fd_open(newfd): self.sys_close(newfd) - + if newfd >= len(self.files): self.files.extend([None]*(newfd+1-len(self.files))) - + self.files[newfd] = self.files[fd] - + logger.debug('sys_dup2(%d,%d) -> %d', fd, newfd, newfd) return newfd - + def sys_close(self, fd): ''' Closes a file descriptor @@ -1875,6 +1877,7 @@ def syscall(self): except (AttributeError, KeyError): raise Exception("SyscallNotImplemented %d %d"%(self.current.address_bit_size, index)) + print("(M) Invoking %s syscall" % name) return self._syscall_abi.invoke(implementation) def sys_clock_gettime(self, clock_id, timespec): @@ -2377,7 +2380,7 @@ def sys_recv(self, sockfd, buf, count, flags): return super(SLinux, self).sys_recv(sockfd, buf, count, flags) def sys_accept(self, sockfd, addr, addrlen, flags): - #TODO(yan): Transmit some symbolic bytes as soon as we start. + #TODO(yan): Transmit some symbolic bytes as soon as we start. # Remove this hack once no longer needed. fd = super(SLinux, self).sys_accept(sockfd, addr, addrlen, flags) @@ -2393,7 +2396,7 @@ def sys_accept(self, sockfd, addr, addrlen, flags): def sys_open(self, buf, flags, mode): ''' A version of open(2) that includes a special case for a symbolic path. - When given a symbolic path, it will create a temporary file with + When given a symbolic path, it will create a temporary file with 64 bytes of symbolic bytes as contents and return that instead. :param buf: address of zero-terminated pathname @@ -2416,4 +2419,3 @@ def sys_open(self, buf, flags, mode): self.current.memory.munmap(buf, 1024) return rv - diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 762595031..82cf3b13e 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -2,6 +2,7 @@ import inspect from ..core.memory import MemoryException, FileMap, AnonMap +from ..core.smtlib import Operators from .helpers import issymbolic ###################################################################### @@ -15,6 +16,9 @@ from capstone.arm import * from capstone.x86 import * +import pprint as pp +import struct + logger = logging.getLogger("EMULATOR") class UnicornEmulator(object): @@ -23,17 +27,80 @@ class UnicornEmulator(object): ''' def __init__(self, cpu): self._cpu = cpu + self.flag_registers = set(['CF','PF','AF','ZF','SF','IF','DF','OF']) text = cpu.memory.map_containing(cpu.PC) + cpu.subscribe('did_write_memory', self.write_back_memory) + cpu.subscribe('did_write_register', self.write_back_register) + cpu.subscribe('did_set_descriptor', self.update_segment) # Keep track of all memory mappings. We start with just the text section - self._should_be_mapped = { - text.start: (len(text), UC_PROT_READ | UC_PROT_EXEC) - } + self.mem_map = {} + for m in cpu.memory.maps: + if True:#type(m) is FileMap: + permissions = UC_PROT_NONE + if 'r' in m.perms: + permissions |= UC_PROT_READ + if 'w' in m.perms: + permissions |= UC_PROT_WRITE + if 'x' in m.perms: + permissions |= UC_PROT_EXEC + self.mem_map[m.start] = (len(m), permissions) # Keep track of all the memory Unicorn needs while executing this # instruction self._should_be_written = {} + # Establish Manticore state, potentially from past emulation + # attempts + self.reset() + for base in self.mem_map: + size, perms = self.mem_map[base] + print("About to map %s bytes from %02x to %02x" % (size, base, base + size)) + self._emu.mem_map(base, size, perms) + + self._emu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, self._hook_unmapped) + self._emu.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED, self._hook_unmapped) + self._emu.hook_add(UC_HOOK_MEM_FETCH_UNMAPPED, self._hook_unmapped) + # self._emu.hook_add(UC_HOOK_MEM_READ, self._hook_xfer_mem) + self._emu.hook_add(UC_HOOK_MEM_WRITE, self._hook_xfer_mem) + self._emu.hook_add(UC_HOOK_INTR, self._interrupt) + + self.registers = set(self._cpu.canonical_registers) + + # Refer to EFLAGS instead of individual flags for x86 + if self._cpu.arch == CS_ARCH_X86: + # The last 8 canonical registers of x86 are individual flags; replace + # with the eflags + self.registers -= self.flag_registers + self.registers.add('EFLAGS') + + # TODO(mark): Unicorn 1.0.1 does not support reading YMM registers, + # and simply returns back zero. If a unicorn emulated instruction writes to an + # XMM reg, we will read back the corresponding YMM register, resulting in an + # incorrect zero value being actually written to the XMM register. This is + # fixed in Unicorn PR #819, so when that is included in a release, delete + # these two lines. + self.registers -= set(['YMM0', 'YMM1', 'YMM2', 'YMM3', 'YMM4', 'YMM5', 'YMM6', 'YMM7', 'YMM8', 'YMM9', 'YMM10', 'YMM11', 'YMM12', 'YMM13', 'YMM14', 'YMM15']) + self.registers |= set(['XMM0', 'XMM1', 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', 'XMM8', 'XMM9', 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15']) + + print("Setting initial register state") + for reg in self.registers: + val = self._cpu.read_register(reg) + if issymbolic(val): + from ..core.cpu.abstractcpu import ConcretizeRegister + raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", + policy='ONE') + self._emu.reg_write(self._to_unicorn_id(reg), val) + + self.create_GDT() + for index, m in enumerate(self.mem_map): + size = self.mem_map[m][0] + print("Reading map %s (%s kb)" % (index, size / 1024)) + map_bytes = self._cpu.read_bytes(m, size) + print("Writing map %s" % index) + self._emu.mem_write(m, ''.join(map_bytes)) + print("Unicorn init complete") + def reset(self): self._emu = self._unicorn() self._to_raise = None @@ -53,6 +120,12 @@ def _unicorn(self): raise RuntimeError("Unsupported architecture") + def in_map(self, addr): + for m in self.mem_map: + if addr >= m and addr <= (m + self.mem_map[m][0]): + return True + return False + def _create_emulated_mapping(self, uc, address): ''' Create a mapping in Unicorn and note that we'll need it if we retry. @@ -63,18 +136,18 @@ def _create_emulated_mapping(self, uc, address): ''' m = self._cpu.memory.map_containing(address) - - permissions = UC_PROT_NONE - if 'r' in m.perms: - permissions |= UC_PROT_READ - if 'w' in m.perms: - permissions |= UC_PROT_WRITE - if 'x' in m.perms: - permissions |= UC_PROT_EXEC - - uc.mem_map(m.start, len(m), permissions) - - self._should_be_mapped[m.start] = (len(m), permissions) + if m.start not in self.mem_map.keys(): + permissions = UC_PROT_NONE + if 'r' in m.perms: + permissions |= UC_PROT_READ + if 'w' in m.perms: + permissions |= UC_PROT_WRITE + if 'x' in m.perms: + permissions |= UC_PROT_EXEC + print("About to map %s bytes from %02x to %02x" % (len(m), m.start, m.start + len(m))) + uc.mem_map(m.start, len(m), permissions) + + self.mem_map[m.start] = (len(m), permissions) return m @@ -95,22 +168,24 @@ def _hook_xfer_mem(self, uc, access, address, size, value, data): assert access in (UC_MEM_WRITE, UC_MEM_READ, UC_MEM_FETCH) if access == UC_MEM_WRITE: + print("Writing %s bytes to %02x: %02x" % (size, address, value)) self._cpu.write_int(address, value, size*8) # If client code is attempting to read a value, we need to bring it # in from Manticore state. If we try to mem_write it here, Unicorn # will segfault. We add the value to a list of things that need to # be written, and ask to restart the emulation. - elif access == UC_MEM_READ: - value = self._cpu.read_bytes(address, size) - - if address in self._should_be_written: - return True - - self._should_be_written[address] = value - - self._should_try_again = True - return False + # elif access == UC_MEM_READ: + # print("Reading %s bytes from %02x: %02x" % (size, address, value)) + # value = self._cpu.read_bytes(address, size) + # + # if address in self._should_be_written: + # return True + # + # self._should_be_written[address] = value + # + # self._should_try_again = True + # return False return True @@ -121,8 +196,10 @@ def _hook_unmapped(self, uc, access, address, size, value, data): ''' try: + print("Mapping memory at " + hex(address)) m = self._create_emulated_mapping(uc, address) except MemoryException as e: + print("Failed to map memory") self._to_raise = e self._should_try_again = False return False @@ -134,7 +211,7 @@ def _interrupt(self, uc, number, data): ''' Handle software interrupt (SVC/INT) ''' - + print("Caught interrupt: %s" % number) from ..core.cpu.abstractcpu import Interruption self._to_raise = Interruption(number) return True @@ -149,7 +226,17 @@ def _to_unicorn_id(self, reg_name): return globals()['UC_ARM_REG_' + reg_name] elif self._cpu.arch == CS_ARCH_X86: # TODO(yan): This needs to handle AF register - return globals()['UC_X86_REG_' + reg_name] + custom_mapping = {'PC':'RIP'} + try: + return globals()['UC_X86_REG_' + reg_name] + except KeyError: + try: + return globals()['UC_X86_REG_' + custom_mapping[reg_name]] + except: + print 'UC_X86_REG_' + str(reg_name) + ' not in ' + pp.pprint([k for k in globals() if 'UC_X86_REG' in k]) + raise + else: # TODO(yan): raise a more appropriate exception raise TypeError @@ -162,15 +249,6 @@ def emulate(self, instruction): # The emulation might restart if Unicorn needs to bring in a memory map # or bring a value from Manticore state. while True: - - self.reset() - - # Establish Manticore state, potentially from past emulation - # attempts - for base in self._should_be_mapped: - size, perms = self._should_be_mapped[base] - self._emu.mem_map(base, size, perms) - for address, values in self._should_be_written.items(): for offset, byte in enumerate(values, start=address): if issymbolic(byte): @@ -193,48 +271,20 @@ def _step(self, instruction): ''' A single attempt at executing an instruction. ''' - - registers = set(self._cpu.canonical_registers) - - # Refer to EFLAGS instead of individual flags for x86 - if self._cpu.arch == CS_ARCH_X86: - # The last 8 canonical registers of x86 are individual flags; replace - # with the eflags - registers -= set(['CF','PF','AF','ZF','SF','IF','DF','OF']) - registers.add('EFLAGS') - - # TODO(mark): Unicorn 1.0.1 does not support reading YMM registers, - # and simply returns back zero. If a unicorn emulated instruction writes to an - # XMM reg, we will read back the corresponding YMM register, resulting in an - # incorrect zero value being actually written to the XMM register. This is - # fixed in Unicorn PR #819, so when that is included in a release, delete - # these two lines. - registers -= set(['YMM0', 'YMM1', 'YMM2', 'YMM3', 'YMM4', 'YMM5', 'YMM6', 'YMM7', 'YMM8', 'YMM9', 'YMM10', 'YMM11', 'YMM12', 'YMM13', 'YMM14', 'YMM15']) - registers |= set(['XMM0', 'XMM1', 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', 'XMM8', 'XMM9', 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15']) - # XXX(yan): This concretizes the entire register state. This is overly # aggressive. Once capstone adds consistent support for accessing # referred registers, make this only concretize those registers being # read from. - for reg in registers: - val = self._cpu.read_register(reg) - if issymbolic(val): - from ..core.cpu.abstractcpu import ConcretizeRegister - raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", - policy='ONE') - self._emu.reg_write(self._to_unicorn_id(reg), val) + # for reg in self.registers: + # val = self._cpu.read_register(reg) + # if issymbolic(val): + # from ..core.cpu.abstractcpu import ConcretizeRegister + # raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", + # policy='ONE') + # self._emu.reg_write(self._to_unicorn_id(reg), val) # Bring in the instruction itself instruction = self._cpu.decode_instruction(self._cpu.PC) - text_bytes = self._cpu.read_bytes(self._cpu.PC, instruction.size) - self._emu.mem_write(self._cpu.PC, ''.join(text_bytes)) - - self._emu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, self._hook_unmapped) - self._emu.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED, self._hook_unmapped) - self._emu.hook_add(UC_HOOK_MEM_FETCH_UNMAPPED, self._hook_unmapped) - self._emu.hook_add(UC_HOOK_MEM_READ, self._hook_xfer_mem) - self._emu.hook_add(UC_HOOK_MEM_WRITE, self._hook_xfer_mem) - self._emu.hook_add(UC_HOOK_INTR, self._interrupt) saved_PC = self._cpu.PC @@ -258,18 +308,78 @@ def _step(self, instruction): logger.debug(">"*10) # Bring back Unicorn registers to Manticore - for reg in registers: + for reg in self.registers: val = self._emu.reg_read(self._to_unicorn_id(reg)) self._cpu.write_register(reg, val) #Unicorn hack. On single step unicorn wont advance the PC register - mu_pc = self.get_unicorn_pc() - if saved_PC == mu_pc: - self._cpu.PC = saved_PC + instruction.size + # mu_pc = self.get_unicorn_pc() + # if saved_PC == mu_pc: + # self._cpu.PC = saved_PC + instruction.size # Raise the exception from a hook that Unicorn would have eaten if self._to_raise: + print("Raising %s" % self._to_raise) raise self._to_raise return + def sync_unicorn_to_manticore(self): + for reg in self.registers: + oldval = self._cpu.read_register(reg) + if issymbolic(oldval): + from ..core.cpu.abstractcpu import ConcretizeRegister + raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", + policy='ONE') + val = self._emu.reg_read(self._to_unicorn_id(reg)) + if val != oldval: + print("(M) %s: %s -> %s" % (reg, oldval, val)) + self._cpu.write_register(reg, val) + + def write_back_memory(self, where, expr, size): + if issymbolic(expr): + print("Concretizing memory") + from ..core.memory import ConcretizeMemory + raise ConcretizeMemory(self._cpu.memory, where, size, policy='ONE') + # data = '+'*(size/8) + # else: + data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] + # print(data) + # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + if not self.in_map(where): + self._create_emulated_mapping(self._emu, where) + self._emu.mem_write(where, ''.join(data)) + + def write_back_register(self, reg, val): + if reg in self.flag_registers: + self._emu.reg_write(self._to_unicorn_id('EFLAGS'), self._cpu.read_register('EFLAGS')) + return + oldval = self._emu.reg_read(self._to_unicorn_id(reg)) + if oldval != val: + print("(U) %s: %s -> %s" % (reg, oldval, val)) + self._emu.reg_write(self._to_unicorn_id(reg), val) + + def update_segment(self, selector, base, size, perms): + print("(U) Updating selector %s to 0x%02x (%s bytes) (%s)" % (selector, base, size, perms)) + dest = self.gdt_base + (selector*8) + entry = self.make_table_entry(base, size) + self._emu.mem_write(dest, entry) + + + def make_table_entry(self, base, limit, access_byte=0xff, flags=0xf0): + # http://wiki.osdev.org/Global_Descriptor_Table#Structure + out = 0 + out |= (limit & 0xffff) + out |= ((base & 0xffffff) << 16) + out |= ((access_byte & 0xff) << 40) + out |= (((limit >> 16) & 0xf) << 48) + out |= ((flags & 0xff) << 52) + out |= (((base >> 24) & 0xff) << 56) + return struct.pack(' Date: Tue, 24 Oct 2017 23:55:33 -0500 Subject: [PATCH 02/65] Bypass smtlib when initializing memory --- manticore/core/cpu/abstractcpu.py | 19 +++++++++++++++--- manticore/utils/emulate.py | 5 +++-- tests/binaries/arguments | Bin tests/binaries/arguments_linux_amd64 | Bin tests/binaries/arguments_linux_armv7 | Bin tests/binaries/basic_linux_amd64 | Bin tests/binaries/cadet_decree_x86 | Bin .../api_interception/api_interception.cpp | 0 .../api_interception/api_interception.dmp | Bin .../memdumps/ignore_an_api/ignore_an_api.dmp | Bin tests/memdumps/index_code/index_code.dmp | Bin tests/memdumps/index_data/index_data.dmp | Bin tests/memdumps/linux_palindrome/Palindrome | Bin tests/memdumps/many_ifs/many_ifs.dmp | Bin tests/memdumps/many_ifs/many_ifs.exe | Bin .../simple_bad_deref/simple_bad_deref.dmp | Bin tests/memdumps/simple_fpu/simple_fpu.dmp | Bin tests/memdumps/simple_parse/simple_parse.dmp | Bin 18 files changed, 19 insertions(+), 5 deletions(-) mode change 100755 => 100644 tests/binaries/arguments mode change 100755 => 100644 tests/binaries/arguments_linux_amd64 mode change 100755 => 100644 tests/binaries/arguments_linux_armv7 mode change 100755 => 100644 tests/binaries/basic_linux_amd64 mode change 100755 => 100644 tests/binaries/cadet_decree_x86 mode change 100755 => 100644 tests/memdumps/api_interception/api_interception.cpp mode change 100755 => 100644 tests/memdumps/api_interception/api_interception.dmp mode change 100755 => 100644 tests/memdumps/ignore_an_api/ignore_an_api.dmp mode change 100755 => 100644 tests/memdumps/index_code/index_code.dmp mode change 100755 => 100644 tests/memdumps/index_data/index_data.dmp mode change 100755 => 100644 tests/memdumps/linux_palindrome/Palindrome mode change 100755 => 100644 tests/memdumps/many_ifs/many_ifs.dmp mode change 100755 => 100644 tests/memdumps/many_ifs/many_ifs.exe mode change 100755 => 100644 tests/memdumps/simple_bad_deref/simple_bad_deref.dmp mode change 100755 => 100644 tests/memdumps/simple_fpu/simple_fpu.dmp mode change 100755 => 100644 tests/memdumps/simple_parse/simple_parse.dmp diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index e8890911b..096628a18 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -507,6 +507,19 @@ def write_int(self, where, expression, size=None): self.publish('did_write_memory', where, expression, size) + def _raw_read(self, where, size=None): + ''' + Selects bytes from memory + + :param int where: address to read from + :param size: number of bits to read + :return: the value read + :rtype: int or BitVec + ''' + data = self.memory[where:where + size] + assert (len(data)) == size + return data + def read_int(self, where, size=None): ''' Reads int from memory @@ -521,8 +534,8 @@ def read_int(self, where, size=None): assert size in SANE_SIZES self.publish('will_read_memory', where, size) - data = self.memory[where:where + size / 8] - assert (8 * len(data)) == size + data = self._raw_read(where, size/8) + value = Operators.CONCAT(size, *map(Operators.ORD, reversed(data))) self.publish('did_read_memory', where, value, size) @@ -769,7 +782,7 @@ def determine_implementation(instruction): #implementation = getattr(self, name, fallback_to_emulate) implementation = determine_implementation(insn) if logger.level == logging.DEBUG : - logger.debug(self.render_instruction(insn)) + logger.debug(self.render_instruction(insn) + " (%s)" % insn.size) for l in self.render_registers(): register_logger.debug(l) diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 82cf3b13e..fb8de2103 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -96,7 +96,8 @@ def __init__(self, cpu): for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] print("Reading map %s (%s kb)" % (index, size / 1024)) - map_bytes = self._cpu.read_bytes(m, size) + # map_bytes = self._cpu.read_bytes(m, size) + map_bytes = self._cpu._raw_read(m,size) print("Writing map %s" % index) self._emu.mem_write(m, ''.join(map_bytes)) print("Unicorn init complete") @@ -345,7 +346,7 @@ def write_back_memory(self, where, expr, size): # else: data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] # print(data) - # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) diff --git a/tests/binaries/arguments b/tests/binaries/arguments old mode 100755 new mode 100644 diff --git a/tests/binaries/arguments_linux_amd64 b/tests/binaries/arguments_linux_amd64 old mode 100755 new mode 100644 diff --git a/tests/binaries/arguments_linux_armv7 b/tests/binaries/arguments_linux_armv7 old mode 100755 new mode 100644 diff --git a/tests/binaries/basic_linux_amd64 b/tests/binaries/basic_linux_amd64 old mode 100755 new mode 100644 diff --git a/tests/binaries/cadet_decree_x86 b/tests/binaries/cadet_decree_x86 old mode 100755 new mode 100644 diff --git a/tests/memdumps/api_interception/api_interception.cpp b/tests/memdumps/api_interception/api_interception.cpp old mode 100755 new mode 100644 diff --git a/tests/memdumps/api_interception/api_interception.dmp b/tests/memdumps/api_interception/api_interception.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/ignore_an_api/ignore_an_api.dmp b/tests/memdumps/ignore_an_api/ignore_an_api.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/index_code/index_code.dmp b/tests/memdumps/index_code/index_code.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/index_data/index_data.dmp b/tests/memdumps/index_data/index_data.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/linux_palindrome/Palindrome b/tests/memdumps/linux_palindrome/Palindrome old mode 100755 new mode 100644 diff --git a/tests/memdumps/many_ifs/many_ifs.dmp b/tests/memdumps/many_ifs/many_ifs.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/many_ifs/many_ifs.exe b/tests/memdumps/many_ifs/many_ifs.exe old mode 100755 new mode 100644 diff --git a/tests/memdumps/simple_bad_deref/simple_bad_deref.dmp b/tests/memdumps/simple_bad_deref/simple_bad_deref.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/simple_fpu/simple_fpu.dmp b/tests/memdumps/simple_fpu/simple_fpu.dmp old mode 100755 new mode 100644 diff --git a/tests/memdumps/simple_parse/simple_parse.dmp b/tests/memdumps/simple_parse/simple_parse.dmp old mode 100755 new mode 100644 From f40577b53781df1c0c0997f11b7f576d21dd9d13 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Tue, 24 Oct 2017 23:55:57 -0500 Subject: [PATCH 03/65] Revert "Bypass smtlib when initializing memory" This reverts commit 60e6266fcf923295c8520ff2573026c8fd7937be. --- manticore/core/cpu/abstractcpu.py | 19 +++--------------- manticore/utils/emulate.py | 5 ++--- tests/binaries/arguments | Bin tests/binaries/arguments_linux_amd64 | Bin tests/binaries/arguments_linux_armv7 | Bin tests/binaries/basic_linux_amd64 | Bin tests/binaries/cadet_decree_x86 | Bin .../api_interception/api_interception.cpp | 0 .../api_interception/api_interception.dmp | Bin .../memdumps/ignore_an_api/ignore_an_api.dmp | Bin tests/memdumps/index_code/index_code.dmp | Bin tests/memdumps/index_data/index_data.dmp | Bin tests/memdumps/linux_palindrome/Palindrome | Bin tests/memdumps/many_ifs/many_ifs.dmp | Bin tests/memdumps/many_ifs/many_ifs.exe | Bin .../simple_bad_deref/simple_bad_deref.dmp | Bin tests/memdumps/simple_fpu/simple_fpu.dmp | Bin tests/memdumps/simple_parse/simple_parse.dmp | Bin 18 files changed, 5 insertions(+), 19 deletions(-) mode change 100644 => 100755 tests/binaries/arguments mode change 100644 => 100755 tests/binaries/arguments_linux_amd64 mode change 100644 => 100755 tests/binaries/arguments_linux_armv7 mode change 100644 => 100755 tests/binaries/basic_linux_amd64 mode change 100644 => 100755 tests/binaries/cadet_decree_x86 mode change 100644 => 100755 tests/memdumps/api_interception/api_interception.cpp mode change 100644 => 100755 tests/memdumps/api_interception/api_interception.dmp mode change 100644 => 100755 tests/memdumps/ignore_an_api/ignore_an_api.dmp mode change 100644 => 100755 tests/memdumps/index_code/index_code.dmp mode change 100644 => 100755 tests/memdumps/index_data/index_data.dmp mode change 100644 => 100755 tests/memdumps/linux_palindrome/Palindrome mode change 100644 => 100755 tests/memdumps/many_ifs/many_ifs.dmp mode change 100644 => 100755 tests/memdumps/many_ifs/many_ifs.exe mode change 100644 => 100755 tests/memdumps/simple_bad_deref/simple_bad_deref.dmp mode change 100644 => 100755 tests/memdumps/simple_fpu/simple_fpu.dmp mode change 100644 => 100755 tests/memdumps/simple_parse/simple_parse.dmp diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index 096628a18..e8890911b 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -507,19 +507,6 @@ def write_int(self, where, expression, size=None): self.publish('did_write_memory', where, expression, size) - def _raw_read(self, where, size=None): - ''' - Selects bytes from memory - - :param int where: address to read from - :param size: number of bits to read - :return: the value read - :rtype: int or BitVec - ''' - data = self.memory[where:where + size] - assert (len(data)) == size - return data - def read_int(self, where, size=None): ''' Reads int from memory @@ -534,8 +521,8 @@ def read_int(self, where, size=None): assert size in SANE_SIZES self.publish('will_read_memory', where, size) - data = self._raw_read(where, size/8) - + data = self.memory[where:where + size / 8] + assert (8 * len(data)) == size value = Operators.CONCAT(size, *map(Operators.ORD, reversed(data))) self.publish('did_read_memory', where, value, size) @@ -782,7 +769,7 @@ def determine_implementation(instruction): #implementation = getattr(self, name, fallback_to_emulate) implementation = determine_implementation(insn) if logger.level == logging.DEBUG : - logger.debug(self.render_instruction(insn) + " (%s)" % insn.size) + logger.debug(self.render_instruction(insn)) for l in self.render_registers(): register_logger.debug(l) diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index fb8de2103..82cf3b13e 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -96,8 +96,7 @@ def __init__(self, cpu): for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] print("Reading map %s (%s kb)" % (index, size / 1024)) - # map_bytes = self._cpu.read_bytes(m, size) - map_bytes = self._cpu._raw_read(m,size) + map_bytes = self._cpu.read_bytes(m, size) print("Writing map %s" % index) self._emu.mem_write(m, ''.join(map_bytes)) print("Unicorn init complete") @@ -346,7 +345,7 @@ def write_back_memory(self, where, expr, size): # else: data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] # print(data) - print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) diff --git a/tests/binaries/arguments b/tests/binaries/arguments old mode 100644 new mode 100755 diff --git a/tests/binaries/arguments_linux_amd64 b/tests/binaries/arguments_linux_amd64 old mode 100644 new mode 100755 diff --git a/tests/binaries/arguments_linux_armv7 b/tests/binaries/arguments_linux_armv7 old mode 100644 new mode 100755 diff --git a/tests/binaries/basic_linux_amd64 b/tests/binaries/basic_linux_amd64 old mode 100644 new mode 100755 diff --git a/tests/binaries/cadet_decree_x86 b/tests/binaries/cadet_decree_x86 old mode 100644 new mode 100755 diff --git a/tests/memdumps/api_interception/api_interception.cpp b/tests/memdumps/api_interception/api_interception.cpp old mode 100644 new mode 100755 diff --git a/tests/memdumps/api_interception/api_interception.dmp b/tests/memdumps/api_interception/api_interception.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/ignore_an_api/ignore_an_api.dmp b/tests/memdumps/ignore_an_api/ignore_an_api.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/index_code/index_code.dmp b/tests/memdumps/index_code/index_code.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/index_data/index_data.dmp b/tests/memdumps/index_data/index_data.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/linux_palindrome/Palindrome b/tests/memdumps/linux_palindrome/Palindrome old mode 100644 new mode 100755 diff --git a/tests/memdumps/many_ifs/many_ifs.dmp b/tests/memdumps/many_ifs/many_ifs.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/many_ifs/many_ifs.exe b/tests/memdumps/many_ifs/many_ifs.exe old mode 100644 new mode 100755 diff --git a/tests/memdumps/simple_bad_deref/simple_bad_deref.dmp b/tests/memdumps/simple_bad_deref/simple_bad_deref.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/simple_fpu/simple_fpu.dmp b/tests/memdumps/simple_fpu/simple_fpu.dmp old mode 100644 new mode 100755 diff --git a/tests/memdumps/simple_parse/simple_parse.dmp b/tests/memdumps/simple_parse/simple_parse.dmp old mode 100644 new mode 100755 From cb3b7338a74135eff0d79db8fc34028a6350e6e6 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Tue, 24 Oct 2017 23:58:36 -0500 Subject: [PATCH 04/65] Fixed extra boxes checked... --- manticore/core/cpu/abstractcpu.py | 19 ++++++++++++++++--- manticore/utils/emulate.py | 5 +++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index e8890911b..096628a18 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -507,6 +507,19 @@ def write_int(self, where, expression, size=None): self.publish('did_write_memory', where, expression, size) + def _raw_read(self, where, size=None): + ''' + Selects bytes from memory + + :param int where: address to read from + :param size: number of bits to read + :return: the value read + :rtype: int or BitVec + ''' + data = self.memory[where:where + size] + assert (len(data)) == size + return data + def read_int(self, where, size=None): ''' Reads int from memory @@ -521,8 +534,8 @@ def read_int(self, where, size=None): assert size in SANE_SIZES self.publish('will_read_memory', where, size) - data = self.memory[where:where + size / 8] - assert (8 * len(data)) == size + data = self._raw_read(where, size/8) + value = Operators.CONCAT(size, *map(Operators.ORD, reversed(data))) self.publish('did_read_memory', where, value, size) @@ -769,7 +782,7 @@ def determine_implementation(instruction): #implementation = getattr(self, name, fallback_to_emulate) implementation = determine_implementation(insn) if logger.level == logging.DEBUG : - logger.debug(self.render_instruction(insn)) + logger.debug(self.render_instruction(insn) + " (%s)" % insn.size) for l in self.render_registers(): register_logger.debug(l) diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 82cf3b13e..fb8de2103 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -96,7 +96,8 @@ def __init__(self, cpu): for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] print("Reading map %s (%s kb)" % (index, size / 1024)) - map_bytes = self._cpu.read_bytes(m, size) + # map_bytes = self._cpu.read_bytes(m, size) + map_bytes = self._cpu._raw_read(m,size) print("Writing map %s" % index) self._emu.mem_write(m, ''.join(map_bytes)) print("Unicorn init complete") @@ -345,7 +346,7 @@ def write_back_memory(self, where, expr, size): # else: data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] # print(data) - # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) From bf266e6233ab1ab8a1cf7c9c183632fe22a0861d Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Tue, 31 Oct 2017 00:14:52 -0400 Subject: [PATCH 05/65] Save changes to emulator before implementing other concretization techniques --- manticore/core/cpu/abstractcpu.py | 19 ++++++++++++++++--- manticore/utils/emulate.py | 11 +++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index e8890911b..096628a18 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -507,6 +507,19 @@ def write_int(self, where, expression, size=None): self.publish('did_write_memory', where, expression, size) + def _raw_read(self, where, size=None): + ''' + Selects bytes from memory + + :param int where: address to read from + :param size: number of bits to read + :return: the value read + :rtype: int or BitVec + ''' + data = self.memory[where:where + size] + assert (len(data)) == size + return data + def read_int(self, where, size=None): ''' Reads int from memory @@ -521,8 +534,8 @@ def read_int(self, where, size=None): assert size in SANE_SIZES self.publish('will_read_memory', where, size) - data = self.memory[where:where + size / 8] - assert (8 * len(data)) == size + data = self._raw_read(where, size/8) + value = Operators.CONCAT(size, *map(Operators.ORD, reversed(data))) self.publish('did_read_memory', where, value, size) @@ -769,7 +782,7 @@ def determine_implementation(instruction): #implementation = getattr(self, name, fallback_to_emulate) implementation = determine_implementation(insn) if logger.level == logging.DEBUG : - logger.debug(self.render_instruction(insn)) + logger.debug(self.render_instruction(insn) + " (%s)" % insn.size) for l in self.render_registers(): register_logger.debug(l) diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 82cf3b13e..a25c0ad7a 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -96,7 +96,8 @@ def __init__(self, cpu): for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] print("Reading map %s (%s kb)" % (index, size / 1024)) - map_bytes = self._cpu.read_bytes(m, size) + # map_bytes = self._cpu.read_bytes(m, size) + map_bytes = self._cpu._raw_read(m,size) print("Writing map %s" % index) self._emu.mem_write(m, ''.join(map_bytes)) print("Unicorn init complete") @@ -168,7 +169,7 @@ def _hook_xfer_mem(self, uc, access, address, size, value, data): assert access in (UC_MEM_WRITE, UC_MEM_READ, UC_MEM_FETCH) if access == UC_MEM_WRITE: - print("Writing %s bytes to %02x: %02x" % (size, address, value)) + # print("Writing %s bytes to %02x: %02x" % (size, address, value)) self._cpu.write_int(address, value, size*8) # If client code is attempting to read a value, we need to bring it @@ -338,14 +339,16 @@ def sync_unicorn_to_manticore(self): def write_back_memory(self, where, expr, size): if issymbolic(expr): - print("Concretizing memory") + print("Concretizing memory. Original Contents:") + data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] + print("%02x, %s" % (where, data)) from ..core.memory import ConcretizeMemory raise ConcretizeMemory(self._cpu.memory, where, size, policy='ONE') # data = '+'*(size/8) # else: data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] # print(data) - # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) From 15fee00a838ca7467c468f91d28064f20efaed79 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Thu, 2 Nov 2017 00:34:13 -0400 Subject: [PATCH 06/65] Cleaned up miscellaneous print statements --- manticore/core/cpu/abstractcpu.py | 2 +- manticore/core/cpu/x86.py | 1 - manticore/core/memory.py | 2 +- manticore/platforms/linux.py | 2 +- manticore/utils/emulate.py | 47 ++++++++++++++++--------------- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index 096628a18..245199add 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -773,7 +773,7 @@ def determine_implementation(instruction): implementation = fallback_to_emulate for op in instruction.operands: if op.mem.segment is not None and 'FS' in op.mem.segment: - print("%s uses the %s segment" % (name, op.mem.segment)) + print("(U) Falling back to Manticore for %s" % self.render_instruction(instruction)) implementation = getattr(self, name, fallback_to_emulate) self._non_unicorn_instrs += 1 diff --git a/manticore/core/cpu/x86.py b/manticore/core/cpu/x86.py index 1097334a7..dc9ec7b3c 100644 --- a/manticore/core/cpu/x86.py +++ b/manticore/core/cpu/x86.py @@ -620,7 +620,6 @@ def address(self): address = 0 if self.mem.segment is not None: seg = self.mem.segment - print("Accessing %s" % seg) base, size, ty = cpu.get_descriptor(cpu.read_register(seg)) address += base #todo check limits and perms else: diff --git a/manticore/core/memory.py b/manticore/core/memory.py index 9194ad260..af0e2d638 100644 --- a/manticore/core/memory.py +++ b/manticore/core/memory.py @@ -84,7 +84,7 @@ def __init__(self, start, size, perms, name=None): :param perms: the access permissions of the map (rwx). ''' assert isinstance(start, (int, long)) and start >= 0, 'Invalid start address' - print("(M) Mapping %s bytes from %s to %s" % (size, hex(start), hex(start+size))) + # print("(M) Mapping %s kb from %s to %s" % (size / 1024, hex(start), hex(start+size))) assert isinstance(size, (int, long)) and size > 0, 'Invalid end address' super(Map, self).__init__() diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index d3243f0e9..59a9a103f 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1107,7 +1107,7 @@ def sys_lseek(self, fd, offset, whence): def sys_read(self, fd, buf, count): - print("Reading %s bytes from FD %s into %02x" % (count, fd, buf)) + print("(M) Reading %s bytes from FD %s into %02x" % (count, fd, buf)) data = '' if count != 0: # TODO check count bytes from buf diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index a25c0ad7a..19325f060 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -2,7 +2,7 @@ import inspect from ..core.memory import MemoryException, FileMap, AnonMap -from ..core.smtlib import Operators +from ..core.smtlib import Operators, solver from .helpers import issymbolic ###################################################################### @@ -18,6 +18,7 @@ import pprint as pp import struct +from binascii import hexlify logger = logging.getLogger("EMULATOR") @@ -55,7 +56,7 @@ def __init__(self, cpu): self.reset() for base in self.mem_map: size, perms = self.mem_map[base] - print("About to map %s bytes from %02x to %02x" % (size, base, base + size)) + # print("About to map %s bytes from %02x to %02x" % (size, base, base + size)) self._emu.mem_map(base, size, perms) self._emu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, self._hook_unmapped) @@ -83,7 +84,7 @@ def __init__(self, cpu): self.registers -= set(['YMM0', 'YMM1', 'YMM2', 'YMM3', 'YMM4', 'YMM5', 'YMM6', 'YMM7', 'YMM8', 'YMM9', 'YMM10', 'YMM11', 'YMM12', 'YMM13', 'YMM14', 'YMM15']) self.registers |= set(['XMM0', 'XMM1', 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', 'XMM8', 'XMM9', 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15']) - print("Setting initial register state") + # print("Setting initial register state") for reg in self.registers: val = self._cpu.read_register(reg) if issymbolic(val): @@ -95,12 +96,9 @@ def __init__(self, cpu): self.create_GDT() for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] - print("Reading map %s (%s kb)" % (index, size / 1024)) - # map_bytes = self._cpu.read_bytes(m, size) map_bytes = self._cpu._raw_read(m,size) - print("Writing map %s" % index) self._emu.mem_write(m, ''.join(map_bytes)) - print("Unicorn init complete") + print("(U) Unicorn init complete") def reset(self): self._emu = self._unicorn() @@ -145,7 +143,7 @@ def _create_emulated_mapping(self, uc, address): permissions |= UC_PROT_WRITE if 'x' in m.perms: permissions |= UC_PROT_EXEC - print("About to map %s bytes from %02x to %02x" % (len(m), m.start, m.start + len(m))) + # print("(U) Mapping %s kb from %s to %s" % (len(m) / 1024, hex(m.start), hex(m.start+len(m)))) uc.mem_map(m.start, len(m), permissions) self.mem_map[m.start] = (len(m), permissions) @@ -197,7 +195,7 @@ def _hook_unmapped(self, uc, access, address, size, value, data): ''' try: - print("Mapping memory at " + hex(address)) + # print("Mapping memory at " + hex(address)) m = self._create_emulated_mapping(uc, address) except MemoryException as e: print("Failed to map memory") @@ -333,22 +331,25 @@ def sync_unicorn_to_manticore(self): raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", policy='ONE') val = self._emu.reg_read(self._to_unicorn_id(reg)) - if val != oldval: - print("(M) %s: %s -> %s" % (reg, oldval, val)) + # if val != oldval: + # print("(M) %s: %s -> %s" % (reg, oldval, val)) self._cpu.write_register(reg, val) def write_back_memory(self, where, expr, size): if issymbolic(expr): - print("Concretizing memory. Original Contents:") + print("Concretizing memory: ") + # print("Constraint set: %s" % self._cpu.memory.constraints) data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] - print("%02x, %s" % (where, data)) - from ..core.memory import ConcretizeMemory - raise ConcretizeMemory(self._cpu.memory, where, size, policy='ONE') - # data = '+'*(size/8) - # else: - data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] - # print(data) - print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + concrete_data = [] + for c in data: + if issymbolic(c): + c = chr(solver.get_value(self._cpu.memory.constraints, c)) + print("Solved: %s" % hexlify(c)) + concrete_data.append(c) + data = concrete_data + else: + data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] + # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) @@ -358,12 +359,12 @@ def write_back_register(self, reg, val): self._emu.reg_write(self._to_unicorn_id('EFLAGS'), self._cpu.read_register('EFLAGS')) return oldval = self._emu.reg_read(self._to_unicorn_id(reg)) - if oldval != val: - print("(U) %s: %s -> %s" % (reg, oldval, val)) + # if oldval != val: + # print("(U) %s: %s -> %s" % (reg, oldval, val)) self._emu.reg_write(self._to_unicorn_id(reg), val) def update_segment(self, selector, base, size, perms): - print("(U) Updating selector %s to 0x%02x (%s bytes) (%s)" % (selector, base, size, perms)) + # print("(U) Updating selector %s to 0x%02x (%s bytes) (%s)" % (selector, base, size, perms)) dest = self.gdt_base + (selector*8) entry = self.make_table_entry(base, size) self._emu.mem_write(dest, entry) From 9f603601ab3c0647eb2a2a48a4a40b513a0af8bc Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Thu, 2 Nov 2017 01:31:11 -0400 Subject: [PATCH 07/65] Cleaned up implementation selection --- manticore/core/cpu/abstractcpu.py | 22 +++++++++------------- manticore/manticore.py | 6 +++--- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index 245199add..f0a67141e 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -756,30 +756,26 @@ def execute(self): name = self.canonicalize_instruction_name(insn) def fallback_to_emulate(*operands): - text_bytes = ' '.join('%02x'%x for x in insn.bytes) - # logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", - # insn.address, text_bytes, insn.mnemonic, insn.op_str) - - if 'SYSCALL' in name: - self.emu.sync_unicorn_to_manticore() - implementation = getattr(self, name) - implementation(*insn.operands) - else: - self.publish('will_emulate_instruction', insn) - self.emulate(insn) - self.publish('did_emulate_instruction', insn) + self.publish('will_emulate_instruction', insn) + self.emulate(insn) + self.publish('did_emulate_instruction', insn) def determine_implementation(instruction): implementation = fallback_to_emulate + for op in instruction.operands: if op.mem.segment is not None and 'FS' in op.mem.segment: print("(U) Falling back to Manticore for %s" % self.render_instruction(instruction)) implementation = getattr(self, name, fallback_to_emulate) self._non_unicorn_instrs += 1 + + if 'SYSCALL' in name: + self.emu.sync_unicorn_to_manticore() + implementation = getattr(self, name, fallback_to_emulate) + self._non_unicorn_instrs += 1 return implementation - #implementation = getattr(self, name, fallback_to_emulate) implementation = determine_implementation(insn) if logger.level == logging.DEBUG : logger.debug(self.render_instruction(insn) + " (%s)" % insn.size) diff --git a/manticore/manticore.py b/manticore/manticore.py index b0024d137..519913ce2 100644 --- a/manticore/manticore.py +++ b/manticore/manticore.py @@ -573,7 +573,7 @@ def create_stats(self): def _start_run(self): assert not self.running - #FIXME this will be self.publish + #FIXME this will be self.publish self._executor.publish('will_start_run', self._initial_state) self.enqueue(self._initial_state) self._initial_state = None @@ -588,7 +588,7 @@ def _finish_run(self, profiling=False): if profiling: self._produce_profiling_data() - #FIXME this will be self.publish + #FIXME this will be self.publish self._executor.publish('did_finish_run') def run(self, procs=1, timeout=0, should_profile=False): @@ -651,8 +651,8 @@ def _get_symbol_address(self, symbol): if len(symbols) == 0: continue - return symbols[0].entry['st_value'] + return symbols[0].entry['st_value'] @property def coverage_file(self): From ab48f25c0757d4686bdce7fb6858275fb35404e1 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Fri, 3 Nov 2017 19:55:11 -0400 Subject: [PATCH 08/65] Working FS register in Unicorn --- manticore/core/cpu/abstractcpu.py | 7 +++-- manticore/utils/emulate.py | 48 ++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index f0a67141e..2a774fb00 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -765,9 +765,10 @@ def determine_implementation(instruction): for op in instruction.operands: if op.mem.segment is not None and 'FS' in op.mem.segment: - print("(U) Falling back to Manticore for %s" % self.render_instruction(instruction)) - implementation = getattr(self, name, fallback_to_emulate) - self._non_unicorn_instrs += 1 + pass + # print("(U) Falling back to Manticore for %s" % self.render_instruction(instruction)) + # implementation = getattr(self, name, fallback_to_emulate) + # self._non_unicorn_instrs += 1 if 'SYSCALL' in name: self.emu.sync_unicorn_to_manticore() diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 19325f060..6d63fafe0 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -93,7 +93,10 @@ def __init__(self, cpu): policy='ONE') self._emu.reg_write(self._to_unicorn_id(reg), val) - self.create_GDT() + self.scratch_mem = 0x1000 + self._emu.mem_map(self.scratch_mem, 4096) + + # self.create_GDT() for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] map_bytes = self._cpu._raw_read(m,size) @@ -365,9 +368,11 @@ def write_back_register(self, reg, val): def update_segment(self, selector, base, size, perms): # print("(U) Updating selector %s to 0x%02x (%s bytes) (%s)" % (selector, base, size, perms)) - dest = self.gdt_base + (selector*8) - entry = self.make_table_entry(base, size) - self._emu.mem_write(dest, entry) + # dest = self.gdt_base + (selector*8) + # entry = self.make_table_entry(base, size) + # self._emu.mem_write(dest, entry) + if selector == 99: + self.set_fs(base) def make_table_entry(self, base, limit, access_byte=0xff, flags=0xf0): @@ -387,3 +392,38 @@ def create_GDT(self, base=0x1000, size=8192): self._emu.mem_map(base, size) self._emu.reg_write(UC_X86_REG_GDTR, (0, base, size, 0)) + + + def set_msr(self, msr, value): + ''' + set the given model-specific register (MSR) to the given value. + this will clobber some memory at the given scratch address, as it emits some code. + ''' + # save clobbered registers + orax = self._emu.reg_read(UC_X86_REG_RAX) + ordx = self._emu.reg_read(UC_X86_REG_RDX) + orcx = self._emu.reg_read(UC_X86_REG_RCX) + orip = self._emu.reg_read(UC_X86_REG_RIP) + + # x86: wrmsr + buf = '\x0f\x30' + self._emu.mem_write(self.scratch_mem, buf) + self._emu.reg_write(UC_X86_REG_RAX, value & 0xFFFFFFFF) + self._emu.reg_write(UC_X86_REG_RDX, (value >> 32) & 0xFFFFFFFF) + self._emu.reg_write(UC_X86_REG_RCX, msr & 0xFFFFFFFF) + self._emu.emu_start(self.scratch_mem, self.scratch_mem+len(buf), count=1) + + # restore clobbered registers + self._emu.reg_write(UC_X86_REG_RAX, orax) + self._emu.reg_write(UC_X86_REG_RDX, ordx) + self._emu.reg_write(UC_X86_REG_RCX, orcx) + self._emu.reg_write(UC_X86_REG_RIP, orip) + + + def set_fs(self, addr): + ''' + set the FS.base hidden descriptor-register field to the given address. + this enables referencing the fs segment on x86-64. + ''' + FSMSR = 0xC0000100 + return self.set_msr(FSMSR, addr) \ No newline at end of file From b0ed8f4400fd4d1936028e0733f02507ab9c715b Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Sun, 5 Nov 2017 00:51:02 -0400 Subject: [PATCH 09/65] Improved register sync performance --- manticore/core/cpu/abstractcpu.py | 5 ++-- manticore/utils/emulate.py | 41 +++---------------------------- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index 2a774fb00..6f1dbf96f 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -763,9 +763,8 @@ def fallback_to_emulate(*operands): def determine_implementation(instruction): implementation = fallback_to_emulate - for op in instruction.operands: - if op.mem.segment is not None and 'FS' in op.mem.segment: - pass + # for op in instruction.operands: + # if op.mem.segment is not None and 'FS' in op.mem.segment: # print("(U) Falling back to Manticore for %s" % self.render_instruction(instruction)) # implementation = getattr(self, name, fallback_to_emulate) # self._non_unicorn_instrs += 1 diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 6d63fafe0..06c042016 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -96,7 +96,6 @@ def __init__(self, cpu): self.scratch_mem = 0x1000 self._emu.mem_map(self.scratch_mem, 4096) - # self.create_GDT() for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] map_bytes = self._cpu._raw_read(m,size) @@ -308,16 +307,9 @@ def _step(self, instruction): register, self._cpu.read_register(register), self._emu.reg_read(self._to_unicorn_id(register)) ) logger.debug(">"*10) - - # Bring back Unicorn registers to Manticore - for reg in self.registers: - val = self._emu.reg_read(self._to_unicorn_id(reg)) - self._cpu.write_register(reg, val) - - #Unicorn hack. On single step unicorn wont advance the PC register - # mu_pc = self.get_unicorn_pc() - # if saved_PC == mu_pc: - # self._cpu.PC = saved_PC + instruction.size + + # self.sync_unicorn_to_manticore() + self._cpu.PC = self._emu.reg_read(self._to_unicorn_id('PC')) # Raise the exception from a hook that Unicorn would have eaten if self._to_raise: @@ -328,14 +320,7 @@ def _step(self, instruction): def sync_unicorn_to_manticore(self): for reg in self.registers: - oldval = self._cpu.read_register(reg) - if issymbolic(oldval): - from ..core.cpu.abstractcpu import ConcretizeRegister - raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", - policy='ONE') val = self._emu.reg_read(self._to_unicorn_id(reg)) - # if val != oldval: - # print("(M) %s: %s -> %s" % (reg, oldval, val)) self._cpu.write_register(reg, val) def write_back_memory(self, where, expr, size): @@ -374,26 +359,6 @@ def update_segment(self, selector, base, size, perms): if selector == 99: self.set_fs(base) - - def make_table_entry(self, base, limit, access_byte=0xff, flags=0xf0): - # http://wiki.osdev.org/Global_Descriptor_Table#Structure - out = 0 - out |= (limit & 0xffff) - out |= ((base & 0xffffff) << 16) - out |= ((access_byte & 0xff) << 40) - out |= (((limit >> 16) & 0xf) << 48) - out |= ((flags & 0xff) << 52) - out |= (((base >> 24) & 0xff) << 56) - return struct.pack(' Date: Tue, 7 Nov 2017 23:09:35 -0500 Subject: [PATCH 10/65] Switched to memory delta model --- manticore/utils/emulate.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 06c042016..2a4a9ce15 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -28,6 +28,7 @@ class UnicornEmulator(object): ''' def __init__(self, cpu): self._cpu = cpu + self._mem_delta = {} self.flag_registers = set(['CF','PF','AF','ZF','SF','IF','DF','OF']) text = cpu.memory.map_containing(cpu.PC) @@ -169,8 +170,7 @@ def _hook_xfer_mem(self, uc, access, address, size, value, data): assert access in (UC_MEM_WRITE, UC_MEM_READ, UC_MEM_FETCH) if access == UC_MEM_WRITE: - # print("Writing %s bytes to %02x: %02x" % (size, address, value)) - self._cpu.write_int(address, value, size*8) + self._mem_delta[address] = (value, size) # If client code is attempting to read a value, we need to bring it # in from Manticore state. If we try to mem_write it here, Unicorn @@ -272,17 +272,6 @@ def _step(self, instruction): ''' A single attempt at executing an instruction. ''' - # XXX(yan): This concretizes the entire register state. This is overly - # aggressive. Once capstone adds consistent support for accessing - # referred registers, make this only concretize those registers being - # read from. - # for reg in self.registers: - # val = self._cpu.read_register(reg) - # if issymbolic(val): - # from ..core.cpu.abstractcpu import ConcretizeRegister - # raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", - # policy='ONE') - # self._emu.reg_write(self._to_unicorn_id(reg), val) # Bring in the instruction itself instruction = self._cpu.decode_instruction(self._cpu.PC) @@ -322,8 +311,15 @@ def sync_unicorn_to_manticore(self): for reg in self.registers: val = self._emu.reg_read(self._to_unicorn_id(reg)) self._cpu.write_register(reg, val) + for location in self._mem_delta: + value, size = self._mem_delta[location] + # print("Writing %s bytes to 0x%02x" % (size, location)) + self._cpu.write_int(location, value, size*8) + self._mem_delta = {} def write_back_memory(self, where, expr, size): + if where in self._mem_delta.keys(): + return if issymbolic(expr): print("Concretizing memory: ") # print("Constraint set: %s" % self._cpu.memory.constraints) @@ -337,7 +333,7 @@ def write_back_memory(self, where, expr, size): data = concrete_data else: data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] - # print("Writing back %s bits to %02x: %s" % (size, where, ''.join(data))) + # print("Writing back %s bits to 0x%02x" % (size, where)) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) From dbb9e497ae0abfaa8c05ce78ae3eb73b32dda272 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Wed, 8 Nov 2017 00:12:35 -0500 Subject: [PATCH 11/65] Added timing information --- manticore/core/cpu/abstractcpu.py | 6 ---- manticore/platforms/linux.py | 8 +++-- manticore/utils/emulate.py | 56 +++++++++++-------------------- 3 files changed, 25 insertions(+), 45 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index 6f1dbf96f..331b6ffa3 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -762,12 +762,6 @@ def fallback_to_emulate(*operands): def determine_implementation(instruction): implementation = fallback_to_emulate - - # for op in instruction.operands: - # if op.mem.segment is not None and 'FS' in op.mem.segment: - # print("(U) Falling back to Manticore for %s" % self.render_instruction(instruction)) - # implementation = getattr(self, name, fallback_to_emulate) - # self._non_unicorn_instrs += 1 if 'SYSCALL' in name: self.emu.sync_unicorn_to_manticore() diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 59a9a103f..4809eac0d 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -6,6 +6,7 @@ import struct import ctypes import socket +import time #Remove in favor of binary.py from elftools.elf.elffile import ELFFile @@ -1877,8 +1878,11 @@ def syscall(self): except (AttributeError, KeyError): raise Exception("SyscallNotImplemented %d %d"%(self.current.address_bit_size, index)) - print("(M) Invoking %s syscall" % name) - return self._syscall_abi.invoke(implementation) + start = time.time() + out = self._syscall_abi.invoke(implementation) + + print("(M) Invoked %s syscall (%s seconds)" % (name, time.time() - start)) + return out def sys_clock_gettime(self, clock_id, timespec): logger.info("sys_clock_time not really implemented") diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 2a4a9ce15..1e92917a0 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -19,6 +19,7 @@ import pprint as pp import struct from binascii import hexlify +import time logger = logging.getLogger("EMULATOR") @@ -27,11 +28,15 @@ class UnicornEmulator(object): Helper class to emulate a single instruction via Unicorn. ''' def __init__(self, cpu): + self.init_time = time.time() + self.out_of_step_time = self.init_time - self.init_time + self.in_step_time = self.out_of_step_time + self.sync_time = self.in_step_time + self._cpu = cpu self._mem_delta = {} self.flag_registers = set(['CF','PF','AF','ZF','SF','IF','DF','OF']) - text = cpu.memory.map_containing(cpu.PC) cpu.subscribe('did_write_memory', self.write_back_memory) cpu.subscribe('did_write_register', self.write_back_register) cpu.subscribe('did_set_descriptor', self.update_segment) @@ -48,10 +53,6 @@ def __init__(self, cpu): permissions |= UC_PROT_EXEC self.mem_map[m.start] = (len(m), permissions) - # Keep track of all the memory Unicorn needs while executing this - # instruction - self._should_be_written = {} - # Establish Manticore state, potentially from past emulation # attempts self.reset() @@ -85,7 +86,6 @@ def __init__(self, cpu): self.registers -= set(['YMM0', 'YMM1', 'YMM2', 'YMM3', 'YMM4', 'YMM5', 'YMM6', 'YMM7', 'YMM8', 'YMM9', 'YMM10', 'YMM11', 'YMM12', 'YMM13', 'YMM14', 'YMM15']) self.registers |= set(['XMM0', 'XMM1', 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', 'XMM8', 'XMM9', 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15']) - # print("Setting initial register state") for reg in self.registers: val = self._cpu.read_register(reg) if issymbolic(val): @@ -101,7 +101,10 @@ def __init__(self, cpu): size = self.mem_map[m][0] map_bytes = self._cpu._raw_read(m,size) self._emu.mem_write(m, ''.join(map_bytes)) - print("(U) Unicorn init complete") + + self.init_time = time.time() - self.init_time + print("(U) Unicorn init complete (%s seconds)" % self.init_time) + self._last_step_time = time.time() def reset(self): self._emu = self._unicorn() @@ -172,22 +175,6 @@ def _hook_xfer_mem(self, uc, access, address, size, value, data): if access == UC_MEM_WRITE: self._mem_delta[address] = (value, size) - # If client code is attempting to read a value, we need to bring it - # in from Manticore state. If we try to mem_write it here, Unicorn - # will segfault. We add the value to a list of things that need to - # be written, and ask to restart the emulation. - # elif access == UC_MEM_READ: - # print("Reading %s bytes from %02x: %02x" % (size, address, value)) - # value = self._cpu.read_bytes(address, size) - # - # if address in self._should_be_written: - # return True - # - # self._should_be_written[address] = value - # - # self._should_try_again = True - # return False - return True @@ -250,19 +237,17 @@ def emulate(self, instruction): # The emulation might restart if Unicorn needs to bring in a memory map # or bring a value from Manticore state. while True: - for address, values in self._should_be_written.items(): - for offset, byte in enumerate(values, start=address): - if issymbolic(byte): - from ..core.cpu.abstractcpu import ConcretizeMemory - raise ConcretizeMemory(self._cpu.memory, offset, 8, - "Concretizing for emulation") - - self._emu.mem_write(address, ''.join(values)) # Try emulation self._should_try_again = False + starttime = time.time() + self.out_of_step_time += (starttime - self._last_step_time) + self._last_step_time = starttime + self._step(instruction) + + self.in_step_time += (time.time() - starttime) if not self._should_try_again: break @@ -276,8 +261,6 @@ def _step(self, instruction): # Bring in the instruction itself instruction = self._cpu.decode_instruction(self._cpu.PC) - saved_PC = self._cpu.PC - try: self._emu.emu_start(self._cpu.PC, self._cpu.PC+instruction.size, count=1) except UcError as e: @@ -298,7 +281,7 @@ def _step(self, instruction): logger.debug(">"*10) # self.sync_unicorn_to_manticore() - self._cpu.PC = self._emu.reg_read(self._to_unicorn_id('PC')) + self._cpu.PC = self.get_unicorn_pc() # Raise the exception from a hook that Unicorn would have eaten if self._to_raise: @@ -308,6 +291,7 @@ def _step(self, instruction): return def sync_unicorn_to_manticore(self): + start = time.time() for reg in self.registers: val = self._emu.reg_read(self._to_unicorn_id(reg)) self._cpu.write_register(reg, val) @@ -316,6 +300,7 @@ def sync_unicorn_to_manticore(self): # print("Writing %s bytes to 0x%02x" % (size, location)) self._cpu.write_int(location, value, size*8) self._mem_delta = {} + self.sync_time += (time.time() - start) def write_back_memory(self, where, expr, size): if where in self._mem_delta.keys(): @@ -349,9 +334,6 @@ def write_back_register(self, reg, val): def update_segment(self, selector, base, size, perms): # print("(U) Updating selector %s to 0x%02x (%s bytes) (%s)" % (selector, base, size, perms)) - # dest = self.gdt_base + (selector*8) - # entry = self.make_table_entry(base, size) - # self._emu.mem_write(dest, entry) if selector == 99: self.set_fs(base) From dc55c78eb9c571b88b37fb65a5b3d59698d084eb Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Sat, 25 Nov 2017 02:31:42 -0500 Subject: [PATCH 12/65] Stripped print statements --- manticore/core/cpu/abstractcpu.py | 8 ++--- manticore/core/executor.py | 9 +++-- manticore/core/memory.py | 1 - manticore/platforms/linux.py | 3 -- manticore/utils/emulate.py | 59 +++++++++++++++---------------- 5 files changed, 37 insertions(+), 43 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index 331b6ffa3..f028ebd6b 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -507,14 +507,14 @@ def write_int(self, where, expression, size=None): self.publish('did_write_memory', where, expression, size) - def _raw_read(self, where, size=None): + def _raw_read(self, where, size=1): ''' Selects bytes from memory :param int where: address to read from - :param size: number of bits to read - :return: the value read - :rtype: int or BitVec + :param size: number of bytes to read + :return: the bytes in memory + :rtype: list ''' data = self.memory[where:where + size] assert (len(data)) == size diff --git a/manticore/core/executor.py b/manticore/core/executor.py index d4dbdb270..16d1264d3 100644 --- a/manticore/core/executor.py +++ b/manticore/core/executor.py @@ -54,7 +54,7 @@ def locked_context(self, key=None, default=dict): yield policy_context def _add_state_callback(self, state_id, state): - ''' Save summarize(state) on policy shared context before + ''' Save summarize(state) on policy shared context before the state is stored ''' summary = self.summarize(state) @@ -137,7 +137,7 @@ def summarize(self, state): def choice(self, state_ids): interesting = set(state_ids) - with self.locked_context() as policy_ctx: + with self.locked_context() as policy_ctx: visited = policy_ctx.get('visited', dict()) summaries = policy_ctx.get('summaries', dict()) lst = [] @@ -193,7 +193,7 @@ def __init__(self, initial=None, workspace=None, policy='random', context=None, #scheduling priority policy (wip) #Set policy - policies = {'random': Random, + policies = {'random': Random, 'uncovered': Uncovered, 'branchlimited': BranchLimited, } @@ -209,7 +209,7 @@ def __init__(self, initial=None, workspace=None, policy='random', context=None, @contextmanager def locked_context(self, key=None, default=dict): - ''' Executor context is a shared memory object. All workers share this. + ''' Executor context is a shared memory object. All workers share this. It needs a lock. Its used like this: with executor.context() as context: @@ -494,7 +494,6 @@ def run(self): except (Exception, AssertionError) as e: import traceback trace = traceback.format_exc() - print str(e), trace logger.error("Exception: %s\n%s", str(e), trace) #Notify this worker is done self.publish('will_terminate_state', current_state, current_state_id, 'Exception') diff --git a/manticore/core/memory.py b/manticore/core/memory.py index af0e2d638..447f06f37 100644 --- a/manticore/core/memory.py +++ b/manticore/core/memory.py @@ -84,7 +84,6 @@ def __init__(self, start, size, perms, name=None): :param perms: the access permissions of the map (rwx). ''' assert isinstance(start, (int, long)) and start >= 0, 'Invalid start address' - # print("(M) Mapping %s kb from %s to %s" % (size / 1024, hex(start), hex(start+size))) assert isinstance(size, (int, long)) and size > 0, 'Invalid end address' super(Map, self).__init__() diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 4809eac0d..cd06d3451 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1108,7 +1108,6 @@ def sys_lseek(self, fd, offset, whence): def sys_read(self, fd, buf, count): - print("(M) Reading %s bytes from FD %s into %02x" % (count, fd, buf)) data = '' if count != 0: # TODO check count bytes from buf @@ -1878,10 +1877,8 @@ def syscall(self): except (AttributeError, KeyError): raise Exception("SyscallNotImplemented %d %d"%(self.current.address_bit_size, index)) - start = time.time() out = self._syscall_abi.invoke(implementation) - print("(M) Invoked %s syscall (%s seconds)" % (name, time.time() - start)) return out def sys_clock_gettime(self, clock_id, timespec): diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 1e92917a0..6ea43089c 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -16,9 +16,6 @@ from capstone.arm import * from capstone.x86 import * -import pprint as pp -import struct -from binascii import hexlify import time logger = logging.getLogger("EMULATOR") @@ -40,6 +37,11 @@ def __init__(self, cpu): cpu.subscribe('did_write_memory', self.write_back_memory) cpu.subscribe('did_write_register', self.write_back_register) cpu.subscribe('did_set_descriptor', self.update_segment) + cpu.subscribe('will_execute_instruction', self.pre_execute_callback) + cpu.subscribe('did_execute_instruction', self.post_execute_callback) + + self.reset() + # Keep track of all memory mappings. We start with just the text section self.mem_map = {} for m in cpu.memory.maps: @@ -53,12 +55,11 @@ def __init__(self, cpu): permissions |= UC_PROT_EXEC self.mem_map[m.start] = (len(m), permissions) + # Establish Manticore state, potentially from past emulation # attempts - self.reset() for base in self.mem_map: size, perms = self.mem_map[base] - # print("About to map %s bytes from %02x to %02x" % (size, base, base + size)) self._emu.mem_map(base, size, perms) self._emu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, self._hook_unmapped) @@ -92,6 +93,7 @@ def __init__(self, cpu): from ..core.cpu.abstractcpu import ConcretizeRegister raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", policy='ONE') + logger.debug("Writing %s into %s", val, reg) self._emu.reg_write(self._to_unicorn_id(reg), val) self.scratch_mem = 0x1000 @@ -99,11 +101,13 @@ def __init__(self, cpu): for index, m in enumerate(self.mem_map): size = self.mem_map[m][0] + + start_time = time.time() map_bytes = self._cpu._raw_read(m,size) + logger.info("Reading %s kb map at 0x%02x took %s seconds", size / 1024, m, time.time() - start_time) self._emu.mem_write(m, ''.join(map_bytes)) - + self.init_time = time.time() - self.init_time - print("(U) Unicorn init complete (%s seconds)" % self.init_time) self._last_step_time = time.time() def reset(self): @@ -149,7 +153,6 @@ def _create_emulated_mapping(self, uc, address): permissions |= UC_PROT_WRITE if 'x' in m.perms: permissions |= UC_PROT_EXEC - # print("(U) Mapping %s kb from %s to %s" % (len(m) / 1024, hex(m.start), hex(m.start+len(m)))) uc.mem_map(m.start, len(m), permissions) self.mem_map[m.start] = (len(m), permissions) @@ -184,10 +187,9 @@ def _hook_unmapped(self, uc, access, address, size, value, data): ''' try: - # print("Mapping memory at " + hex(address)) m = self._create_emulated_mapping(uc, address) except MemoryException as e: - print("Failed to map memory") + logger.error("Failed to map memory") self._to_raise = e self._should_try_again = False return False @@ -199,7 +201,7 @@ def _interrupt(self, uc, number, data): ''' Handle software interrupt (SVC/INT) ''' - print("Caught interrupt: %s" % number) + logger.info("Caught interrupt: %s" % number) from ..core.cpu.abstractcpu import Interruption self._to_raise = Interruption(number) return True @@ -221,8 +223,7 @@ def _to_unicorn_id(self, reg_name): try: return globals()['UC_X86_REG_' + custom_mapping[reg_name]] except: - print 'UC_X86_REG_' + str(reg_name) + ' not in ' - pp.pprint([k for k in globals() if 'UC_X86_REG' in k]) + logger.error("Can't find register UC_X86_REG_%s",str(reg_name)) raise else: @@ -240,14 +241,8 @@ def emulate(self, instruction): # Try emulation self._should_try_again = False - - starttime = time.time() - self.out_of_step_time += (starttime - self._last_step_time) - self._last_step_time = starttime self._step(instruction) - - self.in_step_time += (time.time() - starttime) if not self._should_try_again: break @@ -285,7 +280,7 @@ def _step(self, instruction): # Raise the exception from a hook that Unicorn would have eaten if self._to_raise: - print("Raising %s" % self._to_raise) + logger.info("Raising %s", self._to_raise) raise self._to_raise return @@ -297,7 +292,7 @@ def sync_unicorn_to_manticore(self): self._cpu.write_register(reg, val) for location in self._mem_delta: value, size = self._mem_delta[location] - # print("Writing %s bytes to 0x%02x" % (size, location)) + logger.debug("Writing %s bytes to 0x%02x", size, location) self._cpu.write_int(location, value, size*8) self._mem_delta = {} self.sync_time += (time.time() - start) @@ -306,19 +301,16 @@ def write_back_memory(self, where, expr, size): if where in self._mem_delta.keys(): return if issymbolic(expr): - print("Concretizing memory: ") - # print("Constraint set: %s" % self._cpu.memory.constraints) data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] concrete_data = [] for c in data: if issymbolic(c): c = chr(solver.get_value(self._cpu.memory.constraints, c)) - print("Solved: %s" % hexlify(c)) concrete_data.append(c) data = concrete_data else: data = [Operators.CHR(Operators.EXTRACT(expr, offset, 8)) for offset in xrange(0, size, 8)] - # print("Writing back %s bits to 0x%02x" % (size, where)) + logger.debug("Writing back %s bits to 0x%02x", size, where) if not self.in_map(where): self._create_emulated_mapping(self._emu, where) self._emu.mem_write(where, ''.join(data)) @@ -327,13 +319,10 @@ def write_back_register(self, reg, val): if reg in self.flag_registers: self._emu.reg_write(self._to_unicorn_id('EFLAGS'), self._cpu.read_register('EFLAGS')) return - oldval = self._emu.reg_read(self._to_unicorn_id(reg)) - # if oldval != val: - # print("(U) %s: %s -> %s" % (reg, oldval, val)) self._emu.reg_write(self._to_unicorn_id(reg), val) def update_segment(self, selector, base, size, perms): - # print("(U) Updating selector %s to 0x%02x (%s bytes) (%s)" % (selector, base, size, perms)) + logger.info("Updating selector %s to 0x%02x (%s bytes) (%s)", selector, base, size, perms) if selector == 99: self.set_fs(base) @@ -369,4 +358,14 @@ def set_fs(self, addr): this enables referencing the fs segment on x86-64. ''' FSMSR = 0xC0000100 - return self.set_msr(FSMSR, addr) \ No newline at end of file + return self.set_msr(FSMSR, addr) + + def pre_execute_callback(self, _insn): + start_time = time.time() + self.out_of_step_time += (start_time - self._last_step_time) + self._last_step_time = start_time + + def post_execute_callback(self, _insn): + start_time = time.time() + self.in_step_time += (start_time - self._last_step_time) + self._last_step_time = start_time \ No newline at end of file From d5f3f991f5b02760447c5016d3be10f28a3bbf31 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Sat, 25 Nov 2017 04:13:22 -0500 Subject: [PATCH 13/65] Hid concrete mode behind kwarg --- manticore/core/cpu/abstractcpu.py | 31 ++- manticore/core/cpu/cpufactory.py | 4 +- manticore/manticore.py | 4 +- manticore/platforms/linux.py | 9 +- manticore/utils/emulate.py | 2 +- manticore/utils/event.py | 4 +- manticore/utils/fallback_emulator.py | 274 +++++++++++++++++++++++++++ 7 files changed, 308 insertions(+), 20 deletions(-) create mode 100644 manticore/utils/fallback_emulator.py diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index f028ebd6b..544c2cd88 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -15,7 +15,8 @@ ConcretizeMemory, InvalidMemoryAccess, MemoryException, FileMap, AnonMap ) from ...utils.helpers import issymbolic -from ...utils.emulate import UnicornEmulator +from ...utils.emulate import ConcreteUnicornEmulator +from ...utils.fallback_emulator import UnicornEmulator from ...utils.event import Eventful logger = logging.getLogger("CPU") @@ -364,9 +365,10 @@ class Cpu(Eventful): - stack_alias ''' - def __init__(self, regfile, memory, **kwargs): + def __init__(self, regfile, memory, *args, **kwargs): assert isinstance(regfile, RegisterFile) self._disasm = kwargs.pop("disasm", 'capstone') + self._concrete = kwargs.pop("concrete", False) super(Cpu, self).__init__(**kwargs) self._regfile = regfile self._memory = memory @@ -387,15 +389,17 @@ def __getstate__(self): state['icount'] = self._icount state['last_pc'] = self._last_pc state['disassembler'] = self._disasm + state['concrete'] = self._concrete return state def __setstate__(self, state): Cpu.__init__(self, state['regfile'], state['memory'], - disasm=state['disassembler']) + disasm=state['disassembler'], concrete=state['concrete']) self._icount = state['icount'] self._last_pc = state['last_pc'] self._disasm = state['disassembler'] + self._concrete = state['concrete'] super(Cpu, self).__setstate__(state) @property @@ -761,11 +765,16 @@ def fallback_to_emulate(*operands): self.publish('did_emulate_instruction', insn) def determine_implementation(instruction): - implementation = fallback_to_emulate - - if 'SYSCALL' in name: - self.emu.sync_unicorn_to_manticore() + if self._concrete: + implementation = fallback_to_emulate + + if 'SYSCALL' in name: + self.emu.sync_unicorn_to_manticore() + implementation = getattr(self, name, fallback_to_emulate) + else: implementation = getattr(self, name, fallback_to_emulate) + + if implementation != fallback_to_emulate: self._non_unicorn_instrs += 1 return implementation @@ -790,14 +799,18 @@ def emulate(self, insn): ''' if not hasattr(self, 'emu'): - self.emu = UnicornEmulator(self) + if self._concrete: + self.emu = ConcreteUnicornEmulator(self) + else: + self.emu = UnicornEmulator(self) self.emu.emulate(insn) # We have been seeing occasional Unicorn issues with it not clearing # the backing unicorn instance. Saw fewer issues with the following # line present. - # del emu + if not self._concrete: + del emu def render_instruction(self, insn=None): try: diff --git a/manticore/core/cpu/cpufactory.py b/manticore/core/cpu/cpufactory.py index 5a02db475..54e7ea579 100644 --- a/manticore/core/cpu/cpufactory.py +++ b/manticore/core/cpu/cpufactory.py @@ -9,8 +9,8 @@ class CpuFactory(object): } @staticmethod - def get_cpu(mem, machine): - return CpuFactory._cpus[machine](mem) + def get_cpu(mem, machine, **kwargs): + return CpuFactory._cpus[machine](mem, **kwargs) @staticmethod def get_function_abi(cpu, os, machine): diff --git a/manticore/manticore.py b/manticore/manticore.py index 519913ce2..4ebd70b4e 100644 --- a/manticore/manticore.py +++ b/manticore/manticore.py @@ -65,7 +65,7 @@ def make_decree(program, concrete_data='', **kwargs): platform.input.transmit(initial_state.symbolicate_buffer('+'*14, label='RECEIVE')) return initial_state -def make_linux(program, argv=None, env=None, symbolic_files=None, concrete_start = ''): +def make_linux(program, argv=None, env=None, symbolic_files=None, concrete_start = '', **kwargs): env = {} if env is None else env argv = [] if argv is None else argv env = ['%s=%s'%(k,v) for k,v in env.items()] @@ -74,7 +74,7 @@ def make_linux(program, argv=None, env=None, symbolic_files=None, concrete_start constraints = ConstraintSet() platform = linux.SLinux(program, argv=argv, envp=env, - symbolic_files=symbolic_files) + symbolic_files=symbolic_files, **kwargs) initial_state = State(constraints, platform) diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index cd06d3451..45b974e9d 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -315,6 +315,7 @@ def __init__(self, program, argv=None, envp=None, disasm='capstone', **kwargs): # Many programs to support SLinux self.programs = program self.disasm = disasm + self._concrete = kwargs.pop('concrete', False) # dict of [int -> (int, int)] where tuple is (soft, hard) limits self._rlimits = { @@ -410,7 +411,7 @@ def _execve(self, program, argv, envp): def _mk_proc(self, arch): mem = Memory32() if arch in {'i386', 'armv7'} else Memory64() - cpu = CpuFactory.get_cpu(mem, arch) + cpu = CpuFactory.get_cpu(mem, arch, concrete=self._concrete) return cpu @@ -2256,7 +2257,7 @@ class SLinux(Linux): :param tuple[str] symbolic_files: files to consider symbolic """ def __init__(self, programs, argv=None, envp=None, symbolic_files=None, - disasm='capstone'): + disasm='capstone', **kwargs): argv = [] if argv is None else argv envp = [] if envp is None else envp symbolic_files = [] if symbolic_files is None else symbolic_files @@ -2267,7 +2268,7 @@ def __init__(self, programs, argv=None, envp=None, symbolic_files=None, super(SLinux, self).__init__(programs, argv=argv, envp=envp, - disasm=disasm) + disasm=disasm, **kwargs) def _mk_proc(self, arch): @@ -2280,7 +2281,7 @@ def _mk_proc(self, arch): from ..core.cpu.binja import BinjaCpu return BinjaCpu(mem) - cpu = CpuFactory.get_cpu(mem, arch) + cpu = CpuFactory.get_cpu(mem, arch, concrete=self._concrete) return cpu def add_symbolic_file(self, symbolic_file): diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index 6ea43089c..d097acbc3 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -20,7 +20,7 @@ logger = logging.getLogger("EMULATOR") -class UnicornEmulator(object): +class ConcreteUnicornEmulator(object): ''' Helper class to emulate a single instruction via Unicorn. ''' diff --git a/manticore/utils/event.py b/manticore/utils/event.py index 98f365bd0..31504f1b8 100644 --- a/manticore/utils/event.py +++ b/manticore/utils/event.py @@ -16,7 +16,7 @@ def __init__(self, *args, **kwargs): self._signals = dict() # a set of sink eventful objects (see forward_events_from()) self._forwards = WeakKeyDictionary() - super(Eventful, self).__init__(*args, **kwargs) + super(Eventful, self).__init__() def __setstate__(self, state): ''' It wont get serialized by design, user is responsible to reconnect''' @@ -47,7 +47,7 @@ def _get_signal_bucket(self, name): return self._signals.setdefault(name, dict()) # The underscore _name is to avoid naming collisions with callback params - def publish(self, _name, *args, **kwargs): + def publish(self, _name, *args, **kwargs): bucket = self._get_signal_bucket(_name) for robj, methods in bucket.iteritems(): for callback in methods: diff --git a/manticore/utils/fallback_emulator.py b/manticore/utils/fallback_emulator.py new file mode 100644 index 000000000..bca6b5cbf --- /dev/null +++ b/manticore/utils/fallback_emulator.py @@ -0,0 +1,274 @@ +import logging +import inspect + +from ..core.memory import MemoryException, FileMap, AnonMap + +from .helpers import issymbolic +###################################################################### +# Abstract classes for capstone/unicorn based cpus +# no emulator by default +from unicorn import * +from unicorn.x86_const import * +from unicorn.arm_const import * + +from capstone import * +from capstone.arm import * +from capstone.x86 import * + +logger = logging.getLogger(__name__) + +class UnicornEmulator(object): + ''' + Helper class to emulate a single instruction via Unicorn. + ''' + def __init__(self, cpu): + self._cpu = cpu + + text = cpu.memory.map_containing(cpu.PC) + # Keep track of all memory mappings. We start with just the text section + self._should_be_mapped = { + text.start: (len(text), UC_PROT_READ | UC_PROT_EXEC) + } + + # Keep track of all the memory Unicorn needs while executing this + # instruction + self._should_be_written = {} + + def reset(self): + self._emu = self._unicorn() + self._to_raise = None + + def _unicorn(self): + if self._cpu.arch == CS_ARCH_ARM: + if self._cpu.mode == CS_MODE_ARM: + return Uc(UC_ARCH_ARM, UC_MODE_ARM) + elif self._cpu.mode == CS_MODE_THUMB: + return Uc(UC_ARCH_ARM, UC_MODE_THUMB) + elif self._cpu.arch == CS_ARCH_X86: + if self._cpu.mode == CS_MODE_32: + return Uc(UC_ARCH_X86, UC_MODE_32) + elif self._cpu.mode == CS_MODE_64: + return Uc(UC_ARCH_X86, UC_MODE_64) + + raise RuntimeError("Unsupported architecture") + + + def _create_emulated_mapping(self, uc, address): + ''' + Create a mapping in Unicorn and note that we'll need it if we retry. + :param uc: The Unicorn instance. + :param address: The address which is contained by the mapping. + :rtype Map + ''' + + m = self._cpu.memory.map_containing(address) + + permissions = UC_PROT_NONE + if 'r' in m.perms: + permissions |= UC_PROT_READ + if 'w' in m.perms: + permissions |= UC_PROT_WRITE + if 'x' in m.perms: + permissions |= UC_PROT_EXEC + + uc.mem_map(m.start, len(m), permissions) + + self._should_be_mapped[m.start] = (len(m), permissions) + + return m + + def get_unicorn_pc(self): + if self._cpu.arch == CS_ARCH_ARM: + return self._emu.reg_read(UC_ARM_REG_R15) + elif self._cpu.arch == CS_ARCH_X86: + if self._cpu.mode == CS_MODE_32: + return self._emu.reg_read(UC_X86_REG_EIP) + elif self._cpu.mode == CS_MODE_64: + return self._emu.reg_read(UC_X86_REG_RIP) + + + def _hook_xfer_mem(self, uc, access, address, size, value, data): + ''' + Handle memory operations from unicorn. + ''' + assert access in (UC_MEM_WRITE, UC_MEM_READ, UC_MEM_FETCH) + + if access == UC_MEM_WRITE: + self._cpu.write_int(address, value, size*8) + + # If client code is attempting to read a value, we need to bring it + # in from Manticore state. If we try to mem_write it here, Unicorn + # will segfault. We add the value to a list of things that need to + # be written, and ask to restart the emulation. + elif access == UC_MEM_READ: + value = self._cpu.read_bytes(address, size) + + if address in self._should_be_written: + return True + + self._should_be_written[address] = value + + self._should_try_again = True + return False + + return True + + + def _hook_unmapped(self, uc, access, address, size, value, data): + ''' + We hit an unmapped region; map it into unicorn. + ''' + + try: + m = self._create_emulated_mapping(uc, address) + except MemoryException as e: + self._to_raise = e + self._should_try_again = False + return False + + self._should_try_again = True + return False + + def _interrupt(self, uc, number, data): + ''' + Handle software interrupt (SVC/INT) + ''' + + from ..core.cpu.abstractcpu import Interruption + self._to_raise = Interruption(number) + return True + + def _to_unicorn_id(self, reg_name): + # TODO(felipe, yan): Register naming is broken in current unicorn + # packages, but works on unicorn git's master. We leave this hack + # in until unicorn gets updated. + if unicorn.__version__ <= '1.0.0' and reg_name == 'APSR': + reg_name = 'CPSR' + if self._cpu.arch == CS_ARCH_ARM: + return globals()['UC_ARM_REG_' + reg_name] + elif self._cpu.arch == CS_ARCH_X86: + # TODO(yan): This needs to handle AF register + return globals()['UC_X86_REG_' + reg_name] + else: + # TODO(yan): raise a more appropriate exception + raise TypeError + + def emulate(self, instruction): + ''' + Emulate a single instruction. + ''' + + # The emulation might restart if Unicorn needs to bring in a memory map + # or bring a value from Manticore state. + while True: + + self.reset() + + # Establish Manticore state, potentially from past emulation + # attempts + for base in self._should_be_mapped: + size, perms = self._should_be_mapped[base] + self._emu.mem_map(base, size, perms) + + for address, values in self._should_be_written.items(): + for offset, byte in enumerate(values, start=address): + if issymbolic(byte): + from ..core.cpu.abstractcpu import ConcretizeMemory + raise ConcretizeMemory(self._cpu.memory, offset, 8, + "Concretizing for emulation") + + self._emu.mem_write(address, ''.join(values)) + + # Try emulation + self._should_try_again = False + + self._step(instruction) + + if not self._should_try_again: + break + + + def _step(self, instruction): + ''' + A single attempt at executing an instruction. + ''' + + registers = set(self._cpu.canonical_registers) + + # Refer to EFLAGS instead of individual flags for x86 + if self._cpu.arch == CS_ARCH_X86: + # The last 8 canonical registers of x86 are individual flags; replace + # with the eflags + registers -= set(['CF','PF','AF','ZF','SF','IF','DF','OF']) + registers.add('EFLAGS') + + # TODO(mark): Unicorn 1.0.1 does not support reading YMM registers, + # and simply returns back zero. If a unicorn emulated instruction writes to an + # XMM reg, we will read back the corresponding YMM register, resulting in an + # incorrect zero value being actually written to the XMM register. This is + # fixed in Unicorn PR #819, so when that is included in a release, delete + # these two lines. + registers -= set(['YMM0', 'YMM1', 'YMM2', 'YMM3', 'YMM4', 'YMM5', 'YMM6', 'YMM7', 'YMM8', 'YMM9', 'YMM10', 'YMM11', 'YMM12', 'YMM13', 'YMM14', 'YMM15']) + registers |= set(['XMM0', 'XMM1', 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', 'XMM8', 'XMM9', 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15']) + + # XXX(yan): This concretizes the entire register state. This is overly + # aggressive. Once capstone adds consistent support for accessing + # referred registers, make this only concretize those registers being + # read from. + for reg in registers: + val = self._cpu.read_register(reg) + if issymbolic(val): + from ..core.cpu.abstractcpu import ConcretizeRegister + raise ConcretizeRegister(self._cpu, reg, "Concretizing for emulation.", + policy='ONE') + self._emu.reg_write(self._to_unicorn_id(reg), val) + + # Bring in the instruction itself + instruction = self._cpu.decode_instruction(self._cpu.PC) + text_bytes = self._cpu.read_bytes(self._cpu.PC, instruction.size) + self._emu.mem_write(self._cpu.PC, ''.join(text_bytes)) + + self._emu.hook_add(UC_HOOK_MEM_READ_UNMAPPED, self._hook_unmapped) + self._emu.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED, self._hook_unmapped) + self._emu.hook_add(UC_HOOK_MEM_FETCH_UNMAPPED, self._hook_unmapped) + self._emu.hook_add(UC_HOOK_MEM_READ, self._hook_xfer_mem) + self._emu.hook_add(UC_HOOK_MEM_WRITE, self._hook_xfer_mem) + self._emu.hook_add(UC_HOOK_INTR, self._interrupt) + + saved_PC = self._cpu.PC + + try: + self._emu.emu_start(self._cpu.PC, self._cpu.PC+instruction.size, count=1) + except UcError as e: + # We request re-execution by signaling error; if we we didn't set + # _should_try_again, it was likely an actual error + if not self._should_try_again: + raise + + if self._should_try_again: + return + + if logger.isEnabledFor(logging.DEBUG): + logger.debug("="*10) + for register in self._cpu.canonical_registers: + logger.debug("Register % 3s Manticore: %08x, Unicorn %08x", + register, self._cpu.read_register(register), + self._emu.reg_read(self._to_unicorn_id(register)) ) + logger.debug(">"*10) + + # Bring back Unicorn registers to Manticore + for reg in registers: + val = self._emu.reg_read(self._to_unicorn_id(reg)) + self._cpu.write_register(reg, val) + + #Unicorn hack. On single step unicorn wont advance the PC register + mu_pc = self.get_unicorn_pc() + if saved_PC == mu_pc: + self._cpu.PC = saved_PC + instruction.size + + # Raise the exception from a hook that Unicorn would have eaten + if self._to_raise: + raise self._to_raise + + return + From 267c825e03400cd25afd3f837b35b9699cf0faa5 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Mon, 27 Nov 2017 18:55:12 -0500 Subject: [PATCH 14/65] Propagated missing kwargs --- manticore/core/cpu/abstractcpu.py | 2 +- manticore/core/cpu/arm.py | 4 ++-- manticore/core/cpu/binja.py | 6 +++--- manticore/core/cpu/x86.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index cea80c6f9..c945a0ac3 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -366,7 +366,7 @@ class Cpu(Eventful): ''' _published_events = {'write_register', 'read_register', 'write_memory', 'read_memory', 'decode_instruction', - 'execute_instruction'} + 'execute_instruction', 'set_descriptor'} def __init__(self, regfile, memory, **kwargs): assert isinstance(regfile, RegisterFile) diff --git a/manticore/core/cpu/arm.py b/manticore/core/cpu/arm.py index ca14c4f00..2e74cd745 100644 --- a/manticore/core/cpu/arm.py +++ b/manticore/core/cpu/arm.py @@ -324,8 +324,8 @@ class Armv7Cpu(Cpu): arch = cs.CS_ARCH_ARM mode = cs.CS_MODE_ARM - def __init__(self, memory): - super(Armv7Cpu, self).__init__(Armv7RegisterFile(), memory) + def __init__(self, memory, *args, **kwargs): + super(Armv7Cpu, self).__init__(Armv7RegisterFile(), memory, *args, **kwargs) self._it_conditional = list() self._last_flags = {'C': 0, 'V': 0, 'N': 0, 'Z': 0, 'GE': 0} self._at_symbolic_conditional = False diff --git a/manticore/core/cpu/binja.py b/manticore/core/cpu/binja.py index a38c7f2cd..7d953511b 100644 --- a/manticore/core/cpu/binja.py +++ b/manticore/core/cpu/binja.py @@ -272,7 +272,7 @@ class BinjaCpu(Cpu): ''' A Virtual CPU model for Binary Ninja's IL ''' - def __init__(self, memory): + def __init__(self, memory, *args, **kwargs): ''' Builds a CPU model. :param arch: BinaryNinja arch. @@ -285,7 +285,7 @@ def __init__(self, memory): # get a platform specific CPU # and mark it as non-virtual so as to not increment the PC in the # @instruction decorator - self.platform_cpu = CpuFactory.get_cpu(memory, 'amd64') + self.platform_cpu = CpuFactory.get_cpu(memory, 'amd64', *args, **kwargs) self.platform_cpu.real_cpu = False platform_regs = self.platform_cpu.all_registers self.max_instr_width = arch.max_instr_length @@ -301,7 +301,7 @@ def __init__(self, memory): super(BinjaCpu, self).__init__(BinjaRegisterFile('x86_64', platform_regs), memory, - disasm="binja-il") + disasm="binja-il", *args, **kwargs) def initialize_disassembler(self, program_path): import binaryninja as bn diff --git a/manticore/core/cpu/x86.py b/manticore/core/cpu/x86.py index 4ad52b16e..a48b7abac 100644 --- a/manticore/core/cpu/x86.py +++ b/manticore/core/cpu/x86.py @@ -705,9 +705,9 @@ def set_descriptor(self, selector, base, limit, perms): assert base>=0 and base < (1<=0 and limit < 0xffff or limit&0xfff == 0 #perms ? not used yet Also is not really perms but rather a bunch of attributes - self.publish('will_set_descriptor', selector, base, limit, perms) + self._publish('will_set_descriptor', selector, base, limit, perms) self._segments[selector] = (base, limit, perms) - self.publish('did_set_descriptor', selector, base, limit, perms) + self._publish('did_set_descriptor', selector, base, limit, perms) def get_descriptor(self, selector): if selector in self._segments: From 968fca698e1625909d46407bc013bc84c9f350fb Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Mon, 27 Nov 2017 19:29:38 -0500 Subject: [PATCH 15/65] Re-implemented abstractcpu changes from master --- manticore/core/cpu/abstractcpu.py | 58 +++++++++++++------------------ manticore/utils/emulate.py | 4 +-- tests/test_unicorn.py | 2 +- 3 files changed, 28 insertions(+), 36 deletions(-) diff --git a/manticore/core/cpu/abstractcpu.py b/manticore/core/cpu/abstractcpu.py index c945a0ac3..85441d8d0 100644 --- a/manticore/core/cpu/abstractcpu.py +++ b/manticore/core/cpu/abstractcpu.py @@ -762,48 +762,40 @@ def execute(self): name = self.canonicalize_instruction_name(insn) - def fallback_to_emulate(*operands): - if self._concrete: - text_bytes = ' '.join('%02x'%x for x in insn.bytes) - logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", - insn.address, text_bytes, insn.mnemonic, insn.op_str) - self.emulate(insn) - - def determine_implementation(instruction): - if self._concrete: - implementation = fallback_to_emulate - - if 'SYSCALL' in name: - self.emu.sync_unicorn_to_manticore() - implementation = getattr(self, name, fallback_to_emulate) - else: - implementation = getattr(self, name, fallback_to_emulate) - - if implementation != fallback_to_emulate: - self._non_unicorn_instrs += 1 - - return implementation - - implementation = determine_implementation(insn) if logger.level == logging.DEBUG : logger.debug(self.render_instruction(insn) + " (%s)" % insn.size) for l in self.render_registers(): register_logger.debug(l) - #FIXME(yan): In the case the instruction implementation invokes a system call, we would not be able to - # publish the did_execute_instruction event from here, so we capture and attach it to the syscall - # exception for the platform to emit it for us once the syscall has successfully been executed. - def did_exec(): - self._icount += 1 - self._publish('did_execute_instruction', self._last_pc, self.PC, insn) - try: - implementation(*insn.operands) + if self._concrete and 'SYSCALL' in name: + self.emu.sync_unicorn_to_manticore() + if self._concrete and 'SYSCALL' not in name: + self.emulate(insn) + else: + try: + self._non_unicorn_instrs += 1 + getattr(self, name)(*insn.operands) + except AttributeError: + text_bytes = ' '.join('%02x'%x for x in insn.bytes) + logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", + insn.address, text_bytes, insn.mnemonic, insn.op_str) + self.emulate(insn) except (Interruption, Syscall) as e: - e.on_handled = did_exec + e.on_handled = lambda: self._publish_instruction_as_executed(insn) raise e else: - did_exec() + self._publish_instruction_as_executed(insn) + + #FIXME(yan): In the case the instruction implementation invokes a system call, we would not be able to + # publish the did_execute_instruction event from here, so we capture and attach it to the syscall + # exception for the platform to emit it for us once the syscall has successfully been executed. + def _publish_instruction_as_executed(self, insn): + ''' + Notify listeners that an instruction has been executed. + ''' + self._icount += 1 + self._publish('did_execute_instruction', self._last_pc, self.PC, insn) def emulate(self, insn): diff --git a/manticore/utils/emulate.py b/manticore/utils/emulate.py index d097acbc3..ee939481f 100644 --- a/manticore/utils/emulate.py +++ b/manticore/utils/emulate.py @@ -360,12 +360,12 @@ def set_fs(self, addr): FSMSR = 0xC0000100 return self.set_msr(FSMSR, addr) - def pre_execute_callback(self, _insn): + def pre_execute_callback(self, _pc, _insn): start_time = time.time() self.out_of_step_time += (start_time - self._last_step_time) self._last_step_time = start_time - def post_execute_callback(self, _insn): + def post_execute_callback(self, _last_pc, _pc, _insn): start_time = time.time() self.in_step_time += (start_time - self._last_step_time) self._last_step_time = start_time \ No newline at end of file diff --git a/tests/test_unicorn.py b/tests/test_unicorn.py index 702294b35..8b1a23e88 100644 --- a/tests/test_unicorn.py +++ b/tests/test_unicorn.py @@ -8,7 +8,7 @@ from manticore.core.state import State from manticore.core.smtlib import BitVecVariable, ConstraintSet from manticore.platforms import linux -from manticore.utils.emulate import UnicornEmulator +from manticore.utils.fallback_emulator import UnicornEmulator from capstone.arm import * from keystone import Ks, KS_ARCH_ARM, KS_MODE_ARM From f9c81ca7d44e52c5d2863fb4d2d2d8355eeb1f32 Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Sat, 9 Dec 2017 19:41:53 -0500 Subject: [PATCH 16/65] Added transition doc to repo --- TRANSITION.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 TRANSITION.md diff --git a/TRANSITION.md b/TRANSITION.md new file mode 100644 index 000000000..2fa002f98 --- /dev/null +++ b/TRANSITION.md @@ -0,0 +1,33 @@ +# Big Bad Manticore/Unicorn Transition Doc +It looks like we'll be shelving the Manticore's Unicorn-backed concrete mode (Manticorn?) for a while in favor of more pressing features. This document will serve as a record of all the issues and to-dos from this branch so that we can revive it more easily in the future. + +### Original goals +Create a fast, purely-concrete execution mode with analyis for Manticore. The idea was to replace Pin in Sienna Locomotive with an in-house tool that was easier to use than Pin, but faster than Manticore. We decided that the best way to do this was to use Unicorn's fast qemu-based CPU emulation as a replacement for Manticore's relatively slow python-based instruction emulation. + +### Structure +On `master`, whenever an `abstractcpu` can't find an implementation for an instruction, it spins up a Unicorn instance, executes the instruction, and replicates any reads to Unicorn's memory or registers into Manticore's cpu. This branch modifies `abstractcpu` to instead send every instruction to the Unicorn instance (and preserve the instance across instructions). In order to make this more efficient, we try to let Unicorn run independently from Manticore as much as possible. When we initialize the emulator, we clone Manticore's image of the program memory into Unicorn, then tell Unicorn to execute each instruction in the text segment based on wherever Manticore thinks the program counter is pointing. With the exception of the program counter, we don't sync changes to the memory or the registers back to Manticore after every instruction, instead choosing to do it lazily whenever we need to hand control back to Manticore. Currently, that only happens whenever we need to execute a syscall, since Unicorn can't emulate those. When that happens, we sync Unicorn's state to Manticore's, then invoke Manticore's syscall emulation, and write any changes to the memory or registers back into the Unicorn instance. We implement the write-back by subscribing callbacks in the emulator to the `did_write_memory` and `did_set_register` events. + +### Issues +* The sync-up from Unicorn to Manticore that occurs immediately before a syscall should probably occur any time Manticore hits a hook as well, but that's currently not implemented. +* The taint-tracking system doesn't work yet with this branch, since the emulator makes no effort to track it. +* Manticore's default way of implementing file reads seems to be creating a symbolic file, and treating anything read from it as symbolic input. This doesn't make a lot of sense for a pure concrete mode - when a binary wants to read flag.txt, we should just let it read flag.txt. Implementing this will mean modifying all the syscalls that read from symbolic files to add a check for whether we're executing on the Unicorn emulator, in which case, they should read from the actual filesystem. + * For example, in `multiple-styles`, we get the read from stdin to work by shoving some bytes at the binary via `concrete_start`. If we don't do this, it reads symbolic memory, which gets concretized to null bytes. This is probably not the ideal behavior. While we still want concrete initialization data to work, It would make more sense for this to read from stdin like a real binary would. +* The initial write to the `DS` register causes Unicorn to segfault when running CGC binaries. No idea why this is happening. Currently, the segmentation code only handles FS (and relies on some Unicorn fakery, not a real GDT), so it's possible that fixing this would make the segfault go away. +* It takes an extremely long time to clone Manticore's memory image into Unicorn. This is almost entirely because Manticore implements memory using a dict, and we can't just take a big slice of the dict nearly as efficiently as we can do so with a list. Options for fixing this include creating a flat copy of the program memory within Manticore during initialization, or writing/finding a separate loader just for Unicorn. +* Performance analysis of the in-step time (the total time spent waiting for Unicorn to execute instructions) and the out-of-step time (Out of step time = -