Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Experiment with flattening graphs for textual edit distance based match #188

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion idaplugin/rematch/collectors/vectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from .assembly_hash import AssemblyHashVector
from .mnemonic_hash import MnemonicHashVector
from .mnemonic_hist import MnemonicHistVector
from .flatgraph_editdistance import FlatGraphEditDistanceVector


__all__ = ["Vector", "InstructionHashVector", "IdentityHashVector",
"NameHashVector", "AssemblyHashVector", "MnemonicHashVector",
"MnemonicHistVector"]
"MnemonicHistVector", "FlatGraphEditDistanceVector"]
63 changes: 63 additions & 0 deletions idaplugin/rematch/collectors/vectors/flatgraph_editdistance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import idaapi

from .vector import Vector


class FlatGraphEditDistanceVector(Vector):
type = 'flatgraph_editdistance'
type_version = 0

def __init__(self, *args, **kwargs):
super(FlatGraphEditDistanceVector, self).__init__(*args, **kwargs)
self.flow_chart = None
self.visited = set()
self.items = list()

def _bb_size(self, bb):
if bb.endEA > bb.startEA:
return bb.endEA - bb.startEA

raise ValueError("while flattening graph, a basicblock that ends before "
"it starts encountered at {:x}".format(self.offset))

def _bb_value(self, bb):
# TODO: this should be something that's uncorellated with the order of
# basic blocks and describes basic blocks well
# Some kind of hash for mnemonics could be used
return self._bb_size(bb)

def _append_bbs(self, *bbs):
self.items.extend(map(self._bb_value, bbs))

def _find_head(self):
def is_head(bb):
return len(bb.preds()) == 0

heads = filter(is_head, self.flow_chart)
if len(heads) == 1:
return heads[0]

msg = ("flattening graphs with head count other than 1 is not supported, "
"got {} head-count for {:x}".format(len(heads), self.offset))
raise ValueError(msg)

def _sort_siblings(self, siblings):
return sorted(siblings, key=self._bb_size)

def _recurse_bb(self, bb):
if bb in self.visited:
return

self.visited.add(bb)
siblings = self._sort_siblings(bb.succs())
self._append_bbs(*siblings)

for sibling in siblings:
self._recurse_siblings(sibling)

def data(self, offset):
self.flow_chart = idaapi.FlowChart(idaapi.get_func(offset))
self.items.append(self.flow_chart.size)
head = self._find_head()
self._recurse_bb(head)
return self.items
3 changes: 2 additions & 1 deletion idaplugin/rematch/instances/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def __init__(self, *args, **kwargs):
collectors.vectors.IdentityHashVector,
collectors.vectors.AssemblyHashVector,
collectors.vectors.MnemonicHashVector,
collectors.vectors.MnemonicHistVector}
collectors.vectors.MnemonicHistVector,
collectors.vectors.FlatGraphVector}
self.annotations |= {collectors.annotations.AssemblyAnnotation}

def size(self):
Expand Down