In [1]:
import angr
import pyvex
import archinfo
from angrutils import *
import networkx as nx

In [2]:
class stmtInfo:
    def __init__(self,stmt):
        self.init()
        self.__extract_stmt_info(stmt)
    
    def init(self):
        self.tempAss = []
        self.guestAss = []
        self.memAss = False
        self.usedTemps = []
        self.usedguest = []
        self.usedMem = []
        self.addr = 0
        self.memRead = False
        self.condWrite = []
        self.InstEnd = False
        self.BlockEnd = False
        self.exit = False

        
    def __extract_stmt_info(self,stmt):
        if stmt.tag == 'Ist_NoOp':
            self.__NoOp(stmt)
        if stmt.tag == 'Ist_IMark':
            self.__IMark(stmt)
        if stmt.tag == 'Ist_AbiHint':
            self.__AbiHint(stmt)
        if stmt.tag == 'Ist_Put':
            self.__Put(stmt)
        if stmt.tag == 'Ist_PutI':
            self.__PutI(stmt)
        if stmt.tag == 'Ist_WrTmp':
            self.__WrTmp(stmt)
        if stmt.tag == 'Ist_Store':
            self.__Store(stmt)
        if stmt.tag == 'Ist_CAS':
            self.__CAS(stmt)
        if stmt.tag == 'Ist_LLSC':
            self.__LLSC(stmt)
        if stmt.tag == 'Ist_MBE':
            self.__MBE(stmt)
        if stmt.tag == 'Ist_Dirty':
            self.__Dirty(stmt)
        if stmt.tag == 'Ist_Exit':
            self.__Exit(stmt)
        if stmt.tag == 'Ist_LoadG':
            self.__LoadG(stmt)
        if stmt.tag == 'Ist_StoreG':
            self.__StoreG(stmt)

    def __NoOp(self,stmt):
        print("NoOp found")

    def __IMark(self,stmt):
        self.addr = stmt.addr
        self.InstEnd = True

    def __AbiHint(self,stmt):
        # raise Exception('Not Implimented AbiHint')
        self.InstEnd = True
        self.BlockEnd = True

    def __Put(self,stmt):
        self.guestAss.append(stmt.offset)
        self.__parseData(stmt.data)
        # raise Exception('Not Implimented Put')

    def __PutI(self,stmt):
        raise Exception('Not Implimented PutI')

    def __WrTmp(self,stmt):
        self.tempAss.append(stmt.tmp)
        self.__parseData(stmt.data)
        # raise Exception('Not Implimented WrTmp')

    def __Store(self,stmt):
        self.memAss = True
        self.__parseData(stmt.addr)
        self.__parseData(stmt.data)
        # raise Exception('Not Implimented Store')

    def __CAS(self,stmt):  # compare and swap
        raise Exception('Not Implimented CAS')

    def __LLSC(self,stmt): # Either Load-Linked or Store-Conditional
        raise Exception('Not Implimented LLSC')

    def __MBE(self,stmt):  # memory bus event
        raise Exception('Not Implimented MBE')

    def __Dirty(self,stmt):
        raise Exception('Not Implimented Dirty')

    def __Exit(self,stmt):
        self.__parseData(stmt.guard)
        self.condWrite.append(stmt.offsIP)
        self.InstEnd = True
        self.BlockEnd = True
        self.exit = True
        
        # raise Exception('Not Implimented Exit')

    def __LoadG(self,stmt):
        raise Exception('Not Implimented LoadG')

    def __StoreG(self,stmt):
        raise Exception('Not Implimented StoreG')

    def __parseData(self,expr):
        # print(expr.tag)
        if expr.tag == 'Iex_Binder':
            self.__Binder(expr)
        if expr.tag == 'Iex_VECRET':
            self.__VECRET(expr)
        if expr.tag == 'Iex_GSPTR':
            self.__GSPTR(expr)
        if expr.tag == 'Iex_GetI':
            self.__GetI(expr)
        if expr.tag == 'Iex_RdTmp':
            self.__RdTmp(expr)
        if expr.tag == 'Iex_Get':
            self.__Get(expr)
        if expr.tag == 'Iex_Qop':
            self.__Qop(expr)
        if expr.tag == 'Iex_Triop':
            self.__Triop(expr)
        if expr.tag == 'Iex_Binop':
            self.__Binop(expr)
        if expr.tag == 'Iex_Unop':
            self.__Unop(expr)
        if expr.tag == 'Iex_Load':
            self.__Load(expr)
        if expr.tag == 'Iex_Const':
            self. __Const(expr)
        if expr.tag == 'Iex_ITE':
            self.__ITE(expr)
        if expr.tag == 'Iex_CCall':
            self.__CCall(expr)

    def __Binder(self,expr):
    	raise Exception('Not Implimented Binder')

    def __VECRET(self,expr):
        raise Exception('Not Implimented VECRET')

    def __GSPTR(self,expr):
        raise Exception('Not Implimented GSPTR')

    def __GetI(self,expr):
        raise Exception('Not Implimented GetI')

    def __RdTmp(self,expr):
        # expr.pp()
        self.usedTemps.append(expr.tmp)
        # raise Exception('Not Implimented RdTmp')

    def __Get(self,expr):
        self.usedguest.append(expr.offset)
        # raise Exception('Not Implimented Get')

    def __Qop(self,expr):
        for arg in expr.args:
            self.__parseData(arg)
        raise Exception('Not Implimented Qop')

    def __Triop(self,expr):
        for arg in expr.args:
            self.__parseData(arg)
        raise Exception('Not Implimented Triop')

    def __Binop(self,expr):
        print(expr.op)
        for arg in expr.args:
            self.__parseData(arg)
        # raise Exception('Not Implimented Binop')

    def __Unop(self,expr):
        self.__parseData(expr.args[0])
        # raise Exception('Not Implimented Unop')

    def __Load(self,expr):
        self.memRead = True
        self.__parseData(expr.addr)
        # raise Exception('Not Implimented Load')

    def __Const(self,expr):
        pass
        # raise Exception('Not Implimented Const')

    def __ITE(self,expr):
        raise Exception('Not Implimented ITE')

    def __CCall(self,expr):
        for arg in expr.args:
            self.__parseData(arg)
        # raise Exception('Not Implimented CCall')

In [3]:
def dfs(n,cfg,dfg,g,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
        info = stmtInfo(stmt)
        if g in info.usedguest:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "guest", "num" : g}
        if g in info.guestAss:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "kill","passby" : "guest", "num" : g}
            return
    for nei in cfg.graph.successors(n):
        dfs(nei,cfg,dfg,g,vis,addr) 

In [4]:
def dfs2(n,cfg,dfg,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "cond", "num" : 0}
    for nei in cfg.graph.successors(n):
        dfs2(nei,cfg,dfg,vis,addr) 

In [5]:
def dfs3(n,cfg,dfg,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
        info = stmtInfo(stmt)
        if info.memRead:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "mem", "num" : 0}
        if info.memAss:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "kill","passby" : "mem", "num" : 0}
            return
    for nei in cfg.graph.successors(n):
        dfs3(nei,cfg,dfg,vis,addr) 

In [6]:
def add_edges(dfg,cfg,nodes,stmts,node_ind,stmt_ind,f):
    addr = stmts[stmt_ind].addr
    dfg.add_node(hex(addr))
    addr1 = 0
    stmt_ind+=1
    guest = []
    temp = []
    memRight = False
    memRead = False
    inst_comp = False
    for ind in range(stmt_ind,len(stmts)):
        if not(inst_comp) :
            info = stmtInfo(stmts[ind])
            guest.extend(info.guestAss)
            temp.extend(info.tempAss)
            memRight |= info.memAss
            if(info.exit == True):
                for n in cfg.graph.successors(nodes[node_ind]):
                    dfs2(n,cfg,dfg,[],addr)       
            if stmts[ind].tag == 'Ist_IMark':
                inst_comp = True
                addr1 = info.addr
        else:
            info = stmtInfo(stmts[ind])
            if stmts[ind].tag == 'Ist_IMark':
                addr1 = info.addr
            for g in guest:
                if g in info.usedguest:
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "dep","passby": "guest", "num" : g}
                if g in info.guestAss:
                    guest.remove(g)
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "kill","passby" : "guest", "num" : g}
            for t in temp:
                if t in info.usedTemps:
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "dep","passby" : "temp", "num" : t}
                if t in info.tempAss:
                    temp.remove(t)
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "kill","passby" : "temp", "num" : t}
                    
            if info.memRead and memRight and f:
                dfg.add_edge(hex(addr),hex(addr1))
                ob = dfg.adj[hex(addr)][hex(addr1)]
                ob[len(ob)] = {"kind" : "dep","passby" : "mem", "num" : 0}
                
            if info.memAss and memRight and f:
                dfg.add_edge(hex(addr),hex(addr1))
                ob = dfg.adj[hex(addr)][hex(addr1)]
                memRight = False
                ob[len(ob)] = {"kind" : "kill","passby" : "mem", "num" : 0}
            
    for g in guest:
        vis = []
        for n in cfg.graph.successors(nodes[node_ind]):
            dfs(n,cfg,dfg,g,vis,addr)
            
    if f and memRight:
        vis = []
        for n in cfg.graph.successors(nodes[node_ind]):
            dfs3(n,cfg,dfg,vis,addr)

In [7]:
def get_dfg(cfg,f):
    nodes = list(cfg.graph.nodes)
    dfg = nx.DiGraph()
    for node_ind in range(0,len(nodes)):
        try:
            stmts = list(nodes[node_ind].block.vex.statements)
        except:
            continue
        for stmt_ind in range(0,len(stmts)):
            if stmts[stmt_ind].tag == 'Ist_IMark':
                add_edges(dfg,cfg,nodes,stmts,node_ind,stmt_ind,f)
    return dfg

In [8]:
def getPath(dfg,add1,add2,vis):
    if add1 in vis:
        return(None)
    vis.append(add1)
    if add1 == add2:
        return(add1)
    for e in dfg.adj[add1]:
        for ind in dfg.adj[add1][e]:
            if dfg.adj[add1][e][ind]['kind'] == 'dep':
                s1 = dfg.adj[add1][e][ind]['passby']
                s2 = str(dfg.adj[add1][e][ind]['num'])
                s = getPath(dfg,e,add2,vis)
                if s!= None:
                    return(add1+'\n'+s1+s2+" "+s)
    return None

In [9]:
def printedges(dfg,addr):
    x = dfg.adj[addr];
    for a in x:
        for o in x[a]:
            print(a,x[a][o]['kind'],x[a][o]['passby'],x[a][o]['num'])

In [10]:
# basic
p = angr.Project("../test/basicasm/basic.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/basicasm/basicvex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/basicasm/basicasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)

Iop_Add64
Iop_Add64


In [11]:
print(getPath(dfg,'0x40100a','0x40101f',[]))

0x40100a
temp0 0x40101f


In [12]:
print(getPath(dfg,'0x40100a','0x401012',[]))

0x40100a
temp0 0x401012


In [13]:
printedges(dfg,'0x40100a')

0x401012 dep temp 0
0x40101f dep temp 0
0x40102e kill guest 144
0x40102e kill guest 152
0x40102e kill guest 160


In [14]:
# if-else
p = angr.Project("../test/ifelseasm/ifelse.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/ifelseasm/ifelsevex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/ifelseasm/ifelseasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)
# print(fun(dfg,'0x40100a','0x40101f',[]))
# print(fun(dfg,'0x40100a','0x401012',[]))

Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64


In [15]:
dfg.nodes

NodeView(('0x401000', '0x401008', '0x40102a', '0x40102d', '0x401038', '0x401010', '0x40102f', '0x401018', '0x401020', '0x401034', '0x40104a', '0x40103d', '0x401042', '0x401045', '0x40104c', '0x401051', '0x401056', '0x40105b', '0x40105e', '0x401032'))

In [16]:
print(getPath(dfg,'0x401000','0x401042',[]))  # a->out

0x401000
temp7 0x40102d
cond0 0x401034
temp0 0x401042


In [17]:
print(getPath(dfg,'0x401010','0x401042',[]))  # c->out

0x401010
guest48 0x40102f
guest24 0x401042


In [18]:
print(getPath(dfg,'0x401018','0x401042',[]))  # d->out

None


In [19]:
print(getPath(dfg,'0x401020','0x401042',[]))  # mess-> out

0x401020
guest24 0x401034
temp0 0x401042


In [20]:
print(getPath(dfg,'0x40102f','0x401034',[]))  # if -> else

None


In [21]:
printedges(dfg,'0x401038')

0x401056 kill guest 16
0x401056 kill guest 16


In [22]:
# loop
p = angr.Project("../test/loopasm/loop.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/loopasm/loopvex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/loopasm/loopasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)
# print(fun(dfg,'0x40100a','0x40101f',[]))
# print(fun(dfg,'0x40100a','0x401012',[]))

Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S


In [28]:
# if-else
p = angr.Project("../test/memasm/mem.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/memasm/memvex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/memasm/memasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)
# print(fun(dfg,'0x40100a','0x40101f',[]))
# print(fun(dfg,'0x40100a','0x401012',[]))print(getPath(dfg,'0x401010','0x401036',[])) # msg -> output



Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64


In [29]:
# if-else
p = angr.Project("../test/memasm/mem.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/memasm/memvex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/memasm/memasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)
# print(fun(dfg,'0x40100a','0x40101f',[]))
# print(fun(dfg,'0x40100a','0x401012',[]))

Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpLT64S
Iop_Add64
Iop_Add64


In [30]:
print(getPath(dfg,'0x401000','0x401051',[]))  # a->out

0x401000
temp7 0x40102d
cond0 0x40103f
temp6 0x401051


In [31]:
print(getPath(dfg,'0x401010','0x401051',[]))  # c->out

0x401010
guest48 0x40102f
guest24 0x401051


In [32]:
print(getPath(dfg,'0x401018','0x401051',[]))  # d->out

None


In [33]:
print(getPath(dfg,'0x401020','0x401051',[]))  # mess-> out

0x401020
guest24 0x40102f
guest24 0x401051


In [34]:
# if-else
p = angr.Project("../test/onecpp/one.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/onecpp/onevex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/onecpp/oneasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)
# print(fun(dfg,'0x40100a','0x40101f',[]))
# print(fun(dfg,'0x40100a','0x401012',[]))



Iop_Add64
Iop_And64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_And64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_And64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_CmpEQ64
Iop_Add64
Iop_Add64
Iop_Sub64
Iop_Sar64
Iop_Sar64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_Sub64
Iop_Add64
Iop_CmpEQ64
Iop_Shl64
Iop_Add64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_Add64
Iop_Add64
Iop_CmpEQ32
Iop_Add64
Iop_CmpEQ32
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_Add64
Iop_Sub64
Iop_Add64
Iop_Add64
Iop_Sub64
Iop_CmpEQ64
Iop_Sub64
Iop_Sub64
Iop_Add64
Iop_And64
Iop_Sub64
Iop_Sub64
Iop_Sub64
Iop_Sub64


In [35]:
main = p.loader.main_object.get_symbol('main')
main


<Symbol "main" in one.exe at 0x401189>