In [17]:
import angr
import pyvex
import archinfo
from angrutils import *
import networkx as nx

In [18]:
class stmtInfo:
    def __init__(self,stmt):
        self.init()
        self.__extract_stmt_info(stmt)
    
    def init(self):
        self.tempAss = []
        self.guestAss = []
        self.memAss = False
        self.memchng = []
        self.usedTemps = []
        self.usedguest = []
        self.usedMem = []
        self.addr = 0
        self.memRead = False
        self.condWrite = []
        self.InstEnd = False
        self.BlockEnd = False
        self.exit = False
        

        
    def __extract_stmt_info(self,stmt):
        if stmt.tag == 'Ist_NoOp':
            self.__NoOp(stmt)
        if stmt.tag == 'Ist_IMark':
            self.__IMark(stmt)
        if stmt.tag == 'Ist_AbiHint':
            self.__AbiHint(stmt)
        if stmt.tag == 'Ist_Put':
            self.__Put(stmt)
        if stmt.tag == 'Ist_PutI':
            self.__PutI(stmt)
        if stmt.tag == 'Ist_WrTmp':
            self.__WrTmp(stmt)
        if stmt.tag == 'Ist_Store':
            self.__Store(stmt)
        if stmt.tag == 'Ist_CAS':
            self.__CAS(stmt)
        if stmt.tag == 'Ist_LLSC':
            self.__LLSC(stmt)
        if stmt.tag == 'Ist_MBE':
            self.__MBE(stmt)
        if stmt.tag == 'Ist_Dirty':
            self.__Dirty(stmt)
        if stmt.tag == 'Ist_Exit':
            self.__Exit(stmt)
        if stmt.tag == 'Ist_LoadG':
            self.__LoadG(stmt)
        if stmt.tag == 'Ist_StoreG':
            self.__StoreG(stmt)

    def __NoOp(self,stmt):
        print("NoOp found")

    def __IMark(self,stmt):
        self.addr = stmt.addr
        self.InstEnd = True

    def __AbiHint(self,stmt):
        # raise Exception('Not Implimented AbiHint')
        self.InstEnd = True
        self.BlockEnd = True

    def __Put(self,stmt):
        self.guestAss.append(stmt.offset)
        self.__parseData(stmt.data)
        # raise Exception('Not Implimented Put')

    def __PutI(self,stmt):
        raise Exception('Not Implimented PutI')

    def __WrTmp(self,stmt):
        self.tempAss.append(stmt.tmp)
        self.__parseData(stmt.data)
        # raise Exception('Not Implimented WrTmp')

    def __Store(self,stmt):
        self.memAss = True
        self.__parseData(stmt.addr)
        self.__parseData(stmt.data)
        if stmt.addr.tag == 'Iex_Const':
            con = stmt.addr.con.value
            add1 = (con//8)*8
            self.memchng.append(add1)
            if(con%8 !=0):
                self.memchng.append(add1+8)
        # raise Exception('Not Implimented Store')

    def __CAS(self,stmt):  # compare and swap
        raise Exception('Not Implimented CAS')

    def __LLSC(self,stmt): # Either Load-Linked or Store-Conditional
        raise Exception('Not Implimented LLSC')

    def __MBE(self,stmt):  # memory bus event
        raise Exception('Not Implimented MBE')

    def __Dirty(self,stmt):
        raise Exception('Not Implimented Dirty')

    def __Exit(self,stmt):
        self.__parseData(stmt.guard)
        self.condWrite.append(stmt.offsIP)
        self.InstEnd = True
        self.BlockEnd = True
        self.exit = True
        
        # raise Exception('Not Implimented Exit')

    def __LoadG(self,stmt):
        raise Exception('Not Implimented LoadG')

    def __StoreG(self,stmt):
        raise Exception('Not Implimented StoreG')

    def __parseData(self,expr):
        # print(expr.tag)
        if expr.tag == 'Iex_Binder':
            self.__Binder(expr)
        if expr.tag == 'Iex_VECRET':
            self.__VECRET(expr)
        if expr.tag == 'Iex_GSPTR':
            self.__GSPTR(expr)
        if expr.tag == 'Iex_GetI':
            self.__GetI(expr)
        if expr.tag == 'Iex_RdTmp':
            self.__RdTmp(expr)
        if expr.tag == 'Iex_Get':
            self.__Get(expr)
        if expr.tag == 'Iex_Qop':
            self.__Qop(expr)
        if expr.tag == 'Iex_Triop':
            self.__Triop(expr)
        if expr.tag == 'Iex_Binop':
            self.__Binop(expr)
        if expr.tag == 'Iex_Unop':
            self.__Unop(expr)
        if expr.tag == 'Iex_Load':
            self.__Load(expr)
        if expr.tag == 'Iex_Const':
            self. __Const(expr)
        if expr.tag == 'Iex_ITE':
            self.__ITE(expr)
        if expr.tag == 'Iex_CCall':
            self.__CCall(expr)

    def __Binder(self,expr):
    	raise Exception('Not Implimented Binder')

    def __VECRET(self,expr):
        raise Exception('Not Implimented VECRET')

    def __GSPTR(self,expr):
        raise Exception('Not Implimented GSPTR')

    def __GetI(self,expr):
        raise Exception('Not Implimented GetI')

    def __RdTmp(self,expr):
        # expr.pp()
        self.usedTemps.append(expr.tmp)
        # raise Exception('Not Implimented RdTmp')

    def __Get(self,expr):
        self.usedguest.append(expr.offset)
        # raise Exception('Not Implimented Get')

    def __Qop(self,expr):
        for arg in expr.args:
            self.__parseData(arg)
        raise Exception('Not Implimented Qop')

    def __Triop(self,expr):
        for arg in expr.args:
            self.__parseData(arg)
        raise Exception('Not Implimented Triop')

    def __Binop(self,expr):
#         print(expr.op)
        for arg in expr.args:
            self.__parseData(arg)
        # raise Exception('Not Implimented Binop')

    def __Unop(self,expr):
        self.__parseData(expr.args[0])
        # raise Exception('Not Implimented Unop')

    def __Load(self,expr):
        self.memRead = True
        self.__parseData(expr.addr)
#         print(expr.type)
        if expr.addr.tag == 'Iex_Const':
            con = expr.addr.con.value
            add1 = (con//8)*8
            self.usedMem.append(add1)
            if(con%8 !=0):
                self.usedMem.append(add1+8)
        # raise Exception('Not Implimented Load')

    def __Const(self,expr):
#         print("cons")
#         print(expr.con.value)
#         print(expr.result_type(self.tyenv))
        pass
        # raise Exception('Not Implimented Const')

    def __ITE(self,expr):
        raise Exception('Not Implimented ITE')

    def __CCall(self,expr):
        for arg in expr.args:
            self.__parseData(arg)
        # raise Exception('Not Implimented CCall')

In [19]:
def dfsguest(n,cfg,dfg,g,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
        info = stmtInfo(stmt)
        if g in info.usedguest:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "guest", "num" : g}
        if g in info.guestAss:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "kill","passby" : "guest", "num" : g}
            return
    for nei in cfg.graph.successors(n):
        dfsguest(nei,cfg,dfg,g,vis,addr) 

In [20]:
def dfscontrolflow(n,cfg,dfg,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "cond", "num" : 0}
    for nei in cfg.graph.successors(n):
        dfscontrolflow(nei,cfg,dfg,vis,addr) 

In [21]:
def dfs3(n,cfg,dfg,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
        info = stmtInfo(stmt)
        if info.memRead:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "memweek", "num" : 0}
#         if info.memAss:
#             dfg.add_edge(hex(addr),hex(addr1))
#             ob = dfg.adj[hex(addr)][hex(addr1)]
#             ob[len(ob)] = {"kind" : "kill","passby" : "mem", "num" : 0}
#             return
    for nei in cfg.graph.successors(n):
        dfs3(nei,cfg,dfg,vis,addr) 

In [22]:
def dfsglb(n,cfg,dfg,add,vis,addr):
    if n in vis:
        return
    vis.append(n)
    try:
        stmts = n.block.vex.statements
    except:
        stmts = []
    addr1 = 0
    for stmt in stmts:
        if stmt.tag == 'Ist_IMark':
            addr1 = stmt.addr
        info = stmtInfo(stmt)
        if add in info.usedMem:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "dep","passby" : "mem", "num" : hex(add)}
        if add in info.memchng:
            dfg.add_edge(hex(addr),hex(addr1))
            ob = dfg.adj[hex(addr)][hex(addr1)]
            ob[len(ob)] = {"kind" : "kill","passby" : "mem", "num" : hex(add)}
            return
    for nei in cfg.graph.successors(n):
        dfsglb(nei,cfg,dfg,add,vis,addr) 

In [23]:
def add_edges(dfg,cfg,nodes,stmts,node_ind,stmt_ind,f):
    addr = stmts[stmt_ind].addr
    dfg.add_node(hex(addr))
    addr1 = 0
    stmt_ind+=1
    guest = []
    temp = []
    memRight = False
    memRead = False
    memChng = []
    memreadlist = []
    inst_comp = False
    for ind in range(stmt_ind,len(stmts)):
        if not(inst_comp) :
            info = stmtInfo(stmts[ind])
            guest.extend(info.guestAss)
            temp.extend(info.tempAss)
            memRight |= info.memAss
            memChng.extend(info.memchng)
            if(info.exit == True):
                for n in cfg.graph.successors(nodes[node_ind]):
                    dfscontrolflow(n,cfg,dfg,[],addr)       
            if stmts[ind].tag == 'Ist_IMark':
                inst_comp = True
                addr1 = info.addr
        else:
            info = stmtInfo(stmts[ind])
            if stmts[ind].tag == 'Ist_IMark':
                addr1 = info.addr
            for g in guest:
                if g in info.usedguest:
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "dep","passby": "guest", "num" : g}
                if g in info.guestAss:
                    guest.remove(g)
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "kill","passby" : "guest", "num" : g}
            for t in temp:
                if t in info.usedTemps:
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "dep","passby" : "temp", "num" : t}
                if t in info.tempAss:
                    temp.remove(t)
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "kill","passby" : "temp", "num" : t}
            for add in memChng:
                if add in info.usedMem:
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "dep","passby" : "mem", "num" : hex(add)}
                if add in info.memchng:
                    memChng.remove(add)
                    dfg.add_edge(hex(addr),hex(addr1))
                    ob = dfg.adj[hex(addr)][hex(addr1)]
                    ob[len(ob)] = {"kind" : "kill","passby" : "mem", "num" : hex(add)}
                    
            if info.memRead and memRight and f:
                dfg.add_edge(hex(addr),hex(addr1))
                ob = dfg.adj[hex(addr)][hex(addr1)]
                ob[len(ob)] = {"kind" : "dep","passby" : "memweak", "num" : 0}
                
            if info.memAss and memRight and f:
                dfg.add_edge(hex(addr),hex(addr1))
                ob = dfg.adj[hex(addr)][hex(addr1)]
                memRight = False
                ob[len(ob)] = {"kind" : "kill","passby" : "memweak", "num" : 0}
            
#     print(addr,memChng)
    for g in guest:
        vis = []
        for n in cfg.graph.successors(nodes[node_ind]):
            dfsguest(n,cfg,dfg,g,vis,addr)
            
    for add in memChng:
        vis = []
        for n in cfg.graph.successors(nodes[node_ind]):
            dfsglb(n,cfg,dfg,add,vis,addr)
            
    if f and memRight:
        vis = []
        for n in cfg.graph.successors(nodes[node_ind]):
            dfs3(n,cfg,dfg,vis,addr)

In [24]:
def get_dfg(cfg,f):
    nodes = list(cfg.graph.nodes)
    dfg = nx.DiGraph()
    for node_ind in range(0,len(nodes)):
        try:
            stmts = list(nodes[node_ind].block.vex.statements)
        except:
            continue
        for stmt_ind in range(0,len(stmts)):
            if stmts[stmt_ind].tag == 'Ist_IMark':
                add_edges(dfg,cfg,nodes,stmts,node_ind,stmt_ind,f)
    return dfg

In [25]:
def getPath(dfg,add1,add2,vis):
    if add1 in vis:
        return(None)
    vis.append(add1)
    if add1 == add2:
        return(add1)
    for e in dfg.adj[add1]:
        for ind in dfg.adj[add1][e]:
            if dfg.adj[add1][e][ind]['kind'] == 'dep':
                s1 = dfg.adj[add1][e][ind]['passby']
                s2 = str(dfg.adj[add1][e][ind]['num'])
                s = getPath(dfg,e,add2,vis)
                if s!= None:
                    return(add1+'\n'+s1+s2+" "+s)
    return None

In [26]:
def printedges(dfg,addr):
    x = dfg.adj[addr];
    for a in x:
        for o in x[a]:
            print(a,x[a][o]['kind'],x[a][o]['passby'],x[a][o]['num'])

In [27]:
p = angr.Project("../test/onlyglobal/onlyglobal.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/onlyglobal/onlyglobalvex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/onlyglobal/onlyglobalasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)

In [28]:
dfg.nodes

NodeView(('0x401000', '0x401008', '0x401032', '0x401035', '0x401045', '0x401010', '0x401020', '0x40102a', '0x401018', '0x401041', '0x401057', '0x401037', '0x40104a', '0x40104f', '0x401052', '0x401059', '0x40105e', '0x401063', '0x401068', '0x40106b', '0x40103f'))

In [29]:
# printedges(dfg,'0x401014')

In [30]:
print(getPath(dfg,'0x401008','0x40104f',[]))

0x401008
temp12 0x401035
cond0 0x401041
temp0 0x40104f


In [31]:
# basic
p = angr.Project("../test/onecpp/one.exe",auto_load_libs=False)
start_state = p.factory.entry_state()
cfg = p.analyses.CFGEmulated(fail_fast=True, starts=[p.entry], initial_state=start_state)
plot_cfg(cfg, "../test/onecpp/onevex", vexinst=True, remove_imports=True, remove_path_terminator=True)
plot_cfg(cfg, "../test/onecpp/oneasm", asminst=True, remove_imports=True, remove_path_terminator=True)
dfg = get_dfg(cfg,False)



In [32]:
b = p.factory.block(int('0x4011c9',0))

In [33]:
b.pp()

        _Z3funi:
4011c9  endbr64 
4011cd  push    rbp
4011ce  mov     rbp, rsp
4011d1  sub     rsp, 0x20
4011d5  mov     dword ptr [rbp-0x14], edi
4011d8  mov     rax, qword ptr  fs:[0x28]
4011e1  mov     qword ptr [rbp-0x8], rax
4011e5  xor     eax, eax
4011e7  lea     rax, [rbp-0xc]
4011eb  mov     rsi, rax
4011ee  lea     rdi, [_ZSt3cin@@GLIBCXX_3.4]
4011f5  call    std::istream::operator>>

        _Z3funi:
4011c9  endbr64 
4011cd  push    rbp
4011ce  mov     rbp, rsp
4011d1  sub     rsp, 0x20
4011d5  mov     dword ptr [rbp-0x14], edi
4011d8  mov     rax, qword ptr  fs:[0x28]
4011e1  mov     qword ptr [rbp-0x8], rax
4011e5  xor     eax, eax
4011e7  lea     rax, [rbp-0xc]
4011eb  mov     rsi, rax
4011ee  lea     rdi, [_ZSt3cin@@GLIBCXX_3.4]
4011f5  call    std::istream::operator>>


In [34]:
bvex = b.vex

In [35]:
bvex.pp()

IRSB {
   t0:Ity_I64 t1:Ity_I64 t2:Ity_I64 t3:Ity_I64 t4:Ity_I64 t5:Ity_I64 t6:Ity_I64 t7:Ity_I64 t8:Ity_I32 t9:Ity_I32 t10:Ity_I32 t11:Ity_I64 t12:Ity_I64 t13:Ity_I64 t14:Ity_I64 t15:Ity_I64 t16:Ity_I64 t17:Ity_I64 t18:Ity_I64 t19:Ity_I64 t20:Ity_I32 t21:Ity_I64 t22:Ity_I64 t23:Ity_I64 t24:Ity_I64 t25:Ity_I64 t26:Ity_I64 t27:Ity_I64 t28:Ity_I64 t29:Ity_I32 t30:Ity_I64 t31:Ity_I32 t32:Ity_I64 t33:Ity_I64 t34:Ity_I64 t35:Ity_I64 t36:Ity_I64 t37:Ity_I64 t38:Ity_I64 t39:Ity_I64 t40:Ity_I64 t41:Ity_I64

   00 | ------ IMark(0x4011c9, 4, 0) ------
   01 | PUT(rip) = 0x00000000004011cd
   02 | ------ IMark(0x4011cd, 1, 0) ------
   03 | t0 = GET:I64(rbp)
   04 | t16 = GET:I64(rsp)
   05 | t15 = Sub64(t16,0x0000000000000008)
   06 | PUT(rsp) = t15
   07 | STle(t15) = t0
   08 | ------ IMark(0x4011ce, 3, 0) ------
   09 | PUT(rbp) = t15
   10 | ------ IMark(0x4011d1, 4, 0) ------
   11 | t2 = Sub64(t15,0x0000000000000020)
   12 | PUT(rsp) = t2
   13 | PUT(rip) = 0x00000000004011d5
   14 | ----

In [36]:
l = []
nodes = list(cfg.graph.nodes)
dfg = nx.DiGraph()
for node_ind in range(0,len(nodes)):
    stmts = []
    try:
        stmts = list(nodes[node_ind].block.vex.statements)
    except:
        continue
    if len(stmts) > 0:
        if stmts[0].addr == int('0x4011c9',0):
            l.append(nodes[node_ind])

In [37]:
l

[<CFGENode _Z3funi 0x4011c9[49]>, <CFGENode _Z3funi 0x4011c9[49]>]

In [38]:
list(cfg.graph.successors(l[1]))

[<CFGENode  0x401090[11]>, <CFGENode _Z3funi+0x31 0x4011fa[23]>]

In [39]:
l2 = list(cfg.graph.successors(l[0]))

In [40]:
l2

[<CFGENode  0x401090[11]>, <CFGENode _Z3funi+0x31 0x4011fa[23]>]

In [41]:
l3 = list(cfg.graph.successors(l2[0]))

In [42]:
l3

[<CFGENode std::istream::operator>>(int&) 0x500020>]

In [43]:
l4 = list(cfg.graph.successors(l3[0]))

In [44]:
l4

[<CFGENode _Z3funi+0x31 0x4011fa[23]>]

In [45]:
l5 = list(cfg.graph.successors(l4[0]))

In [46]:
l5

[<CFGENode _Z3funi+0x4d 0x401216[2]>, <CFGENode _Z3funi+0x48 0x401211[5]>]

In [47]:
l6 = list(cfg.graph.successors(l5[0]))

In [48]:
cfg.graph.adj[l5[0]]

AtlasView({<CFGENode main+0x38 0x4012a6[14]>: {'jumpkind': 'Ijk_Ret', 'stmt_idx': -2, 'ins_addr': 4198935}})

In [49]:
l6

[<CFGENode main+0x38 0x4012a6[14]>]

In [50]:
cfg.graph.adj[l[1]]

AtlasView({<CFGENode  0x401090[11]>: {'jumpkind': 'Ijk_Call', 'stmt_idx': -2, 'ins_addr': 4198901}, <CFGENode _Z3funi+0x31 0x4011fa[23]>: {'jumpkind': 'Ijk_FakeRet', 'stmt_idx': -2, 'ins_addr': 4198901}})

In [16]:
# import pyvex
# import archinfo

# # translate an AMD64 basic block (of nops) at 0x400400 into VEX
# irsb = pyvex.lift(b"\x90\x90\x90\x90\x90", 0x400400, archinfo.ArchAMD64())

# # pretty-print the basic block
# irsb.pp()

# # this is the IR Expression of the jump target of the unconditional exit at the end of the basic block
# print(irsb.next)

# # this is the type of the unconditional exit (i.e., a call, ret, syscall, etc)
# print(irsb.jumpkind)

# # you can also pretty-print it
# irsb.next.pp()

# # iterate through each statement and print all the statements
# for stmt in irsb.statements:
#     stmt.pp()

# # pretty-print the IR expression representing the data, and the *type* of that IR expression written by every store statement
# import pyvex
# for stmt in irsb.statements:
#     if isinstance(stmt, pyvex.IRStmt.Store):
#         print("Data:", end="")
#         stmt.data.pp()
#         print("")

#         print("Type:", end="")
#         print(stmt.data.result_type)
#         print("")

# # pretty-print the condition and jump target of every conditional exit from the basic block
# for stmt in irsb.statements:
#     if isinstance(stmt, pyvex.IRStmt.Exit):
#         print("Condition:", end="")
#         stmt.guard.pp()
#         print("")

#         print("Target:", end="")
#         stmt.dst.pp()
#         print("")

# # these are the types of every temp in the IRSB
# print(irsb.tyenv.types)

# # here is one way to get the type of temp 0
# print(irsb.tyenv.types[0])

IRSB {
   t0:Ity_I64

   00 | ------ IMark(0x400400, 1, 0) ------
   01 | ------ IMark(0x400401, 1, 0) ------
   02 | ------ IMark(0x400402, 1, 0) ------
   03 | ------ IMark(0x400403, 1, 0) ------
   04 | ------ IMark(0x400404, 1, 0) ------
   NEXT: PUT(rip) = 0x0000000000400405; Ijk_Boring
}
0x0000000000400405
Ijk_Boring
0x0000000000400405
------ IMark(0x400400, 1, 0) ------
------ IMark(0x400401, 1, 0) ------
------ IMark(0x400402, 1, 0) ------
------ IMark(0x400403, 1, 0) ------
------ IMark(0x400404, 1, 0) ------
['Ity_I64']
Ity_I64
