In [1]:
import angr
import networkx as nx
import matplotlib.pyplot as plt
import os
import claripy
import pandas as pd
import math
import sys

In [2]:
def generate_call_graph(project):

    # Set up the call graph analysis
    cfg = project.analyses.CFGEmulated(keep_state=True)

    # Retrieve the call graph
    call_graph = cfg.functions.callgraph
    
    # Filter out internal functions and keep only the explicitly defined functions
    defined_functions = project.kb.functions.values()
    program_functions = []
    program_functions_addr=[]
    program_functions_name=[]
    
    for function in defined_functions:
        if not function.is_simprocedure:
            program_functions_addr.append(function.addr)
            program_functions.append(function)
            program_functions_name.append(function.name)

    d={'name': program_functions_name,'address': program_functions_addr,'distance':[math.inf]*len(program_functions_addr), 'solver': [[None]]*len(program_functions_addr),  'values': [[None]]*len(program_functions_addr)}
    function_data=pd.DataFrame(data=d)

    # Create a subgraph for the program functions
    sub_graph = call_graph.subgraph(program_functions_addr)

    return (sub_graph, program_functions,function_data,cfg)

In [4]:
def find_func_address(target,func_addr):
    target_address = None

    #TODO without for loop
    for function in func_addr:
        if function.name == target:
                target_address = function.addr

    # Check if the function is found in the call graph
    if target_address is None:
        print(f"Error: '{target}' not found in the call graph.")
        return None

    return target_address

In [5]:
def nodes_distance(graph, trg):

    shortest_paths = nx.shortest_path_length(graph, target=trg)
    addresses=list(shortest_paths)
    addresses.reverse()

    return (addresses,shortest_paths)

In [13]:
def get_type(project, functions,cfg):

    types=[]
    # Set up the calling convention analysis for each function
    for f in functions:
        # Vriable recovery
        vr = project.analyses.VariableRecoveryFast(f)
        
        cca = project.analyses.CallingConvention(f,cfg=cfg,analyze_callsites=True)
        types.append(cca.prototype)
        
    return types

In [14]:
def find_succ(source,graph,addr,distance):
    
    elems_in_both_lists = set(addr) & set(list(graph.successors(source)))
    target_addr=[x for x in elems_in_both_lists if distance[source] > distance[x]]
    
    
    return target_addr

In [33]:
def get_solver(source,target,project,n,binary_path):
    

    # Symbolic input variables
    y = claripy.BVS("y", 100*8) # 100 bytes

    initial_state= project.factory.entry_state(addr=source,args=[binary_path,y])
    sm = project.factory.simgr(initial_state)
    sm.explore(find=target)

    # Get constraints leading to reaching the api_address
    constraints = []
    for path in sm.found:
        constraints.extend(path.solver.constraints)
        print(path.solver.eval(y, cast_to=byte)


    # Create a solver with all the constraints combined using the logical OR operator
    if constraints:
        combined_constraints = claripy.Or(*constraints)
        solver = claripy.Solver()
        solver.add(combined_constraints)
        solutions=solver.eval(y,n)
        #sol=solver.eval(y,n)
        #solutions=[[s] for s in solutions]
        print(solutions)
        #print(sol)
    else:
        solver=True
        solutions=[]

    # Convert solutions from bytes to strings
    solutions_as_strings = [sol.decode('utf-8') for sol in solutions]
    print(solutions_as_strings)
    return solver, solutions

In [34]:
def functions_dataframe(binary_path, api_call,n):

    # Check if the binary file exists
    if not os.path.isfile(binary_path):
        print(f"Error: File '{binary_path}' does not exist.")
        return 

    # Create an angr project
    project = angr.Project(binary_path, auto_load_libs=False)

    # Generate the call graph
    (call_graph, func_addr,function_data, cfg)=generate_call_graph(project)

    # Find the address of the function
    api_address=find_func_address(api_call,func_addr) 
    # Check if the function is found in the call graph
    if api_address is None:
        return 
    
    # Find minimum distance between nodes and target
    (nodes,distance)=nodes_distance(call_graph,api_address)

    # Get functions' type inputs
    type_inputs=get_type(project, func_addr,cfg)
    function_data['type']=type_inputs

    addr=nodes.copy() #non necessario
    #TODO in parallel
    for starting_address in nodes:
        i=function_data.index[function_data['address']==starting_address].item()
        function_data.loc[i,'distance']=distance[starting_address]
        if distance[starting_address]==0:
            continue
        addr.remove(starting_address)
        
        # Find for each node successors with smaller distance
        target_func=find_succ(starting_address,call_graph,addr,distance) #forse conviene non definire la funzione e mettere tutto nel main
        
        # Get the solver with constraints leading to reaching the target_func, and values to solve them
        s,v=get_solver(starting_address,target_func,project,n,binary_path)
        function_data.loc[i,'solver']=s
        function_data.at[i,'values']=v
    print(function_data.values.tolist())

    # Visualize the call graph
    #visualize(cfg,call_graph) #se eliminamo questa funzione possiamo togliere cfg da funzione generate_call-graph

    return function_data

In [35]:
binary_path='./test_char'
api_call='printf'
n=2
functions_dataframe(binary_path,api_call,n)

(0, 23689546086131422960647270026588478931532074235789438036179382904450240366918592625898413220651954314430049601701829119967217713075482397330387684250506304003974227539380644310764545984368872754291732775783027172102246808682496)


AttributeError: 'int' object has no attribute 'decode'