In [1]:
#!ls ~/trained_models/
import pandas as pd
pd.set_option('display.max_colwidth', 20)   # JSON columns make things look weird in notebook without this

In [2]:
from itertools import chain
from pathlib import Path
from datatype_recovery.models.dataset import load_dataset_from_path, max_typesequence_len_in_dataset
from datatype_recovery.models.dataset.encoding import *

import torch_geometric.transforms as T
from torch_geometric.loader import DataLoader
from tqdm import tqdm

from datatype_recovery.models.metrics import acc_heuristic_numcorrect

def eval_model_on_dataset(model_path:Path, device:str, dataset_path:Path) -> float:
    '''
    Evaluates the model on the given dataset and returns the accuracy of the corrected
    model output against the dataset labels
    '''
    dataset = load_dataset_from_path(dataset_path)
    max_true_seq_len = max_typesequence_len_in_dataset(dataset_path)
    return eval_model_on_subset(model_path, device, dataset, max_true_seq_len)

def eval_model_on_subset(model_path:Path, device:str, dataset, max_true_seq_len:int) -> float:
    '''
    Evaluates the model on the given subset and returns the accuracy of the corrected
    model output against the dataset labels
    '''
    model = torch.load(model_path)
    print(model)

    # take the max of model seq length and max seq length of dataset so we
    # calculate accuracy correctly (without truncating something)
    max_len = max(model.max_seq_len, max_true_seq_len)

    # prepare the data loaders
    batch_size = 64
    dataset.transform = T.Compose([ToBatchTensors(), ToFixedLengthTypeSeq(max_len)])

    # split the dataset into the part divisible by batch size and the leftovers
    # we can chain these together for performance - our metrics simply iterate
    # through all elements in the batch
    batched_total = len(dataset)-(len(dataset)%batch_size)
    batch_loader = DataLoader(dataset[:batched_total], batch_size=batch_size)
    leftovers_loader = DataLoader(dataset[batched_total:], batch_size=1)

    print(f'Running eval...')

    model.to(device)
    model.eval()
    num_correct = 0

    for data in tqdm(chain(batch_loader, leftovers_loader), total=len(batch_loader)+len(leftovers_loader)):
        data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        num_correct += acc_heuristic_numcorrect(data.y, out)

    accuracy = num_correct/len(dataset)
    print(f'Accuracy = {accuracy*100:,.2f}%')

    return accuracy

In [3]:
# TODO: eval on an interesting subset of the data
# from torch.utils.data import Subset
# Subset()

In [4]:
dataset_path = Path.home()/'datasets/test_ds'
model_path = Path.home()/'trained_models/structural_3out_3hops_nocomp.pt'

# eval_model_on_dataset(model_path, 'cuda:3', dataset_path)

In [5]:
dataset = load_dataset_from_path(dataset_path)

In [6]:
df = dataset.read_vars_csv()
df

Unnamed: 0,FunctionStart,Signature,Name_Strip,Type_Strip,LocType_Strip,LocRegName_Strip,LocOffset_Strip,TypeCategory_Strip,TypeSeq_Strip,Name_Debug,...,LocRegName_Debug,LocOffset_Debug,TypeCategory_Debug,TypeSeq_Debug,HasDWARF,TypeJson_Debug,BinaryId,Vartype,IsReturnType_Debug,IsReturnType_Strip
0,1323424,17,auStack_8,uchar[8],stack,,-8.0,ARR,"ARR,uchar",auStack_8,...,,-8.0,ARR,"ARR,uchar",False,"{""kind"": ""ArrayT...",1000,l,False,False
1,1323657,"16,28,52,76,101,...",local_c,int32,stack,,-12.0,BUILTIN,int32,i,...,,-12.0,BUILTIN,int32,True,"{""kind"": ""Builti...",1000,l,False,False
2,1323772,"16,28,52,76,101,...",local_c,int32,stack,,-12.0,BUILTIN,int32,i,...,,-12.0,BUILTIN,int32,True,"{""kind"": ""Builti...",1000,l,False,False
3,1323887,172953797987,local_c,int32,stack,,-12.0,BUILTIN,int32,i,...,,-12.0,BUILTIN,int32,True,"{""kind"": ""Builti...",1000,l,False,False
4,1323980,219696104,local_c,int32,stack,,-12.0,BUILTIN,int32,,...,,,COMP,COMP,False,,1000,l,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49086,1055260,-1,,uint64,,,,BUILTIN,uint64,,...,,,BUILTIN,int32,True,"{""kind"": ""Builti...",2002,p,True,True
49087,1055568,-1,,void,,,,BUILTIN,void,,...,,,BUILTIN,void,False,"{""kind"": ""Builti...",2002,p,True,True
49088,1055680,-1,,void,,,,BUILTIN,void,,...,,,BUILTIN,void,False,"{""kind"": ""Builti...",2002,p,True,True
49089,1055688,-1,,void,,,,BUILTIN,void,,...,,,BUILTIN,int32,False,"{""kind"": ""Builti...",2002,p,True,True


In [7]:
# TODO: predict data type with model for each variable in dataset
# --> save results in a new DF using varid
# --> join (merge) predictions into var_df so we can analyze the results however we wish

# NOTE: we can join right away, but then don't forget to filter out variables we
# discarded (COMP and return types are dropped) when we compute METRICS

In [8]:
def make_predictions_on_dataset(model_path:Path, device:str, dataset, max_true_seq_len:int) -> pd.DataFrame:
    '''
    Evaluates the model on the given dataset and returns a DataFrame containing the varid columns
    of each variable in the dataset along with its predicted type sequence (raw and corrected)
    '''
    model = torch.load(model_path)
    print(model)

    # take the max of model seq length and max seq length of dataset so we
    # calculate accuracy correctly (without truncating something)
    max_len = max(model.max_seq_len, max_true_seq_len)

    # prepare the data loaders
    batch_size = 1
    dataset.transform = T.Compose([ToBatchTensors(), ToFixedLengthTypeSeq(max_len)])

    # split the dataset into the part divisible by batch size and the leftovers
    # we can chain these together for performance - our metrics simply iterate
    # through all elements in the batch
    batched_total = len(dataset)-(len(dataset)%batch_size)
    batch_loader = DataLoader(dataset[:batched_total], batch_size=batch_size)
    leftovers_loader = DataLoader(dataset[batched_total:], batch_size=1)

    print(f'Running eval...')

    model.to(device)
    model.eval()

    model_outputs = []

    for data in tqdm(chain(batch_loader, leftovers_loader), total=len(batch_loader)+len(leftovers_loader)):
        data.to(device)
        out = model(data.x, data.edge_index, data.batch)
        raw = decode_typeseq(out, drop_empty_elems=True)
        corrected = decode_typeseq(out, force_valid_seq=True)

        binid, funcstart, sig, vartype = data.varid[0]

        model_outputs.append((
            binid, funcstart, sig, vartype,
            ','.join(raw),
            ','.join(corrected),
        ))

    return pd.DataFrame.from_records(model_outputs, columns=['BinaryId','FunctionStart','Signature','Vartype','RawPred','Pred'])

In [9]:
max_true_len = df.TypeSeq_Debug.apply(lambda ts: len(ts.split(','))).max()
model_pred = make_predictions_on_dataset(model_path, 'cuda:1', dataset, max_true_len)
model_pred

StructuralTypeSeqModel(
  (gat_layers): ModuleList(
    (0): GATConv(31, 128, heads=1)
    (1-2): 2 x GATConv(128, 128, heads=1)
  )
  (pred_head): Linear(128, 66, bias=True)
)
Running eval...


100%|██████████| 26785/26785 [02:18<00:00, 194.05it/s]


Unnamed: 0,BinaryId,FunctionStart,Signature,Vartype,RawPred,Pred
0,1000,1323424,17,l,"uint32,COMP,double",uint32
1,1000,1323657,"16,28,52,76,101,...",l,"PTR,COMP,double","PTR,COMP"
2,1000,1323657,66,p,"int32,int128,short",int32
3,1000,1323772,"16,28,52,76,101,...",l,"PTR,COMP,double","PTR,COMP"
4,1000,1323772,66,p,"int32,int128,short",int32
...,...,...,...,...,...,...
26780,2002,1055119,5358,l,"double,COMP,short",double
26781,2002,1055119,105,p,"int32,int128,short",int32
26782,2002,1055119,2688132,p,"int32,uchar,short",int32
26783,2002,1055260,3252268288,l,"PTR,COMP,short","PTR,COMP"


In [22]:
mdf = df.merge(model_pred, how='right', on=['BinaryId','FunctionStart','Signature','Vartype'])
mdf

Unnamed: 0,FunctionStart,Signature,Name_Strip,Type_Strip,LocType_Strip,LocRegName_Strip,LocOffset_Strip,TypeCategory_Strip,TypeSeq_Strip,Name_Debug,...,TypeCategory_Debug,TypeSeq_Debug,HasDWARF,TypeJson_Debug,BinaryId,Vartype,IsReturnType_Debug,IsReturnType_Strip,RawPred,Pred
0,1323424,17,auStack_8,uchar[8],stack,,-8.0,ARR,"ARR,uchar",auStack_8,...,ARR,"ARR,uchar",False,"{""kind"": ""ArrayT...",1000,l,False,False,"uint32,COMP,double",uint32
1,1323657,"16,28,52,76,101,...",local_c,int32,stack,,-12.0,BUILTIN,int32,i,...,BUILTIN,int32,True,"{""kind"": ""Builti...",1000,l,False,False,"PTR,COMP,double","PTR,COMP"
2,1323657,66,param_3,int64,register,rdx,16.0,BUILTIN,int64,b,...,PTR,"PTR,float",True,"{""kind"": ""Pointe...",1000,p,False,False,"int32,int128,short",int32
3,1323772,"16,28,52,76,101,...",local_c,int32,stack,,-12.0,BUILTIN,int32,i,...,BUILTIN,int32,True,"{""kind"": ""Builti...",1000,l,False,False,"PTR,COMP,double","PTR,COMP"
4,1323772,66,param_3,int64,register,rdx,16.0,BUILTIN,int64,b,...,PTR,"PTR,float",True,"{""kind"": ""Pointe...",1000,p,False,False,"int32,int128,short",int32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26780,1055119,5358,pcVar2,char*,register,rax,0.0,PTR,"PTR,char",pcVar2,...,PTR,"PTR,char",False,"{""kind"": ""Pointe...",2002,l,False,False,"double,COMP,short",double
26781,1055119,105,param_1,int32,register,edi,56.0,BUILTIN,int32,category,...,BUILTIN,int32,True,"{""kind"": ""Builti...",2002,p,False,False,"int32,int128,short",int32
26782,1055119,2688132,param_2,uint64,register,rsi,48.0,BUILTIN,uint64,variable,...,PTR,"PTR,char",True,"{""kind"": ""Pointe...",2002,p,False,False,"int32,uchar,short",int32
26783,1055260,3252268288,uVar1,uint64,register,rax,0.0,BUILTIN,uint64,pcVar1,...,PTR,"PTR,char",False,"{""kind"": ""Pointe...",2002,l,False,False,"PTR,COMP,short","PTR,COMP"


In [38]:
acc_corrected = len(mdf[mdf.TypeSeq_Debug==mdf.Pred])/len(mdf)*100
acc_corrected

2.1056561508306886

In [40]:
acc_raw = len(mdf[mdf.TypeSeq_Debug==mdf.RawPred])/len(mdf)*100
acc_raw

0.0

In [18]:
len(df[(df.TypeSeq_Debug!='COMP')&(~df.IsReturnType_Debug)])

26785

In [9]:
dataset[0].varid

(1000, 1323424, '17', 'l')

In [8]:
import pandas as pd

# pd.read_csv(dataset.root/dataset.raw_file_names[0])
from datatype_recovery.models.dataset import TypeSequenceDataset

ds = TypeSequenceDataset(dataset_path)

In [9]:
exp_runs = pd.read_csv(ds.exp_runs_path)
exp_runs

Unnamed: 0,RunGid,RunFolder,BinariesCsv,FuncsCsv,ParamsCsv,LocalsCsv
0,0,/home/cls0027/exp_builds/astera.exp/rundata/run1,/home/cls0027/exp_builds/astera.exp/rundata/ru...,/home/cls0027/exp_builds/astera.exp/rundata/ru...,/home/cls0027/exp_builds/astera.exp/rundata/ru...,/home/cls0027/exp_builds/astera.exp/rundata/ru...
