# Approach Evaluation

In this notebook, we evaluate the performance of our approach.

In [None]:
import rich.pretty

rich.pretty.install()

In [None]:
import IPython.display

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt
import numpy as np
import sklearn.metrics
from sklearn.metrics import RocCurveDisplay
import sqlalchemy as sa
from sklearn.metrics import roc_curve
import msgspec
import itertools
import pathlib as pl
import networkx as nx
import tqdm

In [None]:
from evaluatie import models as m
from evaluatie import utils
from evaluatie.data import FunctionDataset, DatasetOptions

In [None]:
mpl.rc(
    "font",
    size=12,
)

In [None]:
def create_table(dataset: FunctionDataset, score_col: str) -> pd.DataFrame:
    categories = ["low", "medium", "high", "all"]

    tbl = pd.DataFrame(
        index=pd.Index(
            categories,
            name="neighborhood_size",
        ),
        columns=pd.Index(
            categories,
            name="size",
        ),
    )

    for size, neighborhood_size in itertools.product(categories, categories):
        options = DatasetOptions(
            size=size,
            neighborhood_size=neighborhood_size,
        )
        subset_df = dataset.frame[options.indexer(dataset.frame)]

        fpr, tpr, threshholds = roc_curve(
            y_score=subset_df[score_col],
            y_true=subset_df["label"],
        )

        tbl.loc[neighborhood_size, size] = sklearn.metrics.auc(fpr, tpr)

    return tbl

In [None]:
def dataset_subsample(d: FunctionDataset) -> FunctionDataset:
    return FunctionDataset(
        name=d.name,
        frame=d.frame.groupby(
            by=[
                "qsize",
                "qneighborhood_size",
            ]
        ).sample(
            n=1_000,
            random_state=0,
        ),
    )

## BSim

In [None]:
d = FunctionDataset.from_name("f:o0Xo2")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:o0Xo3")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:osXo0")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:osXo2")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:noinlineXinline")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:x86Xarm")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:armXmips")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:x86Xmips")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:malware-analysis")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:firmware-analysis")
create_table(d, score_col="bsim")

In [None]:
d = FunctionDataset.from_name("f:random")
create_table(d, score_col="bsim")

## NeighBSim Evaluation

In [None]:
name2dataset: dict[str, FunctionDataset] = {}

In [None]:
names = [
    # Usecases
    # "f:firmware-analysis",
    # "f:malware-analysis",
    # "f:random",
    # Optimisation
    "f:o0Xo2",
    "f:o0Xo3",
    # "f:osXo0",
    # "f:osXo2",
    # "f:osXo3",
    # Architecture
    # "f:armXmips",
    # "f:x86Xarm",
    # "f:x86Xmips",
    # Inlining
    # "f:noinlineXinline",
]

In [None]:
for name in names:
    d = FunctionDataset.from_name(name)
    print(name)
    print(f"{d.frame["bsim"].isna().sum() / len(d.frame)}")
    d = d.load_pickle()
    print(f"{d.frame["neighbsim"].isna().sum() / len(d.frame)}")
    # d = d.dropna()

    name2dataset[name] = d

In [None]:
for name, d in name2dataset.items():
    neighbsim_table = create_table(d, score_col="neighbsim")
    IPython.display.display(f"{name} -- NeighBSim")
    IPython.display.display(neighbsim_table)

    bsim_table = create_table(d, score_col="bsim")
    IPython.display.display(f"{name} -- BSim")
    IPython.display.display(bsim_table)

In [None]:
bins = ["low", "medium", "high"]

x = np.arange(len(bins) * len(name2dataset), step=len(name2dataset))
inter_dataset_offset = 0.4
intra_dataset_offset = 0.2

In [None]:
markers = ["o", "x", "1", "<", "D"]

In [None]:
bin2line = {
    "low": "dotted",
    "medium": (0, (3, 1, 1, 1)),
    "high": "solid",
}

In [None]:
fig, ax = plt.subplots(
    figsize=(8, 12),
)

In [None]:
marker_cycle = iter(markers)

for i, dataset in enumerate(name2dataset.values()):
    marker = next(marker_cycle)
    bsim_table = create_table(dataset, "bsim").drop(columns="all").drop(labels="all")
    neighbsim_table = create_table(dataset, "neighbsim").drop(columns="all").drop(labels="all")

    dataset_offset = i * (inter_dataset_offset + 2 * intra_dataset_offset)

    for j, size_bin in enumerate(bins):
        offset = dataset_offset + j * intra_dataset_offset

        bsim_auc = bsim_table[size_bin]
        neighbsim_auc = neighbsim_table[size_bin]

        ymin = np.where(bsim_auc < neighbsim_auc, bsim_auc, neighbsim_auc)
        ymax = np.where(bsim_auc > neighbsim_auc, bsim_auc, neighbsim_auc)
        ax.vlines(x + offset, ymin=ymin, ymax=ymax, colors="grey", linestyles=bin2line[size_bin])

        rects = ax.scatter(
            x=x + offset,
            y=bsim_auc,
            # width=width,
            label=size_bin,
            color="mediumseagreen",
            alpha=1.0,
            marker=marker,
        )

        rects = ax.scatter(
            x=x + offset,
            y=neighbsim_auc,
            # width=width,
            label=size_bin,
            color="tomato",
            alpha=1.0,
            marker=marker,
        )

In [None]:
fig

In [1]:
from evaluatie.neighbsim.neighbsim import neighbsim_lazy, NeighBSimLazyArgs, NeighBSimArgs
from evaluatie import models as m

In [None]:
d = name2dataset["f:o0Xo2"]

In [None]:
d.frame[d.frame["neighbsim"].isna()][
    ["query_binary_id", "target_binary_id", "query_function_id", "target_function_id", "bsim"]
]

In [2]:
with m.Session() as session:
    args = NeighBSimLazyArgs.from_binary_ids(14223, 14231, session)
    result = neighbsim_lazy(7609331, 7668160, args, session=session)

In [3]:
result

NeighBSimResult(args=NeighBSimArgs(similarity_graph=<networkx.classes.graph.Graph object at 0x72ebc433faa0>, query_binary_id=14223, query_call_graph=<networkx.classes.digraph.DiGraph object at 0x72ebbe7b4830>, target_binary_id=14231, target_call_graph=<networkx.classes.digraph.DiGraph object at 0x72ebbeaeea80>), qcallers=[7602256], tcallers=[], qcallees=[7609331], tcallees=[7668160, 7671693], caller_matching=<networkx.classes.graph.Graph object at 0x72ebbe7b7cb0>, callee_matching=<networkx.classes.graph.Graph object at 0x72ebbdddc050>, score=0.10432955940183293)