# NeighBSim Evaluation

In [1]:
import rich.pretty

rich.pretty.install()

In [2]:
import msgspec
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt
import sqlalchemy as sa
import networkx as nx
import sklearn.metrics
from sklearn.metrics import roc_curve
from evaluatie.neighbsim.neighbsim import neighbsim, NeighBSimArgs
import itertools

In [3]:
from evaluatie import models as m
from evaluatie.utils import Dataset, Pair, DatasetSpec

In [6]:
def neighbsim_score_from_dataset(dataset: Dataset):
    def _neighbsim_from_row(row):
        qb_id = row["qb_id"]
        qf_id = row["qf_id"]
        tb_id = row["tb_id"]
        tf_id = row["tf_id"]

        assert qb_id < tb_id
        args = NeighBSimArgs.from_binary_ids(
            query_binary_id=qb_id,
            target_binary_id=tb_id,
            dataset=dataset,
        )

        try:
            result = neighbsim(
                query_function_id=qf_id,
                target_function_id=tf_id,
                args=args,
            )
        except ValueError:
            # No idea what causes this.
            # "cost matrix is infeasible"
            return None
        return result

    df = dataset.to_frame().copy()
    return df.apply(_neighbsim_from_row, axis=1)

In [None]:
df = ds.to_frame()
df["bsim"] = bsim_score_from_dataset(ds)
df["neighbsim-result"] = neighbsim_score_from_dataset(ds)

In [None]:
df["neighbsim"] = df["neighbsim-result"].dropna().apply(lambda result: result.score)
df["qneighbor-len"] = (
    df["neighbsim-result"]
    .dropna()
    .apply(lambda result: len(result.qcallers) + len(result.qcallees))
)
df["tneighbor-len"] = (
    df["neighbsim-result"]
    .dropna()
    .apply(lambda result: len(result.tcallers) + len(result.tcallees))
)

## Visualisation

In [None]:
import graphviz

In [None]:
def get_name(function_id: int, result):
    qcg = result.args.query_call_graph
    tcg = result.args.target_call_graph

    if function_id in qcg:
        return qcg.nodes[function_id]["name"]

    if function_id in tcg:
        return tcg.nodes[function_id]["name"]

In [None]:
def get_sim(edge, result):
    caller_m = result.caller_matching
    callee_m = result.callee_matching

    if edge in caller_m.edges:
        return "{:.2f}".format(caller_m.edges[edge]["weight"])

    if edge in callee_m.edges:
        return "{:.2f}".format(callee_m.edges[edge]["weight"])

    raise ValueError

In [None]:
queryx = 0
targetx = 500

ydist = 50
callery = 0
funy = ydist * max(len(result.qcallers), len(result.tcallers))
calleey = funy + ydist

dot = graphviz.Graph()

for i, qcaller in enumerate(result.qcallers):
    dot.node(
        str(qcaller),
        pos=f"{queryx},{i * ydist}!",
        label=get_name(qcaller, result),
    )

for i, tcaller in enumerate(result.tcallers):
    dot.node(
        str(tcaller),
        pos=f"{targetx},{i * ydist}!",
        label=get_name(tcaller, result),
    )


for i, qcallee in enumerate(result.qcallees):
    dot.node(
        str(qcallee),
        pos=f"{queryx},{calleey + i * ydist}!",
        label=get_name(qcallee, result),
    )

for i, tcallee in enumerate(result.tcallees):
    dot.node(
        str(tcallee),
        pos=f"{targetx},{calleey + i * ydist}!",
        label=get_name(tcallee, result),
    )


dot.node(
    str(qf_id),
    pos=f"{queryx},{funy}",
    label=get_name(qf_id, result),
)
dot.node(
    str(tf_id),
    pos=f"{targetx},{funy}",
    label=get_name(tf_id, result),
)
dot.edge(
    str(qf_id),
    str(tf_id),
)

for qcaller, tcaller in result.caller_matching.edges:
    dot.edge(
        str(qcaller),
        str(tcaller),
        label=get_sim((qcaller, tcaller), result),
        headport="w",
        tailport="e",
    )

for qcallee, tcallee in result.callee_matching.edges:
    dot.edge(
        str(qcallee),
        str(tcallee),
        label=get_sim((qcallee, tcallee), result),
        headport="w",
        tailport="e",
    )

In [None]:
result.callee_matching.edges

In [None]:
dot.render(
    "matching",
    engine="neato",
    neato_no_op=2,
)