# Call-Graph Evaluation
This notebook analyzes

In [1]:
import rich.pretty

rich.pretty.install()

In [2]:
import msgspec
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt
import sqlalchemy as sa
import networkx as nx
import sklearn.metrics
from sklearn.metrics import roc_curve
from evaluatie.neighbsim.neighbsim import neighbsim, NeighBSimArgs
import itertools
from tqdm import tqdm
import scipy
import scipy.stats
import numpy as np

In [3]:
from evaluatie import models as m
from evaluatie.data import Dataset, Pair, DatasetSpec

In [4]:
tqdm.pandas()

In [8]:
def in_degree_from_graph(graph: nx.DiGraph) -> np.ndarray:
    """Returns an array with"""
    indeg_sr = pd.Series(
        [deg for _, deg in graph.in_degree],
    )
    # qb_indeg_sr.value_counts(sort=True)
    return indeg_sr.to_numpy()

In [None]:
with m.Session() as session:
    xo_df["qb_cg"] = xo_df["qb_id"].progress_apply(call_graph_from_binary_id, args=(session,))
    xo_df["tb_cg"] = xo_df["tb_id"].progress_apply(call_graph_from_binary_id, args=(session,))

    xaxb_df["qb_cg"] = xaxb_df["qb_id"].progress_apply(call_graph_from_binary_id, args=(session,))
    xaxb_df["tb_cg"] = xaxb_df["tb_id"].progress_apply(call_graph_from_binary_id, args=(session,))

    xo_o0xo3_df["qb_cg"] = xo_o0xo3_df["qb_id"].progress_apply(
        call_graph_from_binary_id, args=(session,)
    )
    xo_o0xo3_df["tb_cg"] = xo_o0xo3_df["tb_id"].progress_apply(
        call_graph_from_binary_id, args=(session,)
    )

    xo_o2xo3_df["qb_cg"] = xo_o2xo3_df["qb_id"].progress_apply(
        call_graph_from_binary_id, args=(session,)
    )
    xo_o2xo3_df["tb_cg"] = xo_o2xo3_df["tb_id"].progress_apply(
        call_graph_from_binary_id, args=(session,)
    )

In [None]:
xo_df["qb_indeg"] = xo_df["qb_cg"].progress_apply(in_degree_from_graph)
xo_df["tb_indeg"] = xo_df["tb_cg"].progress_apply(in_degree_from_graph)

xaxb_df["qb_indeg"] = xaxb_df["qb_cg"].progress_apply(in_degree_from_graph)
xaxb_df["tb_indeg"] = xaxb_df["tb_cg"].progress_apply(in_degree_from_graph)

xo_o0xo3_df["qb_indeg"] = xo_o0xo3_df["qb_cg"].progress_apply(in_degree_from_graph)
xo_o0xo3_df["tb_indeg"] = xo_o0xo3_df["tb_cg"].progress_apply(in_degree_from_graph)

xo_o2xo3_df["qb_indeg"] = xo_o2xo3_df["qb_cg"].progress_apply(in_degree_from_graph)
xo_o2xo3_df["tb_indeg"] = xo_o2xo3_df["tb_cg"].progress_apply(in_degree_from_graph)

In [None]:
def mean_distance_from_row(row):
    qb_indeg = row["qb_indeg"]
    tb_indeg = row["tb_indeg"]

    return np.mean(qb_indeg) - np.mean(tb_indeg)

In [None]:
def energy_distance_from_row(row):
    qb_indeg = row["qb_indeg"]
    tb_indeg = row["tb_indeg"]

    # return scipy.stats.wasserstein_distance(qb_indeg, tb_indeg)
    return scipy.stats.energy_distance(qb_indeg, tb_indeg)

In [None]:
xo_energy_dist = xo_df.progress_apply(
    energy_distance_from_row,
    axis=1,
)

xaxb_energy_dist = xaxb_df.progress_apply(
    energy_distance_from_row,
    axis=1,
)

xo_o0xo3_energy_dist = xo_o0xo3_df.progress_apply(
    energy_distance_from_row,
    axis=1,
)

xo_o2xo3_energy_dist = xo_o2xo3_df.progress_apply(
    energy_distance_from_row,
    axis=1,
)

In [None]:
(
    fig,
    ((xo_ax, xaxb_ax), (xo_o0xo3_ax, xo_o2xo3_ax)),
) = plt.subplots(
    sharex=True,
    sharey=True,
    nrows=2,
    ncols=2,
    figsize=(12, 6),
    dpi=120,
)

kwargs = {
    "binwidth": 0.01,
    "stat": "probability",
    "kde": True,
}

sns.histplot(
    data=xo_energy_dist,
    ax=xo_ax,
    **kwargs,
)
xo_ax.set_title("xo")

sns.histplot(
    data=xaxb_energy_dist,
    ax=xaxb_ax,
    **kwargs,
)
xaxb_ax.set_title("xaxb")


sns.histplot(
    data=xo_o0xo3_energy_dist,
    ax=xo_o0xo3_ax,
    **kwargs,
)
xo_o0xo3_ax.set_title("xo_o0xo3")


sns.histplot(
    data=xo_o2xo3_energy_dist,
    ax=xo_o2xo3_ax,
    **kwargs,
)
xo_o2xo3_ax.set_title("xo_o2xo3")

In [None]:
xo_mean_dist = xo_df.progress_apply(
    mean_distance_from_row,
    axis=1,
)

xaxb_mean_dist = xaxb_df.progress_apply(
    mean_distance_from_row,
    axis=1,
)


xo_o0xo3_mean_dist = xo_o0xo3_df.progress_apply(
    mean_distance_from_row,
    axis=1,
)

xo_o2xo3_mean_dist = xo_o2xo3_df.progress_apply(
    mean_distance_from_row,
    axis=1,
)

In [None]:
(
    fig,
    ((xo_ax, xaxb_ax), (xo_o0xo3_ax, xo_o2xo3_ax)),
) = plt.subplots(
    sharex=True,
    sharey=True,
    nrows=2,
    ncols=2,
    figsize=(12, 6),
    dpi=120,
)

kwargs = {
    "binwidth": 0.025,
    "stat": "probability",
    "kde": True,
    "cumulative": False,
}

sns.histplot(
    data=xo_mean_dist,
    ax=xo_ax,
    **kwargs,
)
xo_ax.set_title("xo")

sns.histplot(
    data=xaxb_mean_dist,
    ax=xaxb_ax,
    **kwargs,
)
xaxb_ax.set_title("xaxb")


sns.histplot(
    data=xo_o0xo3_mean_dist,
    ax=xo_o0xo3_ax,
    **kwargs,
)
xo_o0xo3_ax.set_title("xo_o0x03")


sns.histplot(
    data=xo_o2xo3_mean_dist,
    ax=xo_o2xo3_ax,
    **kwargs,
)
xo_o2xo3_ax.set_title("xo_o2xo3")