# Call-Graph Evaluation
This notebook analyzes

In [None]:
import rich.pretty

rich.pretty.install()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt
import sqlalchemy as sa
import networkx as nx
import scipy
import scipy.stats
import numpy as np
import pathlib as pl
import networkx as nx

In [None]:
from tqdm import tqdm

tqdm.pandas()

## Creating pickles

In [None]:
from evaluatie.utils import call_graph_from_binary_id
from evaluatie import models as m

In [None]:
tqdm.pandas()

In [None]:
name2frame: dict[str, pd.DataFrame] = {}

In [None]:
names = [
    # Optimization
    "cg:o0Xo2",
    "cg:o0Xo3",
    "cg:osXo0",
    "cg:osXo2",
    "cg:osXo3",
    # Architecture
    "cg:armXmips",
    "cg:x86Xarm",
    "cg:x86Xmips",
    # Others
    "cg:random",
    "cg:noinlineXinline",
]
for name in names:
    pickle_path = pl.Path(f"datasets/{name}.pickle")
    if pickle_path.exists():
        pass
        # name2frame[name] = pd.read_pickle(pickle_path)
    else:
        pass
        name2frame[name] = pd.read_csv(f"datasets/{name}.csv")

In [None]:
with m.Session() as session:
    for name, frame in list(name2frame.items()):
        print(name)
        if len(frame) > 4_000:
            print(f"Sampling from {name}")
            frame = frame.iloc[0:3000].copy()

        frame["qcg"] = frame["qb_id"].progress_apply(
            call_graph_from_binary_id,
            args=(session,),
        )
        frame["tcg"] = frame["tb_id"].progress_apply(
            call_graph_from_binary_id,
            args=(session,),
        )
        frame.to_pickle(f"datasets/{name}.pickle")

        name2frame[name] = frame.drop(columns=["qcg", "tcg"]).copy()
        del frame
        session.rollback()

## Evaluating

In [None]:
name2frame: dict[str, pd.DataFrame] = {}

names = [
    # Optimization
    # "cg:o0Xo2",
    # "cg:o0Xo3",
    "cg:osXo0",
    "cg:osXo2",
    "cg:osXo3",
    # Architecture
    # "cg:armXmips",
    # "cg:x86Xarm",
    # "cg:x86Xmips",
    # Others
    # "cg:random",
    # "cg:noinlineXinline",
]
for name in names:
    pickle_path = pl.Path(f"datasets/{name}.pickle")
    name2frame[name] = pd.read_pickle(pickle_path)

In [None]:
nx.graph_edit_distance(qcg, tcg)

In [None]:
def energy_distance_from_distributions(query_indegs, target_indegs):
    return scipy.stats.energy_distance(query_indegs, target_indegs)


def mean_distance_from_arrays(query_indeg, target_indegs):
    return np.mean(query_indeg) - np.mean(target_indegs)


def distance_from_scalars(query_value, target_value):
    return target_value - query_value

In [None]:
def node_count_from_graph(graph: nx.DiGraph):
    return len(graph)


def edge_count_from_graph(graph: nx.DiGraph):
    return len(graph.edges)


def in_degree_from_graph(graph: nx.DiGraph) -> np.ndarray:
    indeg_sr = pd.Series(
        [deg for _, deg in graph.in_degree],
    )
    return indeg_sr.to_numpy()


def out_degree_from_graph(graph: nx.DiGraph) -> np.ndarray:
    """Returns an array with"""
    outdeg_sr = pd.Series(
        [deg for _, deg in graph.out_degree],
    )
    return outdeg_sr.to_numpy()

In [None]:
feature2binwidth = {
    "indegree": 0.01,
    "outdegree": 0.01,
    "#nodes": 1,
    "#edges": 1,
}

In [None]:
def prepare_figure():
    names = [
        "cg:osXo0",
        "cg:osXo2",
        "cg:osXo3",
    ]

    features = [
        "indegree",
        "outdegree",
        "#nodes",
        "#edges",
    ]

    nrows = len(features)
    ncols = len(names)

    fig = plt.figure(figsize=(14, 8))

    feature2axs: dict[str, mpl.axes.Axes] = {feature: [] for feature in features}

    for row, feature in enumerate(features):
        prev_ax = None
        for col, name in enumerate(names):
            index = row * ncols + col + 1
            ax = fig.add_subplot(
                nrows,
                ncols,
                index,
                sharey=prev_ax,
                sharex=prev_ax,
            )

            feature2axs[feature].append(ax)

            prev_ax = ax

    return fig, feature2axs

In [None]:
def plot_distance(frame: pd.DataFrame, *, feature, ax, distance_from_feature, feature_from_graph):
    quey_col = f"qfeature"
    target_col = f"tfeature"

    plot_df = frame[[]].copy()
    plot_df[quey_col] = frame["qcg"].apply(feature_from_graph)
    plot_df[target_col] = frame["tcg"].apply(feature_from_graph)

    def distance_from_row(row):
        return distance_from_feature(
            row[quey_col],
            row[target_col],
        )

    plot_df["dist"] = plot_df.apply(distance_from_row, axis=1)

    print("Plotting")
    sns.histplot(
        data=plot_df,
        x="dist",
        ax=ax,
        binwidth=feature2binwidth[feature],
        kde=True,
        kde_kws={"cut": 0},
        stat="probability",
    )

In [None]:
fig, feature2axs = prepare_figure()

for idx, name in enumerate(names):
    ax = feature2axs["indegree"][idx]
    ax.set_title(name)

feature2axs["#nodes"][0].set_xlim(-50, 50)
feature2axs["#edges"][0].set_xlim(-50, 50)


# Indegree
plot_distance(
    name2frame["cg:osXo0"],
    ax=feature2axs["indegree"][0],
    distance_from_feature=energy_distance_from_distributions,
    feature_from_graph=in_degree_from_graph,
    feature="indegree",
)

plot_distance(
    name2frame["cg:osXo2"],
    ax=feature2axs["indegree"][1],
    distance_from_feature=energy_distance_from_distributions,
    feature_from_graph=in_degree_from_graph,
    feature="indegree",
)

plot_distance(
    name2frame["cg:osXo3"],
    ax=feature2axs["indegree"][2],
    distance_from_feature=energy_distance_from_distributions,
    feature_from_graph=in_degree_from_graph,
    feature="indegree",
)

# Outdegree
plot_distance(
    name2frame["cg:osXo0"],
    ax=feature2axs["outdegree"][0],
    distance_from_feature=energy_distance_from_distributions,
    feature_from_graph=out_degree_from_graph,
    feature="outdegree",
)

plot_distance(
    name2frame["cg:osXo2"],
    ax=feature2axs["outdegree"][1],
    distance_from_feature=energy_distance_from_distributions,
    feature_from_graph=out_degree_from_graph,
    feature="outdegree",
)

plot_distance(
    name2frame["cg:osXo3"],
    ax=feature2axs["outdegree"][2],
    distance_from_feature=energy_distance_from_distributions,
    feature_from_graph=out_degree_from_graph,
    feature="outdegree",
)

# Nodecount

plot_distance(
    name2frame["cg:osXo0"],
    ax=feature2axs["#nodes"][0],
    feature_from_graph=node_count_from_graph,
    distance_from_feature=distance_from_scalars,
    feature="#nodes",
)

plot_distance(
    name2frame["cg:osXo2"],
    ax=feature2axs["#nodes"][1],
    feature_from_graph=node_count_from_graph,
    distance_from_feature=distance_from_scalars,
    feature="#nodes",
)

plot_distance(
    name2frame["cg:osXo3"],
    ax=feature2axs["#nodes"][2],
    feature_from_graph=node_count_from_graph,
    distance_from_feature=distance_from_scalars,
    feature="#nodes",
)


# Edgecount

plot_distance(
    name2frame["cg:osXo0"],
    ax=feature2axs["#edges"][0],
    feature_from_graph=edge_count_from_graph,
    distance_from_feature=distance_from_scalars,
    feature="#edges",
)

plot_distance(
    name2frame["cg:osXo2"],
    ax=feature2axs["#edges"][1],
    feature_from_graph=edge_count_from_graph,
    distance_from_feature=distance_from_scalars,
    feature="#edges",
)

plot_distance(
    name2frame["cg:osXo3"],
    ax=feature2axs["#edges"][2],
    feature_from_graph=edge_count_from_graph,
    distance_from_feature=distance_from_scalars,
    feature="#edges",
)

In [None]:
fig

### Node count