# BSim Evaluation

In this notebook, we evaluate the performance of BSim.

In [None]:
import rich.pretty

rich.pretty.install()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt
import numpy as np

In [None]:
mpl.rc(
    "font",
    size=12,
)
#mpl.rc(
#    "figure",
#    figsize=(10, 10),
#)

In [None]:
%matplotlib

In [None]:
import sqlalchemy as sa

In [None]:
from evaluatie import models as m

In [None]:
session = m.Session()

In [None]:
LeftFun = sa.orm.aliased(m.Function)
LeftBin = sa.orm.aliased(m.Binary)
LeftBuildParams = sa.orm.aliased(m.BuildParameters)

RightFun = sa.orm.aliased(m.Function)
RightBin = sa.orm.aliased(m.Binary)
RightBuildParams = sa.orm.aliased(m.BuildParameters)

compare_result = sa.func.lshvector_compare(
    LeftFun.vector,
    RightFun.vector,
)

sim = compare_result.scalar_table_valued("sim")

stmt = (
    sa.select(
        sim.label("sim"),
    )
    .select_from(
        LeftFun,
        RightFun,
    )
    .join(
        LeftBin,
        LeftFun.binary,
    )
    .join(
        RightBin,
        RightFun.binary,
    )
    .where(
        LeftFun.binary_id != RightFun.binary_id,
        LeftBin.package_name == RightBin.package_name,
        LeftBin.package_version == RightBin.package_version,
    )
    .where(
        LeftFun.name == RightFun.name,
        LeftFun.file == RightFun.file,
        LeftFun.lineno == RightFun.lineno,
    )
    .where(
        LeftFun.vector != None,
        RightFun.vector != None,
    )
    .join(
        LeftBuildParams,
        LeftBin.build_parameters,
    )
    .join(
        RightBuildParams,
        RightBin.build_parameters,
    )
    .where(
        LeftBuildParams.optimisation == "O3",
        RightBuildParams.optimisation == "O0",
        # LeftBuildParams.optimisation == RightBuildParams.optimisation,
        LeftBuildParams.architecture == RightBuildParams.architecture,
        LeftBuildParams.compiler_backend == RightBuildParams.compiler_backend,
        LeftBuildParams.compiler_version == RightBuildParams.compiler_version,
        LeftBuildParams.bitness == RightBuildParams.bitness,
    )
)

In [None]:
df = pd.read_sql(stmt, con=m.engine)
df

In [None]:
fig, ax = plt.subplots()

sns.histplot(
    df["sim"],
    binwidth=0.1,
    binrange=(0, 1),
    stat="probability",
    color="blue",
    ax=ax,
)
ax.set_xticks(np.linspace(0, 1.0, 11))
ax.set_xlabel("BSim Similarity")
ax.set_ylabel("Percentage")
ax.set_title("Histogram of self-similarity [O0 vs. O3]")


for container in ax.containers:
    ax.bar_label(container, fmt="{:.0%}")

fig.savefig(
    "out/bsim:histogram.pdf",
    transparent=None,
    bbox_inches="tight",
)

In [None]:
fig, ax = plt.subplots()

sns.histplot(
    df["sim"],
    binwidth=0.1,
    binrange=(0, 1),
    stat="probability",
    cumulative=True,
    color="blue",
    ax=ax,
)
ax.set_xticks(np.linspace(0, 1.0, 11))
ax.set_xlabel("BSim Similarity")
ax.set_ylabel("Percentage")
ax.set_title("Cumulative Histogram of self-similarity [O0 vs. O3]")


for container in ax.containers:
    ax.bar_label(container, fmt="{:.0%}")

fig.savefig(
    "out/bsim:cumulative-histogram.pdf",
    transparent=None,
    bbox_inches="tight",
)