# BSim Evaluation

In this notebook, we evaluate the performance of BSim.

In [None]:
import rich.pretty

rich.pretty.install()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib as mplt
from matplotlib import pyplot as plt
import numpy as np

In [None]:
%matplotlib

In [None]:
import sqlalchemy as sa

In [None]:
from evaluatie import models as m

In [None]:
session = m.Session()

In [None]:
LeftFun = sa.orm.aliased(m.Function)
LeftBin = sa.orm.aliased(m.Binary)
LeftBuildParams = sa.orm.aliased(m.BuildParameters)

RightFun = sa.orm.aliased(m.Function)
RightBin = sa.orm.aliased(m.Binary)
RightBuildParams = sa.orm.aliased(m.BuildParameters)

compare_result = sa.func.lshvector_compare(
    LeftFun.vector,
    RightFun.vector,
)

sim = compare_result.scalar_table_valued("sim")

stmt = (
    sa.select(
        sim.label("sim"),
    )
    .select_from(
        LeftFun,
        RightFun,
    )
    .join(
        LeftBin,
        LeftFun.binary,
    )
    .join(
        RightBin,
        RightFun.binary,
    )
    .where(
        LeftFun.binary_id != RightFun.binary_id,
        LeftBin.package_name == RightBin.package_name,
        LeftBin.package_version == RightBin.package_version,
    )
    .where(
        LeftFun.name == RightFun.name,
        LeftFun.file == RightFun.file,
        LeftFun.lineno == RightFun.lineno,
    ).where(
        LeftFun.vector != None,
        RightFun.vector != None,
    )
)

rows = list(session.execute(stmt))

In [None]:
df = pd.read_sql(stmt, con=m.engine)
df

In [None]:
ax = plt.axes()

sns.histplot(
    df["sim"],
    binwidth=0.1,
    stat="probability",
    label="Distribution",
    color="blue",
    ax=ax,    
)
ax.set_xticks(np.linspace(0, 1.0, 11))


for container in ax.containers:
    ax.bar_label(container, fmt="{:.0%}")

ax.legend()


In [None]:

ax = plt.axes()

sns.histplot(
    df["sim"],
    binwidth=0.1,
    stat="probability",
    cumulative=True,
    label="Distribution",
    color="blue",
    ax=ax,    
)
ax.set_xticks(np.linspace(0, 1.0, 11))


for container in ax.containers:
    ax.bar_label(container, fmt="{:.0%}")

ax.legend()
