In [None]:
import polars as pl
import matplotlib.pyplot as plt

plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 150

base_path = "base_timings.csv"
path = "timings.csv"

# These are all the timings we want to see
paths = [base_path, path]

# Read the CSV
dfs = [
    pl.scan_csv(path).select(pl.col("package"), pl.col("duration")).collect()
    for path in paths
]

for path, df in zip(paths,dfs):
    count = df.select(pl.len()).item()
    print(f"{path}: {count} records")

# Define the histogram bins
threshold = 50
bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, threshold, threshold + 1]

dfs_capped = [
    df.select([
        pl.col("duration").map_elements(lambda x: min(x, threshold), return_dtype=pl.Float64)
    ]) for df in dfs]

# Create the histogram
fig, axs = plt.subplots(2, sharex=True)

for path, df_capped, axs in zip(paths, dfs_capped, axs):
    values, bins, bars = axs.hist(df_capped["duration"], bins=bins, density=True)
    axs.set_title(path)
    axs.bar_label(bars, fontsize=8, color='black', labels = [f'{x.get_height():.1%}' for x in bars])
    axs.tick_params(axis='y', which='both', left=False, top=False, labelleft=False)

# Add labels to the ticks
fig.supxlabel("Solve duration in seconds")
fig.supylabel("Percentage of solves")
fig.suptitle("Histogram of solve durations")


plt.show()


In [None]:
# Load the timings
dfs = [
    pl.scan_csv(path).select(pl.col("package"), pl.col("duration")) 
    for path in [base_path, path]
]

# Compute the solver diffs. Negative values means the second timings are faster
df_diff = dfs[1].join(dfs[0], on="package").select(pl.col("package"), (pl.col("duration")-pl.col("duration_right"))).collect();

# Create the histogram
plt.hist(df_diff["duration"], bins=40, density=True)
plt.xlabel("Difference in solve duration in seconds")
plt.ylabel("Difference probability")

plt.show()