In [None]:
import polars as pl
import matplotlib.pyplot as plt

plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

# These are all the timings we want to see
paths = ["main_timings.csv", "timings.csv"]

# Read the CSV
dfs = [
    pl.scan_csv(path).select(pl.col("package"), pl.col("duration")).collect()
    for path in paths
]

# Define the histogram bins
threshold = 10
bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, threshold, threshold + 1]

dfs_capped = [
    df.select([
        pl.col("duration").map_elements(lambda x: min(x, threshold), return_dtype=pl.Float64)
    ]) for df in dfs]

# Create the histogram
fig, axs = plt.subplots(2, sharex=True)

for path, df_capped, axs in zip(paths, dfs_capped, axs):
    axs.hist(df_capped["duration"], bins=bins, density=True)
    axs.set_title(path)

# Add labels to the ticks
fig.supxlabel("Duration in seconds")
fig.supylabel("Percantage of succesful solves")
fig.suptitle("Histogram of solve durations")

plt.show()


In [None]:
paths = ["timings.csv", "main_timings.csv"]

# Load the timings
dfs = [
    pl.scan_csv(path).select(pl.col("package"), pl.col("duration")) 
    for path in paths
]

# Compute the solver diffs
df_diff = dfs[0].join(dfs[1], on="package").select(pl.col("package"), (pl.col("duration")-pl.col("duration_right"))).collect();

# Create the histogram
plt.hist(df_diff["duration"], bins=40, density=True)

plt.show()