In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV into a DataFrame
df = pd.read_csv("benchmark_results.csv")

In [None]:
assert (df.loc[df['Result'] == 'verification-failed', 'Satisfiable'] == False).all(), \
    "There are rows with 'verification-failed' where Satisfiable is not False"

df.loc[df['Result'] == 'verification-failed', 'Result'] = 'satisfiable'

In [None]:
# Calculate % of correct results per solver, excluding 'verification-failed'
percent_correct_per_solver = (
    df[df['Result'] != 'verification-failed']
    .groupby('Solver')
    .apply(
        lambda g: (
            ((g['Result'] == 'satisfiable') & (g['Satisfiable'] == True)) |
            ((g['Result'] == 'unsatisfiable') & (g['Satisfiable'] == False))
        ).mean() * 100
    )
    .reset_index(name='% Correct Results')
)

# Plotting
plt.figure(figsize=(14, 5))
for i, v in enumerate(percent_correct_per_solver['% Correct Results']):
    plt.text(i, v + 1, f"{v:.1f}%", ha='center', va='bottom', fontsize=10)
plt.bar(percent_correct_per_solver['Solver'], percent_correct_per_solver['% Correct Results'], color='lightgreen')
plt.xlabel('Solvers')
plt.ylabel('Percentage (%)')
plt.title('Percentage of Correct Results per Solver')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
# Calculate % correct for each (Solver, Satisfiable) group
percent_correct = (
    df[df['Result'] != 'verification-failed']
    .groupby(['Solver', 'Satisfiable'])
    .apply(
        lambda g: (
            ((g['Result'] == 'satisfiable') & (g['Satisfiable'] == True)) |
            ((g['Result'] == 'unsatisfiable') & (g['Satisfiable'] == False))
        ).mean() * 100
    )
    .reset_index(name='% Correct Results')
)

# Pivot for plotting
pivot = percent_correct.pivot(index='Solver', columns='Satisfiable', values='% Correct Results')

# Plot
pivot.plot(kind='bar', figsize=(14, 5), color=["#6797FF", "#FF7575"])
plt.xlabel('Strategies')
plt.ylabel('Percentage (%)')
plt.title('Percentage of Correct Results per Solver (by Satisfiable)')
plt.legend(['Unsatisfiable', 'Satisfiable'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
# Group by solver and calculate the average duration
avg_duration_per_solver = df.groupby('Solver')['Duration(ms)'].mean().reset_index()

# Plotting
plt.figure(figsize=(14, 5))
plt.bar(avg_duration_per_solver['Solver'], avg_duration_per_solver['Duration(ms)'], color='skyblue')
plt.xlabel('Solvers')
plt.ylabel('Average Duration (milliseconds)')
plt.title('Average Duration per Solver')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
# Group by solver and calculate the average memory
avg_duration_per_solver = df.groupby('Solver')['Memory(MB)'].mean().reset_index()

# Plotting
plt.figure(figsize=(14, 5))
plt.bar(avg_duration_per_solver['Solver'], avg_duration_per_solver['Memory(MB)'], color='skyblue')
plt.xlabel('Solvers')
plt.ylabel('Memory (MB)')
plt.title('Average Memory used per Solver')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
# Group by solver and calculate the average cpu(%)
avg_cpu_per_solver = df.groupby('Solver')['CPU(%)'].mean().reset_index()

# Plotting
plt.figure(figsize=(14, 5))
plt.bar(avg_cpu_per_solver['Solver'], avg_cpu_per_solver['CPU(%)'], color='skyblue')
plt.xlabel('Solvers')
plt.ylabel('CPU (%)')
plt.title('Average CPU(%) used per Solver')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
duration_agg_dict = {
    'mean': ('Duration(ms)', 'mean'),
    'std': ('Duration(ms)', 'std'),
    'min': ('Duration(ms)', 'min'),
    'max': ('Duration(ms)', 'max'),
    '25%': ('Duration(ms)', lambda x: x.quantile(0.25)),
    '50%': ('Duration(ms)', lambda x: x.quantile(0.5)),
    '75%': ('Duration(ms)', lambda x: x.quantile(0.75)),
}

duration_stats = df.groupby('Solver').agg(**duration_agg_dict).reset_index()
duration_stats

# # Export to latex
# duration_latex_table = duration_stats.to_latex(index=False, float_format="%.2f")
# print(duration_latex_table)

In [None]:
memory_agg_dict = {
    'mean': ('Memory(MB)', 'mean'),
    'std': ('Memory(MB)', 'std'),
    'min': ('Memory(MB)', 'min'),
    'max': ('Memory(MB)', 'max'),
    '25%': ('Memory(MB)', lambda x: x.quantile(0.25)),
    '50%': ('Memory(MB)', lambda x: x.quantile(0.5)),
    '75%': ('Memory(MB)', lambda x: x.quantile(0.75)),
}

memory_stats = df.groupby('Solver').agg(**memory_agg_dict).reset_index()
memory_stats
# # Export to latex
# memory_latex_table = memory_stats.to_latex(index=False, float_format="%.2f")
# print(memory_latex_table)

In [None]:
cpu_agg_dict = {
    'mean': ('CPU(%)', 'mean'),
    'std': ('CPU(%)', 'std'),
    'min': ('CPU(%)', 'min'),
    'max': ('CPU(%)', 'max'),
    '25%': ('CPU(%)', lambda x: x.quantile(0.25)),
    '50%': ('CPU(%)', lambda x: x.quantile(0.5)),
    '75%': ('CPU(%)', lambda x: x.quantile(0.75)),
}

cpu_stats = df.groupby('Solver').agg(**cpu_agg_dict).reset_index()
cpu_stats
# # Export to latex
# cpu_latex_table = cpu_stats.to_latex(index=False, float_format="%.2f")
# print(cpu_latex_table)