In [1]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("data/checklist_answers_50_papers/all_papers.csv")
df_o3 = df[df.venue == "ICLR2024_o3"]
df_o4 = df[df.venue == "ICLR2024_o4_mini"]

In [None]:
def compute_average_variance(df_subset, score_cols):
    variances = {}
    for col in score_cols:
        # Compute variance for each row, then average across all rows
        checklist_variances = df_subset[col].apply(lambda x: np.var(x) if isinstance(x, list) else np.nan)
        variances[col] = checklist_variances.mean()
    return variances    

def convert_string_to_int(df, score_cols):
    for col in score_cols:
        df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)


In [None]:
score_cols = [col for col in df.columns if col.endswith('_score')]
convert_string_to_int(df_o3, score_cols)
convert_string_to_int(df_o4, score_cols)

variance_o3 = compute_average_variance(df_o3, score_cols)
variance_o4 = compute_average_variance(df_o4, score_cols)

comparison_df = pd.DataFrame({
    'o3_variance': variance_o3,
    'o4_mini_variance': variance_o4
})
comparison_df['better_model'] = comparison_df.apply(
    lambda row: 'o3' if row['o3_variance'] < row['o4_mini_variance'] else 'o4_mini',
    axis=1
)


# Add row for combined (mean) variance
combined = pd.Series({
    'o3_variance': np.mean(list(variance_o3.values())),
    'o4_mini_variance': np.mean(list(variance_o4.values())),
}, name='combined_average')

comparison_df = pd.concat([comparison_df, combined.to_frame().T])
comparison_df['better_model'] = comparison_df.apply(
    lambda row: 'o3' if row['o3_variance'] < row['o4_mini_variance'] else 'o4_mini',
    axis=1
)

print(comparison_df)



In [None]:
comparison_df[['o3_variance', 'o4_mini_variance']].plot(kind='bar', figsize=(10,5))
plt.title('Checklist Variance Comparison: o3 vs o4_mini')
plt.ylabel('Average Variance Across Papers')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()