In [None]:
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path
from coderm.eval.metrics import get_pass_ks
from coderm.utils import gunzip_json_read

from math import comb
import os
import json

DATASET_NAME = "codegenning/F_livecodebench_lite_v2"
LOG_DIRECTORY = os.path.join("../../logs",
                                "simple_idea/codet0.8"
                             )

json_path = os.path.join(LOG_DIRECTORY, "results_per_code_group.json")
assert Path(json_path).exists()
with open(json_path, "r") as f:
    data_per_idea = json.load(f)

NUM_PROBS = 226
data_per_problem = [{"results": []} for _ in range(NUM_PROBS)]
for i, idea_group in enumerate(data_per_idea):
    data_per_problem[i % NUM_PROBS]["results"].extend(idea_group["results"])

In [None]:
mid_problems = []
for i, data_for_problem in enumerate(data_per_problem):
    num_passing = sum([p["passing"] for p in data_for_problem["results"]])
    fraction = num_passing / len(data_for_problem["results"])
    if fraction <= 0.01 or fraction >= 0.99:
        continue
    mid_problems.append(i)

good_ideas_idx = []
for i in range(len(data_per_idea)):
    if i % NUM_PROBS in mid_problems:
        good_ideas_idx.append(i)

filtered_pp = [data_per_problem[i] for i in mid_problems]
filtered_pi = [data_per_idea[i] for i in good_ideas_idx]
print(len(mid_problems))

In [None]:
# Set up the plot style for a professional look
plt.style.use('seaborn-v0_8-whitegrid')
plt.figure(figsize=(10, 6), dpi=300)

# Calculate pass@1 scores
data_per_idea_pass_ks = get_pass_ks(filtered_pi, 1)
data_per_problem_pass_ks = get_pass_ks(filtered_pp, 1)

# Normalize the heights by the lengths of each
weights_data_per_idea = np.ones_like(data_per_idea_pass_ks) / len(data_per_idea_pass_ks)
weights_data_per_problem = np.ones_like(data_per_problem_pass_ks) / len(data_per_problem_pass_ks)

# Define common bins for both histograms
bins = np.linspace(0, 1, 15)  # 20 bins from 0 to 1 for more granularity

# Plot histograms
plt.hist(data_per_problem_pass_ks, bins=bins, color='#FF9999', edgecolor='#CC0000', 
         alpha=0.7, label='Per Problem', weights=weights_data_per_problem)
plt.hist(data_per_idea_pass_ks, bins=bins, color='#66B2FF', edgecolor='#004080', 
         alpha=0.7, label='Per Idea', weights=weights_data_per_idea)

# Customize the plot
plt.title('Distribution of Solve Rates Conditioned on Idea', fontsize=18, fontweight='bold')
plt.xlabel('Solve Rate', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.legend(loc='upper right', fontsize=12)
plt.tick_params(axis='both', which='major', labelsize=12)
# Set x-axis limits to 0 and 1
plt.xlim(-0.021, 1.021)
# Add a text box with statistics
stats_text = f"N(problems) = {len(data_per_problem_pass_ks)}\n"
stats_text += f"N(ideas) = {len(data_per_idea_pass_ks)}"
plt.text(0.05, 0.95, stats_text, transform=plt.gca().transAxes, 
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Adjust layout and save the figure
plt.tight_layout()
plt.savefig('plots/idea_solve_rates_distribution.pdf', format='pdf', bbox_inches='tight')
plt.show()