In [None]:
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path
from coderm.eval.metrics import get_pass_ks
from coderm.utils import gunzip_json_read

from math import comb
import os
import json

DATASET_NAME = "codegenning/F_livecodebench_lite_v2"
LOG_DIRECTORY = os.path.join("../../logs",
                                "simple_idea/codet0.8"
                             )

json_path = os.path.join(LOG_DIRECTORY, "results_per_code_group.json")
assert Path(json_path).exists()
with open(json_path, "r") as f:
    data = json.load(f)

NUM_PROBS = 226
data_grouped = [{"results": []} for _ in range(NUM_PROBS)]
for i, idea_group in enumerate(data):
    data_grouped[i % NUM_PROBS]["results"].extend(idea_group["results"])

In [None]:
plt.figure(figsize=(10, 6))

data_pass_ks = get_pass_ks(data, 1)
data_grouped_pass_ks = get_pass_ks(data_grouped, 1)

# Normalize the heights by the lengths of each
weights_data = np.ones_like(data_pass_ks) / len(data_pass_ks)
weights_data_grouped = np.ones_like(data_grouped_pass_ks) / len(data_grouped_pass_ks)

plt.hist(data_pass_ks, bins=10, color='skyblue', edgecolor='black', alpha=0.5, label='P(Solve) per idea (over all problems)', weights=weights_data)
plt.hist(data_grouped_pass_ks, bins=10, color='salmon', edgecolor='black', alpha=0.5, label='P(Solve) per problem (over all problems)', weights=weights_data_grouped)

plt.title('Comparison of Pass@1 Scores Distribution', fontsize=16)
plt.xlabel('Pass@1 Score', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.legend(loc='upper right')
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()