In [16]:
import json
from typing import List, Union
import itertools

import numpy as np
from collections import defaultdict
import os


In [17]:
def estimate_pass_at_k(
    num_samples: Union[int, List[int], np.ndarray],
    num_correct: Union[List[int], np.ndarray],
    k: int
) -> np.ndarray:
    """
    Estimates pass@k of each problem and returns them in an array.
    """

    def estimator(n: int, c: int, k: int) -> float:
        """
        Calculates 1 - comb(n - c, k) / comb(n, k).
        """
        if n - c < k:
            return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    if isinstance(num_samples, int):
        num_samples_it = itertools.repeat(num_samples, len(num_correct))
    else:
        assert len(num_samples) == len(num_correct)
        num_samples_it = iter(num_samples)

    return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])


In [18]:
folder_name = 'gpt35'
files = os.listdir(f'../Generation/Output/{folder_name}/')
json_files = [file for file in files if file.endswith('128.json')]
print(json_files)

['GPT3.5_Output_refined_0.4_128.json', 'GPT3.5_Output_raw_0.4_128.json', 'GPT3.5_Output_raw_0.8_128.json', 'GPT3.5_Output_raw_1.0_128.json', 'GPT3.5_Output_refined_0.2_128.json', 'GPT3.5_Output_raw_0.2_128.json', 'GPT3.5_Output_refined_0.8_128.json', 'GPT3.5_Output_refined_1.0_128.json', 'GPT3.5_Output_raw_0.6_128.json', 'GPT3.5_Output_refined_0.6_128.json', 'GPT3.5_Output_raw_0.0_128.json', 'GPT3.5_Output_refined_0.0_128.json']


In [19]:

matrix = "Compiled"
for name in json_files:
    # print(name)
    name = name.replace('.json', '')
    filename = f'../Generation/Output/{folder_name}/{name}.json'
    outfilename = f"./Output/{folder_name}/{name}_{matrix}_Result.json"


    with open(outfilename, 'r') as f:
        result = json.load(f)

    results = defaultdict(list)

    for r in result:
        results[r['id']].append(r)

    # Calculate pass@k.
    total, correct = [], []
    for result in results.values():
        passed = [r["passed"] for r in result]
        total.append(len(passed))
        correct.append(sum(passed))
    total = np.array(total)
    correct = np.array(correct)
    # print(total, correct)
    ks = [1,3,10]
    pass_at_k = {k: (estimate_pass_at_k(total, correct, k).mean())*100
                        for k in ks if (total >= k).all()}
    for k in pass_at_k.keys():
        print(f"{name},{k},{pass_at_k[k]}")


GPT3.5_Output_refined_0.4_128,1,35.86614173228347
GPT3.5_Output_refined_0.4_128,3,42.11942257217848
GPT3.5_Output_refined_0.4_128,10,47.24409448818898
GPT3.5_Output_raw_0.4_128,1,24.17322834645669
GPT3.5_Output_raw_0.4_128,3,30.574146981627297
GPT3.5_Output_raw_0.4_128,10,35.69553805774278
GPT3.5_Output_raw_0.8_128,1,33.56955380577428
GPT3.5_Output_raw_0.8_128,3,44.654418197725285
GPT3.5_Output_raw_0.8_128,10,52.6246719160105
GPT3.5_Output_raw_1.0_128,1,22.257217847769027
GPT3.5_Output_raw_1.0_128,3,33.47659667541558
GPT3.5_Output_raw_1.0_128,10,44.2257217847769
GPT3.5_Output_refined_0.2_128,1,35.94488188976378
GPT3.5_Output_refined_0.2_128,3,39.064960629921266
GPT3.5_Output_refined_0.2_128,10,41.46981627296588
GPT3.5_Output_raw_0.2_128,1,24.84251968503937
GPT3.5_Output_raw_0.2_128,3,28.638451443569558
GPT3.5_Output_raw_0.2_128,10,32.020997375328086
GPT3.5_Output_refined_0.8_128,1,33.727034120734906
GPT3.5_Output_refined_0.8_128,3,44.534120734908136
GPT3.5_Output_refined_0.8_128,10,53.