In [1]:
import json
from typing import List, Union
import itertools

import numpy as np
from collections import defaultdict


In [2]:
def estimate_pass_at_k(
    num_samples: Union[int, List[int], np.ndarray],
    num_correct: Union[List[int], np.ndarray],
    k: int
) -> np.ndarray:
    """
    Estimates pass@k of each problem and returns them in an array.
    """

    def estimator(n: int, c: int, k: int) -> float:
        """
        Calculates 1 - comb(n - c, k) / comb(n, k).
        """
        if n - c < k:
            return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    if isinstance(num_samples, int):
        num_samples_it = itertools.repeat(num_samples, len(num_correct))
    else:
        assert len(num_samples) == len(num_correct)
        num_samples_it = iter(num_samples)

    return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])


In [3]:
# names = ["Text_DaVinci_Raw_Output","Text_DaVinci_Refined_Output","GPT3.5_Raw_Output","GPT3.5_Refined_Output", "T5_Raw_Output","T5_Refined_Output"]
names = ["Phi_Raw_Output","Phi_Refined_Output"]

matrix = "Compiled"
for name in names:
    print(name)
    filename = f'../Generation/{name}.json'
    outfilename = f'./{name}_{matrix}_Result.json'

    with open(outfilename, 'r') as f:
        result = json.load(f)

    results = defaultdict(list)

    for r in result:
        results[r['id']].append(r)

    # Calculate pass@k.
    total, correct = [], []
    for result in results.values():
        passed = [r["passed"] for r in result]
        total.append(len(passed))
        correct.append(sum(passed))
    total = np.array(total)
    correct = np.array(correct)
    # print(total, correct)
    ks = [1,5,10]
    pass_at_k = {f"pass@{k}": (estimate_pass_at_k(total, correct, k).mean())*100
                        for k in ks if (total >= k).all()}
    print(pass_at_k)


Phi_Raw_Output
{'pass@1': 2.270341207349081, 'pass@5': 7.450110402866309, 'pass@10': 10.892388451443571}
Phi_Refined_Output
{'pass@1': 1.6666666666666667, 'pass@5': 6.2341165687622375, 'pass@10': 9.711286089238845}


In [19]:
# Write a code for pass@k
def pass_at_k(results, ks):
    total = len(results)
    for k in ks:
        correct = 0
        for result in results.values():
            passed = [r["passed"] for r in result[:k]]
            correct += (1 if sum(passed)!=0 else 0)
        print(f"DFA-EQ@{k}: {correct/total*100}")

        

    

In [20]:
names = ["Text_DaVinci_Raw_Output","Text_DaVinci_Refined_Output","GPT3.5_Raw_Output","GPT3.5_Refined_Output", "T5_Raw_Output","T5_Refined_Output"]

matrix = "DFA"
for name in names:
    print(name)
    filename = f'../Generation/{name}.json'
    outfilename = f'./{name}_{matrix}_Result.json'

    with open(outfilename, 'r') as f:
        result = json.load(f)

    results = defaultdict(list)


    for r in result:
        results[r['id']].append(r)

    pass_at_k(results, [1,3,10])

    # Calculate pass@k.
    total, correct = [], []
    for result in results.values():
        passed = [r["passed"] for r in result]
        total.append(len(passed))
        correct.append(sum(passed))
    total = np.array(total)
    correct = np.array(correct)
    # print(total, correct)
    ks = [1,5,10]
    pass_at_k = {f"pass@{k}": (estimate_pass_at_k(total, correct, k).mean())*100
                        for k in ks if (total >= k).all()}
    print(pass_at_k)


Text_DaVinci_Raw_Output
DFA-EQ@1: 7.611548556430446
DFA-EQ@3: 11.286089238845145
DFA-EQ@10: 15.354330708661418
{'pass@1': 6.981627296587926, 'pass@5': 13.00618672665917, 'pass@10': 15.354330708661418}
Text_DaVinci_Refined_Output
DFA-EQ@1: 10.104986876640421
DFA-EQ@3: 14.304461942257218
DFA-EQ@10: 17.191601049868765
{'pass@1': 9.52755905511811, 'pass@5': 15.172582593842437, 'pass@10': 17.191601049868765}
GPT3.5_Raw_Output
DFA-EQ@1: 11.811023622047244
DFA-EQ@3: 15.485564304461944
DFA-EQ@10: 19.160104986876643
{'pass@1': 10.787401574803148, 'pass@5': 16.760925717618633, 'pass@10': 19.160104986876643}
GPT3.5_Refined_Output
DFA-EQ@1: 10.36745406824147
DFA-EQ@3: 14.960629921259844
DFA-EQ@10: 18.766404199475065
{'pass@1': 10.918635170603674, 'pass@5': 16.618755988834728, 'pass@10': 18.766404199475065}
T5_Raw_Output
DFA-EQ@1: 0.26246719160104987
DFA-EQ@3: 0.39370078740157477
DFA-EQ@10: 1.0498687664041995
{'pass@1': 0.2493438320209974, 'pass@5': 0.663458734324876, 'pass@10': 1.0498687664041995}