In [14]:

import os
import json

from typing import List, Union
import itertools

import numpy as np
from collections import defaultdict


In [15]:
folder_name = 'gpt35'
files = os.listdir(f'../Generation/Output/{folder_name}/')
json_files = [file for file in files if file.endswith('128.json')]
print(json_files)

['GPT3.5_Output_refined_0.4_128.json', 'GPT3.5_Output_raw_0.4_128.json', 'GPT3.5_Output_raw_0.8_128.json', 'GPT3.5_Output_raw_1.0_128.json', 'GPT3.5_Output_refined_0.2_128.json', 'GPT3.5_Output_raw_0.2_128.json', 'GPT3.5_Output_refined_0.8_128.json', 'GPT3.5_Output_refined_1.0_128.json', 'GPT3.5_Output_raw_0.6_128.json', 'GPT3.5_Output_refined_0.6_128.json', 'GPT3.5_Output_raw_0.0_128.json', 'GPT3.5_Output_refined_0.0_128.json']


In [16]:
def estimate_pass_at_k(
    num_samples: Union[int, List[int], np.ndarray],
    num_correct: Union[List[int], np.ndarray],
    k: int
) -> np.ndarray:
    """
    Estimates pass@k of each problem and returns them in an array.
    """

    def estimator(n: int, c: int, k: int) -> float:
        """
        Calculates 1 - comb(n - c, k) / comb(n, k).
        """
        if n - c < k:
            return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    if isinstance(num_samples, int):
        num_samples_it = itertools.repeat(num_samples, len(num_correct))
    else:
        assert len(num_samples) == len(num_correct)
        num_samples_it = iter(num_samples)

    return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])


In [17]:

matrix = "ReDoSHunter"
for name in json_files:
    # print(name)
    name = name.replace('.json', '')
    filename = f'../Generation/Output/{folder_name}/{name}.json'
    outfilename = f"./Output/{folder_name}/{name}_{matrix}_Results.json"


    with open(outfilename, 'r') as f:
        result = json.load(f)

    results = defaultdict(list)

    for r in result:
        results[r['id']].append(r)

    # Calculate pass@k.
    total, correct = [], []
    for result in results.values():
        passed = [r["vulnerable"] for r in result]
        total.append(len(passed))
        correct.append(sum(passed))
    total = np.array(total)
    correct = np.array(correct)
    # print(total, correct)
    ks = [1,3,10]
    pass_at_k = {k: (estimate_pass_at_k(total, correct, k).mean())*100
                        for k in ks if (total >= k).all()}
    for k in pass_at_k.keys():
        print(f"{name},{k},{pass_at_k[k]}")


GPT3.5_Output_refined_0.4_128,1,3.503937007874016
GPT3.5_Output_refined_0.4_128,3,5.755686789151357
GPT3.5_Output_refined_0.4_128,10,8.267716535433072
GPT3.5_Output_raw_0.4_128,1,2.2834645669291342
GPT3.5_Output_raw_0.4_128,3,4.650043744531934
GPT3.5_Output_raw_0.4_128,10,8.005249343832022
GPT3.5_Output_raw_0.8_128,1,3.963254593175854
GPT3.5_Output_raw_0.8_128,3,8.109142607174103
GPT3.5_Output_raw_0.8_128,10,14.041994750656167
GPT3.5_Output_raw_1.0_128,1,3.7532808398950133
GPT3.5_Output_raw_1.0_128,3,8.78827646544182
GPT3.5_Output_raw_1.0_128,10,17.191601049868765
GPT3.5_Output_refined_0.2_128,1,3.083989501312336
GPT3.5_Output_refined_0.2_128,3,4.564741907261592
GPT3.5_Output_refined_0.2_128,10,6.299212598425196
GPT3.5_Output_raw_0.2_128,1,2.952755905511811
GPT3.5_Output_raw_0.2_128,3,4.576771653543307
GPT3.5_Output_raw_0.2_128,10,6.692913385826772
GPT3.5_Output_refined_0.8_128,1,3.490813648293963
GPT3.5_Output_refined_0.8_128,3,7.273622047244094
GPT3.5_Output_refined_0.8_128,10,13.648