In [1]:
import json
import json
from typing import List, Union
import itertools

import numpy as np
from collections import defaultdict


In [2]:
def estimate_vul_at_k(
    num_samples: Union[int, List[int], np.ndarray],
    num_correct: Union[List[int], np.ndarray],
    k: int
) -> np.ndarray:
    """
    Estimates vul@k of each problem and returns them in an array.
    """

    def estimator(n: int, c: int, k: int) -> float:
        """
        Calculates 1 - comb(n - c, k) / comb(n, k).
        """
        if n - c < k:
            return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    if isinstance(num_samples, int):
        num_samples_it = itertools.repeat(num_samples, len(num_correct))
    else:
        assert len(num_samples) == len(num_correct)
        num_samples_it = iter(num_samples)

    return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])


In [3]:
def parse_rescue(text, offset=0):
    # Split the text into sections using "id:" as a delimiter

    sections = text.split('-------------------------')
    
    # Initialize a list to store the parsed data
    parsed_data = []
    
    # Iterate through each section (excluding the first empty section)
    for section in sections:
        # Initialize a dictionary to store the parsed information for this section
        section_data = {}
        
        current_data = section.split('\n')
        # print(current_data)
        if current_data[0] == '':
            current_data = current_data[1:]
        
        if len(current_data) < 3:
            continue
        
        
        # Extract the ID
        section_data['id'] = (offset + int(current_data[0].split(":")[-1].strip())-1)//10

        # Extract the status 1
        section_data['pattern'] = current_data[1].strip()
        
        # Extract the status 1
        section_data['status'] = current_data[2].strip()
        
        if section_data['status'] == "RESULT-TRUE":
            section_data['status'] = True
        else:
            section_data['status'] = False
        

        # Append the parsed data for this section to the list
        parsed_data.append(section_data)
    
    return parsed_data

In [4]:
names = ["Phi_Raw_Output","Phi_Refined_Output","Text_DaVinci_Refined_Output","GPT3.5_Raw_Output","GPT3.5_Refined_Output", "T5_Raw_Output","T5_Refined_Output"]
for name in names:
    print(name)
    outfilename = f'./ReDoSHunter_Results/{name}_full_result.txt'

    with open(outfilename, 'r') as f:
        result = f.read()
    redoshunter_data = parse_rescue(result)

    # print(rescue_data[0])


    results = defaultdict(list)

    for r in redoshunter_data:
        results[r['id']].append(r)
    

    # Calculate pass@k.
    total, correct = [], []
    for result in results.values():
            passed = [r["status"] for r in result]
            # print(passed)
            total.append(len(passed))
            correct.append(sum(passed))
    total = np.array(total)
    correct = np.array(correct)

    ks = [1,3,10]
    pass_at_k = {f"pass@{k}": (estimate_vul_at_k(total, correct, k).mean())*100
                        for k in ks if (total >= k).all()}
    print(pass_at_k)

Phi_Raw_Output
{'pass@1': 6.246719160104987, 'pass@3': 16.420603674540683, 'pass@10': 38.4514435695538}
Phi_Refined_Output
{'pass@1': 5.118110236220472, 'pass@3': 13.138670166229222, 'pass@10': 30.314960629921263}
Text_DaVinci_Refined_Output
{'pass@1': 3.556430446194226, 'pass@3': 5.882545931758529, 'pass@10': 9.580052493438322}
GPT3.5_Raw_Output
{'pass@1': 3.5039370078740153, 'pass@3': 6.207349081364829, 'pass@10': 9.84251968503937}
GPT3.5_Refined_Output
{'pass@1': 3.188976377952756, 'pass@3': 5.72944006999125, 'pass@10': 9.186351706036746}
T5_Raw_Output
{'pass@1': 44.02887139107611, 'pass@3': 67.51749781277341, 'pass@10': 84.7769028871391}
T5_Refined_Output
{'pass@1': 47.80839895013123, 'pass@3': 72.52515310586176, 'pass@10': 87.53280839895014}


In [5]:
# find all text files in a folder
import glob

path = './ReDoSHunter_Results/Text_DaVinci_Raw/'
files = [f for f in glob.glob(path + "**/*.txt", recursive=True)]
filenames = []
for file in files:
    if "only" not in file:
        filenames.append(file)
print(len(filenames))


8


In [6]:
redoshunter_data = []
for file in filenames:
    print(file)
    offset = int(file.split("_")[7])
    # print(offset)
    with open(file, 'r') as f:
        result = f.read()
    redoshunter_data.extend(parse_rescue(result, offset=offset))

    # print(rescue_data[0])
print(len(redoshunter_data))

./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_4000_redos_s_java_11111_0_2023_09_10_20_40_36.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_1000_redos_s_java_11111_0_2023_09_10_19_26_05.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_2000_redos_s_java_11111_0_2023_09_10_19_31_52.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_0_redos_s_java_11111_0_2023_09_10_19_26_02.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_7000_redos_s_java_11111_0_2023_09_10_20_11_49.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_5000_redos_s_java_11111_0_2023_09_10_19_52_39.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_3000_redos_s_java_11111_0_2023_09_10_19_34_34.txt
./ReDoSHunter_Results/Text_DaVinci_Raw/Text_DaVinci_Raw_Output_6000_redos_s_java_11111_0_2023_09_10_20_25_14.txt
7620


In [7]:


results = defaultdict(list)

for r in redoshunter_data:
        results[r['id']].append(r)
    

    # Calculate pass@k.
total, correct = [], []
for result in results.values():
            passed = [r["status"] for r in result]
            # print(passed)
            total.append(len(passed))
            correct.append(sum(passed))
total = np.array(total)
correct = np.array(correct)

ks = [1,3,10]
vul_at_k = {f"pass@{k}": (estimate_vul_at_k(total, correct, k).mean())*100
                        for k in ks if (total >= k).all()}
print(vul_at_k)

{'pass@1': 3.63517060367454, 'pass@3': 6.897419072615923, 'pass@10': 11.67979002624672}
