In [1]:
import os
import multiprocess

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

from utils import get_object_names_from_code

### Sample types:

1. (code, pytest code) pairs with 100% coverage.
2. (code, pytest code) pairs with coverage lower than 100% - sampled from 100% coverage.
3. (code, incomplete pytest code, complete pytest code) pairs for error correction - sampled from 100% coverage.

In [2]:
df = pd.concat(
        [pd.read_csv(f"generated/{file}") for file in os.listdir("generated")]
    ).reset_index(drop=True)

df["sample_type"] = None
df["initial_test_case"] = None
df["initial_output"] = None

df.loc[df["coverage"] > 99, "sample_type"] = 1

df

Unnamed: 0,original_code,pytest_code,coverage,sample_type,initial_test_case,initial_output
0,"def quaternion_multiply(r, q):\r\n \r\n ...",# test_source.py\r\nimport pytest\r\nimport so...,100.0,1,,
1,"def sing_three(mu, c, i0=1.0):\r\n \r\n ...",import pytest\r\nfrom source import sing_three...,100.0,1,,
2,def get_rgb_from_int(rgb_int):\r\n \r\n ...,import pytest\r\nimport sys\r\nsys.path.insert...,100.0,1,,
3,"def inflate(tensor, times, dim):\r\n \r\n ...",# test_source.py\r\nimport pytest\r\nfrom sour...,100.0,1,,
4,def radii(mag):\r\n \r\n # ADM mask all ...,# test_source.py\r\nimport pytest\r\nimport sy...,100.0,1,,
...,...,...,...,...,...,...
76915,"def recursiveChoosing ( arr , start , M , dp )...",import pytest\n\ndef test_recursive_choosing()...,0.0,,,
76916,"def findSmallest ( arr , n ) :\n res = 1\n ...",from source import *\nimport pytest\n\ndef tes...,0.0,,,
76917,"\nmanufacturer = ""Dell""\nscreen_size = 15.6\np...",import pytest\nfrom source import manufacturer...,0.0,,,
76918,"\nimport random\ndata = [1, 2, 3, 4, 5]\n# ran...",import pytest\nfrom source import *\ndef test_...,0.0,,,


In [3]:
def get_all_combinations_wrapper(test_params):
    from utils import timeout
    @timeout(30)
    def get_all_combinations(test_params):
        from itertools import combinations
        from utils import run_pytest, delete_object_from_code

        code, pytest_code, coverage, test_functions = test_params
        
        k = min(len(test_functions) - 1, 5)


        for combination in combinations(test_functions, k):
            try:
                cnt_test = pytest_code
                for obj in combination:
                    cnt_test = delete_object_from_code(obj, cnt_test)
                    
                res = run_pytest(code, cnt_test)

                if (not res["failed_assertions"]) and\
                        (res["stderr"] == "") and\
                        (0 < res["coverage"] < 100): 
                    return (code, pytest_code, res["coverage"], 2, cnt_test)

            except Exception as e:
                continue
    
    try:
        return get_all_combinations(test_params)
    except:
        return None, None, None, None, None



### Make and append incomplete test files

In [4]:
def extract_test_functions(x):
    try:
        funcs = get_object_names_from_code(x)
    except:
        return []
    test_funcs = [i for i in funcs if i.startswith("test_")]
    return test_funcs


df_map = df[df["sample_type"] == 1][["original_code", "pytest_code", "coverage"]]
df_map["test_funcs"] = df_map["pytest_code"].map(extract_test_functions)
df_map = df_map[df_map.test_funcs.map(len) > 1]
df_map

Unnamed: 0,original_code,pytest_code,coverage,test_funcs
4,def radii(mag):\r\n \r\n # ADM mask all ...,# test_source.py\r\nimport pytest\r\nimport sy...,100.0,"[test_radii_input, test_radii_input_equal_to_1..."
8,"def axisAligned(angle, tol=None, axis=None):\r...",import pytest\r\nfrom source import axisAligne...,100.0,"[test_axisAligned_tolerance, test_axisAligned_..."
11,def dms(degrees):\r\n \r\n\r\n degrees_i...,import source\r\n\r\ndef test_dms_positive_deg...,100.0,"[test_dms_fraction_degrees, test_dms_positive_..."
15,"def constrain(value, min_value, max_value):\r\...",# test_source.py\r\nimport pytest\r\nfrom sour...,100.0,"[test_constrain_min, test_constrain_normal, te..."
20,"def denormalize_bbox(bbox, rows, cols):\r\n ...",# test_source.py\r\n\r\nimport pytest\r\nfrom ...,100.0,"[test_denormalize_bbox_positive, test_denormal..."
...,...,...,...,...
66081,"\ndef calculate_power(base, exponent):\n """"...",from source import calculate_power\nimport pyt...,100.0,"[test_calculate_power_int, test_calculate_powe..."
66082,"\nimport math\n\ndef calculate_hypotenuse(a, b...",import pytest\nfrom source import calculate_hy...,100.0,"[test_calculate_hypotenuse_negative_numbers, t..."
66083,"import math\ndef getSum ( a , n ) :\n sum =...",import pytest\nfrom source import *\n\ndef tes...,100.0,"[test_getSum, test_getSum_zero]"
66084,"def digitWell ( n , m , k ) :\n cnt = 0\n ...","import pytest\nfrom source import digitWell, f...",100.0,"[test_digitWell, test_findInt]"


In [5]:
df_map = df_map.loc[df_map.test_funcs.map(len).sort_values().index].reset_index(drop=True)
df_map

Unnamed: 0,original_code,pytest_code,coverage,test_funcs
0,"\ndef find_missing_number(numbers):\n """"""\n...",import pytest\nfrom source import find_missing...,100.0,"[test_find_missing_number_second, test_find_mi..."
1,def convert_retention_to_seconds(desired_reten...,"import sys\nsys.path.append(""."") # this is to ...",100.0,"[test_convert_retention_to_seconds_hours, test..."
2,\ndef bubble_sort(arr):\n n = len(arr)\n ...,import pytest\nfrom source import bubble_sort\...,100.0,"[test_bubble_sort, test_bubble_sort_2]"
3,def brokenTen(value):\n \n if (value < 1...,#test_source.py\nimport pytest\nfrom source im...,100.0,"[test_brokenTen_less_than_ten, test_brokenTen_..."
4,\ndef is_prime(n):\n if n < 2:\n ret...,"import pytest\nfrom source import is_prime, su...",100.0,"[test_is_prime, test_sum_of_primes]"
...,...,...,...,...
14047,for i in range(10):\n for j in range(10):\n...,"import pytest\nfrom source import i, j\ndef te...",100.0,"[test_twenty_three, test_four, test_eighteen, ..."
14048,"def convertor(value, fromunits, tounits):\n ...",import pytest\nimport sys\nsys.path.append('.'...,100.0,"[test_kcal_mol_to_kJ_mol, test_hartree_to_wave..."
14049,"def count_set_bits(n, k):\n \n count = 0\n ...",import pytest\nfrom source import count_set_bi...,100.0,"[test_count_set_bits_11, test_count_set_bits_3..."
14050,def get_def_class(word):\r\n \r\n test_t...,import pytest\r\nimport source\r\n\r\ndef test...,100.0,"[test_get_def_class_mass, test_get_def_class_i..."


In [7]:
import pickle

step_size = 300
for i in range(0, df_map.shape[0], step_size):
    cnt_df_map = df_map.iloc[i : min(i + step_size, df_map.shape[0])]
    num_tasks = cnt_df_map.shape[0]

    with multiprocess.Pool(24) as pool:
        results_cov = list(tqdm(pool.imap(get_all_combinations_wrapper, cnt_df_map.to_numpy().tolist()), total=num_tasks))

    with open(f"test_completion_results/{i}_{min(i + step_size, df_map.shape[0])}_res.pkl", "wb") as f:
        pickle.dump(results_cov, f)

  0%|          | 0/300 [00:00<?, ?it/s]