In [17]:
import os

import pandas as pd
from tqdm.notebook import tqdm
import multiprocess

from datasets import load_dataset

In [None]:
dataset = load_dataset("vikp/python_functions_filtered")

In [None]:
df = pd.DataFrame(dataset["train"])
df = df.sort_values(by="quality_prob", ascending=False).reset_index(drop=True)
df

In [None]:
df.code = df.code.str.replace(
    r"(\'\'\'[\s\S]*?\'\'\'|\"\"\"[\s\S]*?\"\"\")", "", regex=True
)
df

In [None]:
def create_tests(code):
    try:
        prompt = """You are a useful code assistant. A user will give you a code representing a python file. 
Your task is to take said code and generate a complete working testing file using Pytest. 
You may assume the original code can be found in a `source.py` file residing in the same directory as the test file, you must import it as such.
You *MUST* always provide the full test code with no other explanations.
You *MUST* use only one assertion per test. Always aim for full code coverage. 
"""
        from adjustment_utils import modify_pytest_code, run_pytest
        from agent import CodeAgent

        model = CodeAgent(prompt)

        results = model.generate_response(code, n=3)
        test_results = []
        for result in results:
            try:
                res = run_pytest(code, result)
                if not len(res["stderr"]):
                    if (res["failed_assertions"] == 0) and (res["coverage"] > 90):
                        return (code, result)
                    res["pytest_code"] = result
                    test_results.append(res)
            except Exception as e:
                continue

        try:
            test_results = sorted(
                test_results, key=lambda x: x["coverage"], reverse=True
            )

            pytest_code = ""
            adjusted_results = []
            for test_result in test_results:
                try:
                    adjusted = modify_pytest_code(code, test_result["pytest_code"])

                    res = run_pytest(code, adjusted)
                    res["pytest_code"] = result

                    if (not len(res["stderr"])) and (res["failed_assertions"] == 0):
                        if res["coverage"] > 95:
                            return (code, adjusted)
                        adjusted_results.append(res)
                except Exception as e:
                    continue

            pytest_code = max(adjusted_results, key=lambda x: x["coverage"])[
                "pytest_code"
            ]
            return (code, pytest_code)
        except:
            return None

    except:
        return None

In [None]:
step_size = 1000

for j in range(50_000, 59_000, step_size):
    input_code = df.code[j : j + step_size]
    num_tasks = len(input_code)

    with multiprocess.Pool(24) as pool:
        results = list(tqdm(pool.imap(create_tests, input_code), total=num_tasks))

    results = [result for result in results if result is not None]
    df_results = pd.DataFrame(results)
    df_results.columns = ["original_code", "pytest_code"]
    df_results = df_results.dropna().reset_index(drop=True)
    # df_results["coverage"] = df_results.apply(lambda x: run_pytest(x.iloc[0], x.iloc[1])["coverage"], axis=1)

    def get_cov(row):
        if row is None:
            return (None, None, None)

        code, pytest_code = row

        if (code is None) or (pytest_code is None):
            return (None, None, None)

        from adjustment_utils import run_pytest

        coverage = run_pytest(code, pytest_code, random_subdir=True)["coverage"]
        return (code, pytest_code, coverage)

    num_tasks = len(results)

    with multiprocess.Pool(24) as pool:
        results_cov = list(tqdm(pool.imap(get_cov, results), total=num_tasks))

    df_cov = pd.DataFrame(results_cov).dropna()
    df_cov.columns = ["original_code", "pytest_code", "coverage"]
    df_cov = df_cov.sort_values(by="coverage", ascending=False).reset_index(drop=True)
    df_cov.to_csv(f"{j}_{j + step_size}_dataset_results.csv", index=False)

    print(
        f"Saved {j}_{j + step_size}_dataset_results.csv with {df_cov[df_cov.coverage > 99].shape[0]} 100% coverage tests"
    )