# Extracting prompts from SWE-Bench

This notebook extracts prompts from the SWE-bench dataset to be used by humans and language models.

In [None]:
from datasets import load_dataset
import pandas as pd

# Load the 'squad' dataset
dataset = load_dataset("princeton-nlp/SWE-bench_oracle_llama", split='test')
df = pd.DataFrame(dataset)

In [None]:
# Show available repos
df["repo"].unique()

In [None]:
import os

def get_rnd_issue(df: pd.DataFrame, repo: str=None, dir: str="./prompts_oracle/") -> pd.Series:
    """
    Fetches a random issue from the given DataFrame and saves its prompt to a file in `dir`.
    """
    
    if not os.path.exists(dir):
        os.makedirs(dir)

    # If repo is specified, filter rows by the given repo name.
    if repo:
        df = df[df['repo'] == repo]
    
    # Return a random row from the filtered DataFrame.
    issue =  df.sample(n=1).iloc[0]

    text = issue['text']
    instance_id = issue['instance_id']

    # Create a filename using the "instance_id"
    filename = f"{dir}/{instance_id}.txt"

    # Save the "text" to the file
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(text)

    print(f"Save the issue prompt to: {filename}")

In [None]:
get_rnd_issue(df, repo="django/django")

# Evaluation of patch
 
Enter your patch and instance_id (usually the file name of prompt with `.txt`) below and run the cells:

In [None]:
instance_id="pallets__flask-4642"
patch_text="""
"""

In [None]:
import json

filename = "tmp_predictions.json"
json_dict = {
    "instance_id": instance_id,
    "prediction": patch_text,
    "model":"human"
}

with open(filename, 'w') as json_file:
    # the eval script expects predictions to be in a list
    json.dump([json_dict], json_file, indent=4)

In [None]:
import subprocess
import os



def run_evaluation(log_dir="evaluation_outputs", swe_bench_tasks="../swe-bench.json", testbed="eval-artifacts-deleteme", skip_existing=None, timeout=None, verbose=None):
    cmd = [
        "python", "../harness/run_evaluation.py",
        "--predictions_path", "tmp_predictions.json",
        "--log_dir", log_dir,
        "--swe_bench_tasks", swe_bench_tasks,
        "--testbed", testbed
    ]

    for dir in [log_dir,testbed]:
        if not os.path.exists(dir):
            os.makedirs(dir)

    # Add optional arguments to the command
    if skip_existing is not None:
        cmd.extend(["--skip_existing", str(skip_existing)])
    
    if timeout is not None:
        cmd.extend(["--timeout", str(timeout)])

    if verbose is not None:
        cmd.extend(["--verbose", str(verbose)])

    # Run the command
    result = subprocess.run(cmd, capture_output=True, text=True)

    # Print the output
    print(result.stdout)
    print(result.stderr)

    # Return the result
    return result

In [None]:
run_evaluation()