In [1]:
import re
import json
import os
import argparse
from typing import List, Dict, Tuple, Any
from pathlib import Path

import numpy as np
import pandas as pd
import datasets
from datasets import Dataset, load_dataset
from tqdm import tqdm
from openai import OpenAI
from transformers import AutoTokenizer
from unidiff import PatchSet, UnidiffParseError

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("parquet", data_files="data/swe-oracle/search-replace/test.parquet", split="train")

Generating train split: 2248 examples [00:01, 1514.23 examples/s]


In [3]:
verified_data = load_dataset("data/princeton-nlp/SWE-bench_Verified", split="test")

In [4]:
verified_instance_ids = verified_data["instance_id"]
for instance_id in verified_instance_ids:
    if instance_id not in dataset["instance_id"]:
        print(instance_id)

astropy__astropy-13398


In [5]:
verfied_data = dataset.filter(lambda x: x["instance_id"] in verified_instance_ids)
verfied_data.to_parquet("data/swe-verified-eval/search-replace.parquet")


Filter: 100%|██████████| 2248/2248 [00:00<00:00, 3962.76 examples/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00,  3.15ba/s]


90412956

In [7]:
outputs = load_dataset("json", data_files="outputs/swe-verified-eval/search-replace/deepseek-chat-temp-0.0-tokens-4096-maxlen-131072_score_0.1138.jsonl", split="train")

Generating train split: 498 examples [00:00, 890.89 examples/s] 


In [8]:
from verl.utils.reward_score.swe_rl.original import extract_thought_solution, parse_search_replace, apply_code_change, get_normalized_patch

In [23]:
idx = 1

pred = outputs[idx]["prediction"]
file_names = outputs[idx]["extra_info"]["file_names"]
file_contents = outputs[idx]["extra_info"]["file_contents"]
thought, solution = extract_thought_solution(pred)

In [24]:
code_dict = {name: content for name, content in zip(file_names, file_contents)}
code_changes = parse_search_replace(solution)
pred_dict = apply_code_change(code_dict, code_changes)

In [25]:
import difflib

def generate_patch_from_dicts(original_dict, pred_dict):
    patch_lines = []

    # Only generate diffs for files that exist in both
    for file_path in original_dict.keys() & pred_dict.keys():
        old_content = original_dict[file_path].splitlines(keepends=True)
        new_content = pred_dict[file_path].splitlines(keepends=True)
        diff = difflib.unified_diff(
            old_content,
            new_content,
            fromfile=f"a/{file_path}",
            tofile=f"b/{file_path}",
        )
        patch_lines.extend(diff)

    return "".join(patch_lines)

In [26]:
print(generate_patch_from_dicts(code_dict, pred_dict))

--- a/astropy/timeseries/core.py
+++ b/astropy/timeseries/core.py
@@ -68,17 +68,22 @@
 
             plural = 's' if len(required_columns) > 1 else ''
 
-            if not self._required_columns_relax and len(self.colnames) == 0:
+            if not self._required_columns_relax:
+                if len(self.colnames) == 0:
+                    raise ValueError("{} object is invalid - expected columns {} "
+                                   "but time series has no columns"
+                                   .format(self.__class__.__name__, required_columns))
+                
+                missing_columns = set(required_columns) - set(self.colnames)
+                if missing_columns:
+                    plural = 's' if len(missing_columns) > 1 else ''
+                    raise ValueError("{} object is invalid - missing required column{}: {}"
+                                   .format(self.__class__.__name__, plural, sorted(missing_columns)))
 
-                raise ValueErro