Fix get_eval_refs for huggingface source

princeton-nlp · Apr 2, 2024 · 12a287a · 12a287a
1 parent 3fb2179
commit 12a287a
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 4 deletions.
diff --git a/swebench/harness/run_evaluation.py b/swebench/harness/run_evaluation.py
@@ -207,10 +207,7 @@ def main(
         # Clean up
         for temp_dir in temp_dirs:
             # Kill all processes that are using the temp directory
-            try:
-                subprocess.run(f"lsof +D {temp_dir} | awk 'NR>1 {{print $2}}' | xargs kill", shell=True)
-            except Exception as e:
-                logger.error(f"Error killing processes using temp directory: {e}")
+            subprocess.run(f"lsof +D {temp_dir} | awk 'NR>1 {{print $2}}' | xargs kill", shell=True, capture_output=True)
             # Remove temp directory
             shutil.rmtree(temp_dir, ignore_errors=True)
 

diff --git a/swebench/metrics/getters.py b/swebench/metrics/getters.py
@@ -127,18 +127,26 @@ def log_path_to_sms(log_fp: str, log_parser) -> Tuple[list, bool]:
 
 
 def get_eval_refs(data_path_or_name):
+    decode_keys = False
     if os.path.isfile(data_path_or_name):
         if data_path_or_name.endswith(".jsonl"):
             data = [json.loads(l) for l in open(data_path_or_name).readlines()]
         elif data_path_or_name.endswith(".json"):
             data = json.load(open(data_path_or_name, "r"))
     elif os.path.isdir(data_path_or_name):
         data = load_from_disk(data_path_or_name)
+        decode_keys = True
     else:
         data = load_dataset(data_path_or_name)
+        decode_keys = True
     if isinstance(data, dict):
         all_data = list()
         for split in data.keys():
             all_data.extend(data[split])
         data = all_data
+    if decode_keys:
+        for datum in data:
+            for key in ["PASS_TO_PASS", "FAIL_TO_PASS"]:
+                datum[key] = json.loads(datum[key])
     return {d[KEY_INSTANCE_ID]: d for d in data}
+