Skip to content

Commit

Permalink
Add ruff-format to hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
klieret committed May 25, 2024
1 parent 03f52ab commit b1ad80d
Show file tree
Hide file tree
Showing 29 changed files with 849 additions and 768 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ repos:
# Run the linter.
- id: ruff
# Run the formatter.
# - id: ruff-format
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v4.0.0-alpha.8" # Use the sha or tag you want to point at
Expand Down
2 changes: 2 additions & 0 deletions config/commands/_split_string.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import sys


def print_flake8_output(input_string, show_line_numbers=False):
for value in input_string.split("\n"):
parts = value.split()
Expand All @@ -10,6 +11,7 @@ def print_flake8_output(input_string, show_line_numbers=False):
line_nums = ":".join(parts[0].split(":")[1:])
print(f"- {line_nums} {' '.join(parts[1:])}")


if __name__ == "__main__":
lint_output = sys.argv[1]
print_flake8_output(lint_output)
51 changes: 29 additions & 22 deletions evaluation/aggregate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

from pathlib import Path
Expand Down Expand Up @@ -79,16 +80,16 @@ def convert_experiments_to_rows(folder_name, runs_max):
if len(folder_data) != 8:
# TODO: This might be too strict?
continue
temperature = float(folder_data[3][len("t-"):].strip())
top_p = float(folder_data[4][len("p-"):].strip())
cost = float(folder_data[5][len("c-"):].strip())
temperature = float(folder_data[3][len("t-") :].strip())
top_p = float(folder_data[4][len("p-") :].strip())
cost = float(folder_data[5][len("c-") :].strip())
install = "Y" if folder_data[6].strip() == "install-1" else "N"

# Parse out run number
run = folder_data[-1]
if "run" not in run:
continue

try:
if "run-" in run:
run = int(run.split("run-")[-1].split("-")[0].replace("_", "").strip())
Expand All @@ -97,7 +98,7 @@ def convert_experiments_to_rows(folder_name, runs_max):
except Exception as e:
print(run)
raise e

if runs_max is not None and run > runs_max:
continue

Expand All @@ -112,7 +113,7 @@ def convert_experiments_to_rows(folder_name, runs_max):
resolved_ids = results_data["resolved"]
elif "counts" in results_data and isinstance(results_data["counts"]["resolved"], list):
resolved_ids = results_data["counts"]["resolved"]

# Extract instance costs from trajectories
costs_overall = []
costs_success = []
Expand Down Expand Up @@ -156,10 +157,7 @@ def convert_experiments_to_rows(folder_name, runs_max):

def get_results_df(folder_name, runs_max):
rows = convert_experiments_to_rows(folder_name, runs_max)
return (
pd.DataFrame(rows, columns=COLUMNS)
.sort_values(by=COLUMNS[:8])
)
return pd.DataFrame(rows, columns=COLUMNS).sort_values(by=COLUMNS[:8])


def get_results_csv(folder_name):
Expand All @@ -169,11 +167,20 @@ def get_results_csv(folder_name):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Aggregate results from experiments")
parser.add_argument("--folder", type=str, help="Folder containing experiment results", default="../trajectories")
parser.add_argument("--model", nargs='+', type=str, help="Model(s) to filter results by.")
parser.add_argument("--dataset", nargs='+', type=str, help="Dataset to filter results by.")
parser.add_argument("--setup", nargs='+', type=str, help="Setup to filter results by.")
parser.add_argument("--runs_min", type=int, help="Minimum number of runs that experiment should have been run for.")
parser.add_argument(
"--folder",
type=str,
help="Folder containing experiment results",
default="../trajectories",
)
parser.add_argument("--model", nargs="+", type=str, help="Model(s) to filter results by.")
parser.add_argument("--dataset", nargs="+", type=str, help="Dataset to filter results by.")
parser.add_argument("--setup", nargs="+", type=str, help="Setup to filter results by.")
parser.add_argument(
"--runs_min",
type=int,
help="Minimum number of runs that experiment should have been run for.",
)
parser.add_argument("--runs_max", type=int, help="Maximum number of runs taken into account")
args = parser.parse_args()

Expand Down Expand Up @@ -201,19 +208,19 @@ def get_results_csv(folder_name):

# Filtering
if args.model:
grouped_data = grouped_data[grouped_data['Model'].isin(args.model)]
grouped_data = grouped_data[grouped_data["Model"].isin(args.model)]
if args.dataset:
grouped_data = grouped_data[grouped_data['Dataset'].isin(args.dataset)]
grouped_data = grouped_data[grouped_data["Dataset"].isin(args.dataset)]
if args.setup:
grouped_data = grouped_data[grouped_data['Setup'].isin(args.setup)]
grouped_data = grouped_data[grouped_data["Setup"].isin(args.setup)]
if args.runs_min:
grouped_data = grouped_data[grouped_data['Run'] >= args.runs_min]
grouped_data = grouped_data[grouped_data["Run"] >= args.runs_min]

print(f"Total experiments run: {grouped_data.shape[0]}")
grouped_data_sorted = grouped_data.sort_values(by=['Dataset', 'Resolved'], ascending=[True, False])
grouped_data_sorted = grouped_data.sort_values(by=["Dataset", "Resolved"], ascending=[True, False])
pd.set_option("display.max_rows", None)
grouped = grouped_data_sorted.groupby('Dataset')
grouped = grouped_data_sorted.groupby("Dataset")

for name, group in grouped:
print(f'\n-----------------\nDataset: {name}\n-----------------')
print(f"\n-----------------\nDataset: {name}\n-----------------")
print(group.to_string(index=False))
61 changes: 27 additions & 34 deletions evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,18 @@
from unidiff import PatchSet


def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, timeout, verbose, conda_link, log_suffix, num_processes):
def main(
predictions_path,
log_dir,
swe_bench_tasks,
testbed,
skip_existing,
timeout,
verbose,
conda_link,
log_suffix,
num_processes,
):
# Check if paths exist
if not os.path.exists(predictions_path):
raise FileNotFoundError(f"Predictions path {predictions_path} does not exist")
Expand Down Expand Up @@ -65,7 +76,7 @@ def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, tim
verbose=verbose,
conda_link=conda_link,
log_suffix=log_suffix,
num_processes=num_processes
num_processes=num_processes,
)
print("✅ Finished evaluation")
except Exception as e:
Expand Down Expand Up @@ -98,11 +109,7 @@ def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, tim
]
)
)
scorecard["exit_status"] = (
traj_data["info"]["exit_status"]
if "exit_status" in traj_data["info"]
else "n/a"
)
scorecard["exit_status"] = traj_data["info"]["exit_status"] if "exit_status" in traj_data["info"] else "n/a"

# Check that a prediction was generated
if p[KEY_PREDICTION] is None or p[KEY_PREDICTION].strip() == "":
Expand All @@ -112,9 +119,7 @@ def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, tim
scorecard["statuses"].append("generated")

# Get log file
log_path = os.path.join(
log_dir, f"{p[KEY_INSTANCE_ID]}.{directory_name}.eval.log"
)
log_path = os.path.join(log_dir, f"{p[KEY_INSTANCE_ID]}.{directory_name}.eval.log")
if not os.path.exists(log_path):
scorecard["statuses"].append("build_failure")
scorecards.append(scorecard)
Expand Down Expand Up @@ -144,18 +149,15 @@ def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, tim
"success": {
"FAIL_TO_PASS": report["FAIL_TO_PASS"]["success"],
"PASS_TO_PASS": report["PASS_TO_PASS"]["success"],
}
},
}
resolution_status = get_resolution_status(report)
scorecard["statuses"].append(resolution_status)

try:
diff_obj = PatchSet(p[KEY_PREDICTION])
scorecard["patch_files"] = [
x.path
for x in diff_obj.modified_files
+ diff_obj.added_files
+ diff_obj.removed_files
x.path for x in diff_obj.modified_files + diff_obj.added_files + diff_obj.removed_files
]
scorecard["patch_lines_add"] = sum([f.added for f in diff_obj])
scorecard["patch_lines_del"] = sum([f.removed for f in diff_obj])
Expand Down Expand Up @@ -193,38 +195,29 @@ def main(predictions_path, log_dir, swe_bench_tasks, testbed, skip_existing, tim
help="Path to predictions file (.jsonl)",
required=True,
)
parser.add_argument(
"--log_dir", type=str, help="Path to log directory", required=True
)
parser.add_argument("--log_dir", type=str, help="Path to log directory", required=True)
parser.add_argument(
"--swe_bench_tasks",
type=str,
help="Path to SWE-bench task instances file",
required=True,
)
parser.add_argument(
"--testbed", type=str, help="Path to testbed directory", required=True
)
parser.add_argument(
"--skip_existing", action="store_true", help="(Optional) Skip existing logs"
)
parser.add_argument("--testbed", type=str, help="Path to testbed directory", required=True)
parser.add_argument("--skip_existing", action="store_true", help="(Optional) Skip existing logs")
parser.add_argument(
"--timeout",
type=int,
help="(Optional) Timeout in seconds (default: 900)",
default=900,
)
parser.add_argument("--verbose", action="store_true", help="(Optional) Verbose mode")
parser.add_argument(
"--verbose", action="store_true", help="(Optional) Verbose mode"
)
parser.add_argument(
"--conda_link", default=None, type=str, help="(Optional) URL to conda installation to use"
)
parser.add_argument(
"--log_suffix", default=None, type=str, help="(Optional) Log suffix"
)
parser.add_argument(
"--num_processes", default=-1, type=int, help="Num processes"
"--conda_link",
default=None,
type=str,
help="(Optional) URL to conda installation to use",
)
parser.add_argument("--log_suffix", default=None, type=str, help="(Optional) Log suffix")
parser.add_argument("--num_processes", default=-1, type=int, help="Num processes")
args = parser.parse_args()
main(**vars(args))
Loading

0 comments on commit b1ad80d

Please sign in to comment.