From 6710b42d94ebccddfe9d1e30a8e3e520a4206ada Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 26 Apr 2021 19:55:31 -0400 Subject: [PATCH 1/3] Add support to run PR testing in CI. --- .github/scripts/run-bisection.sh | 3 +-- bisection.py | 31 ++++++++++++++++++++++++++----- torchbenchmark/util/gitutils.py | 11 ++++++++++- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/.github/scripts/run-bisection.sh b/.github/scripts/run-bisection.sh index 4df814cef8..f1505f8574 100644 --- a/.github/scripts/run-bisection.sh +++ b/.github/scripts/run-bisection.sh @@ -43,5 +43,4 @@ python bisection.py --work-dir ${BISECT_BASE}/gh${GITHUB_RUN_ID} \ --pytorch-src ${PYTORCH_SRC_DIR} \ --torchbench-src ${TORCHBENCH_SRC_DIR} \ --config ${BISECT_BASE}/config.yaml \ - --output ${BISECT_BASE}/gh${GITHUB_RUN_ID}/result.json \ - --debug + --output ${BISECT_BASE}/gh${GITHUB_RUN_ID}/result.json diff --git a/bisection.py b/bisection.py index 16b5a77f72..50f2cef8ab 100644 --- a/bisection.py +++ b/bisection.py @@ -41,14 +41,21 @@ def exist_dir_path(string): # For example, ["test_eval[yolov3-cpu-eager]", "test_train[yolov3-gpu-eager]"] # -> "((eval and yolov3 and cpu and eager) or (train and yolov3 and gpu and eager))" # If targets is None, run everything except slomo -def targets_to_bmfilter(targets: List[str]) -> str: +def targets_to_bmfilter(targets: List[str], models: List[str]) -> str: bmfilter_names = [] if targets == None or len(targets) == 0: return "(not slomo)" for test in targets: regex = re.compile("test_(train|eval)\[([a-zA-Z0-9_]+)-([a-z]+)-([a-z]+)\]") m = regex.match(test).groups() - partial_name = " and ".join(m) + if not m: + if m in models: + partial_name = m + else: + print(f"Cannot recognize the TorchBench filter: {test}. Exit.") + exit(1) + else: + partial_name = " and ".join(m) bmfilter_names.append(f"({partial_name})") return "(" + " or ".join(bmfilter_names) + ")" @@ -128,8 +135,17 @@ def prep(self) -> bool: repo_origin_url = gitutils.get_git_origin(self.srcpath) if not repo_origin_url == TORCH_GITREPO: print(f"WARNING: Unmatched repo origin url: {repo_origin_url} with standard {TORCH_GITREPO}") + self.update_repos() return True - + + # Update pytorch, torchtext, torchvision, and torchaudio repo + def update_repos(self): + repos = [self.srcpath] + repos.extend(TORCHBENCH_DEPS.values()) + for repo in repos: + gitutils.clean_git_repo(repo) + assert gitutils.update_git_repo(repo, "master"), f"Failed to update master branch of {repo}." + # Get all commits between start and end, save them in self.commits def init_commits(self, start: str, end: str) -> bool: commits = gitutils.get_git_commits(self.srcpath, start, end) @@ -220,6 +236,7 @@ class TorchBench: timelimit: int # timeout limit in minutes workdir: str devbig: str + models: List[str] torch_src: TorchSource def __init__(self, srcpath: str, @@ -227,19 +244,23 @@ def __init__(self, srcpath: str, timelimit: int, workdir: str, devbig: str, - branch: str = "0.1"): + branch: str = "master"): self.srcpath = srcpath self.torch_src = torch_src self.timelimit = timelimit self.workdir = workdir self.devbig = devbig self.branch = branch + self.models = list() def prep(self) -> bool: # Verify the code in srcpath is pytorch/benchmark repo_origin_url = gitutils.get_git_origin(self.srcpath) if not repo_origin_url == TORCHBENCH_GITREPO: print(f"WARNING: Unmatched repo origin url: {repo_origin_url} with standard {TORCHBENCH_GITREPO}") + # get list of models + self.models = [ model for model in os.listdir(os.path.join(self.srcpath, "torchbenchmark", "models")) + if os.path.isdir(os.path.join(self.srcpath, "torchbenchmark", "models", model)) ] return True def run_benchmark(self, commit: Commit, targets: List[str]) -> str: @@ -253,7 +274,7 @@ def run_benchmark(self, commit: Commit, targets: List[str]) -> str: os.remove(os.path.join(output_dir, f)) else: os.mkdir(output_dir) - bmfilter = targets_to_bmfilter(targets) + bmfilter = targets_to_bmfilter(targets, self.models) print(f"Running TorchBench for commit: {commit.sha}, filter {bmfilter} ...", end="", flush=True) if not self.devbig: command = f"""bash .github/scripts/run-bench.sh "{output_dir}" "{bmfilter}" &> {output_dir}/benchmark.log""" diff --git a/torchbenchmark/util/gitutils.py b/torchbenchmark/util/gitutils.py index 651e4f7802..6b1ac37d28 100644 --- a/torchbenchmark/util/gitutils.py +++ b/torchbenchmark/util/gitutils.py @@ -8,7 +8,16 @@ from datetime import datetime from typing import Optional, List -def update_git_repo(repo: str, branch: str) -> bool: +def clean_git_repo(repo: str) -> bool: + try: + command = f"git clean -xdf" + subprocess.check_call(command, cwd=repo, shell=True) + return True + except subprocess.CalledProcessError: + print(f"Failed to cleanup git repo {repo}") + return None + +def update_git_repo_branch(repo: str, branch: str) -> bool: try: command = f"git pull origin {branch}" out = subprocess.check_output(command, cwd=repo, shell=True).decode().strip() From 34499c298cad2b7971b0a37a3bd4846e90aeddbb Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 26 Apr 2021 20:14:10 -0400 Subject: [PATCH 2/3] Fix a small bug of git clean --- bisection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bisection.py b/bisection.py index 50f2cef8ab..42cf4ff8f4 100644 --- a/bisection.py +++ b/bisection.py @@ -144,7 +144,7 @@ def update_repos(self): repos.extend(TORCHBENCH_DEPS.values()) for repo in repos: gitutils.clean_git_repo(repo) - assert gitutils.update_git_repo(repo, "master"), f"Failed to update master branch of {repo}." + assert gitutils.update_git_repo(repo), f"Failed to update master branch of {repo}." # Get all commits between start and end, save them in self.commits def init_commits(self, start: str, end: str) -> bool: From 2e6e99c2b0a6325d3f2578740c292e5a9e8f0fcc Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 26 Apr 2021 20:23:00 -0400 Subject: [PATCH 3/3] Init commit list with abtest switch. --- bisection.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bisection.py b/bisection.py index 42cf4ff8f4..0bd1f872fe 100644 --- a/bisection.py +++ b/bisection.py @@ -147,8 +147,11 @@ def update_repos(self): assert gitutils.update_git_repo(repo), f"Failed to update master branch of {repo}." # Get all commits between start and end, save them in self.commits - def init_commits(self, start: str, end: str) -> bool: - commits = gitutils.get_git_commits(self.srcpath, start, end) + def init_commits(self, start: str, end: str, abtest: bool) -> bool: + if not abtest: + commits = gitutils.get_git_commits(self.srcpath, start, end) + else: + commits = [start, end] if not commits or len(commits) < 2: print(f"Failed to retrieve commits from {start} to {end} in {self.srcpath}.") return False @@ -416,7 +419,7 @@ def regression(self, left: Commit, right: Commit, targets: List[str]) -> List[st def prep(self) -> bool: if not self.torch_src.prep(): return False - if not self.torch_src.init_commits(self.start, self.end): + if not self.torch_src.init_commits(self.start, self.end, self.abtest): return False if not self.bench.prep(): return False