From 87c3eb318def7660a786826826919e6aeb02673b Mon Sep 17 00:00:00 2001 From: Jane Xu Date: Tue, 27 Jun 2023 00:51:54 +0000 Subject: [PATCH 1/3] [optim][BE] remove wrong reference to __init__.py --- .github/workflows/userbenchmark-regression-detector.yml | 2 +- userbenchmark/optim/run_optim_benchmarks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/userbenchmark-regression-detector.yml b/.github/workflows/userbenchmark-regression-detector.yml index fe5146c4ff..3d7261e425 100644 --- a/.github/workflows/userbenchmark-regression-detector.yml +++ b/.github/workflows/userbenchmark-regression-detector.yml @@ -50,7 +50,7 @@ jobs: if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi # TODO: scale this to run other benchmarks, but let's start with optim - python -m userbenchmark.optim.run_optim_benchmarks -c ${{ github.event.inputs.userbenchmark_options }} + python -m userbenchmark.optim.run -m BERT_pytorch -d cuda -o AdamW --df no_foreach -f pt2_ cp -r ./.userbenchmark/optim ../benchmark-output - name: Detect potential regressions continue-on-error: true diff --git a/userbenchmark/optim/run_optim_benchmarks.py b/userbenchmark/optim/run_optim_benchmarks.py index afc5697def..a264ef2c26 100644 --- a/userbenchmark/optim/run_optim_benchmarks.py +++ b/userbenchmark/optim/run_optim_benchmarks.py @@ -2,7 +2,7 @@ ''' This script is intended for the CI context only! The whole purpose behind this script is to enable process/context/memory isolation across different models and devices. The OG script (which this -script calls) is the userbenchmark/optim/__init__.py script, which is better documented and what is +script calls) is the userbenchmark/optim/run.py script, which is better documented and what is intended to be used locally. The current script is simply a wrapper that dispatches serial subprocesses to run the OG script and handles the metrics.json merging afterwards. From 663c9ced953675c1b2f0ffa3f83353455a088a2a Mon Sep 17 00:00:00 2001 From: Jane Xu Date: Tue, 27 Jun 2023 15:46:07 +0000 Subject: [PATCH 2/3] Make command run in subprocess --- .github/workflows/userbenchmark-regression-detector.yml | 2 +- userbenchmark/optim/run_optim_benchmarks.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/userbenchmark-regression-detector.yml b/.github/workflows/userbenchmark-regression-detector.yml index 3d7261e425..fe5146c4ff 100644 --- a/.github/workflows/userbenchmark-regression-detector.yml +++ b/.github/workflows/userbenchmark-regression-detector.yml @@ -50,7 +50,7 @@ jobs: if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi # TODO: scale this to run other benchmarks, but let's start with optim - python -m userbenchmark.optim.run -m BERT_pytorch -d cuda -o AdamW --df no_foreach -f pt2_ + python -m userbenchmark.optim.run_optim_benchmarks -c ${{ github.event.inputs.userbenchmark_options }} cp -r ./.userbenchmark/optim ../benchmark-output - name: Detect potential regressions continue-on-error: true diff --git a/userbenchmark/optim/run_optim_benchmarks.py b/userbenchmark/optim/run_optim_benchmarks.py index a264ef2c26..097d7d30a6 100644 --- a/userbenchmark/optim/run_optim_benchmarks.py +++ b/userbenchmark/optim/run_optim_benchmarks.py @@ -53,6 +53,14 @@ def main() -> None: assert not OUTPUT_DIR.exists() or not any(OUTPUT_DIR.glob("*")), \ f'{OUTPUT_DIR} must be empty or nonexistent. Its contents will be wiped by this script.' + command = [sys.executable, '-m', 'userbenchmark.optim.run', '-m', 'BERT_pytorch', '-d', 'cuda', '-o', 'Adam', '--df', 'no_foreach', '-f', 'pt2_'] + completed_process = subprocess.run(command, check=True) + # While it is certainly unexpected for a subprocess to fail, we don't want to halt entirely + # as there can be valuable benchmarks to gather from the other subprocesses. + if completed_process.returncode != 0: + print(f'OH NO, the subprocess for model {m} and device {d} exited with {completed_process.returncode}!') + + return # Run benchmarks in subprocesses to take isolate contexts and memory for m, d in itertools.product(args.models, args.devices): command = [sys.executable, '-m', 'userbenchmark.optim.run', '--continue-on-error', From 649d21a7e195b3855d6d00a81eee258a8332df0f Mon Sep 17 00:00:00 2001 From: "Jane (Yuan) Xu" <31798555+janeyx99@users.noreply.github.com> Date: Fri, 7 Jul 2023 17:40:18 -0400 Subject: [PATCH 3/3] Update userbenchmark/optim/run_optim_benchmarks.py --- userbenchmark/optim/run_optim_benchmarks.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/userbenchmark/optim/run_optim_benchmarks.py b/userbenchmark/optim/run_optim_benchmarks.py index 097d7d30a6..a264ef2c26 100644 --- a/userbenchmark/optim/run_optim_benchmarks.py +++ b/userbenchmark/optim/run_optim_benchmarks.py @@ -53,14 +53,6 @@ def main() -> None: assert not OUTPUT_DIR.exists() or not any(OUTPUT_DIR.glob("*")), \ f'{OUTPUT_DIR} must be empty or nonexistent. Its contents will be wiped by this script.' - command = [sys.executable, '-m', 'userbenchmark.optim.run', '-m', 'BERT_pytorch', '-d', 'cuda', '-o', 'Adam', '--df', 'no_foreach', '-f', 'pt2_'] - completed_process = subprocess.run(command, check=True) - # While it is certainly unexpected for a subprocess to fail, we don't want to halt entirely - # as there can be valuable benchmarks to gather from the other subprocesses. - if completed_process.returncode != 0: - print(f'OH NO, the subprocess for model {m} and device {d} exited with {completed_process.returncode}!') - - return # Run benchmarks in subprocesses to take isolate contexts and memory for m, d in itertools.product(args.models, args.devices): command = [sys.executable, '-m', 'userbenchmark.optim.run', '--continue-on-error',