Skip to content

Commit

Permalink
Run torchbench on CI (#373)
Browse files Browse the repository at this point in the history
* Run torchbench on CI

* Run torchbench on CI #2

* Run torchbench on CI #3

* Run torchbench on CI #4

* Run torchbench on CI #5

* Run torchbench on CI #6

* Run torchbench on CI #7

* Change #1

* Change #2

* Run torchbench on CI #8

* Run torchbench on CI #9

* Install dep

* Change #11

* Change #12

* Change #13

* Change #14

* Change #15

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* More fixes

* Update dependencies

* other fixes

* more fixes

* more fixes

* more fixes

* more fixes #2

* more fixes #2

* more fixes #2

* more fixes

* other fixes

* other fixes

* other fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* additional changes

* other fixes

* other fixes

* additional changes

* other fixes

* other fixes

* other fixes

* other fixes

* other fixes

* Address PR comments

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes
  • Loading branch information
DenisVieriu97 authored and skotapati committed Mar 29, 2023
1 parent 2c6d325 commit 956c9c5
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 23 deletions.
75 changes: 52 additions & 23 deletions .github/scripts/run_torchbench.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
"""
Generate a torchbench test report from a file containing the PR body.
Generate a torchbench test report from a file based on GLOBAL_PR_LIST.
Currently, only supports running tests on specified model names
Testing environment:
- Intel Xeon 8259CL @ 2.50 GHz, 24 Cores with disabled Turbo and HT
- Nvidia Tesla T4
- Nvidia Driver 470.82.01
- Python 3.8
- CUDA 11.3
"""

# Known issues:
# 1. Does not reuse the build artifact in other CI workflows
# 2. CI jobs are serialized because there is only one worker
Expand All @@ -22,6 +18,46 @@

from typing import List, Tuple

GLOBAL_PR_LIST = [
"test_train[alexnet-mps-eager]",
"test_train[dcgan-mps-eager]",
"test_train[hf_Bert-mps-eager]",
"test_train[mnasnet1_0-mps-eager]",
"test_train[mobilenet_v2-mps-eager]",
"test_train[pytorch_unet-mps-eager]",
"test_train[resnet18-mps-eager]",
"test_train[resnet50-mps-eager]",
"test_train[resnext50_32x4d-mps-eager]",
"test_train[shufflenet_v2_x1_0-mps-eager]",
"test_train[timm_efficientnet-mps-eager]",
"test_train[timm_nfnet-mps-eager]",
"test_train[timm_regnet-mps-eager]",
"test_train[timm_resnest-mps-eager]",
"test_train[timm_vision_transformer-mps-eager]",
"test_train[timm_vovnet-mps-eager]",
"test_train[soft_actor_critic-mps-eager]",
"test_train[hf_DistilBert-mps-eager]",
"test_train[hf_Bart-mps-eager]",
"test_train[hf_Albert-mps-eager]",
"test_train[hf_GPT2-mps-eager]",
"test_train[lennard_jones-mps-eager]",
"test_train[pytorch_stargan-mps-eager]",
"test_train[pytorch_struct-mps-eager]",
"test_train[timm_vision_transformer_large-mps-eager]",
"test_train[functorch_dp_cifar10-mps-eager]",
"test_train[squeezenet1_1-mps-eager]",
"test_train[hf_T5_base-mps-eager]",
"test_train[hf_T5_large-mps-eager]",
"test_train[densenet121-mps-eager]",
"test_train[phlippe_resnet-mps-eager]",
"test_train[phlippe_densenet-mps-eager]",
"test_train[tts_angular-mps-eager]",
"test_train[DALLE2_pytorch-mps-eager]",
"test_train[functorch_maml_omniglot-mps-eager]",
"test_train[demucs-mps-eager]",
"test_train[vgg16-mps-eager]"
]

TORCHBENCH_CONFIG_NAME = "config.yaml"
TORCHBENCH_USERBENCHMARK_CONFIG_NAME = "ub-config.yaml"
MAGIC_PREFIX = "RUN_TORCHBENCH:"
Expand Down Expand Up @@ -78,7 +114,14 @@ def find_current_branch(repo_path: str) -> str:

def deploy_torchbench_config(output_dir: str, config: str, config_name: str = TORCHBENCH_CONFIG_NAME) -> None:
# Create test dir if needed
pathlib.Path(output_dir).mkdir(exist_ok=True)
# pathlib.Path(output_dir).mkdir(exist_ok=True)
try:
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=False)
except FileExistsError:
print("Folder is already there")
else:
print("Folder was created")

# TorchBench config file name
config_path = os.path.join(output_dir, config_name)
with open(config_path, "w") as fp:
Expand All @@ -100,23 +143,9 @@ def is_valid_ub_dir(ub_path: str) -> bool:
def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> Tuple[List[str], List[str]]:
model_list = []
userbenchmark_list = []
pr_list = []
with open(prbody_file, "r") as pf:
lines = map(lambda x: x.strip(), pf.read().splitlines())
magic_lines = list(filter(lambda x: x.startswith(MAGIC_PREFIX), lines))
if magic_lines:
# Only the first magic line will be recognized.
pr_list = list(map(lambda x: x.strip(), magic_lines[0][len(MAGIC_PREFIX):].split(",")))
valid_models = get_valid_models(torchbench_path)
valid_ubs = get_valid_userbenchmarks(torchbench_path)
pr_list = GLOBAL_PR_LIST
for pr_bm in pr_list:
if pr_bm in valid_models or pr_bm == "ALL":
model_list.append(pr_bm)
elif pr_bm in valid_ubs:
userbenchmark_list.append(pr_bm)
else:
print(f"The model or benchmark {pr_bm} you specified does not exist in TorchBench suite. Please double check.")
exit(-1)
model_list.append(pr_bm)
# Shortcut: if pr_list is ["ALL"], run all the model tests
if "ALL" in model_list:
model_list = ["ALL"]
Expand Down
110 changes: 110 additions & 0 deletions .github/workflows/run_torchbench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: TorchBench CI
on:
pull_request:
types: [labeled, opened, synchronize, reopened]
workflow_dispatch:

env:
PYTHON_VERSION: "3.9"
# must be consistent with https://github.com/pytorch/benchmark/blob/main/requirements.txt#L19
PR_NUM: ${{ github.event.number }}
PR_BODY: ${{ github.event.pull_request.body }}
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}

jobs:
run-torchbench:
runs-on: [macos-torchbench]
# Set to 6 hours
timeout-minutes: 360
if: contains(github.event.pull_request.labels.*.name, 'ciflow/torchbench')
steps:

- name: Clean up disk space before running MacOS workflow
uses: pytorch/test-infra/.github/actions/check-disk-space@main

- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master

- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: 3.9
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}

- name: Create conda environment and install deps
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
PY_VERS: 3.9
shell: arch -arch arm64 bash {0}
run: |
# shellcheck disable=SC1090
set -ex
${CONDA_RUN} pip install boto3
${CONDA_RUN} conda install -y pytest tabulate gitpython git-lfs tqdm psutil
${CONDA_RUN} conda install -yq -c conda-forge spacy sentencepiece transformers
${CONDA_RUN} pip3 install --pre torch torchtext torchvision torchaudio torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu
- name: Setup TorchBench branch
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
PY_VERS: 3.9
shell: arch -arch arm64 bash {0}
run: |
# shellcheck disable=SC1090
set -ex
PR_BODY_FILE=/tmp/pr-body.txt
echo "$PR_BODY" > ${PR_BODY_FILE}
${CONDA_RUN} python3 .github/scripts/run_torchbench.py --pr-body "${PR_BODY_FILE}" set-torchbench-branch
- name: Checkout TorchBench
uses: malfet/checkout@silent-checkout
with:
repository: razarmehr/benchmark
path: benchmark
lfs: false
ref: ${{ env.TORCHBENCH_BRANCH }}

- name: Run TorchBench
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
PY_VERS: 3.9
shell: arch -arch arm64 bash {0}
run: |
# shellcheck disable=SC1090
set -x
# pushd pytorch
PR_MERGE_BASE=$(git rev-parse origin/master)
# popd
PR_BODY_FILE=/tmp/pr-body.txt
echo "$PR_BODY" > ${PR_BODY_FILE}
# shellcheck source=/dev/null
${CONDA_RUN} python3 .github/scripts/run_torchbench.py \
--pr-body "$PR_BODY_FILE" \
run \
--pytorch-path "${PWD}" \
--torchbench-path "${PWD}"/benchmark \
--pr-num "$PR_NUM" \
--pr-base-sha "$PR_MERGE_BASE" \
--pr-head-sha "$PR_HEAD_SHA"
- name: Remove conda environment and cleanup
run: |
rm /tmp/pr-body.txt
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: TorchBench result
path: ~/.torchbench/bisection/pr${{ github.event.number }}

- name: Clean up disk space
if: always()
continue-on-error: true
uses: pytorch/test-infra/.github/actions/check-disk-space@main

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

0 comments on commit 956c9c5

Please sign in to comment.