Skip to content

Commit

Permalink
Run torchbench on CI (#373)
Browse files Browse the repository at this point in the history
* Run torchbench on CI

* Run torchbench on CI #2

* Run torchbench on CI #3

* Run torchbench on CI #4

* Run torchbench on CI #5

* Run torchbench on CI #6

* Run torchbench on CI #7

* Change #1

* Change #2

* Run torchbench on CI #8

* Run torchbench on CI #9

* Install dep

* Change #11

* Change #12

* Change #13

* Change #14

* Change #15

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* More fixes

* Update dependencies

* other fixes

* more fixes

* more fixes

* more fixes

* more fixes #2

* more fixes #2

* more fixes #2

* more fixes

* other fixes

* other fixes

* other fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* additional changes

* other fixes

* other fixes

* additional changes

* other fixes

* other fixes

* other fixes

* other fixes

* other fixes

* Address PR comments

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes

* more fixes
  • Loading branch information
DenisVieriu97 authored and skotapati committed Apr 7, 2023
1 parent b691c3b commit 2ada3b4
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 30 deletions.
80 changes: 50 additions & 30 deletions .github/scripts/run_torchbench.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
"""
Generate a torchbench test report from a file containing the PR body.
Generate a torchbench test report from a file based on GLOBAL_PR_LIST.
Currently, only supports running tests on specified model names
Testing environment:
- Intel Xeon 8259CL @ 2.50 GHz, 24 Cores with disabled Turbo and HT
- Nvidia Tesla T4
- Nvidia Driver 470.82.01
- Python 3.8
- CUDA 11.3
"""
import argparse

# Known issues:
# 1. Does not reuse the build artifact in other CI workflows
Expand All @@ -21,8 +15,45 @@

from typing import List, Tuple

import boto3 # type: ignore[import]
import git # type: ignore[import]
GLOBAL_PR_LIST = [
"test_train[alexnet-mps-eager]",
"test_train[dcgan-mps-eager]",
"test_train[hf_Bert-mps-eager]",
"test_train[mnasnet1_0-mps-eager]",
"test_train[mobilenet_v2-mps-eager]",
"test_train[pytorch_unet-mps-eager]",
"test_train[resnet18-mps-eager]",
"test_train[resnet50-mps-eager]",
"test_train[resnext50_32x4d-mps-eager]",
"test_train[shufflenet_v2_x1_0-mps-eager]",
"test_train[timm_efficientnet-mps-eager]",
"test_train[timm_nfnet-mps-eager]",
"test_train[timm_regnet-mps-eager]",
"test_train[timm_resnest-mps-eager]",
"test_train[timm_vision_transformer-mps-eager]",
"test_train[timm_vovnet-mps-eager]",
"test_train[soft_actor_critic-mps-eager]",
"test_train[hf_DistilBert-mps-eager]",
"test_train[hf_Bart-mps-eager]",
"test_train[hf_Albert-mps-eager]",
"test_train[hf_GPT2-mps-eager]",
"test_train[lennard_jones-mps-eager]",
"test_train[pytorch_stargan-mps-eager]",
"test_train[pytorch_struct-mps-eager]",
"test_train[timm_vision_transformer_large-mps-eager]",
"test_train[functorch_dp_cifar10-mps-eager]",
"test_train[squeezenet1_1-mps-eager]",
"test_train[hf_T5_base-mps-eager]",
"test_train[hf_T5_large-mps-eager]",
"test_train[densenet121-mps-eager]",
"test_train[phlippe_resnet-mps-eager]",
"test_train[phlippe_densenet-mps-eager]",
"test_train[tts_angular-mps-eager]",
"test_train[DALLE2_pytorch-mps-eager]",
"test_train[functorch_maml_omniglot-mps-eager]",
"test_train[demucs-mps-eager]",
"test_train[vgg16-mps-eager]"
]

TORCHBENCH_CONFIG_NAME = "config.yaml"
TORCHBENCH_USERBENCHMARK_CONFIG_NAME = "ub-config.yaml"
Expand Down Expand Up @@ -89,7 +120,14 @@ def deploy_torchbench_config(
output_dir: str, config: str, config_name: str = TORCHBENCH_CONFIG_NAME
) -> None:
# Create test dir if needed
pathlib.Path(output_dir).mkdir(exist_ok=True)
# pathlib.Path(output_dir).mkdir(exist_ok=True)
try:
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=False)
except FileExistsError:
print("Folder is already there")
else:
print("Folder was created")

# TorchBench config file name
config_path = os.path.join(output_dir, config_name)
with open(config_path, "w") as fp:
Expand Down Expand Up @@ -128,27 +166,9 @@ def extract_models_from_pr(
) -> Tuple[List[str], List[str]]:
model_list = []
userbenchmark_list = []
pr_list = []
with open(prbody_file, "r") as pf:
lines = (x.strip() for x in pf.read().splitlines())
magic_lines = list(filter(lambda x: x.startswith(MAGIC_PREFIX), lines))
if magic_lines:
# Only the first magic line will be recognized.
pr_list = [
x.strip() for x in magic_lines[0][len(MAGIC_PREFIX) :].split(",")
]
valid_models = get_valid_models(torchbench_path)
valid_ubs = get_valid_userbenchmarks(torchbench_path)
pr_list = GLOBAL_PR_LIST
for pr_bm in pr_list:
if pr_bm in valid_models or pr_bm == "ALL":
model_list.append(pr_bm)
elif pr_bm in valid_ubs:
userbenchmark_list.append(pr_bm)
else:
print(
f"The model or benchmark {pr_bm} you specified does not exist in TorchBench suite. Please double check."
)
exit(-1)
model_list.append(pr_bm)
# Shortcut: if pr_list is ["ALL"], run all the model tests
if "ALL" in model_list:
model_list = ["ALL"]
Expand Down
110 changes: 110 additions & 0 deletions .github/workflows/run_torchbench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: TorchBench CI
on:
pull_request:
types: [labeled, opened, synchronize, reopened]
workflow_dispatch:

env:
PYTHON_VERSION: "3.9"
# must be consistent with https://github.com/pytorch/benchmark/blob/main/requirements.txt#L19
PR_NUM: ${{ github.event.number }}
PR_BODY: ${{ github.event.pull_request.body }}
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}

jobs:
run-torchbench:
runs-on: [macos-torchbench]
# Set to 6 hours
timeout-minutes: 360
if: contains(github.event.pull_request.labels.*.name, 'ciflow/torchbench')
steps:

- name: Clean up disk space before running MacOS workflow
uses: pytorch/test-infra/.github/actions/check-disk-space@main

- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master

- name: Setup miniconda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: 3.9
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}

- name: Create conda environment and install deps
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
PY_VERS: 3.9
shell: arch -arch arm64 bash {0}
run: |
# shellcheck disable=SC1090
set -ex
${CONDA_RUN} pip install boto3
${CONDA_RUN} conda install -y pytest tabulate gitpython git-lfs tqdm psutil
${CONDA_RUN} conda install -yq -c conda-forge spacy sentencepiece transformers
${CONDA_RUN} pip3 install --pre torch torchtext torchvision torchaudio torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu
- name: Setup TorchBench branch
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
PY_VERS: 3.9
shell: arch -arch arm64 bash {0}
run: |
# shellcheck disable=SC1090
set -ex
PR_BODY_FILE=/tmp/pr-body.txt
echo "$PR_BODY" > ${PR_BODY_FILE}
${CONDA_RUN} python3 .github/scripts/run_torchbench.py --pr-body "${PR_BODY_FILE}" set-torchbench-branch
- name: Checkout TorchBench
uses: malfet/checkout@silent-checkout
with:
repository: razarmehr/benchmark
path: benchmark
lfs: false
ref: ${{ env.TORCHBENCH_BRANCH }}

- name: Run TorchBench
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
PY_VERS: 3.9
shell: arch -arch arm64 bash {0}
run: |
# shellcheck disable=SC1090
set -x
# pushd pytorch
PR_MERGE_BASE=$(git rev-parse origin/master)
# popd
PR_BODY_FILE=/tmp/pr-body.txt
echo "$PR_BODY" > ${PR_BODY_FILE}
# shellcheck source=/dev/null
${CONDA_RUN} python3 .github/scripts/run_torchbench.py \
--pr-body "$PR_BODY_FILE" \
run \
--pytorch-path "${PWD}" \
--torchbench-path "${PWD}"/benchmark \
--pr-num "$PR_NUM" \
--pr-base-sha "$PR_MERGE_BASE" \
--pr-head-sha "$PR_HEAD_SHA"
- name: Remove conda environment and cleanup
run: |
rm /tmp/pr-body.txt
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: TorchBench result
path: ~/.torchbench/bisection/pr${{ github.event.number }}

- name: Clean up disk space
if: always()
continue-on-error: true
uses: pytorch/test-infra/.github/actions/check-disk-space@main

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

0 comments on commit 2ada3b4

Please sign in to comment.