diff --git a/.github/scripts/run_torchbench.py b/.github/scripts/run_torchbench.py index 352da69c8158e..e110cb148d730 100644 --- a/.github/scripts/run_torchbench.py +++ b/.github/scripts/run_torchbench.py @@ -1,14 +1,10 @@ """ -Generate a torchbench test report from a file containing the PR body. +Generate a torchbench test report from a file based on GLOBAL_PR_LIST. Currently, only supports running tests on specified model names Testing environment: -- Intel Xeon 8259CL @ 2.50 GHz, 24 Cores with disabled Turbo and HT -- Nvidia Tesla T4 -- Nvidia Driver 470.82.01 -- Python 3.8 -- CUDA 11.3 """ + # Known issues: # 1. Does not reuse the build artifact in other CI workflows # 2. CI jobs are serialized because there is only one worker @@ -22,6 +18,46 @@ from typing import List, Tuple +GLOBAL_PR_LIST = [ + "test_train[alexnet-mps-eager]", + "test_train[dcgan-mps-eager]", + "test_train[hf_Bert-mps-eager]", + "test_train[mnasnet1_0-mps-eager]", + "test_train[mobilenet_v2-mps-eager]", + "test_train[pytorch_unet-mps-eager]", + "test_train[resnet18-mps-eager]", + "test_train[resnet50-mps-eager]", + "test_train[resnext50_32x4d-mps-eager]", + "test_train[shufflenet_v2_x1_0-mps-eager]", + "test_train[timm_efficientnet-mps-eager]", + "test_train[timm_nfnet-mps-eager]", + "test_train[timm_regnet-mps-eager]", + "test_train[timm_resnest-mps-eager]", + "test_train[timm_vision_transformer-mps-eager]", + "test_train[timm_vovnet-mps-eager]", + "test_train[soft_actor_critic-mps-eager]", + "test_train[hf_DistilBert-mps-eager]", + "test_train[hf_Bart-mps-eager]", + "test_train[hf_Albert-mps-eager]", + "test_train[hf_GPT2-mps-eager]", + "test_train[lennard_jones-mps-eager]", + "test_train[pytorch_stargan-mps-eager]", + "test_train[pytorch_struct-mps-eager]", + "test_train[timm_vision_transformer_large-mps-eager]", + "test_train[functorch_dp_cifar10-mps-eager]", + "test_train[squeezenet1_1-mps-eager]", + "test_train[hf_T5_base-mps-eager]", + "test_train[hf_T5_large-mps-eager]", + "test_train[densenet121-mps-eager]", + "test_train[phlippe_resnet-mps-eager]", + "test_train[phlippe_densenet-mps-eager]", + "test_train[tts_angular-mps-eager]", + "test_train[DALLE2_pytorch-mps-eager]", + "test_train[functorch_maml_omniglot-mps-eager]", + "test_train[demucs-mps-eager]", + "test_train[vgg16-mps-eager]" +] + TORCHBENCH_CONFIG_NAME = "config.yaml" TORCHBENCH_USERBENCHMARK_CONFIG_NAME = "ub-config.yaml" MAGIC_PREFIX = "RUN_TORCHBENCH:" @@ -78,7 +114,14 @@ def find_current_branch(repo_path: str) -> str: def deploy_torchbench_config(output_dir: str, config: str, config_name: str = TORCHBENCH_CONFIG_NAME) -> None: # Create test dir if needed - pathlib.Path(output_dir).mkdir(exist_ok=True) + # pathlib.Path(output_dir).mkdir(exist_ok=True) + try: + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=False) + except FileExistsError: + print("Folder is already there") + else: + print("Folder was created") + # TorchBench config file name config_path = os.path.join(output_dir, config_name) with open(config_path, "w") as fp: @@ -100,23 +143,9 @@ def is_valid_ub_dir(ub_path: str) -> bool: def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> Tuple[List[str], List[str]]: model_list = [] userbenchmark_list = [] - pr_list = [] - with open(prbody_file, "r") as pf: - lines = map(lambda x: x.strip(), pf.read().splitlines()) - magic_lines = list(filter(lambda x: x.startswith(MAGIC_PREFIX), lines)) - if magic_lines: - # Only the first magic line will be recognized. - pr_list = list(map(lambda x: x.strip(), magic_lines[0][len(MAGIC_PREFIX):].split(","))) - valid_models = get_valid_models(torchbench_path) - valid_ubs = get_valid_userbenchmarks(torchbench_path) + pr_list = GLOBAL_PR_LIST for pr_bm in pr_list: - if pr_bm in valid_models or pr_bm == "ALL": - model_list.append(pr_bm) - elif pr_bm in valid_ubs: - userbenchmark_list.append(pr_bm) - else: - print(f"The model or benchmark {pr_bm} you specified does not exist in TorchBench suite. Please double check.") - exit(-1) + model_list.append(pr_bm) # Shortcut: if pr_list is ["ALL"], run all the model tests if "ALL" in model_list: model_list = ["ALL"] diff --git a/.github/workflows/run_torchbench.yml b/.github/workflows/run_torchbench.yml new file mode 100644 index 0000000000000..63b0f965e4fc5 --- /dev/null +++ b/.github/workflows/run_torchbench.yml @@ -0,0 +1,110 @@ +name: TorchBench CI +on: + pull_request: + types: [labeled, opened, synchronize, reopened] + workflow_dispatch: + +env: + PYTHON_VERSION: "3.9" + # must be consistent with https://github.com/pytorch/benchmark/blob/main/requirements.txt#L19 + PR_NUM: ${{ github.event.number }} + PR_BODY: ${{ github.event.pull_request.body }} + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }} + +jobs: + run-torchbench: + runs-on: [macos-torchbench] + # Set to 6 hours + timeout-minutes: 360 + if: contains(github.event.pull_request.labels.*.name, 'ciflow/torchbench') + steps: + + - name: Clean up disk space before running MacOS workflow + uses: pytorch/test-infra/.github/actions/check-disk-space@main + + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@master + + - name: Setup miniconda + uses: pytorch/test-infra/.github/actions/setup-miniconda@main + with: + python-version: 3.9 + environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }} + + - name: Create conda environment and install deps + env: + ENV_NAME: conda-test-env-${{ github.run_id }} + PY_VERS: 3.9 + shell: arch -arch arm64 bash {0} + run: | + # shellcheck disable=SC1090 + set -ex + ${CONDA_RUN} pip install boto3 + ${CONDA_RUN} conda install -y pytest tabulate gitpython git-lfs tqdm psutil + ${CONDA_RUN} conda install -yq -c conda-forge spacy sentencepiece transformers + ${CONDA_RUN} pip3 install --pre torch torchtext torchvision torchaudio torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu + + - name: Setup TorchBench branch + env: + ENV_NAME: conda-test-env-${{ github.run_id }} + PY_VERS: 3.9 + shell: arch -arch arm64 bash {0} + run: | + # shellcheck disable=SC1090 + set -ex + PR_BODY_FILE=/tmp/pr-body.txt + echo "$PR_BODY" > ${PR_BODY_FILE} + ${CONDA_RUN} python3 .github/scripts/run_torchbench.py --pr-body "${PR_BODY_FILE}" set-torchbench-branch + + - name: Checkout TorchBench + uses: malfet/checkout@silent-checkout + with: + repository: razarmehr/benchmark + path: benchmark + lfs: false + ref: ${{ env.TORCHBENCH_BRANCH }} + + - name: Run TorchBench + env: + ENV_NAME: conda-test-env-${{ github.run_id }} + PY_VERS: 3.9 + shell: arch -arch arm64 bash {0} + run: | + # shellcheck disable=SC1090 + set -x + # pushd pytorch + PR_MERGE_BASE=$(git rev-parse origin/master) + # popd + PR_BODY_FILE=/tmp/pr-body.txt + echo "$PR_BODY" > ${PR_BODY_FILE} + # shellcheck source=/dev/null + ${CONDA_RUN} python3 .github/scripts/run_torchbench.py \ + --pr-body "$PR_BODY_FILE" \ + run \ + --pytorch-path "${PWD}" \ + --torchbench-path "${PWD}"/benchmark \ + --pr-num "$PR_NUM" \ + --pr-base-sha "$PR_MERGE_BASE" \ + --pr-head-sha "$PR_HEAD_SHA" + + - name: Remove conda environment and cleanup + run: | + rm /tmp/pr-body.txt + + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: TorchBench result + path: ~/.torchbench/bisection/pr${{ github.event.number }} + + - name: Clean up disk space + if: always() + continue-on-error: true + uses: pytorch/test-infra/.github/actions/check-disk-space@main + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true