pytorch · xuzhao9 · Oct 25, 2022 · Oct 26, 2022 · Oct 26, 2022 · Oct 26, 2022
diff --git a/.github/workflows/benchmark-config.yml b/.github/workflows/benchmark-config.yml
@@ -67,7 +67,7 @@ jobs:
               exit 1
           fi
           # Install PyTorch nightly from pip
-          pip install --pre torch torchtext torchvision \
+          pip install --pre torch torchtext torchvision torchaudio \
             -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html
           # make sure pytorch+cuda works
           python -c "import torch; torch.cuda.init()"

diff --git a/.github/workflows/pr-gha-runner.yml b/.github/workflows/pr-gha-runner.yml
@@ -4,8 +4,9 @@ on:
   workflow_dispatch:
 
 env:
-  PYTHON_VERSION: "3.8"
+  PYTHON_VERSION: "3.10"
   CUDA_VERSION: "cu116"
+  CONDA_ENV: "pr-test"
   MAGMA_VERSION: "magma-cuda116"
   SETUP_INSTANCE_SCRIPT: "/workspace/setup_instance.sh"
 
@@ -22,20 +23,37 @@ jobs:
           sudo LD_LIBRARY_PATH=/usr/local/nvidia/lib64:$LD_LIBRARY_PATH nvidia-smi -pm 1
           sudo LD_LIBRARY_PATH=/usr/local/nvidia/lib64:$LD_LIBRARY_PATH nvidia-smi -ac 1215,1410
           nvidia-smi
+      - name: Setup Conda Env
+        run: |
+          . "${SETUP_INSTANCE_SCRIPT}"
+          conda create -n "${CONDA_ENV}" python="${PYTHON_VERSION}"
+          conda activate "${CONDA_ENV}"
+          conda install -y "${MAGMA_VERSION}" -c pytorch
+          conda install -y numpy requests ninja pyyaml setuptools gitpython beautifulsoup4 regex
+          conda install -y expecttest -c conda-forge
+          pip install unittest-xml-reporting
       - name: Install PyTorch nightly
         run: |
           . "${SETUP_INSTANCE_SCRIPT}"
-          bash ./scripts/install_nightlies.sh
+          conda activate "${CONDA_ENV}"
+          pip install --pre torch torchvision torchtext torchaudio -f https://download.pytorch.org/whl/nightly/cu116/torch_nightly.html
       - name: Install TorchBench
         run: |
           . "${SETUP_INSTANCE_SCRIPT}"
+          conda activate "${CONDA_ENV}"
           python install.py
       - name: Validate benchmark components (Worker)
         run: |
           . "${SETUP_INSTANCE_SCRIPT}"
+          conda activate "${CONDA_ENV}"
           python -m components.test.test_subprocess
           python -m components.test.test_worker
       - name: Validate benchmark components (Model)
         run: |
           . "${SETUP_INSTANCE_SCRIPT}"
+          conda activate "${CONDA_ENV}"
           python test.py
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
diff --git a/.github/workflows/pr-gpu-stability-ci.yml b/.github/workflows/pr-gpu-stability-ci.yml
@@ -33,7 +33,7 @@ jobs:
           conda install -y numpy requests=2.22 ninja pyyaml mkl mkl-include setuptools \
                            cmake cffi typing_extensions future six dataclasses tabulate gitpython git-lfs
           # Install pytorch nightly
-          pip install --pre torch torchtext torchvision \
+          pip install --pre torch torchtext torchvision torchaudio \
           -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html
           # Install torchbench dependencies
           python install.py

diff --git a/.github/workflows/userbenchmark-t4-metal.yml b/.github/workflows/userbenchmark-t4-metal.yml
@@ -47,7 +47,7 @@ jobs:
               exit 1
           fi
           # Install PyTorch and torchvision nightly from pip
-          pip install --pre torch torchvision torchtext \
+          pip install --pre torch torchvision torchtext torchaudio \
             -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html
           # make sure pytorch+cuda works
           python -c "import torch; torch.cuda.init()"

diff --git a/.github/workflows/v1-nightly.yml b/.github/workflows/v1-nightly.yml
@@ -46,7 +46,7 @@ jobs:
               exit 1
           fi
           # Install PyTorch nightly from pip
-          pip install --pre torch torchtext torchvision \
+          pip install --pre torch torchtext torchvision torchaudio \
           -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html
       - name: Install other TorchBench dependencies
         run: |

diff --git a/.github/workflows/v2-nightly.yml b/.github/workflows/v2-nightly.yml
@@ -48,7 +48,7 @@ jobs:
           # Install magma
           conda install -y -c pytorch "${MAGMA_VERSION}"
           # Install PyTorch nightly from pip
-          pip install --pre torch torchtext torchvision \
+          pip install --pre torch torchtext torchvision torchaudio \
           -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html
       - name: Install other TorchBench dependencies
         run: |

diff --git a/bisection.py b/bisection.py
@@ -1,7 +1,7 @@
 """bisection.py
 Runs bisection to determine PRs that cause performance change.
-It assumes that the pytorch, torchbench, torchtext and torchvision repositories provided are all clean with the latest code.
-By default, the torchvision and torchtext package version will be fixed to the latest commit on the pytorch commit date.
+It assumes that the pytorch, torchbench, torchtext, torchvision, and torchaudio repositories provided are all clean with the latest code.
+By default, the torchaudio, torchvision and torchtext packages will be fixed to the latest commit on the same pytorch commit date.
 
 Usage:
   python bisection.py --work-dir <WORK-DIR> \
@@ -29,6 +29,7 @@
 TORCHBENCH_DEPS = {
     "torchtext": (os.path.expandvars("${HOME}/text"), "main"),
     "torchvision": (os.path.expandvars("${HOME}/vision"), "main"),
+    "torchaudio": (os.path.expandvars("${HOME}/audio"), "main"),
 }
 
 def exist_dir_path(string):
@@ -151,7 +152,7 @@ def prep(self, build_env: os._Environ) -> bool:
         self.build_env = build_env
         return True
 
-    # Update pytorch, torchtext, and torchvision repo
+    # Update pytorch, torchtext, torchvision, and torchaudio repo
     def update_repos(self):
         repos = [(self.srcpath, "master")]
         repos.extend(TORCHBENCH_DEPS.values())
@@ -215,6 +216,10 @@ def build_install_deps(self, build_env):
         print(f"Building torchtext ...", end="", flush=True)
         command = "python setup.py clean install"
         subprocess.check_call(command, cwd=TORCHBENCH_DEPS["torchtext"][0], env=build_env, shell=True)
+        # Build torchaudio
+        print(f"Building torchaudio ...", end="", flush=True)
+        command = "python setup.py clean install"
+        subprocess.check_call(command, cwd=TORCHBENCH_DEPS["torchaudio"][0], env=build_env, shell=True)
         print("done")
 
     def _build_lazy_tensor(self, commit: Commit, build_env: Dict[str, str]):
@@ -261,7 +266,7 @@ def build(self, commit: Commit):
         self.build_install_deps(build_env)
 
     def cleanup(self):
-        packages = ["torch", "torchtext", "torchvision"]
+        packages = ["torch", "torchtext", "torchvision", "torchaudio"]
         CLEANUP_ROUND = 5
         # Clean up multiple times to make sure the packages are all uninstalled
         for _ in range(CLEANUP_ROUND):

diff --git a/components/_impl/workers/subprocess_rpc.py b/components/_impl/workers/subprocess_rpc.py
@@ -358,16 +358,28 @@ def from_exception(e: Exception, tb: types.TracebackType) -> "SerializedExceptio
         """
         try:
             print_file = io.StringIO()
-            traceback.print_exception(
-                etype=type(e),
-                value=e,
-                tb=tb,
-                file=print_file,
-            )
+            python_vinfo = sys.version_info
+            if python_vinfo.major == 3 and python_vinfo.minor < 10:
+                # Starting from Python 3.10, trackback renames the `etype` parameter to `exc`
+                # and make it positional-only.
+                # doc: https://docs.python.org/3/library/traceback.html#traceback.print_exception
+                traceback.print_exception(
+                    etype=type(e),
+                    value=e,
+                    tb=tb,
+                    file=print_file,
+                )
+            else:
+                traceback.print_exception(
+                    type(e),
+                    value=e,
+                    tb=tb,
+                    file=print_file,
+                )
             print_file.seek(0)
             traceback_print: str = print_file.read()
 
-        except Exception:
+        except Exception as e:
             traceback_print = textwrap.dedent("""
                 Traceback
                     Failed to extract traceback from worker. This is not expected.

diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile
@@ -45,11 +45,11 @@ RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.s
     chmod +x Miniconda3-latest-Linux-x86_64.sh && \
     bash ./Miniconda3-latest-Linux-x86_64.sh -b -u
 
-# Use Python 3.8 as default
+# Use Python 3.10 as default
 RUN . ${HOME}/miniconda3/etc/profile.d/conda.sh && \
     conda activate base && \
     conda init && \
-    conda install -y python=3.8 && \
+    conda install -y python=3.10 && \
     pip install unittest-xml-reporting pyyaml
 
 RUN echo "\

diff --git a/scripts/install_nightlies.sh b/scripts/install_nightlies.sh
@@ -1,14 +1,20 @@
 #!/bin/bash
-set -e
+set -ex
 
 . ~/miniconda3/etc/profile.d/conda.sh
-conda activate base
+
+if [[ -z "${CONDA_ENV}" ]]; then
+    conda activate base
+else
+    conda activate "${CONDA_ENV}"
+fi
 
 conda install -y numpy requests ninja pyyaml setuptools gitpython beautifulsoup4 regex
 conda install -y -c pytorch magma-cuda116
 
 # install the most recent successfully built pytorch packages
-python torchbenchmark/util/torch_nightly.py --install-nightlies --packages torch torchvision torchtext
+# torchaudio is required by fairseq/fambench_xlmr
+pip install --pre torch torchvision torchtext torchaudio -f https://download.pytorch.org/whl/nightly/cu116/torch_nightly.html
 
 conda install -y expecttest -c conda-forge
 

diff --git a/torchbenchmark/models/attention_is_all_you_need_pytorch/install.py b/torchbenchmark/models/attention_is_all_you_need_pytorch/install.py
@@ -14,12 +14,12 @@ def preprocess():
     multi30k_data_dir = os.path.join(current_dir.parent.parent, "data", ".data", "multi30k")
     root = os.path.join(str(Path(__file__).parent), ".data")
     os.makedirs(root, exist_ok=True)
-    subprocess.check_call([sys.executable, 'preprocess.py', '-lang_src', 'de', '-lang_trg', 'en', '-share_vocab',
+    subprocess.check_call([sys.executable, 'preprocess.py', '-lang_src', 'de_core_news_sm', '-lang_trg', 'en_core_web_sm', '-share_vocab',
                            '-save_data', os.path.join(root, 'm30k_deen_shr.pkl'), '-data_path', multi30k_data_dir])
 
 if __name__ == '__main__':
     pip_install_requirements()
-    spacy_download('en')
-    spacy_download('de')
+    spacy_download('en_core_web_sm')
+    spacy_download('de_core_news_sm')
     # Preprocessed pkl is larger than 100MB so we cannot skip preprocess
     preprocess()
diff --git a/torchbenchmark/models/attention_is_all_you_need_pytorch/preprocess.py b/torchbenchmark/models/attention_is_all_you_need_pytorch/preprocess.py
@@ -266,7 +266,7 @@ def main_wo_bpe():
     Usage: python preprocess.py -lang_src de -lang_trg en -save_data multi30k_de_en.pkl -share_vocab
     '''
 
-    spacy_support_langs = ['de', 'el', 'en', 'es', 'fr', 'it', 'lt', 'nb', 'nl', 'pt']
+    spacy_support_langs = ['de_core_news_sm', 'el_core_news_sm', 'en_core_web_sm', 'es_core_news_sm', 'fr_core_news_sm', 'it_core_news_sm', 'lt_core_news_sm', 'nb_core_news_sm', 'nl_core_news_sm', 'pt_core_news_sm']
 
     parser = argparse.ArgumentParser()
     parser.add_argument('-lang_src', required=True, choices=spacy_support_langs)
@@ -309,16 +309,19 @@ def tokenize_trg(text):
     MIN_FREQ = opt.min_word_count
 
     if not all([opt.data_src, opt.data_trg]):
-        assert {opt.lang_src, opt.lang_trg} == {'de', 'en'}
+        assert {opt.lang_src, opt.lang_trg} == {'de_core_news_sm', 'en_core_web_sm'}
     else:
         # Pack custom txt file into example datasets
         raise NotImplementedError
 
     def filter_examples_with_length(x):
         return len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN
 
+    def get_short_lang(full_lang):
+        return full_lang.split('_')[0]
+
     train, val, test = Multi30k.splits(
-            exts = ('.' + opt.lang_src, '.' + opt.lang_trg),
+            exts = ('.' + get_short_lang(opt.lang_src), '.' + get_short_lang(opt.lang_trg)),
             fields = (SRC, TRG),
             filter_pred=filter_examples_with_length,
             path=opt.data_path)

diff --git a/torchbenchmark/models/attention_is_all_you_need_pytorch/requirements.txt b/torchbenchmark/models/attention_is_all_you_need_pytorch/requirements.txt
@@ -1,5 +1,5 @@
-dill==0.3.4
+dill==0.3.5.1
 tqdm
 iopath
 numpy
-spacy==2.3.5
+spacy
diff --git a/torchbenchmark/models/fambench_xlmr/install.py b/torchbenchmark/models/fambench_xlmr/install.py
@@ -11,7 +11,15 @@ def update_fambench_submodule():
     subprocess.check_call(update_command, cwd=REPO_PATH)
 
 def pip_install_requirements():
-    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'])
+    try:
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'])
+        # pin fairseq version to 0.12.2
+        # ignore deps specified in requirements.txt
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-deps', 'fairseq==0.12.2'])
+    except subprocess.CalledProcessError:
+        # We ignore the ResolutionImpossible error because fairseq requires omegaconf < 2.1
+        # but detectron2 requires omegaconf >= 2.1
+        pass
 
 if __name__ == "__main__":
     update_fambench_submodule()

diff --git a/torchbenchmark/models/fambench_xlmr/metadata.yaml b/torchbenchmark/models/fambench_xlmr/metadata.yaml
@@ -1,6 +1,8 @@
 devices:
   NVIDIA A100-SXM4-40GB:
     eval_batch_size: 64
+  cpu:
+    eval_batch_size: 4
 eval_benchmark: false
 eval_deterministic: false
 eval_nograd: true

diff --git a/torchbenchmark/models/fambench_xlmr/requirements.txt b/torchbenchmark/models/fambench_xlmr/requirements.txt
@@ -1,8 +1,6 @@
 sacrebleu
 bitarray
-# pin fairseq version
-fairseq==0.10.2
-omegaconf==2.1.1
-hydra-core==1.1.2
+cffi
+omegaconf
+hydra-core
 sentencepiece
-xformers
diff --git a/torchbenchmark/models/yolov3/requirements.txt b/torchbenchmark/models/yolov3/requirements.txt
@@ -1,12 +1,12 @@
 # pip install -U -r requirements.txt
 numpy
 # opencv-python 4.5 requires numpy 1.8
-opencv-python >= 4.1, < 4.5
+opencv-python
 matplotlib
 pycocotools
 tqdm
 pillow
-tensorboard >= 1.14
+tensorboard
 
 # Nvidia Apex (optional) for mixed precision training --------------------------
 # git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex

diff --git a/torchbenchmark/util/model.py b/torchbenchmark/util/model.py
@@ -169,9 +169,12 @@ def determine_batch_size(self, batch_size=None):
         if not batch_size:
             self.batch_size = self.DEFAULT_TRAIN_BSIZE if self.test == "train" else self.DEFAULT_EVAL_BSIZE
             # use the device suggestion on CUDA inference tests
-            if self.test == "eval" and self.device == "cuda":
-                current_device_name = torch.cuda.get_device_name()
-                assert current_device_name, f"torch.cuda.get_device_name() returns None when device is set to cuda, please double check."
+            if self.test == "eval":
+                if self.device == "cuda":
+                    current_device_name = torch.cuda.get_device_name()
+                    assert current_device_name, f"torch.cuda.get_device_name() returns None when device is set to cuda, please double check."
+                elif self.device == "cpu":
+                    current_device_name = "cpu"
                 if self.metadata and "devices" in self.metadata and current_device_name in self.metadata["devices"]:
                     self.batch_size = self.metadata["devices"][current_device_name]["eval_batch_size"]
             # If the model doesn't implement test or eval test

diff --git a/utils/__init__.py b/utils/__init__.py
@@ -2,7 +2,8 @@
 from urllib import request
 from typing import List, Dict
 
-TORCH_DEPS = ['torch', 'torchvision', 'torchtext']
+TORCH_DEPS = ['torch', 'torchvision', 'torchtext', 'torchaudio']
+
 proxy_suggestion = "Unable to verify https connectivity, " \
                    "required for setup.\n" \
                    "Do you need to use a proxy?"

diff --git a/utils/cuda_utils.py b/utils/cuda_utils.py
@@ -56,15 +56,15 @@ def prepare_cuda_env(cuda_version: str, dryrun=False):
     return env
 
 def install_pytorch_nightly(cuda_version: str, env, dryrun=False):
-    uninstall_torch_cmd = ["pip", "uninstall", "-y", "torch", "torchvision", "torchtext"]
+    uninstall_torch_cmd = ["pip", "uninstall", "-y", "torch", "torchvision", "torchtext", "torchaudio"]
     if dryrun:
         print(f"Uninstall pytorch: {uninstall_torch_cmd}")
     else:
         # uninstall multiple times to make sure the env is clean
         for _loop in range(3):
             subprocess.check_call(uninstall_torch_cmd)
     pytorch_nightly_url = f"https://download.pytorch.org/whl/nightly/{CUDA_VERSION_MAP[cuda_version]['pytorch_url']}/torch_nightly.html"
-    install_torch_cmd = ["pip", "install", "--pre", "torch", "torchvision", "torchtext", "-f",  pytorch_nightly_url]
+    install_torch_cmd = ["pip", "install", "--pre", "torch", "torchvision", "torchtext", "torchaudio", "-f",  pytorch_nightly_url]
     if dryrun:
         print(f"Install pytorch nightly: {install_torch_cmd}")
     else: