pytorch · xuzhao9 · Mar 9, 2022 · Mar 9, 2022 · Mar 10, 2022 · Mar 10, 2022
diff --git a/test.py b/test.py
@@ -14,6 +14,7 @@
 
 import torch
 from torchbenchmark import _list_model_paths, ModelTask, get_metadata_from_yaml
+from torchbenchmark.util.metadata_utils import skip_by_metadata
 
 
 # Some of the models have very heavyweight setup, so we have to set a very
@@ -24,14 +25,6 @@
 # unresponsive for 5 minutes the parent will presume it dead / incapacitated.)
 TIMEOUT = 300  # Seconds
 
-# Skip this list of unit tests. One reason may be that the original batch size
-# used in the paper is too large to fit on the CI's GPU.
-EXCLUDELIST = {("densenet121", "train", "cuda"),  # GPU train runs out of memory on CI.
-               ("densenet121", "train", "cpu"),  # CPU train runs for too long on CI.
-               ("densenet121", "example", "cuda"),  # GPU train runs out of memory on CI.
-               ("densenet121", "example", "cpu")}  # CPU train runs for too long on CI.
-
-
 class TestBenchmark(unittest.TestCase):
 
     def setUp(self):
@@ -104,10 +97,13 @@ def check_device_fn(self):
                 self.skipTest(f'Method check_device on {device} is not implemented, skipping...')
 
     name = os.path.basename(path)
+    metadata = get_metadata_from_yaml(path)
     for fn, fn_name in zip([example_fn, train_fn, eval_fn, check_device_fn],
                            ["example", "train", "eval", "check_device"]):
+        # set exclude list based on metadata
         setattr(TestBenchmark, f'test_{name}_{fn_name}_{device}',
-                (unittest.skipIf((name, fn_name, device) in EXCLUDELIST, "This test is on the EXCLUDELIST")(fn)))
+                (unittest.skipIf(skip_by_metadata(test=fn_name, device=device,\
+                                                  jit=False, extra_args=[], metadata=metadata), "This test is skipped by its metadata")(fn)))
 
 
 def _load_tests():

diff --git a/test_bench.py b/test_bench.py
@@ -15,9 +15,9 @@
 import pytest
 import time
 from components._impl.workers import subprocess_worker
-from torchbenchmark import _list_model_paths, ModelTask
+from torchbenchmark import _list_model_paths, ModelTask, get_metadata_from_yaml
 from torchbenchmark.util.machine_config import get_machine_state
-
+from torchbenchmark.util.metadata_utils import skip_by_metadata
 
 def pytest_generate_tests(metafunc):
     # This is where the list of models to test can be configured
@@ -48,6 +48,9 @@ class TestBenchNetwork:
 
     def test_train(self, model_path, device, compiler, benchmark):
         try:
+            if skip_by_metadata(test="train", device=device, jit=(compiler == 'jit'), \
+                                extra_args=[], metadata=get_metadata_from_yaml(model_path)):
+                raise NotImplementedError("Test skipped by its metadata.")
             task = ModelTask(model_path)
             if not task.model_details.exists:
                 return  # Model is not supported.
@@ -62,6 +65,9 @@ def test_train(self, model_path, device, compiler, benchmark):
 
     def test_eval(self, model_path, device, compiler, benchmark, pytestconfig):
         try:
+            if skip_by_metadata(test="eval", device=device, jit=(compiler == 'jit'), \
+                                extra_args=[], metadata=get_metadata_from_yaml(model_path)):
+                raise NotImplementedError("Test skipped by its metadata.")
             task = ModelTask(model_path)
             if not task.model_details.exists:
                 return  # Model is not supported.

diff --git a/torchbenchmark/models/Background_Matting/__init__.py b/torchbenchmark/models/Background_Matting/__init__.py
@@ -129,9 +129,6 @@ def _set_mode(self, train):
         pass
 
     def train(self, niter=1):
-        if self.device == 'cpu':
-            raise NotImplementedError("Disabled due to excessively slow runtime - see GH Issue #100")
-
         self.netG.train()
         self.netD.train()
         lG, lD, GenL, DisL_r, DisL_f, alL, fgL, compL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

diff --git a/torchbenchmark/models/Background_Matting/metadata.yaml b/torchbenchmark/models/Background_Matting/metadata.yaml
@@ -1,2 +1,6 @@
 train_benchmark: true
 train_deterministic: false
+not_implemented:
+  # Disabled due to excessively slow runtime - see GH Issue #100
+  - test: train
+    device: cpu
diff --git a/torchbenchmark/models/LearningToPaint/__init__.py b/torchbenchmark/models/LearningToPaint/__init__.py
@@ -74,8 +74,6 @@ def set_module(self, new_model):
         self.agent.actor = new_model
 
     def train(self, niter=1):
-        if self.jit:
-            raise NotImplementedError()
         episode = episode_steps = 0
         for _ in range(niter):
             episode_steps += 1
@@ -104,8 +102,6 @@ def train(self, niter=1):
             self.step += 1
 
     def eval(self, niter=1) -> Tuple[torch.Tensor]:
-        if self.jit:
-            raise NotImplementedError()
         for _ in range(niter):
             reward, dist = self.evaluate(self.env, self.agent.select_action)
         return (torch.tensor(reward), torch.tensor(dist))

diff --git a/torchbenchmark/models/LearningToPaint/metadata.yaml b/torchbenchmark/models/LearningToPaint/metadata.yaml
@@ -4,3 +4,6 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: true
 train_deterministic: false
+not_implemented:
+  # Model LearningToPaint doesn't support JIT
+  - jit: true
diff --git a/torchbenchmark/models/Super_SloMo/__init__.py b/torchbenchmark/models/Super_SloMo/__init__.py
@@ -34,8 +34,6 @@ class Model(BenchmarkModel):
     DEFAULT_EVAL_BSIZE = 10
 
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
-        if jit and test == "eval" and device == "cuda":
-            raise NotImplementedError("Disabled eval jit test due to insufficient GPU memory size")
         super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
 
         self.model = ModelWrapper(device)
@@ -70,17 +68,11 @@ def get_module(self):
         return self.model, self.example_inputs
 
     def eval(self, niter=1) -> Tuple[torch.Tensor]:
-        if self.device == 'cpu':
-            raise NotImplementedError("Disabled due to excessively slow runtime - see GH Issue #100")
-
         for _ in range(niter):
             out = self.model(*self.example_inputs)
         return out
 
     def train(self, niter=1):
-        if self.device == 'cpu':
-            raise NotImplementedError("Disabled due to excessively slow runtime - see GH Issue #100")
-
         for _ in range(niter):
             self.optimizer.zero_grad()
 

diff --git a/torchbenchmark/models/Super_SloMo/metadata.yaml b/torchbenchmark/models/Super_SloMo/metadata.yaml
@@ -4,3 +4,10 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: false
 train_deterministic: true
+not_implemented:
+  # Disabled due to excessively slow runtime - see GH Issue #100
+  - device: cpu
+  # Disabled eval jit test due to insufficient GPU memory size on CI
+  - device: cuda
+    jit: true
+    test: eval
diff --git a/torchbenchmark/models/demucs/__init__.py b/torchbenchmark/models/demucs/__init__.py
@@ -67,9 +67,6 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]) -> N
         if test == "train":
             self.model.train()
             self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.lr)
-            # TODO: enable GPU training after it is supported by infra
-            #       see GH issue https://github.com/pytorch/benchmark/issues/652
-            raise NotImplementedError("Disabled train test because of insuffcient GPU memory on T4.")
         elif test == "eval":
             self.model.eval()
 
@@ -86,10 +83,6 @@ def eval(self, niter=1) -> Tuple[torch.Tensor]:
         return (sources, estimates)
 
     def train(self, niter=1):
-        if self.device == "cpu":
-            raise NotImplementedError("Disable CPU training because it is too slow (> 1min)")
-        if self.device == "cuda":
-            raise NotImplementedError("Disable GPU training because it causes CUDA OOM on T4")
         for _ in range(niter):
             sources, estimates = self.model(*self.example_inputs)
             sources = center_trim(sources, estimates)

diff --git a/torchbenchmark/models/demucs/metadata.yaml b/torchbenchmark/models/demucs/metadata.yaml
@@ -4,3 +4,10 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: true
 train_deterministic: false
+not_implemented:
+  # Disable CPU training because it is too slow (> 1min)
+  - test: train
+    device: cpu
+  # Disable GPU training because it causes CUDA OOM on T4
+  - test: train
+    device: cuda
diff --git a/torchbenchmark/models/densenet121/__init__.py b/torchbenchmark/models/densenet121/__init__.py
@@ -9,11 +9,5 @@ class Model(TorchVisionModel):
     DEFAULT_EVAL_BSIZE = 64
 
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
-        # Temporarily disable tests because it causes CUDA OOM on CI platform
-        # TODO: Re-enable these tests when better hardware is available
-        if device == 'cuda':
-            raise NotImplementedError('CUDA disabled due to CUDA out of memory on CI GPU')
-        if device == 'cpu':
-            raise NotImplementedError('CPU disabled due to out of memory on CI CPU')
         super().__init__(model_name="densenet121", test=test, device=device, jit=jit,
                          batch_size=batch_size, extra_args=extra_args)
diff --git a/torchbenchmark/models/densenet121/metadata.yaml b/torchbenchmark/models/densenet121/metadata.yaml
@@ -4,3 +4,8 @@ eval_nograd: true
 optimized_for_inference: true
 train_benchmark: false
 train_deterministic: false
+not_implemented:
+  # CUDA disabled due to CUDA out of memory on CI GPU
+  - device: cuda
+  # CPU disabled due to out of memory on CI CPU
+  - device: cpu
diff --git a/torchbenchmark/models/detectron2_maskrcnn/__init__.py b/torchbenchmark/models/detectron2_maskrcnn/__init__.py
@@ -32,8 +32,6 @@ class Model(BenchmarkModel):
     DEFAULT_EVAL_BSIZE = 2
 
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
-        if jit:
-            raise NotImplementedError("Detection Maskrcnn does not support JIT.")
         super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
 
         model_cfg = model_zoo.get_config("common/models/mask_rcnn_fpn.py").model
@@ -60,10 +58,6 @@ def get_module(self):
             return self.model, (data, )
 
     def train(self, niter=1):
-        if not self.device == "cuda":
-            raise NotImplementedError("Only CUDA is supported by this model")
-        if self.jit:
-            raise NotImplementedError("JIT is not supported by this model")
         self.model.train()
         with EventStorage():
             for idx, data in zip(range(niter), self.example_inputs):
@@ -74,10 +68,6 @@ def train(self, niter=1):
                 self.optimizer.zero_grad()
 
     def eval(self, niter=2) -> Tuple[torch.Tensor]:
-        if not self.device == "cuda":
-            raise NotImplementedError("Only CUDA is supported by this model")
-        if self.jit:
-            raise NotImplementedError("JIT is not supported by this model")
         self.model.eval()
         with torch.no_grad():
             for idx, data in zip(range(niter), self.example_inputs):

diff --git a/torchbenchmark/models/detectron2_maskrcnn/metadata.yaml b/torchbenchmark/models/detectron2_maskrcnn/metadata.yaml
@@ -4,3 +4,8 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: false
 train_deterministic: false
+not_implemented:
+  # disable CPU tests because it is too slow
+  - device: cpu
+  # detectron2_maskrcnn doesn't support torchscript (JIT)
+  - jit: true
diff --git a/torchbenchmark/models/dlrm/__init__.py b/torchbenchmark/models/dlrm/__init__.py
@@ -42,8 +42,6 @@ class Model(BenchmarkModel):
     DEFAULT_EVAL_BSIZE = 1000
 
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
-        if jit:
-            raise NotImplementedError("DLRM model does not support JIT.")
         super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
 
         # Train architecture: use the configuration in the paper.
@@ -199,16 +197,11 @@ def get_module(self):
         return self.model, self.example_inputs
 
     def eval(self, niter=1) -> Tuple[torch.Tensor]:
-        if self.jit:
-            raise NotImplementedError("JIT not supported")
         for _ in range(niter):
             out = self.model(*self.example_inputs)
         return (out, )
 
     def train(self, niter=1):
-        if self.jit:
-            raise NotImplementedError("JIT not supported")
-
         gen = self.model(*self.example_inputs)
         for _ in range(niter):
             self.optimizer.zero_grad()

diff --git a/torchbenchmark/models/dlrm/metadata.yaml b/torchbenchmark/models/dlrm/metadata.yaml
@@ -4,3 +4,6 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: false
 train_deterministic: true
+not_implemented:
+  # dlrm model doesn't support jit
+  - jit: true
diff --git a/torchbenchmark/models/drq/__init__.py b/torchbenchmark/models/drq/__init__.py
@@ -90,8 +90,6 @@ class Model(BenchmarkModel):
     ALLOW_CUSTOMIZE_BSIZE = False
 
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
-        if jit:
-            raise NotImplementedError("DrQ model does not support JIT.")
         super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
 
         self.cfg = DRQConfig()
@@ -120,8 +118,6 @@ def set_module(self, new_model):
         self.agent.actor = new_model
 
     def train(self, niter=2):
-        if self.jit:
-            raise NotImplementedError()
         episode, episode_reward, episode_step, done = 0, 0, 1, True
         for step in range(niter):
             obs = self.env.reset()
@@ -151,8 +147,6 @@ def train(self, niter=2):
             self.step += 1
 
     def eval(self, niter=1) -> Tuple[torch.Tensor]:
-        if self.jit:
-            raise NotImplementedError()
         average_episode_reward = 0
         for _episode in range(niter):
             obs = self.env.reset()

diff --git a/torchbenchmark/models/drq/metadata.yaml b/torchbenchmark/models/drq/metadata.yaml
@@ -4,3 +4,6 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: false
 train_deterministic: false
+not_implemented:
+  # DrQ model does not support JIT
+  - jit: true
diff --git a/torchbenchmark/models/fastNLP_Bert/__init__.py b/torchbenchmark/models/fastNLP_Bert/__init__.py
@@ -43,8 +43,6 @@ class Model(BenchmarkModel):
     DEFAULT_EVAL_BSIZE = 1
 
     def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
-        if jit:
-            raise NotImplementedError("FastNLP-Bert model does not support JIT.")
         super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)
 
         self.input_dir = CMRC2018_DIR
@@ -100,8 +98,6 @@ def get_module(self):
 
     # Sliced version of fastNLP.Tester._test()
     def eval(self, niter=1) -> Tuple[torch.Tensor]:
-        if self.jit:
-            raise NotImplementedError("PyTorch JIT compiler is not able to compile this model.")
         self._mode(self.model, is_test=True)
         self._predict_func = self.model.forward
         with torch.no_grad():
@@ -114,8 +110,6 @@ def eval(self, niter=1) -> Tuple[torch.Tensor]:
 
     # Sliced version of fastNLP.Trainer._train()
     def train(self, niter=1):
-        if self.jit:
-            raise NotImplementedError("PyTorch JIT compiler is not able to compile this model.")
         self.step = 0
         self.n_epochs = niter
         self._mode(self.model, is_test=False)

diff --git a/torchbenchmark/models/fastNLP_Bert/metadata.yaml b/torchbenchmark/models/fastNLP_Bert/metadata.yaml
@@ -4,3 +4,6 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: false
 train_deterministic: true
+not_implemented:
+  # FastNLP-Bert model does not support JIT
+  - jit: true
diff --git a/torchbenchmark/models/hf_Albert/__init__.py b/torchbenchmark/models/hf_Albert/__init__.py
@@ -30,25 +30,19 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
             self.model.eval()
 
     def get_module(self):
-        if self.jit:
-            raise NotImplementedError()
         return self.model, (self.example_inputs["input_ids"], )
 
     def enable_fp16_half(self):
         self.model = self.model.half()
 
     def train(self, niter=3):
-        if self.jit:
-            raise NotImplementedError()
         for _ in range(niter):
             outputs = self.model(**self.example_inputs)
             loss = outputs.loss
             loss.backward()
             self.optimizer.step()
 
     def eval(self, niter=1) -> Tuple[torch.Tensor]:
-        if self.jit:
-            raise NotImplementedError()
         with torch.no_grad():
             for _ in range(niter):
                 out = self.model(**self.example_inputs)

diff --git a/torchbenchmark/models/hf_Albert/metadata.yaml b/torchbenchmark/models/hf_Albert/metadata.yaml
@@ -4,3 +4,6 @@ eval_nograd: true
 optimized_for_inference: false
 train_benchmark: false
 train_deterministic: false
+not_implemented:
+  # hf_Albert model doesn't support JIT
+  - jit: true