Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove "NotImplemented" flags if the test is limited by hardware capacity #781

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import torch
from torchbenchmark import _list_model_paths, ModelTask, get_metadata_from_yaml
from torchbenchmark.util.metadata_utils import skip_by_metadata


# Some of the models have very heavyweight setup, so we have to set a very
Expand All @@ -24,14 +25,6 @@
# unresponsive for 5 minutes the parent will presume it dead / incapacitated.)
TIMEOUT = 300 # Seconds

# Skip this list of unit tests. One reason may be that the original batch size
# used in the paper is too large to fit on the CI's GPU.
EXCLUDELIST = {("densenet121", "train", "cuda"), # GPU train runs out of memory on CI.
("densenet121", "train", "cpu"), # CPU train runs for too long on CI.
("densenet121", "example", "cuda"), # GPU train runs out of memory on CI.
("densenet121", "example", "cpu")} # CPU train runs for too long on CI.


class TestBenchmark(unittest.TestCase):

def setUp(self):
Expand Down Expand Up @@ -104,10 +97,13 @@ def check_device_fn(self):
self.skipTest(f'Method check_device on {device} is not implemented, skipping...')

name = os.path.basename(path)
metadata = get_metadata_from_yaml(path)
for fn, fn_name in zip([example_fn, train_fn, eval_fn, check_device_fn],
["example", "train", "eval", "check_device"]):
# set exclude list based on metadata
setattr(TestBenchmark, f'test_{name}_{fn_name}_{device}',
(unittest.skipIf((name, fn_name, device) in EXCLUDELIST, "This test is on the EXCLUDELIST")(fn)))
(unittest.skipIf(skip_by_metadata(test=fn_name, device=device,\
jit=False, extra_args=[], metadata=metadata), "This test is skipped by its metadata")(fn)))


def _load_tests():
Expand Down
10 changes: 8 additions & 2 deletions test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
import pytest
import time
from components._impl.workers import subprocess_worker
from torchbenchmark import _list_model_paths, ModelTask
from torchbenchmark import _list_model_paths, ModelTask, get_metadata_from_yaml
from torchbenchmark.util.machine_config import get_machine_state

from torchbenchmark.util.metadata_utils import skip_by_metadata

def pytest_generate_tests(metafunc):
# This is where the list of models to test can be configured
Expand Down Expand Up @@ -48,6 +48,9 @@ class TestBenchNetwork:

def test_train(self, model_path, device, compiler, benchmark):
try:
if skip_by_metadata(test="train", device=device, jit=(compiler == 'jit'), \
extra_args=[], metadata=get_metadata_from_yaml(model_path)):
raise NotImplementedError("Test skipped by its metadata.")
task = ModelTask(model_path)
if not task.model_details.exists:
return # Model is not supported.
Expand All @@ -62,6 +65,9 @@ def test_train(self, model_path, device, compiler, benchmark):

def test_eval(self, model_path, device, compiler, benchmark, pytestconfig):
try:
if skip_by_metadata(test="eval", device=device, jit=(compiler == 'jit'), \
extra_args=[], metadata=get_metadata_from_yaml(model_path)):
raise NotImplementedError("Test skipped by its metadata.")
task = ModelTask(model_path)
if not task.model_details.exists:
return # Model is not supported.
Expand Down
3 changes: 0 additions & 3 deletions torchbenchmark/models/Background_Matting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,6 @@ def _set_mode(self, train):
pass

def train(self, niter=1):
if self.device == 'cpu':
raise NotImplementedError("Disabled due to excessively slow runtime - see GH Issue #100")

self.netG.train()
self.netD.train()
lG, lD, GenL, DisL_r, DisL_f, alL, fgL, compL, elapse_run, elapse = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
Expand Down
4 changes: 4 additions & 0 deletions torchbenchmark/models/Background_Matting/metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
train_benchmark: true
train_deterministic: false
not_implemented:
# Disabled due to excessively slow runtime - see GH Issue #100
- test: train
device: cpu
4 changes: 0 additions & 4 deletions torchbenchmark/models/LearningToPaint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ def set_module(self, new_model):
self.agent.actor = new_model

def train(self, niter=1):
if self.jit:
raise NotImplementedError()
episode = episode_steps = 0
for _ in range(niter):
episode_steps += 1
Expand Down Expand Up @@ -104,8 +102,6 @@ def train(self, niter=1):
self.step += 1

def eval(self, niter=1) -> Tuple[torch.Tensor]:
if self.jit:
raise NotImplementedError()
for _ in range(niter):
reward, dist = self.evaluate(self.env, self.agent.select_action)
return (torch.tensor(reward), torch.tensor(dist))
Expand Down
3 changes: 3 additions & 0 deletions torchbenchmark/models/LearningToPaint/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: true
train_deterministic: false
not_implemented:
# Model LearningToPaint doesn't support JIT
- jit: true
8 changes: 0 additions & 8 deletions torchbenchmark/models/Super_SloMo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ class Model(BenchmarkModel):
DEFAULT_EVAL_BSIZE = 10

def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
if jit and test == "eval" and device == "cuda":
raise NotImplementedError("Disabled eval jit test due to insufficient GPU memory size")
super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)

self.model = ModelWrapper(device)
Expand Down Expand Up @@ -70,17 +68,11 @@ def get_module(self):
return self.model, self.example_inputs

def eval(self, niter=1) -> Tuple[torch.Tensor]:
if self.device == 'cpu':
raise NotImplementedError("Disabled due to excessively slow runtime - see GH Issue #100")

for _ in range(niter):
out = self.model(*self.example_inputs)
return out

def train(self, niter=1):
if self.device == 'cpu':
raise NotImplementedError("Disabled due to excessively slow runtime - see GH Issue #100")

for _ in range(niter):
self.optimizer.zero_grad()

Expand Down
7 changes: 7 additions & 0 deletions torchbenchmark/models/Super_SloMo/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: false
train_deterministic: true
not_implemented:
# Disabled due to excessively slow runtime - see GH Issue #100
- device: cpu
# Disabled eval jit test due to insufficient GPU memory size on CI
- device: cuda
jit: true
test: eval
7 changes: 0 additions & 7 deletions torchbenchmark/models/demucs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]) -> N
if test == "train":
self.model.train()
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.lr)
# TODO: enable GPU training after it is supported by infra
# see GH issue https://github.com/pytorch/benchmark/issues/652
raise NotImplementedError("Disabled train test because of insuffcient GPU memory on T4.")
elif test == "eval":
self.model.eval()

Expand All @@ -86,10 +83,6 @@ def eval(self, niter=1) -> Tuple[torch.Tensor]:
return (sources, estimates)

def train(self, niter=1):
if self.device == "cpu":
raise NotImplementedError("Disable CPU training because it is too slow (> 1min)")
if self.device == "cuda":
raise NotImplementedError("Disable GPU training because it causes CUDA OOM on T4")
for _ in range(niter):
sources, estimates = self.model(*self.example_inputs)
sources = center_trim(sources, estimates)
Expand Down
7 changes: 7 additions & 0 deletions torchbenchmark/models/demucs/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: true
train_deterministic: false
not_implemented:
# Disable CPU training because it is too slow (> 1min)
- test: train
device: cpu
# Disable GPU training because it causes CUDA OOM on T4
- test: train
device: cuda
6 changes: 0 additions & 6 deletions torchbenchmark/models/densenet121/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,5 @@ class Model(TorchVisionModel):
DEFAULT_EVAL_BSIZE = 64

def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
# Temporarily disable tests because it causes CUDA OOM on CI platform
# TODO: Re-enable these tests when better hardware is available
if device == 'cuda':
raise NotImplementedError('CUDA disabled due to CUDA out of memory on CI GPU')
if device == 'cpu':
raise NotImplementedError('CPU disabled due to out of memory on CI CPU')
super().__init__(model_name="densenet121", test=test, device=device, jit=jit,
batch_size=batch_size, extra_args=extra_args)
5 changes: 5 additions & 0 deletions torchbenchmark/models/densenet121/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ eval_nograd: true
optimized_for_inference: true
train_benchmark: false
train_deterministic: false
not_implemented:
# CUDA disabled due to CUDA out of memory on CI GPU
- device: cuda
# CPU disabled due to out of memory on CI CPU
- device: cpu
10 changes: 0 additions & 10 deletions torchbenchmark/models/detectron2_maskrcnn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ class Model(BenchmarkModel):
DEFAULT_EVAL_BSIZE = 2

def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
if jit:
raise NotImplementedError("Detection Maskrcnn does not support JIT.")
super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)

model_cfg = model_zoo.get_config("common/models/mask_rcnn_fpn.py").model
Expand All @@ -60,10 +58,6 @@ def get_module(self):
return self.model, (data, )

def train(self, niter=1):
if not self.device == "cuda":
raise NotImplementedError("Only CUDA is supported by this model")
if self.jit:
raise NotImplementedError("JIT is not supported by this model")
self.model.train()
with EventStorage():
for idx, data in zip(range(niter), self.example_inputs):
Expand All @@ -74,10 +68,6 @@ def train(self, niter=1):
self.optimizer.zero_grad()

def eval(self, niter=2) -> Tuple[torch.Tensor]:
if not self.device == "cuda":
raise NotImplementedError("Only CUDA is supported by this model")
if self.jit:
raise NotImplementedError("JIT is not supported by this model")
self.model.eval()
with torch.no_grad():
for idx, data in zip(range(niter), self.example_inputs):
Expand Down
5 changes: 5 additions & 0 deletions torchbenchmark/models/detectron2_maskrcnn/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: false
train_deterministic: false
not_implemented:
# disable CPU tests because it is too slow
- device: cpu
# detectron2_maskrcnn doesn't support torchscript (JIT)
- jit: true
7 changes: 0 additions & 7 deletions torchbenchmark/models/dlrm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ class Model(BenchmarkModel):
DEFAULT_EVAL_BSIZE = 1000

def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
if jit:
raise NotImplementedError("DLRM model does not support JIT.")
super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)

# Train architecture: use the configuration in the paper.
Expand Down Expand Up @@ -199,16 +197,11 @@ def get_module(self):
return self.model, self.example_inputs

def eval(self, niter=1) -> Tuple[torch.Tensor]:
if self.jit:
raise NotImplementedError("JIT not supported")
for _ in range(niter):
out = self.model(*self.example_inputs)
return (out, )

def train(self, niter=1):
if self.jit:
raise NotImplementedError("JIT not supported")

gen = self.model(*self.example_inputs)
for _ in range(niter):
self.optimizer.zero_grad()
Expand Down
3 changes: 3 additions & 0 deletions torchbenchmark/models/dlrm/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: false
train_deterministic: true
not_implemented:
# dlrm model doesn't support jit
- jit: true
6 changes: 0 additions & 6 deletions torchbenchmark/models/drq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ class Model(BenchmarkModel):
ALLOW_CUSTOMIZE_BSIZE = False

def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
if jit:
raise NotImplementedError("DrQ model does not support JIT.")
super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)

self.cfg = DRQConfig()
Expand Down Expand Up @@ -120,8 +118,6 @@ def set_module(self, new_model):
self.agent.actor = new_model

def train(self, niter=2):
if self.jit:
raise NotImplementedError()
episode, episode_reward, episode_step, done = 0, 0, 1, True
for step in range(niter):
obs = self.env.reset()
Expand Down Expand Up @@ -151,8 +147,6 @@ def train(self, niter=2):
self.step += 1

def eval(self, niter=1) -> Tuple[torch.Tensor]:
if self.jit:
raise NotImplementedError()
average_episode_reward = 0
for _episode in range(niter):
obs = self.env.reset()
Expand Down
3 changes: 3 additions & 0 deletions torchbenchmark/models/drq/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: false
train_deterministic: false
not_implemented:
# DrQ model does not support JIT
- jit: true
6 changes: 0 additions & 6 deletions torchbenchmark/models/fastNLP_Bert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ class Model(BenchmarkModel):
DEFAULT_EVAL_BSIZE = 1

def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
if jit:
raise NotImplementedError("FastNLP-Bert model does not support JIT.")
super().__init__(test=test, device=device, jit=jit, batch_size=batch_size, extra_args=extra_args)

self.input_dir = CMRC2018_DIR
Expand Down Expand Up @@ -100,8 +98,6 @@ def get_module(self):

# Sliced version of fastNLP.Tester._test()
def eval(self, niter=1) -> Tuple[torch.Tensor]:
if self.jit:
raise NotImplementedError("PyTorch JIT compiler is not able to compile this model.")
self._mode(self.model, is_test=True)
self._predict_func = self.model.forward
with torch.no_grad():
Expand All @@ -114,8 +110,6 @@ def eval(self, niter=1) -> Tuple[torch.Tensor]:

# Sliced version of fastNLP.Trainer._train()
def train(self, niter=1):
if self.jit:
raise NotImplementedError("PyTorch JIT compiler is not able to compile this model.")
self.step = 0
self.n_epochs = niter
self._mode(self.model, is_test=False)
Expand Down
3 changes: 3 additions & 0 deletions torchbenchmark/models/fastNLP_Bert/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: false
train_deterministic: true
not_implemented:
# FastNLP-Bert model does not support JIT
- jit: true
6 changes: 0 additions & 6 deletions torchbenchmark/models/hf_Albert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,19 @@ def __init__(self, test, device, jit=False, batch_size=None, extra_args=[]):
self.model.eval()

def get_module(self):
if self.jit:
raise NotImplementedError()
return self.model, (self.example_inputs["input_ids"], )

def enable_fp16_half(self):
self.model = self.model.half()

def train(self, niter=3):
if self.jit:
raise NotImplementedError()
for _ in range(niter):
outputs = self.model(**self.example_inputs)
loss = outputs.loss
loss.backward()
self.optimizer.step()

def eval(self, niter=1) -> Tuple[torch.Tensor]:
if self.jit:
raise NotImplementedError()
with torch.no_grad():
for _ in range(niter):
out = self.model(**self.example_inputs)
Expand Down
3 changes: 3 additions & 0 deletions torchbenchmark/models/hf_Albert/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ eval_nograd: true
optimized_for_inference: false
train_benchmark: false
train_deterministic: false
not_implemented:
# hf_Albert model doesn't support JIT
- jit: true
Loading