diff --git a/test/common_utils.py b/test/common_utils.py index a7534f7d3ed..06e0e16b1ef 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -259,58 +259,12 @@ def call_args_to_kwargs_only(call_args, *callable_or_arg_names): def cpu_and_gpu(): import pytest # noqa - - # ignore CPU tests in RE as they're already covered by another contbuild - # also ignore CPU tests in CircleCI machines that have a GPU: these tests - # are run on CPU-only machines already. - if IN_RE_WORKER: - devices = [] - else: - if IN_CIRCLE_CI and torch.cuda.is_available(): - mark = pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG) - else: - mark = () - devices = [pytest.param('cpu', marks=mark)] - - if torch.cuda.is_available(): - cuda_marks = () - elif IN_FBCODE: - # Dont collect cuda tests on fbcode if the machine doesnt have a GPU - # This avoids skipping the tests. More robust would be to detect if - # we're in sancastle instead of fbcode? - cuda_marks = pytest.mark.dont_collect() - else: - cuda_marks = pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG) - - devices.append(pytest.param('cuda', marks=cuda_marks)) - - return devices + return ('cpu', pytest.param('cuda', marks=pytest.mark.needs_cuda)) def needs_cuda(test_func): import pytest # noqa - - if IN_FBCODE and not IN_RE_WORKER: - # We don't want to skip in fbcode, so we just don't collect - # TODO: slightly more robust way would be to detect if we're in a sandcastle instance - # so that the test will still be collected (and skipped) in the devvms. - return pytest.mark.dont_collect(test_func) - elif torch.cuda.is_available(): - return test_func - else: - return pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG)(test_func) - - -def cpu_only(test_func): - import pytest # noqa - - if IN_RE_WORKER: - # The assumption is that all RE workers have GPUs. - return pytest.mark.dont_collect(test_func) - elif IN_CIRCLE_CI and torch.cuda.is_available(): - return pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG)(test_func) - else: - return test_func + return pytest.mark.needs_cuda(test_func) def _create_data(height=3, width=3, channels=3, device="cpu"): diff --git a/test/conftest.py b/test/conftest.py index 6e10e4ef071..ab60cb5fa41 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,14 +1,52 @@ +from common_utils import IN_CIRCLE_CI, CIRCLECI_GPU_NO_CUDA_MSG, IN_FBCODE, IN_RE_WORKER, CUDA_NOT_AVAILABLE_MSG +import torch +import pytest + + def pytest_configure(config): # register an additional marker (see pytest_collection_modifyitems) config.addinivalue_line( - "markers", "dont_collect: marks a test that should not be collected (avoids skipping it)" + "markers", "needs_cuda: mark for tests that rely on a CUDA device" ) def pytest_collection_modifyitems(items): # This hook is called by pytest after it has collected the tests (google its name!) - # We can ignore some tests as we see fit here. In particular we ignore the tests that - # we have marked with the custom 'dont_collect' mark. This avoids skipping the tests, - # since the internal fb infra doesn't like skipping tests. - to_keep = [item for item in items if item.get_closest_marker('dont_collect') is None] - items[:] = to_keep + # We can ignore some tests as we see fit here, or add marks, such as a skip mark. + + out_items = [] + for item in items: + # The needs_cuda mark will exist if the test was explicitely decorated with + # the @needs_cuda decorator. It will also exist if it was parametrized with a + # parameter that has the mark: for example if a test is parametrized with + # @pytest.mark.parametrize('device', cpu_and_gpu()) + # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark, + # and the ones with device == 'cpu' won't have the mark. + needs_cuda = item.get_closest_marker('needs_cuda') is not None + + if needs_cuda and not torch.cuda.is_available(): + # In general, we skip cuda tests on machines without a GPU + # There are special cases though, see below + item.add_marker(pytest.mark.skip(reason=CUDA_NOT_AVAILABLE_MSG)) + + if IN_FBCODE: + # fbcode doesn't like skipping tests, so instead we just don't collect the test + # so that they don't even "exist", hence the continue statements. + if not needs_cuda and IN_RE_WORKER: + # The RE workers are the machines with GPU, we don't want them to run CPU-only tests. + continue + if needs_cuda and not torch.cuda.is_available(): + # On the test machines without a GPU, we want to ignore the tests that need cuda. + # TODO: something more robust would be to do that only in a sandcastle instance, + # so that we can still see the test being skipped when testing locally from a devvm + continue + elif IN_CIRCLE_CI: + # Here we're not in fbcode, so we can safely collect and skip tests. + if not needs_cuda and torch.cuda.is_available(): + # Similar to what happens in RE workers: we don't need the CircleCI GPU machines + # to run the CPU-only tests. + item.add_marker(pytest.mark.skip(reason=CIRCLECI_GPU_NO_CUDA_MSG)) + + out_items.append(item) + + items[:] = out_items diff --git a/test/test_image.py b/test/test_image.py index e5aeef0dc37..d9162760263 100644 --- a/test/test_image.py +++ b/test/test_image.py @@ -9,7 +9,7 @@ import torch from PIL import Image import torchvision.transforms.functional as F -from common_utils import get_tmp_dir, needs_cuda, cpu_only +from common_utils import get_tmp_dir, needs_cuda from _assert_utils import assert_equal from torchvision.io.image import ( @@ -335,7 +335,6 @@ def test_decode_jpeg_cuda_errors(): torch.ops.image.decode_jpeg_cuda(data, ImageReadMode.UNCHANGED.value, 'cpu') -@cpu_only def test_encode_jpeg_errors(): with pytest.raises(RuntimeError, match="Input tensor dtype should be uint8"): @@ -359,26 +358,13 @@ def test_encode_jpeg_errors(): encode_jpeg(torch.empty((100, 100), dtype=torch.uint8)) -def _collect_if(cond): - # TODO: remove this once test_encode_jpeg_windows and test_write_jpeg_windows - # are removed - def _inner(test_func): - if cond: - return test_func - else: - return pytest.mark.dont_collect(test_func) - return _inner - - -@cpu_only -@_collect_if(cond=IS_WINDOWS) @pytest.mark.parametrize('img_path', [ pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg") ]) -def test_encode_jpeg_windows(img_path): +def test_encode_jpeg_reference(img_path): # This test is *wrong*. - # It compares a torchvision-encoded jpeg with a PIL-encoded jpeg, but it + # It compares a torchvision-encoded jpeg with a PIL-encoded jpeg (the reference), but it # starts encoding the torchvision version from an image that comes from # decode_jpeg, which can yield different results from pil.decode (see # test_decode... which uses a high tolerance). @@ -403,14 +389,12 @@ def test_encode_jpeg_windows(img_path): assert_equal(jpeg_bytes, pil_bytes) -@cpu_only -@_collect_if(cond=IS_WINDOWS) @pytest.mark.parametrize('img_path', [ pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg") ]) -def test_write_jpeg_windows(img_path): - # FIXME: Remove this eventually, see test_encode_jpeg_windows +def test_write_jpeg_reference(img_path): + # FIXME: Remove this eventually, see test_encode_jpeg_reference with get_tmp_dir() as d: data = read_file(img_path) img = decode_jpeg(data) @@ -433,8 +417,9 @@ def test_write_jpeg_windows(img_path): assert_equal(torch_bytes, pil_bytes) -@cpu_only -@_collect_if(cond=not IS_WINDOWS) +@pytest.mark.skipif(IS_WINDOWS, reason=( + 'this test fails on windows because PIL uses libjpeg-turbo on windows' +)) @pytest.mark.parametrize('img_path', [ pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg") @@ -455,8 +440,9 @@ def test_encode_jpeg(img_path): assert_equal(encoded_jpeg_torch, encoded_jpeg_pil) -@cpu_only -@_collect_if(cond=not IS_WINDOWS) +@pytest.mark.skipif(IS_WINDOWS, reason=( + 'this test fails on windows because PIL uses libjpeg-turbo on windows' +)) @pytest.mark.parametrize('img_path', [ pytest.param(jpeg_path, id=_get_safe_image_name(jpeg_path)) for jpeg_path in get_images(ENCODE_JPEG, ".jpg") diff --git a/test/test_models.py b/test/test_models.py index b52b7ecc690..a3c7ec40342 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,7 +1,7 @@ import os import io import sys -from common_utils import map_nested_tensor_object, freeze_rng_state, set_rng_seed, cpu_and_gpu, needs_cuda, cpu_only +from common_utils import map_nested_tensor_object, freeze_rng_state, set_rng_seed, cpu_and_gpu, needs_cuda from _utils_internal import get_relative_path from collections import OrderedDict import functools @@ -234,7 +234,6 @@ def _make_sliced_model(model, stop_layer): return new_model -@cpu_only @pytest.mark.parametrize('model_name', ['densenet121', 'densenet169', 'densenet201', 'densenet161']) def test_memory_efficient_densenet(model_name): input_shape = (1, 3, 300, 300) @@ -257,7 +256,6 @@ def test_memory_efficient_densenet(model_name): torch.testing.assert_close(out1, out2, rtol=0.0, atol=1e-5) -@cpu_only @pytest.mark.parametrize('dilate_layer_2', (True, False)) @pytest.mark.parametrize('dilate_layer_3', (True, False)) @pytest.mark.parametrize('dilate_layer_4', (True, False)) @@ -272,7 +270,6 @@ def test_resnet_dilation(dilate_layer_2, dilate_layer_3, dilate_layer_4): assert out.shape == (1, 2048, 7 * f, 7 * f) -@cpu_only def test_mobilenet_v2_residual_setting(): model = models.__dict__["mobilenet_v2"](inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]]) model.eval() @@ -281,7 +278,6 @@ def test_mobilenet_v2_residual_setting(): assert out.shape[-1] == 1000 -@cpu_only @pytest.mark.parametrize('model_name', ["mobilenet_v2", "mobilenet_v3_large", "mobilenet_v3_small"]) def test_mobilenet_norm_layer(model_name): model = models.__dict__[model_name]() @@ -295,7 +291,6 @@ def get_gn(num_channels): assert any(isinstance(x, nn.GroupNorm) for x in model.modules()) -@cpu_only def test_inception_v3_eval(): # replacement for models.inception_v3(pretrained=True) that does not download weights kwargs = {} @@ -311,7 +306,6 @@ def test_inception_v3_eval(): _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) -@cpu_only def test_fasterrcnn_double(): model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False) model.double() @@ -327,7 +321,6 @@ def test_fasterrcnn_double(): assert "labels" in out[0] -@cpu_only def test_googlenet_eval(): # replacement for models.googlenet(pretrained=True) that does not download weights kwargs = {} @@ -376,7 +369,6 @@ def checkOut(out): checkOut(out_cpu) -@cpu_only def test_generalizedrcnn_transform_repr(): min_size, max_size = 224, 299 @@ -573,7 +565,6 @@ def compute_mean_std(tensor): pytest.skip(msg) -@cpu_only @pytest.mark.parametrize('model_name', get_available_detection_models()) def test_detection_model_validation(model_name): set_rng_seed(0) diff --git a/test/test_ops.py b/test/test_ops.py index f08720206ad..21b0c2039c3 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1,4 +1,4 @@ -from common_utils import needs_cuda, cpu_only, cpu_and_gpu +from common_utils import needs_cuda, cpu_and_gpu from _assert_utils import assert_equal import math from abc import ABC, abstractmethod @@ -133,7 +133,6 @@ def get_slice(k, block): y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0] return y - @cpu_only def test_boxes_shape(self): self._helper_boxes_shape(ops.roi_pool) @@ -178,7 +177,6 @@ def get_slice(k, block): y[roi_idx, c_out, i, j] = t / area return y - @cpu_only def test_boxes_shape(self): self._helper_boxes_shape(ops.ps_roi_pool) @@ -262,7 +260,6 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_r out_data[r, channel, i, j] = val return out_data - @cpu_only def test_boxes_shape(self): self._helper_boxes_shape(ops.roi_align) @@ -403,12 +400,10 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, out_data[r, c_out, i, j] = val return out_data - @cpu_only def test_boxes_shape(self): self._helper_boxes_shape(ops.ps_roi_align) -@cpu_only class TestMultiScaleRoIAlign: def test_msroialign_repr(self): fmap_names = ['0'] @@ -464,7 +459,6 @@ def _create_tensors_with_iou(self, N, iou_thresh): scores = torch.rand(N) return boxes, scores - @cpu_only @pytest.mark.parametrize("iou", (.2, .5, .8)) def test_nms_ref(self, iou): err_msg = 'NMS incompatible between CPU and reference implementation for IoU={}' @@ -473,7 +467,6 @@ def test_nms_ref(self, iou): keep = ops.nms(boxes, scores, iou) assert torch.allclose(keep, keep_ref), err_msg.format(iou) - @cpu_only def test_nms_input_errors(self): with pytest.raises(RuntimeError): ops.nms(torch.rand(4), torch.rand(3), 0.5) @@ -484,7 +477,6 @@ def test_nms_input_errors(self): with pytest.raises(RuntimeError): ops.nms(torch.rand(3, 4), torch.rand(4), 0.5) - @cpu_only @pytest.mark.parametrize("iou", (.2, .5, .8)) @pytest.mark.parametrize("scale, zero_point", ((1, 0), (2, 50), (3, 10))) def test_qnms(self, iou, scale, zero_point): @@ -542,7 +534,6 @@ def test_nms_cuda_float16(self): keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres) assert_equal(keep32, keep16) - @cpu_only def test_batched_nms_implementations(self): """Make sure that both implementations of batched_nms yield identical results""" @@ -689,7 +680,6 @@ def test_forward(self, device, contiguous, batch_sz, dtype=None): res.to(expected), expected, rtol=tol, atol=tol, msg='\nres:\n{}\nexpected:\n{}'.format(res, expected) ) - @cpu_only def test_wrong_sizes(self): in_channels = 6 out_channels = 2 @@ -786,7 +776,6 @@ def test_autocast(self, batch_sz, dtype): self.test_forward(torch.device("cuda"), contiguous=False, batch_sz=batch_sz, dtype=dtype) -@cpu_only class TestFrozenBNT: def test_frozenbatchnorm2d_repr(self): num_features = 32 @@ -828,7 +817,6 @@ def test_frozenbatchnorm2d_n_arg(self): ops.misc.FrozenBatchNorm2d(32, eps=1e-5, n=32) -@cpu_only class TestBoxConversion: def _get_box_sequences(): # Define here the argument type of `boxes` supported by region pooling operations @@ -853,7 +841,6 @@ def test_convert_boxes_to_roi_format(self, box_sequence): assert_equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)) -@cpu_only class TestBox: def test_bbox_same(self): box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], @@ -940,7 +927,6 @@ def test_bbox_convert_jit(self): torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE) -@cpu_only class TestBoxArea: def test_box_area(self): def area_check(box, expected, tolerance=1e-4): @@ -969,7 +955,6 @@ def area_check(box, expected, tolerance=1e-4): area_check(box_tensor, expected) -@cpu_only class TestBoxIou: def test_iou(self): def iou_check(box, expected, tolerance=1e-4): @@ -991,7 +976,6 @@ def iou_check(box, expected, tolerance=1e-4): iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4) -@cpu_only class TestGenBoxIou: def test_gen_iou(self): def gen_iou_check(box, expected, tolerance=1e-4): diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 1a4ecfbf7c7..2051cc7466c 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -18,7 +18,6 @@ _assert_equal_tensor_to_pil, _assert_approx_equal_tensor_to_pil, cpu_and_gpu, - cpu_only ) from _assert_utils import assert_equal @@ -309,7 +308,6 @@ def test_x_crop(fn, method, out_length, size, device): assert_equal(transformed_img, transformed_batch[i, ...]) -@cpu_only @pytest.mark.parametrize('method', ["FiveCrop", "TenCrop"]) def test_x_crop_save(method): fn = getattr(T, method)(size=[5, ]) @@ -319,7 +317,6 @@ def test_x_crop_save(method): class TestResize: - @cpu_only @pytest.mark.parametrize('size', [32, 34, 35, 36, 38]) def test_resize_int(self, size): # TODO: Minimal check for bug-fix, improve this later @@ -352,7 +349,6 @@ def test_resize_scripted(self, dt, size, max_size, interpolation, device): _test_transform_vs_scripted(transform, s_transform, tensor) _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) - @cpu_only def test_resize_save(self): transform = T.Resize(size=[32, ]) s_transform = torch.jit.script(transform) @@ -372,7 +368,6 @@ def test_resized_crop(self, scale, ratio, size, interpolation, device): _test_transform_vs_scripted(transform, s_transform, tensor) _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) - @cpu_only def test_resized_crop_save(self): transform = T.RandomResizedCrop(size=[32, ]) s_transform = torch.jit.script(transform)