diff --git a/test/conftest.py b/test/conftest.py index 6ca08807a..f5db4b5d6 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,9 +1,52 @@ +import os import random import pytest import torch +def pytest_configure(config): + # register an additional marker (see pytest_collection_modifyitems) + config.addinivalue_line( + "markers", "needs_cuda: mark for tests that rely on a CUDA device" + ) + + +def pytest_collection_modifyitems(items): + # This hook is called by pytest after it has collected the tests (google its + # name to check out its doc!). We can ignore some tests as we see fit here, + # or add marks, such as a skip mark. + + out_items = [] + for item in items: + # The needs_cuda mark will exist if the test was explicitly decorated + # with the @needs_cuda decorator. It will also exist if it was + # parametrized with a parameter that has the mark: for example if a test + # is parametrized with + # @pytest.mark.parametrize('device', cpu_and_cuda()) + # the "instances" of the tests where device == 'cuda' will have the + # 'needs_cuda' mark, and the ones with device == 'cpu' won't have the + # mark. + needs_cuda = item.get_closest_marker("needs_cuda") is not None + + if ( + needs_cuda + and not torch.cuda.is_available() + and os.environ.get("FAIL_WITHOUT_CUDA") is None + ): + # We skip CUDA tests on non-CUDA machines, but only if the + # FAIL_WITHOUT_CUDA env var wasn't set. If it's set, the test will + # typically fail with a "Unsupported device: cuda" error. This is + # normal and desirable: this env var is set on CI jobs that are + # supposed to run the CUDA tests, so if CUDA isn't available on + # those for whatever reason, we need to know. + item.add_marker(pytest.mark.skip(reason="CUDA not available.")) + + out_items.append(item) + + items[:] = out_items + + @pytest.fixture(autouse=True) def prevent_leaking_rng(): # Prevent each test from leaking the rng to all other test when they call diff --git a/test/decoders/test_video_decoder_ops.py b/test/decoders/test_video_decoder_ops.py index 825405b74..ba021fe0a 100644 --- a/test/decoders/test_video_decoder_ops.py +++ b/test/decoders/test_video_decoder_ops.py @@ -39,6 +39,8 @@ from ..utils import ( assert_tensor_close_on_at_least, assert_tensor_equal, + cpu_and_cuda, + get_frame_compare_function, NASA_AUDIO, NASA_VIDEO, needs_cuda, @@ -50,9 +52,9 @@ class ReferenceDecoder: - def __init__(self): + def __init__(self, device="cpu"): self.decoder: torch.Tensor = create_from_file(str(NASA_VIDEO.path)) - add_video_stream(self.decoder) + add_video_stream(self.decoder, device=device) def get_next_frame(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: assert self.decoder is not None @@ -64,90 +66,85 @@ def seek(self, pts: float): class TestOps: - def test_seek_and_next(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_seek_and_next(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) - add_video_stream(decoder) + add_video_stream(decoder, device=device) + frame_compare_function = get_frame_compare_function(device) frame0, _, _ = get_next_frame(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0) + frame_compare_function(frame0, reference_frame0.to(device)) reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1) frame1, _, _ = get_next_frame(decoder) - assert_tensor_equal(frame1, reference_frame1) + frame_compare_function(frame1, reference_frame1.to(device)) seek_to_pts(decoder, 6.0) frame_time6, _, _ = get_next_frame(decoder) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame_time6, reference_frame_time6) + frame_compare_function(frame_time6, reference_frame_time6.to(device)) - def test_get_frame_at_pts(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frame_at_pts(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) - add_video_stream(decoder) + add_video_stream(decoder, device=device) + frame_compare_function = get_frame_compare_function(device) # This frame has pts=6.006 and duration=0.033367, so it should be visible # at timestamps in the range [6.006, 6.039367) (not including the last timestamp). frame6, _, _ = get_frame_at_pts(decoder, 6.006) reference_frame6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame6, reference_frame6) + frame_compare_function(frame6, reference_frame6.to(device)) frame6, _, _ = get_frame_at_pts(decoder, 6.02) - assert_tensor_equal(frame6, reference_frame6) + frame_compare_function(frame6, reference_frame6.to(device)) frame6, _, _ = get_frame_at_pts(decoder, 6.039366) - assert_tensor_equal(frame6, reference_frame6) + frame_compare_function(frame6, reference_frame6.to(device)) # Note that this timestamp is exactly on a frame boundary, so it should # return the next frame since the right boundary of the interval is # open. next_frame, _, _ = get_frame_at_pts(decoder, 6.039367) with pytest.raises(AssertionError): - assert_tensor_equal(next_frame, reference_frame6) + frame_compare_function(next_frame, reference_frame6.to(device)) - def test_get_frame_at_index(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frame_at_index(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) scan_all_streams_to_update_metadata(decoder) - add_video_stream(decoder) + add_video_stream(decoder, device=device) + frame_compare_function = get_frame_compare_function(device) frame0, _, _ = get_frame_at_index(decoder, stream_index=3, frame_index=0) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0) + frame_compare_function(frame0, reference_frame0.to(device)) # The frame that is played at 6 seconds is frame 180 from a 0-based index. frame6, _, _ = get_frame_at_index(decoder, stream_index=3, frame_index=180) reference_frame6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame6, reference_frame6) + frame_compare_function(frame6, reference_frame6.to(device)) - def test_get_frame_with_info_at_index(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frame_with_info_at_index(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) scan_all_streams_to_update_metadata(decoder) - add_video_stream(decoder) + add_video_stream(decoder, device=device) + frame_compare_function = get_frame_compare_function(device) frame6, pts, duration = get_frame_at_index( decoder, stream_index=3, frame_index=180 ) reference_frame6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame6, reference_frame6) + frame_compare_function(frame6, reference_frame6.to(device)) assert pts.item() == pytest.approx(6.006, rel=1e-3) assert duration.item() == pytest.approx(0.03337, rel=1e-3) - def test_get_frames_at_indices(self): - decoder = create_from_file(str(NASA_VIDEO.path)) - scan_all_streams_to_update_metadata(decoder) - add_video_stream(decoder) - frames0and180, *_ = get_frames_at_indices( - decoder, stream_index=3, frame_indices=[0, 180] - ) - reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - reference_frame180 = NASA_VIDEO.get_frame_data_by_index( - INDEX_OF_FRAME_AT_6_SECONDS - ) - assert_tensor_equal(frames0and180[0], reference_frame0) - assert_tensor_equal(frames0and180[1], reference_frame180) - - @needs_cuda - def test_get_frames_at_indices_with_cuda(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frames_at_indices(self, device): + frame_compare_function = get_frame_compare_function(device) decoder = create_from_file(str(NASA_VIDEO.path)) scan_all_streams_to_update_metadata(decoder) - add_video_stream(decoder, device="cuda") + add_video_stream(decoder, device=device) frames0and180, *_ = get_frames_at_indices( decoder, stream_index=3, frame_indices=[0, 180] ) @@ -155,15 +152,13 @@ def test_get_frames_at_indices_with_cuda(self): reference_frame180 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert frames0and180.device.type == "cuda" - assert_tensor_close_on_at_least(frames0and180[0].to("cpu"), reference_frame0) - assert_tensor_close_on_at_least( - frames0and180[1].to("cpu"), reference_frame180, 0.3, 30 - ) + frame_compare_function(frames0and180[0], reference_frame0.to(device)) + frame_compare_function(frames0and180[1], reference_frame180.to(device)) - def test_get_frames_at_indices_unsorted_indices(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frames_at_indices_unsorted_indices(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) - _add_video_stream(decoder) + _add_video_stream(decoder, device=device) scan_all_streams_to_update_metadata(decoder) stream_index = 3 @@ -192,9 +187,10 @@ def test_get_frames_at_indices_unsorted_indices(self): with pytest.raises(AssertionError): assert_tensor_equal(frames[0], frames[-1]) - def test_get_frames_by_pts(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frames_by_pts(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) - _add_video_stream(decoder) + _add_video_stream(decoder, device=device) scan_all_streams_to_update_metadata(decoder) stream_index = 3 @@ -222,48 +218,15 @@ def test_get_frames_by_pts(self): with pytest.raises(AssertionError): assert_tensor_equal(frames[0], frames[-1]) - # TODO: Figure out how to parameterize this test to run on both CPU and CUDA.abs - # The question is how to have the @needs_cuda decorator with the pytest.mark.parametrize - # decorator on the same test. - @needs_cuda - def test_get_frames_by_pts_with_cuda(self): - decoder = create_from_file(str(NASA_VIDEO.path)) - _add_video_stream(decoder, device="cuda") - scan_all_streams_to_update_metadata(decoder) - stream_index = 3 - - # Note: 13.01 should give the last video frame for the NASA video - timestamps = [2, 0, 1, 0 + 1e-3, 13.01, 2 + 1e-3] - - expected_frames = [ - get_frame_at_pts(decoder, seconds=pts)[0] for pts in timestamps - ] - - frames, *_ = get_frames_by_pts( - decoder, - stream_index=stream_index, - timestamps=timestamps, - ) - for frame, expected_frame in zip(frames, expected_frames): - assert_tensor_equal(frame, expected_frame) - - # first and last frame should be equal, at pts=2 [+ eps]. We then modify - # the first frame and assert that it's now different from the last - # frame. This ensures a copy was properly made during the de-duplication - # logic. - assert_tensor_equal(frames[0], frames[-1]) - frames[0] += 20 - with pytest.raises(AssertionError): - assert_tensor_equal(frames[0], frames[-1]) - - def test_pts_apis_against_index_ref(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_pts_apis_against_index_ref(self, device): # Non-regression test for https://github.com/pytorch/torchcodec/pull/287 # Get all frames in the video, then query all frames with all time-based # APIs exactly where those frames are supposed to start. We assert that # we get the expected frame. decoder = create_from_file(str(NASA_VIDEO.path)) scan_all_streams_to_update_metadata(decoder) - add_video_stream(decoder) + add_video_stream(decoder, device=device) metadata = get_json_metadata(decoder) metadata_dict = json.loads(metadata) @@ -316,81 +279,87 @@ def test_pts_apis_against_index_ref(self): ) assert_tensor_equal(pts_seconds, all_pts_seconds_ref) - def test_get_frames_in_range(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_get_frames_in_range(self, device): + frame_compare_function = get_frame_compare_function(device) decoder = create_from_file(str(NASA_VIDEO.path)) scan_all_streams_to_update_metadata(decoder) - add_video_stream(decoder) + add_video_stream(decoder, device=device) # ensure that the degenerate case of a range of size 1 works ref_frame0 = NASA_VIDEO.get_frame_data_by_range(0, 1) bulk_frame0, *_ = get_frames_in_range(decoder, stream_index=3, start=0, stop=1) - assert_tensor_equal(ref_frame0, bulk_frame0) + frame_compare_function(bulk_frame0, ref_frame0.to(device)) ref_frame1 = NASA_VIDEO.get_frame_data_by_range(1, 2) bulk_frame1, *_ = get_frames_in_range(decoder, stream_index=3, start=1, stop=2) - assert_tensor_equal(ref_frame1, bulk_frame1) + frame_compare_function(bulk_frame1, ref_frame1.to(device)) ref_frame389 = NASA_VIDEO.get_frame_data_by_range(389, 390) bulk_frame389, *_ = get_frames_in_range( decoder, stream_index=3, start=389, stop=390 ) - assert_tensor_equal(ref_frame389, bulk_frame389) + frame_compare_function(bulk_frame389, ref_frame389.to(device)) # contiguous ranges ref_frames0_9 = NASA_VIDEO.get_frame_data_by_range(0, 9) bulk_frames0_9, *_ = get_frames_in_range( decoder, stream_index=3, start=0, stop=9 ) - assert_tensor_equal(ref_frames0_9, bulk_frames0_9) + frame_compare_function(bulk_frames0_9, ref_frames0_9.to(device)) ref_frames4_8 = NASA_VIDEO.get_frame_data_by_range(4, 8) bulk_frames4_8, *_ = get_frames_in_range( decoder, stream_index=3, start=4, stop=8 ) - assert_tensor_equal(ref_frames4_8, bulk_frames4_8) + frame_compare_function(bulk_frames4_8, ref_frames4_8.to(device)) # ranges with a stride ref_frames15_35 = NASA_VIDEO.get_frame_data_by_range(15, 36, 5) bulk_frames15_35, *_ = get_frames_in_range( decoder, stream_index=3, start=15, stop=36, step=5 ) - assert_tensor_equal(ref_frames15_35, bulk_frames15_35) + frame_compare_function(bulk_frames15_35, ref_frames15_35.to(device)) ref_frames0_9_2 = NASA_VIDEO.get_frame_data_by_range(0, 9, 2) bulk_frames0_9_2, *_ = get_frames_in_range( decoder, stream_index=3, start=0, stop=9, step=2 ) - assert_tensor_equal(ref_frames0_9_2, bulk_frames0_9_2) + frame_compare_function(bulk_frames0_9_2, ref_frames0_9_2.to(device)) # an empty range is valid! empty_frame, *_ = get_frames_in_range(decoder, stream_index=3, start=5, stop=5) - assert_tensor_equal(empty_frame, NASA_VIDEO.empty_chw_tensor) + frame_compare_function(empty_frame, NASA_VIDEO.empty_chw_tensor.to(device)) - def test_throws_exception_at_eof(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_throws_exception_at_eof(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) - add_video_stream(decoder) + add_video_stream(decoder, device=device) + frame_compare_function = get_frame_compare_function(device) seek_to_pts(decoder, 12.979633) last_frame, _, _ = get_next_frame(decoder) reference_last_frame = NASA_VIDEO.get_frame_data_by_index(289) - assert_tensor_equal(last_frame, reference_last_frame) + frame_compare_function(last_frame, reference_last_frame.to(device)) with pytest.raises(IndexError, match="no more frames"): get_next_frame(decoder) - def test_throws_exception_if_seek_too_far(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_throws_exception_if_seek_too_far(self, device): decoder = create_from_file(str(NASA_VIDEO.path)) - add_video_stream(decoder) + add_video_stream(decoder, device=device) # pts=12.979633 is the last frame in the video. seek_to_pts(decoder, 12.979633 + 1.0e-4) with pytest.raises(IndexError, match="no more frames"): get_next_frame(decoder) - def test_compile_seek_and_next(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_compile_seek_and_next(self, device): # TODO_OPEN_ISSUE Scott (T180277797): Get this to work with the inductor stack. Right now # compilation fails because it can't handle tensors of size unknown at # compile-time. @torch.compile(fullgraph=True, backend="eager") def get_frame1_and_frame_time6(decoder): - add_video_stream(decoder) + add_video_stream(decoder, device=device) frame0, _, _ = get_next_frame(decoder) seek_to_pts(decoder, 6.0) frame_time6, _, _ = get_next_frame(decoder) @@ -399,15 +368,17 @@ def get_frame1_and_frame_time6(decoder): # NB: create needs to happen outside the torch.compile region, # for now. Otherwise torch.compile constant-props it. decoder = create_from_file(str(NASA_VIDEO.path)) + frame_compare_function = get_frame_compare_function(device) frame0, frame_time6 = get_frame1_and_frame_time6(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame0, reference_frame0) - assert_tensor_equal(frame_time6, reference_frame_time6) + frame_compare_function(frame0, reference_frame0.to(device)) + frame_compare_function(frame_time6, reference_frame_time6.to(device)) - def test_class_based_compile_seek_and_next(self): + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_class_based_compile_seek_and_next(self, device): # TODO_OPEN_ISSUE Scott (T180277797): Ditto as above. @torch.compile(fullgraph=True, backend="eager") def class_based_get_frame1_and_frame_time6( @@ -418,17 +389,19 @@ def class_based_get_frame1_and_frame_time6( frame_time6, _, _ = decoder.get_next_frame() return frame0, frame_time6 - decoder = ReferenceDecoder() + decoder = ReferenceDecoder(device=device) + frame_compare_function = get_frame_compare_function(device) frame0, frame_time6 = class_based_get_frame1_and_frame_time6(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame0, reference_frame0) - assert_tensor_equal(frame_time6, reference_frame_time6) + frame_compare_function(frame0, reference_frame0.to(device)) + frame_compare_function(frame_time6, reference_frame_time6.to(device)) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("create_from", ("file", "tensor", "bytes")) - def test_create_decoder(self, create_from): + def test_create_decoder(self, create_from, device): path = str(NASA_VIDEO.path) if create_from == "file": decoder = create_from_file(path) @@ -441,19 +414,20 @@ def test_create_decoder(self, create_from): video_bytes = f.read() decoder = create_from_bytes(video_bytes) - add_video_stream(decoder) + add_video_stream(decoder, device=device) + frame_compare_function = get_frame_compare_function(device) frame0, _, _ = get_next_frame(decoder) reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0) - assert_tensor_equal(frame0, reference_frame0) + frame_compare_function(frame0, reference_frame0.to(device)) reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1) frame1, _, _ = get_next_frame(decoder) - assert_tensor_equal(frame1, reference_frame1) + frame_compare_function(frame1, reference_frame1.to(device)) seek_to_pts(decoder, 6.0) frame_time6, _, _ = get_next_frame(decoder) reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index( INDEX_OF_FRAME_AT_6_SECONDS ) - assert_tensor_equal(frame_time6, reference_frame_time6) + frame_compare_function(frame_time6, reference_frame_time6.to(device)) # Keeping the metadata tests below for now, but we should remove them # once we remove get_json_metadata(). diff --git a/test/utils.py b/test/utils.py index e4b50260a..89957da7a 100644 --- a/test/utils.py +++ b/test/utils.py @@ -12,13 +12,22 @@ import torch -# Decorator for skipping CUDA tests when CUDA isn't available +# Decorator for skipping CUDA tests when CUDA isn't available. The tests are +# effectively marked to be skipped in pytest_collection_modifyitems() of +# conftest.py def needs_cuda(test_item): - if not torch.cuda.is_available(): - if os.environ.get("FAIL_WITHOUT_CUDA") == "1": - raise RuntimeError("CUDA is required for this test") - return pytest.mark.skip(reason="CUDA not available")(test_item) - return test_item + return pytest.mark.needs_cuda(test_item) + + +def cpu_and_cuda(): + return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda)) + + +def get_frame_compare_function(device): + if device == "cpu": + return assert_tensor_equal + else: + return assert_tensor_close_on_at_least # For use with decoded data frames. On Linux, we expect exact, bit-for-bit equality. On @@ -34,10 +43,20 @@ def assert_tensor_equal(*args, **kwargs): # Asserts that at least `percentage`% of the values are within the absolute tolerance. -def assert_tensor_close_on_at_least(frame1, frame2, percentage=99.7, abs_tolerance=20): - diff = (frame2.float() - frame1.float()).abs() - diff_percentage = 100.0 - percentage - assert (diff > abs_tolerance).float().mean() <= diff_percentage / 100.0 +def assert_tensor_close_on_at_least( + actual_tensor, ref_tensor, percentage=90, abs_tolerance=20 +): + assert ( + actual_tensor.device == ref_tensor.device + ), f"Devices don't match: {actual_tensor.device} vs {ref_tensor.device}" + diff = (ref_tensor.float() - actual_tensor.float()).abs() + max_diff_percentage = 100.0 - percentage + if diff.sum() == 0: + return + diff_percentage = (diff > abs_tolerance).float().mean() * 100.0 + assert ( + diff_percentage <= max_diff_percentage + ), f"Diff too high: {diff_percentage} > {max_diff_percentage}" # For use with floating point metadata, or in other instances where we are not confident