diff --git a/test/conftest.py b/test/conftest.py
index 6ca08807a..f5db4b5d6 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,9 +1,52 @@
+import os
 import random
 
 import pytest
 import torch
 
 
+def pytest_configure(config):
+    # register an additional marker (see pytest_collection_modifyitems)
+    config.addinivalue_line(
+        "markers", "needs_cuda: mark for tests that rely on a CUDA device"
+    )
+
+
+def pytest_collection_modifyitems(items):
+    # This hook is called by pytest after it has collected the tests (google its
+    # name to check out its doc!). We can ignore some tests as we see fit here,
+    # or add marks, such as a skip mark.
+
+    out_items = []
+    for item in items:
+        # The needs_cuda mark will exist if the test was explicitly decorated
+        # with the @needs_cuda decorator. It will also exist if it was
+        # parametrized with a parameter that has the mark: for example if a test
+        # is parametrized with
+        # @pytest.mark.parametrize('device', cpu_and_cuda())
+        # the "instances" of the tests where device == 'cuda' will have the
+        # 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
+        # mark.
+        needs_cuda = item.get_closest_marker("needs_cuda") is not None
+
+        if (
+            needs_cuda
+            and not torch.cuda.is_available()
+            and os.environ.get("FAIL_WITHOUT_CUDA") is None
+        ):
+            # We skip CUDA tests on non-CUDA machines, but only if the
+            # FAIL_WITHOUT_CUDA env var wasn't set. If it's set, the test will
+            # typically fail with a "Unsupported device: cuda" error. This is
+            # normal and desirable: this env var is set on CI jobs that are
+            # supposed to run the CUDA tests, so if CUDA isn't available on
+            # those for whatever reason, we need to know.
+            item.add_marker(pytest.mark.skip(reason="CUDA not available."))
+
+        out_items.append(item)
+
+    items[:] = out_items
+
+
 @pytest.fixture(autouse=True)
 def prevent_leaking_rng():
     # Prevent each test from leaking the rng to all other test when they call
diff --git a/test/decoders/test_video_decoder_ops.py b/test/decoders/test_video_decoder_ops.py
index 825405b74..ba021fe0a 100644
--- a/test/decoders/test_video_decoder_ops.py
+++ b/test/decoders/test_video_decoder_ops.py
@@ -39,6 +39,8 @@
 from ..utils import (
     assert_tensor_close_on_at_least,
     assert_tensor_equal,
+    cpu_and_cuda,
+    get_frame_compare_function,
     NASA_AUDIO,
     NASA_VIDEO,
     needs_cuda,
@@ -50,9 +52,9 @@
 
 
 class ReferenceDecoder:
-    def __init__(self):
+    def __init__(self, device="cpu"):
         self.decoder: torch.Tensor = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(self.decoder)
+        add_video_stream(self.decoder, device=device)
 
     def get_next_frame(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         assert self.decoder is not None
@@ -64,90 +66,85 @@ def seek(self, pts: float):
 
 
 class TestOps:
-    def test_seek_and_next(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_seek_and_next(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
+        frame_compare_function = get_frame_compare_function(device)
         frame0, _, _ = get_next_frame(decoder)
         reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
-        assert_tensor_equal(frame0, reference_frame0)
+        frame_compare_function(frame0, reference_frame0.to(device))
         reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1)
         frame1, _, _ = get_next_frame(decoder)
-        assert_tensor_equal(frame1, reference_frame1)
+        frame_compare_function(frame1, reference_frame1.to(device))
         seek_to_pts(decoder, 6.0)
         frame_time6, _, _ = get_next_frame(decoder)
         reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame_time6, reference_frame_time6)
+        frame_compare_function(frame_time6, reference_frame_time6.to(device))
 
-    def test_get_frame_at_pts(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frame_at_pts(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
+        frame_compare_function = get_frame_compare_function(device)
         # This frame has pts=6.006 and duration=0.033367, so it should be visible
         # at timestamps in the range [6.006, 6.039367) (not including the last timestamp).
         frame6, _, _ = get_frame_at_pts(decoder, 6.006)
         reference_frame6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame6, reference_frame6)
+        frame_compare_function(frame6, reference_frame6.to(device))
         frame6, _, _ = get_frame_at_pts(decoder, 6.02)
-        assert_tensor_equal(frame6, reference_frame6)
+        frame_compare_function(frame6, reference_frame6.to(device))
         frame6, _, _ = get_frame_at_pts(decoder, 6.039366)
-        assert_tensor_equal(frame6, reference_frame6)
+        frame_compare_function(frame6, reference_frame6.to(device))
         # Note that this timestamp is exactly on a frame boundary, so it should
         # return the next frame since the right boundary of the interval is
         # open.
         next_frame, _, _ = get_frame_at_pts(decoder, 6.039367)
         with pytest.raises(AssertionError):
-            assert_tensor_equal(next_frame, reference_frame6)
+            frame_compare_function(next_frame, reference_frame6.to(device))
 
-    def test_get_frame_at_index(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frame_at_index(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
         scan_all_streams_to_update_metadata(decoder)
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
+        frame_compare_function = get_frame_compare_function(device)
         frame0, _, _ = get_frame_at_index(decoder, stream_index=3, frame_index=0)
         reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
-        assert_tensor_equal(frame0, reference_frame0)
+        frame_compare_function(frame0, reference_frame0.to(device))
         # The frame that is played at 6 seconds is frame 180 from a 0-based index.
         frame6, _, _ = get_frame_at_index(decoder, stream_index=3, frame_index=180)
         reference_frame6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame6, reference_frame6)
+        frame_compare_function(frame6, reference_frame6.to(device))
 
-    def test_get_frame_with_info_at_index(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frame_with_info_at_index(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
         scan_all_streams_to_update_metadata(decoder)
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
+        frame_compare_function = get_frame_compare_function(device)
         frame6, pts, duration = get_frame_at_index(
             decoder, stream_index=3, frame_index=180
         )
         reference_frame6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame6, reference_frame6)
+        frame_compare_function(frame6, reference_frame6.to(device))
         assert pts.item() == pytest.approx(6.006, rel=1e-3)
         assert duration.item() == pytest.approx(0.03337, rel=1e-3)
 
-    def test_get_frames_at_indices(self):
-        decoder = create_from_file(str(NASA_VIDEO.path))
-        scan_all_streams_to_update_metadata(decoder)
-        add_video_stream(decoder)
-        frames0and180, *_ = get_frames_at_indices(
-            decoder, stream_index=3, frame_indices=[0, 180]
-        )
-        reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
-        reference_frame180 = NASA_VIDEO.get_frame_data_by_index(
-            INDEX_OF_FRAME_AT_6_SECONDS
-        )
-        assert_tensor_equal(frames0and180[0], reference_frame0)
-        assert_tensor_equal(frames0and180[1], reference_frame180)
-
-    @needs_cuda
-    def test_get_frames_at_indices_with_cuda(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frames_at_indices(self, device):
+        frame_compare_function = get_frame_compare_function(device)
         decoder = create_from_file(str(NASA_VIDEO.path))
         scan_all_streams_to_update_metadata(decoder)
-        add_video_stream(decoder, device="cuda")
+        add_video_stream(decoder, device=device)
         frames0and180, *_ = get_frames_at_indices(
             decoder, stream_index=3, frame_indices=[0, 180]
         )
@@ -155,15 +152,13 @@ def test_get_frames_at_indices_with_cuda(self):
         reference_frame180 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert frames0and180.device.type == "cuda"
-        assert_tensor_close_on_at_least(frames0and180[0].to("cpu"), reference_frame0)
-        assert_tensor_close_on_at_least(
-            frames0and180[1].to("cpu"), reference_frame180, 0.3, 30
-        )
+        frame_compare_function(frames0and180[0], reference_frame0.to(device))
+        frame_compare_function(frames0and180[1], reference_frame180.to(device))
 
-    def test_get_frames_at_indices_unsorted_indices(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frames_at_indices_unsorted_indices(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
-        _add_video_stream(decoder)
+        _add_video_stream(decoder, device=device)
         scan_all_streams_to_update_metadata(decoder)
         stream_index = 3
 
@@ -192,9 +187,10 @@ def test_get_frames_at_indices_unsorted_indices(self):
         with pytest.raises(AssertionError):
             assert_tensor_equal(frames[0], frames[-1])
 
-    def test_get_frames_by_pts(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frames_by_pts(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
-        _add_video_stream(decoder)
+        _add_video_stream(decoder, device=device)
         scan_all_streams_to_update_metadata(decoder)
         stream_index = 3
 
@@ -222,48 +218,15 @@ def test_get_frames_by_pts(self):
         with pytest.raises(AssertionError):
             assert_tensor_equal(frames[0], frames[-1])
 
-    # TODO: Figure out how to parameterize this test to run on both CPU and CUDA.abs
-    # The question is how to have the @needs_cuda decorator with the pytest.mark.parametrize
-    # decorator on the same test.
-    @needs_cuda
-    def test_get_frames_by_pts_with_cuda(self):
-        decoder = create_from_file(str(NASA_VIDEO.path))
-        _add_video_stream(decoder, device="cuda")
-        scan_all_streams_to_update_metadata(decoder)
-        stream_index = 3
-
-        # Note: 13.01 should give the last video frame for the NASA video
-        timestamps = [2, 0, 1, 0 + 1e-3, 13.01, 2 + 1e-3]
-
-        expected_frames = [
-            get_frame_at_pts(decoder, seconds=pts)[0] for pts in timestamps
-        ]
-
-        frames, *_ = get_frames_by_pts(
-            decoder,
-            stream_index=stream_index,
-            timestamps=timestamps,
-        )
-        for frame, expected_frame in zip(frames, expected_frames):
-            assert_tensor_equal(frame, expected_frame)
-
-        # first and last frame should be equal, at pts=2 [+ eps]. We then modify
-        # the first frame and assert that it's now different from the last
-        # frame. This ensures a copy was properly made during the de-duplication
-        # logic.
-        assert_tensor_equal(frames[0], frames[-1])
-        frames[0] += 20
-        with pytest.raises(AssertionError):
-            assert_tensor_equal(frames[0], frames[-1])
-
-    def test_pts_apis_against_index_ref(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_pts_apis_against_index_ref(self, device):
         # Non-regression test for https://github.com/pytorch/torchcodec/pull/287
         # Get all frames in the video, then query all frames with all time-based
         # APIs exactly where those frames are supposed to start. We assert that
         # we get the expected frame.
         decoder = create_from_file(str(NASA_VIDEO.path))
         scan_all_streams_to_update_metadata(decoder)
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
 
         metadata = get_json_metadata(decoder)
         metadata_dict = json.loads(metadata)
@@ -316,81 +279,87 @@ def test_pts_apis_against_index_ref(self):
         )
         assert_tensor_equal(pts_seconds, all_pts_seconds_ref)
 
-    def test_get_frames_in_range(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_get_frames_in_range(self, device):
+        frame_compare_function = get_frame_compare_function(device)
         decoder = create_from_file(str(NASA_VIDEO.path))
         scan_all_streams_to_update_metadata(decoder)
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
 
         # ensure that the degenerate case of a range of size 1 works
         ref_frame0 = NASA_VIDEO.get_frame_data_by_range(0, 1)
         bulk_frame0, *_ = get_frames_in_range(decoder, stream_index=3, start=0, stop=1)
-        assert_tensor_equal(ref_frame0, bulk_frame0)
+        frame_compare_function(bulk_frame0, ref_frame0.to(device))
 
         ref_frame1 = NASA_VIDEO.get_frame_data_by_range(1, 2)
         bulk_frame1, *_ = get_frames_in_range(decoder, stream_index=3, start=1, stop=2)
-        assert_tensor_equal(ref_frame1, bulk_frame1)
+        frame_compare_function(bulk_frame1, ref_frame1.to(device))
 
         ref_frame389 = NASA_VIDEO.get_frame_data_by_range(389, 390)
         bulk_frame389, *_ = get_frames_in_range(
             decoder, stream_index=3, start=389, stop=390
         )
-        assert_tensor_equal(ref_frame389, bulk_frame389)
+        frame_compare_function(bulk_frame389, ref_frame389.to(device))
 
         # contiguous ranges
         ref_frames0_9 = NASA_VIDEO.get_frame_data_by_range(0, 9)
         bulk_frames0_9, *_ = get_frames_in_range(
             decoder, stream_index=3, start=0, stop=9
         )
-        assert_tensor_equal(ref_frames0_9, bulk_frames0_9)
+        frame_compare_function(bulk_frames0_9, ref_frames0_9.to(device))
 
         ref_frames4_8 = NASA_VIDEO.get_frame_data_by_range(4, 8)
         bulk_frames4_8, *_ = get_frames_in_range(
             decoder, stream_index=3, start=4, stop=8
         )
-        assert_tensor_equal(ref_frames4_8, bulk_frames4_8)
+        frame_compare_function(bulk_frames4_8, ref_frames4_8.to(device))
 
         # ranges with a stride
         ref_frames15_35 = NASA_VIDEO.get_frame_data_by_range(15, 36, 5)
         bulk_frames15_35, *_ = get_frames_in_range(
             decoder, stream_index=3, start=15, stop=36, step=5
         )
-        assert_tensor_equal(ref_frames15_35, bulk_frames15_35)
+        frame_compare_function(bulk_frames15_35, ref_frames15_35.to(device))
 
         ref_frames0_9_2 = NASA_VIDEO.get_frame_data_by_range(0, 9, 2)
         bulk_frames0_9_2, *_ = get_frames_in_range(
             decoder, stream_index=3, start=0, stop=9, step=2
         )
-        assert_tensor_equal(ref_frames0_9_2, bulk_frames0_9_2)
+        frame_compare_function(bulk_frames0_9_2, ref_frames0_9_2.to(device))
 
         # an empty range is valid!
         empty_frame, *_ = get_frames_in_range(decoder, stream_index=3, start=5, stop=5)
-        assert_tensor_equal(empty_frame, NASA_VIDEO.empty_chw_tensor)
+        frame_compare_function(empty_frame, NASA_VIDEO.empty_chw_tensor.to(device))
 
-    def test_throws_exception_at_eof(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_throws_exception_at_eof(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
+        frame_compare_function = get_frame_compare_function(device)
         seek_to_pts(decoder, 12.979633)
         last_frame, _, _ = get_next_frame(decoder)
         reference_last_frame = NASA_VIDEO.get_frame_data_by_index(289)
-        assert_tensor_equal(last_frame, reference_last_frame)
+        frame_compare_function(last_frame, reference_last_frame.to(device))
         with pytest.raises(IndexError, match="no more frames"):
             get_next_frame(decoder)
 
-    def test_throws_exception_if_seek_too_far(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_throws_exception_if_seek_too_far(self, device):
         decoder = create_from_file(str(NASA_VIDEO.path))
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
         # pts=12.979633 is the last frame in the video.
         seek_to_pts(decoder, 12.979633 + 1.0e-4)
         with pytest.raises(IndexError, match="no more frames"):
             get_next_frame(decoder)
 
-    def test_compile_seek_and_next(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_compile_seek_and_next(self, device):
         # TODO_OPEN_ISSUE Scott (T180277797): Get this to work with the inductor stack. Right now
         # compilation fails because it can't handle tensors of size unknown at
         # compile-time.
         @torch.compile(fullgraph=True, backend="eager")
         def get_frame1_and_frame_time6(decoder):
-            add_video_stream(decoder)
+            add_video_stream(decoder, device=device)
             frame0, _, _ = get_next_frame(decoder)
             seek_to_pts(decoder, 6.0)
             frame_time6, _, _ = get_next_frame(decoder)
@@ -399,15 +368,17 @@ def get_frame1_and_frame_time6(decoder):
         # NB: create needs to happen outside the torch.compile region,
         # for now. Otherwise torch.compile constant-props it.
         decoder = create_from_file(str(NASA_VIDEO.path))
+        frame_compare_function = get_frame_compare_function(device)
         frame0, frame_time6 = get_frame1_and_frame_time6(decoder)
         reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
         reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame0, reference_frame0)
-        assert_tensor_equal(frame_time6, reference_frame_time6)
+        frame_compare_function(frame0, reference_frame0.to(device))
+        frame_compare_function(frame_time6, reference_frame_time6.to(device))
 
-    def test_class_based_compile_seek_and_next(self):
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_class_based_compile_seek_and_next(self, device):
         # TODO_OPEN_ISSUE Scott (T180277797): Ditto as above.
         @torch.compile(fullgraph=True, backend="eager")
         def class_based_get_frame1_and_frame_time6(
@@ -418,17 +389,19 @@ def class_based_get_frame1_and_frame_time6(
             frame_time6, _, _ = decoder.get_next_frame()
             return frame0, frame_time6
 
-        decoder = ReferenceDecoder()
+        decoder = ReferenceDecoder(device=device)
+        frame_compare_function = get_frame_compare_function(device)
         frame0, frame_time6 = class_based_get_frame1_and_frame_time6(decoder)
         reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
         reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame0, reference_frame0)
-        assert_tensor_equal(frame_time6, reference_frame_time6)
+        frame_compare_function(frame0, reference_frame0.to(device))
+        frame_compare_function(frame_time6, reference_frame_time6.to(device))
 
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("create_from", ("file", "tensor", "bytes"))
-    def test_create_decoder(self, create_from):
+    def test_create_decoder(self, create_from, device):
         path = str(NASA_VIDEO.path)
         if create_from == "file":
             decoder = create_from_file(path)
@@ -441,19 +414,20 @@ def test_create_decoder(self, create_from):
                 video_bytes = f.read()
             decoder = create_from_bytes(video_bytes)
 
-        add_video_stream(decoder)
+        add_video_stream(decoder, device=device)
+        frame_compare_function = get_frame_compare_function(device)
         frame0, _, _ = get_next_frame(decoder)
         reference_frame0 = NASA_VIDEO.get_frame_data_by_index(0)
-        assert_tensor_equal(frame0, reference_frame0)
+        frame_compare_function(frame0, reference_frame0.to(device))
         reference_frame1 = NASA_VIDEO.get_frame_data_by_index(1)
         frame1, _, _ = get_next_frame(decoder)
-        assert_tensor_equal(frame1, reference_frame1)
+        frame_compare_function(frame1, reference_frame1.to(device))
         seek_to_pts(decoder, 6.0)
         frame_time6, _, _ = get_next_frame(decoder)
         reference_frame_time6 = NASA_VIDEO.get_frame_data_by_index(
             INDEX_OF_FRAME_AT_6_SECONDS
         )
-        assert_tensor_equal(frame_time6, reference_frame_time6)
+        frame_compare_function(frame_time6, reference_frame_time6.to(device))
 
     # Keeping the metadata tests below for now, but we should remove them
     # once we remove get_json_metadata().
diff --git a/test/utils.py b/test/utils.py
index e4b50260a..89957da7a 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -12,13 +12,22 @@
 import torch
 
 
-# Decorator for skipping CUDA tests when CUDA isn't available
+# Decorator for skipping CUDA tests when CUDA isn't available. The tests are
+# effectively marked to be skipped in pytest_collection_modifyitems() of
+# conftest.py
 def needs_cuda(test_item):
-    if not torch.cuda.is_available():
-        if os.environ.get("FAIL_WITHOUT_CUDA") == "1":
-            raise RuntimeError("CUDA is required for this test")
-        return pytest.mark.skip(reason="CUDA not available")(test_item)
-    return test_item
+    return pytest.mark.needs_cuda(test_item)
+
+
+def cpu_and_cuda():
+    return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
+
+
+def get_frame_compare_function(device):
+    if device == "cpu":
+        return assert_tensor_equal
+    else:
+        return assert_tensor_close_on_at_least
 
 
 # For use with decoded data frames. On Linux, we expect exact, bit-for-bit equality. On
@@ -34,10 +43,20 @@ def assert_tensor_equal(*args, **kwargs):
 
 
 # Asserts that at least `percentage`% of the values are within the absolute tolerance.
-def assert_tensor_close_on_at_least(frame1, frame2, percentage=99.7, abs_tolerance=20):
-    diff = (frame2.float() - frame1.float()).abs()
-    diff_percentage = 100.0 - percentage
-    assert (diff > abs_tolerance).float().mean() <= diff_percentage / 100.0
+def assert_tensor_close_on_at_least(
+    actual_tensor, ref_tensor, percentage=90, abs_tolerance=20
+):
+    assert (
+        actual_tensor.device == ref_tensor.device
+    ), f"Devices don't match: {actual_tensor.device} vs {ref_tensor.device}"
+    diff = (ref_tensor.float() - actual_tensor.float()).abs()
+    max_diff_percentage = 100.0 - percentage
+    if diff.sum() == 0:
+        return
+    diff_percentage = (diff > abs_tolerance).float().mean() * 100.0
+    assert (
+        diff_percentage <= max_diff_percentage
+    ), f"Diff too high: {diff_percentage} > {max_diff_percentage}"
 
 
 # For use with floating point metadata, or in other instances where we are not confident