From e04394f833d3382ee03c13e1c94af49f1d11ce7f Mon Sep 17 00:00:00 2001
From: Prabhat Roy <prabhatroy@fb.com>
Date: Mon, 28 Feb 2022 20:24:25 +0000
Subject: [PATCH 1/2] Improve test_video_reader

---
 test/test_video_reader.py | 1302 ++++++++++++++++++-------------------
 1 file changed, 649 insertions(+), 653 deletions(-)

diff --git a/test/test_video_reader.py b/test/test_video_reader.py
index 73c4d8a1b85..6d556b9802e 100644
--- a/test/test_video_reader.py
+++ b/test/test_video_reader.py
@@ -1,5 +1,4 @@
 import collections
-import itertools
 import math
 import os
 from fractions import Fraction
@@ -112,7 +111,7 @@
 
 # av_seek_frame is imprecise so seek to a timestamp earlier by a margin
 # The unit of margin is second
-seek_frame_margin = 0.25
+SEEK_FRAME_MARGIN = 0.25
 
 
 def _read_from_stream(container, start_pts, end_pts, stream, stream_name, buffer_size=4):
@@ -369,7 +368,8 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config
 
             assert_equal(atimebase, ref_result.atimebase)
 
-    def test_stress_test_read_video_from_file(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_stress_test_read_video_from_file(self, test_video):
         pytest.skip(
             "This stress test will iteratively decode the same set of videos."
             "It helps to detect memory leak but it takes lots of time to run."
@@ -386,52 +386,12 @@ def test_stress_test_read_video_from_file(self):
         audio_timebase_num, audio_timebase_den = 0, 1
 
         for _i in range(num_iter):
-            for test_video, _config in test_videos.items():
-                full_path = os.path.join(VIDEO_DIR, test_video)
-
-                # pass 1: decode all frames using new decoder
-                torch.ops.video_reader.read_video_from_file(
-                    full_path,
-                    seek_frame_margin,
-                    0,  # getPtsOnly
-                    1,  # readVideoStream
-                    width,
-                    height,
-                    min_dimension,
-                    max_dimension,
-                    video_start_pts,
-                    video_end_pts,
-                    video_timebase_num,
-                    video_timebase_den,
-                    1,  # readAudioStream
-                    samples,
-                    channels,
-                    audio_start_pts,
-                    audio_end_pts,
-                    audio_timebase_num,
-                    audio_timebase_den,
-                )
-
-    def test_read_video_from_file(self):
-        """
-        Test the case when decoder starts with a video file to decode frames.
-        """
-        # video related
-        width, height, min_dimension, max_dimension = 0, 0, 0, 0
-        video_start_pts, video_end_pts = 0, -1
-        video_timebase_num, video_timebase_den = 0, 1
-        # audio related
-        samples, channels = 0, 0
-        audio_start_pts, audio_end_pts = 0, -1
-        audio_timebase_num, audio_timebase_den = 0, 1
-
-        for test_video, config in test_videos.items():
             full_path = os.path.join(VIDEO_DIR, test_video)
 
             # pass 1: decode all frames using new decoder
-            tv_result = torch.ops.video_reader.read_video_from_file(
+            torch.ops.video_reader.read_video_from_file(
                 full_path,
-                seek_frame_margin,
+                SEEK_FRAME_MARGIN,
                 0,  # getPtsOnly
                 1,  # readVideoStream
                 width,
@@ -450,14 +410,55 @@ def test_read_video_from_file(self):
                 audio_timebase_num,
                 audio_timebase_den,
             )
-            # pass 2: decode all frames using av
-            pyav_result = _decode_frames_by_av_module(full_path)
-            # check results from TorchVision decoder
-            self.check_separate_decoding_result(tv_result, config)
-            # compare decoding results
-            self.compare_decoding_result(tv_result, pyav_result, config)
 
-    def test_read_video_from_file_read_single_stream_only(self):
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_read_video_from_file(self, test_video, config):
+        """
+        Test the case when decoder starts with a video file to decode frames.
+        """
+        # video related
+        width, height, min_dimension, max_dimension = 0, 0, 0, 0
+        video_start_pts, video_end_pts = 0, -1
+        video_timebase_num, video_timebase_den = 0, 1
+        # audio related
+        samples, channels = 0, 0
+        audio_start_pts, audio_end_pts = 0, -1
+        audio_timebase_num, audio_timebase_den = 0, 1
+
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        # pass 1: decode all frames using new decoder
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        # pass 2: decode all frames using av
+        pyav_result = _decode_frames_by_av_module(full_path)
+        # check results from TorchVision decoder
+        self.check_separate_decoding_result(tv_result, config)
+        # compare decoding results
+        self.compare_decoding_result(tv_result, pyav_result, config)
+
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    @pytest.mark.parametrize("read_video_stream,read_audio_stream", [(1, 0), (0, 1)])
+    def test_read_video_from_file_read_single_stream_only(self, test_video, config, read_video_stream, read_audio_stream):
         """
         Test the case when decoder starts with a video file to decode frames, and
         only reads video stream and ignores audio stream
@@ -471,57 +472,56 @@ def test_read_video_from_file_read_single_stream_only(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-            for readVideoStream, readAudioStream in [(1, 0), (0, 1)]:
-                # decode all frames using new decoder
-                tv_result = torch.ops.video_reader.read_video_from_file(
-                    full_path,
-                    seek_frame_margin,
-                    0,  # getPtsOnly
-                    readVideoStream,
-                    width,
-                    height,
-                    min_dimension,
-                    max_dimension,
-                    video_start_pts,
-                    video_end_pts,
-                    video_timebase_num,
-                    video_timebase_den,
-                    readAudioStream,
-                    samples,
-                    channels,
-                    audio_start_pts,
-                    audio_end_pts,
-                    audio_timebase_num,
-                    audio_timebase_den,
-                )
-
-                (
-                    vframes,
-                    vframe_pts,
-                    vtimebase,
-                    vfps,
-                    vduration,
-                    aframes,
-                    aframe_pts,
-                    atimebase,
-                    asample_rate,
-                    aduration,
-                ) = tv_result
-
-                assert (vframes.numel() > 0) is bool(readVideoStream)
-                assert (vframe_pts.numel() > 0) is bool(readVideoStream)
-                assert (vtimebase.numel() > 0) is bool(readVideoStream)
-                assert (vfps.numel() > 0) is bool(readVideoStream)
-
-                expect_audio_data = readAudioStream == 1 and config.audio_sample_rate is not None
-                assert (aframes.numel() > 0) is bool(expect_audio_data)
-                assert (aframe_pts.numel() > 0) is bool(expect_audio_data)
-                assert (atimebase.numel() > 0) is bool(expect_audio_data)
-                assert (asample_rate.numel() > 0) is bool(expect_audio_data)
-
-    def test_read_video_from_file_rescale_min_dimension(self):
+        full_path = os.path.join(VIDEO_DIR, test_video)
+        # decode all frames using new decoder
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            read_video_stream,
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            read_audio_stream,
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+
+        (
+            vframes,
+            vframe_pts,
+            vtimebase,
+            vfps,
+            vduration,
+            aframes,
+            aframe_pts,
+            atimebase,
+            asample_rate,
+            aduration,
+        ) = tv_result
+
+        assert (vframes.numel() > 0) is bool(read_video_stream)
+        assert (vframe_pts.numel() > 0) is bool(read_video_stream)
+        assert (vtimebase.numel() > 0) is bool(read_video_stream)
+        assert (vfps.numel() > 0) is bool(read_video_stream)
+
+        expect_audio_data = read_audio_stream == 1 and config.audio_sample_rate is not None
+        assert (aframes.numel() > 0) is bool(expect_audio_data)
+        assert (aframe_pts.numel() > 0) is bool(expect_audio_data)
+        assert (atimebase.numel() > 0) is bool(expect_audio_data)
+        assert (asample_rate.numel() > 0) is bool(expect_audio_data)
+
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_file_rescale_min_dimension(self, test_video):
         """
         Test the case when decoder starts with a video file to decode frames, and
         video min dimension between height and width is set.
@@ -535,33 +535,33 @@ def test_read_video_from_file_rescale_min_dimension(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, _config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-
-            tv_result = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert min_dimension == min(tv_result[0].size(1), tv_result[0].size(2))
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert min_dimension == min(tv_result[0].size(1), tv_result[0].size(2))
 
-    def test_read_video_from_file_rescale_max_dimension(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_file_rescale_max_dimension(self, test_video):
         """
         Test the case when decoder starts with a video file to decode frames, and
         video min dimension between height and width is set.
@@ -575,33 +575,33 @@ def test_read_video_from_file_rescale_max_dimension(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, _config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-
-            tv_result = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert max_dimension == max(tv_result[0].size(1), tv_result[0].size(2))
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert max_dimension == max(tv_result[0].size(1), tv_result[0].size(2))
 
-    def test_read_video_from_file_rescale_both_min_max_dimension(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_file_rescale_both_min_max_dimension(self, test_video):
         """
         Test the case when decoder starts with a video file to decode frames, and
         video min dimension between height and width is set.
@@ -615,34 +615,34 @@ def test_read_video_from_file_rescale_both_min_max_dimension(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, _config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-
-            tv_result = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert min_dimension == min(tv_result[0].size(1), tv_result[0].size(2))
-            assert max_dimension == max(tv_result[0].size(1), tv_result[0].size(2))
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert min_dimension == min(tv_result[0].size(1), tv_result[0].size(2))
+        assert max_dimension == max(tv_result[0].size(1), tv_result[0].size(2))
 
-    def test_read_video_from_file_rescale_width(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_file_rescale_width(self, test_video):
         """
         Test the case when decoder starts with a video file to decode frames, and
         video width is set.
@@ -656,33 +656,33 @@ def test_read_video_from_file_rescale_width(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, _config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-
-            tv_result = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert tv_result[0].size(2) == width
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert tv_result[0].size(2) == width
 
-    def test_read_video_from_file_rescale_height(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_file_rescale_height(self, test_video):
         """
         Test the case when decoder starts with a video file to decode frames, and
         video height is set.
@@ -696,33 +696,33 @@ def test_read_video_from_file_rescale_height(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, _config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-
-            tv_result = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert tv_result[0].size(1) == height
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert tv_result[0].size(1) == height
 
-    def test_read_video_from_file_rescale_width_and_height(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_file_rescale_width_and_height(self, test_video):
         """
         Test the case when decoder starts with a video file to decode frames, and
         both video height and width are set.
@@ -736,93 +736,92 @@ def test_read_video_from_file_rescale_width_and_height(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, _config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-
-            tv_result = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert tv_result[0].size(1) == height
-            assert tv_result[0].size(2) == width
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert tv_result[0].size(1) == height
+        assert tv_result[0].size(2) == width
 
-    def test_read_video_from_file_audio_resampling(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    @pytest.mark.parametrize("samples", [9600, 96000])
+    def test_read_video_from_file_audio_resampling(self, test_video, samples):
         """
         Test the case when decoder starts with a video file to decode frames, and
         audio waveform are resampled
         """
+        # video related
+        width, height, min_dimension, max_dimension = 0, 0, 0, 0
+        video_start_pts, video_end_pts = 0, -1
+        video_timebase_num, video_timebase_den = 0, 1
+        # audio related
+        channels = 0
+        audio_start_pts, audio_end_pts = 0, -1
+        audio_timebase_num, audio_timebase_den = 0, 1
 
-        for samples in [9600, 96000]:  # downsampling  # upsampling
-            # video related
-            width, height, min_dimension, max_dimension = 0, 0, 0, 0
-            video_start_pts, video_end_pts = 0, -1
-            video_timebase_num, video_timebase_den = 0, 1
-            # audio related
-            channels = 0
-            audio_start_pts, audio_end_pts = 0, -1
-            audio_timebase_num, audio_timebase_den = 0, 1
-
-            for test_video, _config in test_videos.items():
-                full_path = os.path.join(VIDEO_DIR, test_video)
-
-                tv_result = torch.ops.video_reader.read_video_from_file(
-                    full_path,
-                    seek_frame_margin,
-                    0,  # getPtsOnly
-                    1,  # readVideoStream
-                    width,
-                    height,
-                    min_dimension,
-                    max_dimension,
-                    video_start_pts,
-                    video_end_pts,
-                    video_timebase_num,
-                    video_timebase_den,
-                    1,  # readAudioStream
-                    samples,
-                    channels,
-                    audio_start_pts,
-                    audio_end_pts,
-                    audio_timebase_num,
-                    audio_timebase_den,
-                )
-                (
-                    vframes,
-                    vframe_pts,
-                    vtimebase,
-                    vfps,
-                    vduration,
-                    aframes,
-                    aframe_pts,
-                    atimebase,
-                    asample_rate,
-                    aduration,
-                ) = tv_result
-                if aframes.numel() > 0:
-                    assert samples == asample_rate.item()
-                    assert 1 == aframes.size(1)
-                    # when audio stream is found
-                    duration = float(aframe_pts[-1]) * float(atimebase[0]) / float(atimebase[1])
-                    assert aframes.size(0) == approx(int(duration * asample_rate.item()), abs=0.1 * asample_rate.item())
-
-    def test_compare_read_video_from_memory_and_file(self):
+        full_path = os.path.join(VIDEO_DIR, test_video)
+
+        tv_result = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        (
+            vframes,
+            vframe_pts,
+            vtimebase,
+            vfps,
+            vduration,
+            aframes,
+            aframe_pts,
+            atimebase,
+            asample_rate,
+            aduration,
+        ) = tv_result
+        if aframes.numel() > 0:
+            assert samples == asample_rate.item()
+            assert 1 == aframes.size(1)
+            # when audio stream is found
+            duration = float(aframe_pts[-1]) * float(atimebase[0]) / float(atimebase[1])
+            assert aframes.size(0) == approx(int(duration * asample_rate.item()), abs=0.1 * asample_rate.item())
+
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_compare_read_video_from_memory_and_file(self, test_video, config):
         """
         Test the case when video is already in memory, and decoder reads data in memory
         """
@@ -835,60 +834,60 @@ def test_compare_read_video_from_memory_and_file(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-
-            # pass 1: decode all frames using cpp decoder
-            tv_result_memory = torch.ops.video_reader.read_video_from_memory(
-                video_tensor,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            self.check_separate_decoding_result(tv_result_memory, config)
-            # pass 2: decode all frames from file
-            tv_result_file = torch.ops.video_reader.read_video_from_file(
-                full_path,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
+        full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+
+        # pass 1: decode all frames using cpp decoder
+        tv_result_memory = torch.ops.video_reader.read_video_from_memory(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        self.check_separate_decoding_result(tv_result_memory, config)
+        # pass 2: decode all frames from file
+        tv_result_file = torch.ops.video_reader.read_video_from_file(
+            full_path,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
 
-            self.check_separate_decoding_result(tv_result_file, config)
-            # finally, compare results decoded from memory and file
-            self.compare_decoding_result(tv_result_memory, tv_result_file)
+        self.check_separate_decoding_result(tv_result_file, config)
+        # finally, compare results decoded from memory and file
+        self.compare_decoding_result(tv_result_memory, tv_result_file)
 
-    def test_read_video_from_memory(self):
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_read_video_from_memory(self, test_video, config):
         """
         Test the case when video is already in memory, and decoder reads data in memory
         """
@@ -901,38 +900,38 @@ def test_read_video_from_memory(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-
-            # pass 1: decode all frames using cpp decoder
-            tv_result = torch.ops.video_reader.read_video_from_memory(
-                video_tensor,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            # pass 2: decode all frames using av
-            pyav_result = _decode_frames_by_av_module(full_path)
+        full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+
+        # pass 1: decode all frames using cpp decoder
+        tv_result = torch.ops.video_reader.read_video_from_memory(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        # pass 2: decode all frames using av
+        pyav_result = _decode_frames_by_av_module(full_path)
 
-            self.check_separate_decoding_result(tv_result, config)
-            self.compare_decoding_result(tv_result, pyav_result, config)
+        self.check_separate_decoding_result(tv_result, config)
+        self.compare_decoding_result(tv_result, pyav_result, config)
 
-    def test_read_video_from_memory_get_pts_only(self):
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_read_video_from_memory_get_pts_only(self, test_video, config):
         """
         Test the case when video is already in memory, and decoder reads data in memory.
         Compare frame pts between decoding for pts only and full decoding
@@ -947,234 +946,234 @@ def test_read_video_from_memory_get_pts_only(self):
         audio_start_pts, audio_end_pts = 0, -1
         audio_timebase_num, audio_timebase_den = 0, 1
 
-        for test_video, config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-
-            # pass 1: decode all frames using cpp decoder
-            tv_result = torch.ops.video_reader.read_video_from_memory(
-                video_tensor,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            assert abs(config.video_fps - tv_result[3].item()) < 0.01
-
-            # pass 2: decode all frames to get PTS only using cpp decoder
-            tv_result_pts_only = torch.ops.video_reader.read_video_from_memory(
-                video_tensor,
-                seek_frame_margin,
-                1,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
-            )
+        _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+
+        # pass 1: decode all frames using cpp decoder
+        tv_result = torch.ops.video_reader.read_video_from_memory(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        assert abs(config.video_fps - tv_result[3].item()) < 0.01
+
+        # pass 2: decode all frames to get PTS only using cpp decoder
+        tv_result_pts_only = torch.ops.video_reader.read_video_from_memory(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            1,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
 
-            assert not tv_result_pts_only[0].numel()
-            assert not tv_result_pts_only[5].numel()
-            self.compare_decoding_result(tv_result, tv_result_pts_only)
+        assert not tv_result_pts_only[0].numel()
+        assert not tv_result_pts_only[5].numel()
+        self.compare_decoding_result(tv_result, tv_result_pts_only)
 
-    def test_read_video_in_range_from_memory(self):
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    @pytest.mark.parametrize("num_frames", [4, 8, 16, 32, 64, 128])
+    def test_read_video_in_range_from_memory(self, test_video, config, num_frames):
         """
         Test the case when video is already in memory, and decoder reads data in memory.
         In addition, decoder takes meaningful start- and end PTS as input, and decode
         frames within that interval
         """
-        for test_video, config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-            # video related
-            width, height, min_dimension, max_dimension = 0, 0, 0, 0
-            video_start_pts, video_end_pts = 0, -1
-            video_timebase_num, video_timebase_den = 0, 1
-            # audio related
-            samples, channels = 0, 0
-            audio_start_pts, audio_end_pts = 0, -1
-            audio_timebase_num, audio_timebase_den = 0, 1
-            # pass 1: decode all frames using new decoder
-            tv_result = torch.ops.video_reader.read_video_from_memory(
-                video_tensor,
-                seek_frame_margin,
-                0,  # getPtsOnly
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                video_start_pts,
-                video_end_pts,
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                audio_start_pts,
-                audio_end_pts,
-                audio_timebase_num,
-                audio_timebase_den,
+        full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+        # video related
+        width, height, min_dimension, max_dimension = 0, 0, 0, 0
+        video_start_pts, video_end_pts = 0, -1
+        video_timebase_num, video_timebase_den = 0, 1
+        # audio related
+        samples, channels = 0, 0
+        audio_start_pts, audio_end_pts = 0, -1
+        audio_timebase_num, audio_timebase_den = 0, 1
+        # pass 1: decode all frames using new decoder
+        tv_result = torch.ops.video_reader.read_video_from_memory(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        (
+            vframes,
+            vframe_pts,
+            vtimebase,
+            vfps,
+            vduration,
+            aframes,
+            aframe_pts,
+            atimebase,
+            asample_rate,
+            aduration,
+        ) = tv_result
+        assert abs(config.video_fps - vfps.item()) < 0.01
+
+        start_pts_ind_max = vframe_pts.size(0) - num_frames
+        if start_pts_ind_max <= 0:
+            return
+        # randomly pick start pts
+        start_pts_ind = randint(0, start_pts_ind_max)
+        end_pts_ind = start_pts_ind + num_frames - 1
+        video_start_pts = vframe_pts[start_pts_ind]
+        video_end_pts = vframe_pts[end_pts_ind]
+
+        video_timebase_num, video_timebase_den = vtimebase[0], vtimebase[1]
+        if len(atimebase) > 0:
+            # when audio stream is available
+            audio_timebase_num, audio_timebase_den = atimebase[0], atimebase[1]
+            audio_start_pts = _pts_convert(
+                video_start_pts.item(),
+                Fraction(video_timebase_num.item(), video_timebase_den.item()),
+                Fraction(audio_timebase_num.item(), audio_timebase_den.item()),
+                math.floor,
+            )
+            audio_end_pts = _pts_convert(
+                video_end_pts.item(),
+                Fraction(video_timebase_num.item(), video_timebase_den.item()),
+                Fraction(audio_timebase_num.item(), audio_timebase_den.item()),
+                math.ceil,
+            )
+
+        # pass 2: decode frames in the randomly generated range
+        tv_result = torch.ops.video_reader.read_video_from_memory(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            0,  # getPtsOnly
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            video_start_pts,
+            video_end_pts,
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            audio_start_pts,
+            audio_end_pts,
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+
+        # pass 3: decode frames in range using PyAv
+        video_timebase_av, audio_timebase_av = _get_timebase_by_av_module(full_path)
+
+        video_start_pts_av = _pts_convert(
+            video_start_pts.item(),
+            Fraction(video_timebase_num.item(), video_timebase_den.item()),
+            Fraction(video_timebase_av.numerator, video_timebase_av.denominator),
+            math.floor,
+        )
+        video_end_pts_av = _pts_convert(
+            video_end_pts.item(),
+            Fraction(video_timebase_num.item(), video_timebase_den.item()),
+            Fraction(video_timebase_av.numerator, video_timebase_av.denominator),
+            math.ceil,
+        )
+        if audio_timebase_av:
+            audio_start_pts = _pts_convert(
+                video_start_pts.item(),
+                Fraction(video_timebase_num.item(), video_timebase_den.item()),
+                Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator),
+                math.floor,
+            )
+            audio_end_pts = _pts_convert(
+                video_end_pts.item(),
+                Fraction(video_timebase_num.item(), video_timebase_den.item()),
+                Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator),
+                math.ceil,
             )
-            (
-                vframes,
-                vframe_pts,
-                vtimebase,
-                vfps,
-                vduration,
-                aframes,
-                aframe_pts,
-                atimebase,
-                asample_rate,
-                aduration,
-            ) = tv_result
-            assert abs(config.video_fps - vfps.item()) < 0.01
-
-            for num_frames in [4, 8, 16, 32, 64, 128]:
-                start_pts_ind_max = vframe_pts.size(0) - num_frames
-                if start_pts_ind_max <= 0:
-                    continue
-                # randomly pick start pts
-                start_pts_ind = randint(0, start_pts_ind_max)
-                end_pts_ind = start_pts_ind + num_frames - 1
-                video_start_pts = vframe_pts[start_pts_ind]
-                video_end_pts = vframe_pts[end_pts_ind]
-
-                video_timebase_num, video_timebase_den = vtimebase[0], vtimebase[1]
-                if len(atimebase) > 0:
-                    # when audio stream is available
-                    audio_timebase_num, audio_timebase_den = atimebase[0], atimebase[1]
-                    audio_start_pts = _pts_convert(
-                        video_start_pts.item(),
-                        Fraction(video_timebase_num.item(), video_timebase_den.item()),
-                        Fraction(audio_timebase_num.item(), audio_timebase_den.item()),
-                        math.floor,
-                    )
-                    audio_end_pts = _pts_convert(
-                        video_end_pts.item(),
-                        Fraction(video_timebase_num.item(), video_timebase_den.item()),
-                        Fraction(audio_timebase_num.item(), audio_timebase_den.item()),
-                        math.ceil,
-                    )
-
-                # pass 2: decode frames in the randomly generated range
-                tv_result = torch.ops.video_reader.read_video_from_memory(
-                    video_tensor,
-                    seek_frame_margin,
-                    0,  # getPtsOnly
-                    1,  # readVideoStream
-                    width,
-                    height,
-                    min_dimension,
-                    max_dimension,
-                    video_start_pts,
-                    video_end_pts,
-                    video_timebase_num,
-                    video_timebase_den,
-                    1,  # readAudioStream
-                    samples,
-                    channels,
-                    audio_start_pts,
-                    audio_end_pts,
-                    audio_timebase_num,
-                    audio_timebase_den,
-                )
-
-                # pass 3: decode frames in range using PyAv
-                video_timebase_av, audio_timebase_av = _get_timebase_by_av_module(full_path)
-
-                video_start_pts_av = _pts_convert(
-                    video_start_pts.item(),
-                    Fraction(video_timebase_num.item(), video_timebase_den.item()),
-                    Fraction(video_timebase_av.numerator, video_timebase_av.denominator),
-                    math.floor,
-                )
-                video_end_pts_av = _pts_convert(
-                    video_end_pts.item(),
-                    Fraction(video_timebase_num.item(), video_timebase_den.item()),
-                    Fraction(video_timebase_av.numerator, video_timebase_av.denominator),
-                    math.ceil,
-                )
-                if audio_timebase_av:
-                    audio_start_pts = _pts_convert(
-                        video_start_pts.item(),
-                        Fraction(video_timebase_num.item(), video_timebase_den.item()),
-                        Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator),
-                        math.floor,
-                    )
-                    audio_end_pts = _pts_convert(
-                        video_end_pts.item(),
-                        Fraction(video_timebase_num.item(), video_timebase_den.item()),
-                        Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator),
-                        math.ceil,
-                    )
-
-                pyav_result = _decode_frames_by_av_module(
-                    full_path,
-                    video_start_pts_av,
-                    video_end_pts_av,
-                    audio_start_pts,
-                    audio_end_pts,
-                )
-
-                assert tv_result[0].size(0) == num_frames
-                if pyav_result.vframes.size(0) == num_frames:
-                    # if PyAv decodes a different number of video frames, skip
-                    # comparing the decoding results between Torchvision video reader
-                    # and PyAv
-                    self.compare_decoding_result(tv_result, pyav_result, config)
-
-    def test_probe_video_from_file(self):
+
+        pyav_result = _decode_frames_by_av_module(
+            full_path,
+            video_start_pts_av,
+            video_end_pts_av,
+            audio_start_pts,
+            audio_end_pts,
+        )
+
+        assert tv_result[0].size(0) == num_frames
+        if pyav_result.vframes.size(0) == num_frames:
+            # if PyAv decodes a different number of video frames, skip
+            # comparing the decoding results between Torchvision video reader
+            # and PyAv
+            self.compare_decoding_result(tv_result, pyav_result, config)
+
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_probe_video_from_file(self, test_video, config):
         """
         Test the case when decoder probes a video file
         """
-        for test_video, config in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-            probe_result = torch.ops.video_reader.probe_video_from_file(full_path)
-            self.check_probe_result(probe_result, config)
+        full_path = os.path.join(VIDEO_DIR, test_video)
+        probe_result = torch.ops.video_reader.probe_video_from_file(full_path)
+        self.check_probe_result(probe_result, config)
 
-    def test_probe_video_from_memory(self):
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_probe_video_from_memory(self, test_video, config):
         """
         Test the case when decoder probes a video in memory
         """
-        for test_video, config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-            probe_result = torch.ops.video_reader.probe_video_from_memory(video_tensor)
-            self.check_probe_result(probe_result, config)
+        _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+        probe_result = torch.ops.video_reader.probe_video_from_memory(video_tensor)
+        self.check_probe_result(probe_result, config)
 
-    def test_probe_video_from_memory_script(self):
+    @pytest.mark.parametrize("test_video,config", test_videos.items())
+    def test_probe_video_from_memory_script(self, test_video, config):
         scripted_fun = torch.jit.script(io._probe_video_from_memory)
         assert scripted_fun is not None
 
-        for test_video, config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-            probe_result = scripted_fun(video_tensor)
-            self.check_meta_result(probe_result, config)
+        _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+        probe_result = scripted_fun(video_tensor)
+        self.check_meta_result(probe_result, config)
 
-    def test_read_video_from_memory_scripted(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    def test_read_video_from_memory_scripted(self, test_video):
         """
         Test the case when video is already in memory, and decoder reads data in memory
         """
@@ -1190,29 +1189,28 @@ def test_read_video_from_memory_scripted(self):
         scripted_fun = torch.jit.script(io._read_video_from_memory)
         assert scripted_fun is not None
 
-        for test_video, _config in test_videos.items():
-            full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
-
-            # decode all frames using cpp decoder
-            scripted_fun(
-                video_tensor,
-                seek_frame_margin,
-                1,  # readVideoStream
-                width,
-                height,
-                min_dimension,
-                max_dimension,
-                [video_start_pts, video_end_pts],
-                video_timebase_num,
-                video_timebase_den,
-                1,  # readAudioStream
-                samples,
-                channels,
-                [audio_start_pts, audio_end_pts],
-                audio_timebase_num,
-                audio_timebase_den,
-            )
-            # FUTURE: check value of video / audio frames
+        _, video_tensor = _get_video_tensor(VIDEO_DIR, test_video)
+
+        # decode all frames using cpp decoder
+        scripted_fun(
+            video_tensor,
+            SEEK_FRAME_MARGIN,
+            1,  # readVideoStream
+            width,
+            height,
+            min_dimension,
+            max_dimension,
+            [video_start_pts, video_end_pts],
+            video_timebase_num,
+            video_timebase_den,
+            1,  # readAudioStream
+            samples,
+            channels,
+            [audio_start_pts, audio_end_pts],
+            audio_timebase_num,
+            audio_timebase_den,
+        )
+        # FUTURE: check value of video / audio frames
 
     def test_invalid_file(self):
         set_video_backend("video_reader")
@@ -1223,33 +1221,31 @@ def test_invalid_file(self):
         with pytest.raises(RuntimeError):
             io.read_video("foo.mp4")
 
-    def test_audio_present_pts(self):
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    @pytest.mark.parametrize("backend", ["video_reader", "pyav"])
+    @pytest.mark.parametrize("start_offset", [0, 1000])
+    @pytest.mark.parametrize("end_offset", [3000, None])
+    def test_audio_present_pts(self, test_video, backend, start_offset, end_offset):
         """Test if audio frames are returned with pts unit."""
-        backends = ["video_reader", "pyav"]
-        start_offsets = [0, 1000]
-        end_offsets = [3000, None]
-        for test_video, _ in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-            container = av.open(full_path)
-            if container.streams.audio:
-                for backend, start_offset, end_offset in itertools.product(backends, start_offsets, end_offsets):
-                    set_video_backend(backend)
-                    _, audio, _ = io.read_video(full_path, start_offset, end_offset, pts_unit="pts")
-                    assert all([dimension > 0 for dimension in audio.shape[:2]])
-
-    def test_audio_present_sec(self):
+        full_path = os.path.join(VIDEO_DIR, test_video)
+        container = av.open(full_path)
+        if container.streams.audio:
+            set_video_backend(backend)
+            _, audio, _ = io.read_video(full_path, start_offset, end_offset, pts_unit="pts")
+            assert all([dimension > 0 for dimension in audio.shape[:2]])
+
+    @pytest.mark.parametrize("test_video", test_videos.keys())
+    @pytest.mark.parametrize("backend", ["video_reader", "pyav"])
+    @pytest.mark.parametrize("start_offset", [0, 0.1])
+    @pytest.mark.parametrize("end_offset", [0.3, None])
+    def test_audio_present_sec(self, test_video, backend, start_offset, end_offset):
         """Test if audio frames are returned with sec unit."""
-        backends = ["video_reader", "pyav"]
-        start_offsets = [0, 0.1]
-        end_offsets = [0.3, None]
-        for test_video, _ in test_videos.items():
-            full_path = os.path.join(VIDEO_DIR, test_video)
-            container = av.open(full_path)
-            if container.streams.audio:
-                for backend, start_offset, end_offset in itertools.product(backends, start_offsets, end_offsets):
-                    set_video_backend(backend)
-                    _, audio, _ = io.read_video(full_path, start_offset, end_offset, pts_unit="sec")
-                    assert all([dimension > 0 for dimension in audio.shape[:2]])
+        full_path = os.path.join(VIDEO_DIR, test_video)
+        container = av.open(full_path)
+        if container.streams.audio:
+            set_video_backend(backend)
+            _, audio, _ = io.read_video(full_path, start_offset, end_offset, pts_unit="sec")
+            assert all([dimension > 0 for dimension in audio.shape[:2]])
 
 
 if __name__ == "__main__":

From 04bee90d7a5547e1fb60c35b99abe4dd3d015bf2 Mon Sep 17 00:00:00 2001
From: Prabhat Roy <prabhatroy@fb.com>
Date: Mon, 28 Feb 2022 22:12:46 +0000
Subject: [PATCH 2/2] Fix linter error

---
 test/test_video_reader.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/test_video_reader.py b/test/test_video_reader.py
index 6d556b9802e..c3b0487f153 100644
--- a/test/test_video_reader.py
+++ b/test/test_video_reader.py
@@ -458,7 +458,9 @@ def test_read_video_from_file(self, test_video, config):
 
     @pytest.mark.parametrize("test_video,config", test_videos.items())
     @pytest.mark.parametrize("read_video_stream,read_audio_stream", [(1, 0), (0, 1)])
-    def test_read_video_from_file_read_single_stream_only(self, test_video, config, read_video_stream, read_audio_stream):
+    def test_read_video_from_file_read_single_stream_only(
+        self, test_video, config, read_video_stream, read_audio_stream
+    ):
         """
         Test the case when decoder starts with a video file to decode frames, and
         only reads video stream and ignores audio stream