From 3910fcea676decb5483394923b5b3e383577eb58 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Mon, 28 Feb 2022 15:56:01 +0000 Subject: [PATCH] Improve test_videoapi --- test/test_videoapi.py | 296 ++++++++++++++++++++---------------------- 1 file changed, 144 insertions(+), 152 deletions(-) diff --git a/test/test_videoapi.py b/test/test_videoapi.py index c384ade2878..895b9b83555 100644 --- a/test/test_videoapi.py +++ b/test/test_videoapi.py @@ -52,139 +52,131 @@ def fate(name, path="."): @pytest.mark.skipif(_HAS_VIDEO_OPT is False, reason="Didn't compile with ffmpeg") class TestVideoApi: @pytest.mark.skipif(av is None, reason="PyAV unavailable") - def test_frame_reading(self): - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - with av.open(full_path) as av_reader: - is_video = True if av_reader.streams.video else False - - if is_video: - av_frames, vr_frames = [], [] - av_pts, vr_pts = [], [] - # get av frames - for av_frame in av_reader.decode(av_reader.streams.video[0]): - av_frames.append(torch.tensor(av_frame.to_rgb().to_ndarray()).permute(2, 0, 1)) - av_pts.append(av_frame.pts * av_frame.time_base) - - # get vr frames - video_reader = VideoReader(full_path, "video") - for vr_frame in video_reader: - vr_frames.append(vr_frame["data"]) - vr_pts.append(vr_frame["pts"]) - - # same number of frames - assert len(vr_frames) == len(av_frames) - assert len(vr_pts) == len(av_pts) - - # compare the frames and ptss - for i in range(len(vr_frames)): - assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1) - mean_delta = torch.mean(torch.abs(av_frames[i].float() - vr_frames[i].float())) - # on average the difference is very small and caused - # by decoding (around 1%) - # TODO: asses empirically how to set this? atm it's 1% - # averaged over all frames - assert mean_delta.item() < 2.55 - - del vr_frames, av_frames, vr_pts, av_pts - - # test audio reading compared to PYAV - with av.open(full_path) as av_reader: - is_audio = True if av_reader.streams.audio else False - - if is_audio: - av_frames, vr_frames = [], [] - av_pts, vr_pts = [], [] - # get av frames - for av_frame in av_reader.decode(av_reader.streams.audio[0]): - av_frames.append(torch.tensor(av_frame.to_ndarray()).permute(1, 0)) - av_pts.append(av_frame.pts * av_frame.time_base) - av_reader.close() - - # get vr frames - video_reader = VideoReader(full_path, "audio") - for vr_frame in video_reader: - vr_frames.append(vr_frame["data"]) - vr_pts.append(vr_frame["pts"]) - - # same number of frames - assert len(vr_frames) == len(av_frames) - assert len(vr_pts) == len(av_pts) - - # compare the frames and ptss - for i in range(len(vr_frames)): - assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1) - max_delta = torch.max(torch.abs(av_frames[i].float() - vr_frames[i].float())) - # we assure that there is never more than 1% difference in signal - assert max_delta.item() < 0.001 - - def test_metadata(self): + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_frame_reading(self, test_video): + full_path = os.path.join(VIDEO_DIR, test_video) + with av.open(full_path) as av_reader: + if av_reader.streams.video: + av_frames, vr_frames = [], [] + av_pts, vr_pts = [], [] + # get av frames + for av_frame in av_reader.decode(av_reader.streams.video[0]): + av_frames.append(torch.tensor(av_frame.to_rgb().to_ndarray()).permute(2, 0, 1)) + av_pts.append(av_frame.pts * av_frame.time_base) + + # get vr frames + video_reader = VideoReader(full_path, "video") + for vr_frame in video_reader: + vr_frames.append(vr_frame["data"]) + vr_pts.append(vr_frame["pts"]) + + # same number of frames + assert len(vr_frames) == len(av_frames) + assert len(vr_pts) == len(av_pts) + + # compare the frames and ptss + for i in range(len(vr_frames)): + assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1) + mean_delta = torch.mean(torch.abs(av_frames[i].float() - vr_frames[i].float())) + # on average the difference is very small and caused + # by decoding (around 1%) + # TODO: asses empirically how to set this? atm it's 1% + # averaged over all frames + assert mean_delta.item() < 2.55 + + del vr_frames, av_frames, vr_pts, av_pts + + # test audio reading compared to PYAV + with av.open(full_path) as av_reader: + if av_reader.streams.audio: + av_frames, vr_frames = [], [] + av_pts, vr_pts = [], [] + # get av frames + for av_frame in av_reader.decode(av_reader.streams.audio[0]): + av_frames.append(torch.tensor(av_frame.to_ndarray()).permute(1, 0)) + av_pts.append(av_frame.pts * av_frame.time_base) + av_reader.close() + + # get vr frames + video_reader = VideoReader(full_path, "audio") + for vr_frame in video_reader: + vr_frames.append(vr_frame["data"]) + vr_pts.append(vr_frame["pts"]) + + # same number of frames + assert len(vr_frames) == len(av_frames) + assert len(vr_pts) == len(av_pts) + + # compare the frames and ptss + for i in range(len(vr_frames)): + assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1) + max_delta = torch.max(torch.abs(av_frames[i].float() - vr_frames[i].float())) + # we assure that there is never more than 1% difference in signal + assert max_delta.item() < 0.001 + + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_metadata(self, test_video, config): """ Test that the metadata returned via pyav corresponds to the one returned by the new video decoder API """ - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - reader = VideoReader(full_path, "video") - reader_md = reader.get_metadata() - assert config.video_fps == approx(reader_md["video"]["fps"][0], abs=0.0001) - assert config.duration == approx(reader_md["video"]["duration"][0], abs=0.5) - - def test_seek_start(self): - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - video_reader = VideoReader(full_path, "video") + full_path = os.path.join(VIDEO_DIR, test_video) + reader = VideoReader(full_path, "video") + reader_md = reader.get_metadata() + assert config.video_fps == approx(reader_md["video"]["fps"][0], abs=0.0001) + assert config.duration == approx(reader_md["video"]["duration"][0], abs=0.5) + + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_seek_start(self, test_video): + full_path = os.path.join(VIDEO_DIR, test_video) + video_reader = VideoReader(full_path, "video") + num_frames = 0 + for _ in video_reader: + num_frames += 1 + + # now seek the container to 0 and do it again + # It's often that starting seek can be inprecise + # this way and it doesn't start at 0 + video_reader.seek(0) + start_num_frames = 0 + for _ in video_reader: + start_num_frames += 1 + + assert start_num_frames == num_frames + + # now seek the container to < 0 to check for unexpected behaviour + video_reader.seek(-1) + start_num_frames = 0 + for _ in video_reader: + start_num_frames += 1 + + assert start_num_frames == num_frames + + @pytest.mark.parametrize("test_video", test_videos.keys()) + def test_accurateseek_middle(self, test_video): + full_path = os.path.join(VIDEO_DIR, test_video) + stream = "video" + video_reader = VideoReader(full_path, stream) + md = video_reader.get_metadata() + duration = md[stream]["duration"][0] + if duration is not None: num_frames = 0 - for frame in video_reader: + for _ in video_reader: num_frames += 1 - # now seek the container to 0 and do it again - # It's often that starting seek can be inprecise - # this way and it doesn't start at 0 - video_reader.seek(0) - start_num_frames = 0 - for frame in video_reader: - start_num_frames += 1 - - assert start_num_frames == num_frames - - # now seek the container to < 0 to check for unexpected behaviour - video_reader.seek(-1) - start_num_frames = 0 - for frame in video_reader: - start_num_frames += 1 - - assert start_num_frames == num_frames - - def test_accurateseek_middle(self): - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) + video_reader.seek(duration / 2) + middle_num_frames = 0 + for _ in video_reader: + middle_num_frames += 1 - stream = "video" - video_reader = VideoReader(full_path, stream) - md = video_reader.get_metadata() - duration = md[stream]["duration"][0] - if duration is not None: + assert middle_num_frames < num_frames + assert middle_num_frames == approx(num_frames // 2, abs=1) - num_frames = 0 - for frame in video_reader: - num_frames += 1 - - video_reader.seek(duration / 2) - middle_num_frames = 0 - for frame in video_reader: - middle_num_frames += 1 - - assert middle_num_frames < num_frames - assert middle_num_frames == approx(num_frames // 2, abs=1) - - video_reader.seek(duration / 2) - frame = next(video_reader) - lb = duration / 2 - 1 / md[stream]["fps"][0] - ub = duration / 2 + 1 / md[stream]["fps"][0] - assert (lb <= frame["pts"]) and (ub >= frame["pts"]) + video_reader.seek(duration / 2) + frame = next(video_reader) + lb = duration / 2 - 1 / md[stream]["fps"][0] + ub = duration / 2 + 1 / md[stream]["fps"][0] + assert (lb <= frame["pts"]) and (ub >= frame["pts"]) def test_fate_suite(self): # TODO: remove the try-except statement once the connectivity issues are resolved @@ -199,41 +191,41 @@ def test_fate_suite(self): os.remove(video_path) @pytest.mark.skipif(av is None, reason="PyAV unavailable") - def test_keyframe_reading(self): - for test_video, config in test_videos.items(): - full_path = os.path.join(VIDEO_DIR, test_video) - - av_reader = av.open(full_path) - # reduce streams to only keyframes - av_stream = av_reader.streams.video[0] - av_stream.codec_context.skip_frame = "NONKEY" + @pytest.mark.parametrize("test_video,config", test_videos.items()) + def test_keyframe_reading(self, test_video, config): + full_path = os.path.join(VIDEO_DIR, test_video) - av_keyframes = [] - vr_keyframes = [] - if av_reader.streams.video: + av_reader = av.open(full_path) + # reduce streams to only keyframes + av_stream = av_reader.streams.video[0] + av_stream.codec_context.skip_frame = "NONKEY" - # get all keyframes using pyav. Then, seek randomly into video reader - # and assert that all the returned values are in AV_KEYFRAMES + av_keyframes = [] + vr_keyframes = [] + if av_reader.streams.video: - for av_frame in av_reader.decode(av_stream): - av_keyframes.append(float(av_frame.pts * av_frame.time_base)) + # get all keyframes using pyav. Then, seek randomly into video reader + # and assert that all the returned values are in AV_KEYFRAMES - if len(av_keyframes) > 1: - video_reader = VideoReader(full_path, "video") - for i in range(1, len(av_keyframes)): - seek_val = (av_keyframes[i] + av_keyframes[i - 1]) / 2 - data = next(video_reader.seek(seek_val, True)) - vr_keyframes.append(data["pts"]) + for av_frame in av_reader.decode(av_stream): + av_keyframes.append(float(av_frame.pts * av_frame.time_base)) - data = next(video_reader.seek(config.duration, True)) + if len(av_keyframes) > 1: + video_reader = VideoReader(full_path, "video") + for i in range(1, len(av_keyframes)): + seek_val = (av_keyframes[i] + av_keyframes[i - 1]) / 2 + data = next(video_reader.seek(seek_val, True)) vr_keyframes.append(data["pts"]) - assert len(av_keyframes) == len(vr_keyframes) - # NOTE: this video gets different keyframe with different - # loaders (0.333 pyav, 0.666 for us) - if test_video != "TrumanShow_wave_f_nm_np1_fr_med_26.avi": - for i in range(len(av_keyframes)): - assert av_keyframes[i] == approx(vr_keyframes[i], rel=0.001) + data = next(video_reader.seek(config.duration, True)) + vr_keyframes.append(data["pts"]) + + assert len(av_keyframes) == len(vr_keyframes) + # NOTE: this video gets different keyframe with different + # loaders (0.333 pyav, 0.666 for us) + if test_video != "TrumanShow_wave_f_nm_np1_fr_med_26.avi": + for i in range(len(av_keyframes)): + assert av_keyframes[i] == approx(vr_keyframes[i], rel=0.001) if __name__ == "__main__":