diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 4c9d00122..4e7ae4406 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -940,6 +940,16 @@ VideoDecoder::DecodedOutput VideoDecoder::getFrameDisplayedAtTimestampNoDemux( double frameStartTime = ptsToSeconds(frame->pts, stream.timeBase); double frameEndTime = ptsToSeconds(frame->pts + getDuration(frame), stream.timeBase); + if (frameStartTime > seconds) { + // FFMPEG seeked past the frame we are looking for even though we + // set max_ts to be our needed timestamp in avformat_seek_file() + // in maybeSeekToBeforeDesiredPts(). + // This could be a bug in FFMPEG: https://trac.ffmpeg.org/ticket/11137 + // In this case we return the very next frame instead of throwing an + // exception. + // TODO: Maybe log to stderr for Debug builds? + return true; + } return seconds >= frameStartTime && seconds < frameEndTime; }); } diff --git a/test/decoders/test_simple_video_decoder.py b/test/decoders/test_simple_video_decoder.py index e7d4d8915..ff87a63d7 100644 --- a/test/decoders/test_simple_video_decoder.py +++ b/test/decoders/test_simple_video_decoder.py @@ -9,7 +9,7 @@ from torchcodec.decoders import _core, SimpleVideoDecoder -from ..utils import assert_tensor_close, assert_tensor_equal, NASA_VIDEO +from ..utils import assert_tensor_close, assert_tensor_equal, H265_VIDEO, NASA_VIDEO class TestSimpleDecoder: @@ -320,6 +320,15 @@ def test_get_frame_displayed_at(self): assert isinstance(decoder.get_frame_displayed_at(6.02).pts_seconds, float) assert isinstance(decoder.get_frame_displayed_at(6.02).duration_seconds, float) + def test_get_frame_displayed_at_h265(self): + decoder = SimpleVideoDecoder(H265_VIDEO.path) + # Note that for H265, FFMPEG's seeking is not precise. Even though we ask to + # seek with a max_ts=0.5, FFMPEG will seek beyond that point. + # TODO: Revert use frame5 in the test below once it's fixed upstream: + # https://trac.ffmpeg.org/ticket/11137 + ref_frame6 = H265_VIDEO.get_frame_by_name("frame000006") + assert_tensor_equal(ref_frame6, decoder.get_frame_displayed_at(0.5).data) + def test_get_frame_displayed_at_fails(self): decoder = SimpleVideoDecoder(NASA_VIDEO.path) diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh index 7f9985092..1fdb84deb 100755 --- a/test/generate_reference_resources.sh +++ b/test/generate_reference_resources.sh @@ -42,8 +42,19 @@ ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3" # TODO: Add frames decoded by Nvidia's NVDEC. +# This video was generated by running the following: +# conda install -c conda-forge x265 +# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz +# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y +VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4 +FRAMES=(6) +for frame in "${FRAMES[@]}"; do + frame_name=$(printf "%06d" "$frame") + ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.frame$frame_name.bmp" +done + for bmp in "$RESOURCES_DIR"/*.bmp do - python3 convert_image_to_tensor.py "$bmp" + python3 "$TORCHCODEC_PATH/test/convert_image_to_tensor.py" "$bmp" rm -f "$bmp" done diff --git a/test/resources/h265_video.mp4 b/test/resources/h265_video.mp4 new file mode 100644 index 000000000..33b8457f3 Binary files /dev/null and b/test/resources/h265_video.mp4 differ diff --git a/test/resources/h265_video.mp4.frame000006.pt b/test/resources/h265_video.mp4.frame000006.pt new file mode 100644 index 000000000..1fda757ca Binary files /dev/null and b/test/resources/h265_video.mp4.frame000006.pt differ diff --git a/test/utils.py b/test/utils.py index 3e04c7658..16b1dfeff 100644 --- a/test/utils.py +++ b/test/utils.py @@ -152,3 +152,17 @@ def empty_chw_tensor(self) -> torch.Tensor: # When we start actually decoding audio-only files, we'll probably need to define # a TestAudio class with audio specific values. Until then, we only need a filename. NASA_AUDIO = TestContainerFile(filename="nasa_13013.mp4.audio.mp3", frames={}) + +H265_VIDEO = TestVideo( + filename="h265_video.mp4", + height=128, + width=128, + num_color_channels=3, + # TODO_OPEN_ISSUE Scott: improve the testing framework so that these values are loaded from a JSON + # file and not hardcoded. These values were copied over by hand from the JSON + # output from the following command: + # $ ffprobe -v error -hide_banner -select_streams v:1 -show_frames -of json test/resources/h265_video.mp4 > out.json + frames={ + 6: TestFrameInfo(pts_seconds=0.6, duration_seconds=0.1), + }, +)