From 32cd4b4477be0978956ba62bed52c6be43446856 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 25 Oct 2024 13:03:23 +0100 Subject: [PATCH 1/6] Honor preAllocatedOutputTensor with filtergraph --- .../decoders/_core/VideoDecoder.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 8c9f43635..47915a80b 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -890,6 +890,14 @@ VideoDecoder::DecodedOutput VideoDecoder::convertAVFrameToDecodedOutput( return output; } +// Note [preAllocatedOutputTensor with swscale and filtergraph]: +// Callers may pass a pre-allocated tensor, where the output frame tensor will +// be stored. This parameter is honored in any case, but it only leads to a +// speed-up when swscale is used. With swscale, we can tell ffmpeg to place the +// decoded frame directly into `preAllocatedtensor.data_ptr()`. That's not +// possible with filtegraph, leading to an extra copy. +// Dimension order of the preAllocatedOutputTensor must be HWC, regardless of +// `dimension_order` parameter. It's up to callers to re-shape it if needed. void VideoDecoder::convertAVFrameToDecodedOutputOnCPU( VideoDecoder::RawDecodedOutput& rawOutput, DecodedOutput& output, @@ -926,6 +934,9 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU( streamInfo.colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) { output.frame = convertFrameToTensorUsingFilterGraph(streamIndex, frame); + if (preAllocatedOutputTensor.has_value()) { + preAllocatedOutputTensor.value().copy_(output.frame); + } } else { throw std::runtime_error( "Invalid color conversion library: " + @@ -1077,10 +1088,6 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices( } else { DecodedOutput singleOut = getFrameAtIndex( streamIndex, indexInVideo, output.frames[indexInOutput]); - if (options.colorConversionLibrary == - ColorConversionLibrary::FILTERGRAPH) { - output.frames[indexInOutput] = singleOut.frame; - } output.ptsSeconds[indexInOutput] = singleOut.ptsSeconds; output.durationSeconds[indexInOutput] = singleOut.durationSeconds; } @@ -1157,9 +1164,6 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange( for (int64_t i = start, f = 0; i < stop; i += step, ++f) { DecodedOutput singleOut = getFrameAtIndex(streamIndex, i, output.frames[f]); - if (options.colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) { - output.frames[f] = singleOut.frame; - } output.ptsSeconds[f] = singleOut.ptsSeconds; output.durationSeconds[f] = singleOut.durationSeconds; } @@ -1253,9 +1257,6 @@ VideoDecoder::getFramesDisplayedByTimestampInRange( BatchDecodedOutput output(numFrames, options, streamMetadata); for (int64_t i = startFrameIndex, f = 0; i < stopFrameIndex; ++i, ++f) { DecodedOutput singleOut = getFrameAtIndex(streamIndex, i, output.frames[f]); - if (options.colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) { - output.frames[f] = singleOut.frame; - } output.ptsSeconds[f] = singleOut.ptsSeconds; output.durationSeconds[f] = singleOut.durationSeconds; } From afc2fcd6fa53d823f7406ec68fe5719d945d9929 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 25 Oct 2024 14:33:36 +0100 Subject: [PATCH 2/6] make sure ptrs are the same --- src/torchcodec/decoders/_core/VideoDecoder.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 47915a80b..68ab6e673 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -905,9 +905,9 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU( int streamIndex = rawOutput.streamIndex; AVFrame* frame = rawOutput.frame.get(); auto& streamInfo = streams_[streamIndex]; + torch::Tensor tensor; if (output.streamType == AVMEDIA_TYPE_VIDEO) { if (streamInfo.colorConversionLibrary == ColorConversionLibrary::SWSCALE) { - torch::Tensor tensor; int width = streamInfo.options.width.value_or(frame->width); int height = streamInfo.options.height.value_or(frame->height); if (preAllocatedOutputTensor.has_value()) { @@ -933,9 +933,12 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU( } else if ( streamInfo.colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) { - output.frame = convertFrameToTensorUsingFilterGraph(streamIndex, frame); + tensor = convertFrameToTensorUsingFilterGraph(streamIndex, frame); if (preAllocatedOutputTensor.has_value()) { - preAllocatedOutputTensor.value().copy_(output.frame); + preAllocatedOutputTensor.value().copy_(tensor); + output.frame = preAllocatedOutputTensor.value(); + } else { + output.frame = tensor; } } else { throw std::runtime_error( From 6d83114b1a97a69401687d8326423874303b4400 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 25 Oct 2024 15:11:18 +0100 Subject: [PATCH 3/6] Add C++ test for ptr equality --- test/decoders/VideoDecoderTest.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp index e8d863410..d5f01b343 100644 --- a/test/decoders/VideoDecoderTest.cpp +++ b/test/decoders/VideoDecoderTest.cpp @@ -223,6 +223,28 @@ TEST_P(VideoDecoderTest, DecodesFramesInABatchInNCHW) { EXPECT_TRUE(torch::equal(tensor[1], tensorTime6FromFFMPEG)); } +void testDecoderWithColorConversion(const std::string& colorConversionLibrary) { + std::string path = getResourcePath("nasa_13013.mp4"); + auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8}); + + std::unique_ptr ourDecoder = + createDecoderFromPath(path, GetParam()); + ourDecoder->scanFileAndUpdateMetadataAndIndex(); + int bestVideoStreamIndex = + *ourDecoder->getContainerMetadata().bestVideoStreamIndex; + ourDecoder->addVideoStreamDecoder( + bestVideoStreamIndex, + VideoDecoder::VideoStreamDecoderOptions( + "color_conversion_library=" + colorConversionLibrary)); + auto output = ourDecoder->getFrameAtIndex( + bestVideoStreamIndex, 0, preAllocatedOutputTensor); + EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr()); +} +TEST_P(VideoDecoderTest, PreAllocatedTensor) { + testDecoderWithColorConversion("swscale"); + testDecoderWithColorConversion("filtergraph"); +} + TEST_P(VideoDecoderTest, DecodesFramesInABatchInNHWC) { std::string path = getResourcePath("nasa_13013.mp4"); std::unique_ptr ourDecoder = From 91df09a4f820a7267e592a22b6a98ecbdfa57d4c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 25 Oct 2024 15:12:14 +0100 Subject: [PATCH 4/6] Update comment --- src/torchcodec/decoders/_core/VideoDecoder.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 68ab6e673..b1c4e6ca4 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -894,8 +894,9 @@ VideoDecoder::DecodedOutput VideoDecoder::convertAVFrameToDecodedOutput( // Callers may pass a pre-allocated tensor, where the output frame tensor will // be stored. This parameter is honored in any case, but it only leads to a // speed-up when swscale is used. With swscale, we can tell ffmpeg to place the -// decoded frame directly into `preAllocatedtensor.data_ptr()`. That's not -// possible with filtegraph, leading to an extra copy. +// decoded frame directly into `preAllocatedtensor.data_ptr()`. We haven't yet +// found a way to do that with filtegraph. +// TODO: Figure out whether that's possilbe! // Dimension order of the preAllocatedOutputTensor must be HWC, regardless of // `dimension_order` parameter. It's up to callers to re-shape it if needed. void VideoDecoder::convertAVFrameToDecodedOutputOnCPU( From bc1acd3c5e931d10d8b2f87af0e0a4e2098ad95b Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 28 Oct 2024 16:21:00 +0000 Subject: [PATCH 5/6] Just duplicate the thing --- packaging/check_glibcxx.py | 4 ++- test/decoders/VideoDecoderTest.cpp | 58 ++++++++++++++++++------------ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/packaging/check_glibcxx.py b/packaging/check_glibcxx.py index 37ff654c7..b7efd9813 100644 --- a/packaging/check_glibcxx.py +++ b/packaging/check_glibcxx.py @@ -46,7 +46,9 @@ all_symbols.add(match.group(0)) if not all_symbols: - raise ValueError(f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong.") + raise ValueError( + f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong." + ) all_versions = (symbol.split("_")[1].split(".") for symbol in all_symbols) all_versions = (tuple(int(v) for v in version) for version in all_versions) diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp index 3827eee48..389d47f4f 100644 --- a/test/decoders/VideoDecoderTest.cpp +++ b/test/decoders/VideoDecoderTest.cpp @@ -223,28 +223,6 @@ TEST_P(VideoDecoderTest, DecodesFramesInABatchInNCHW) { EXPECT_TRUE(torch::equal(tensor[1], tensorTime6FromFFMPEG)); } -void testDecoderWithColorConversion(const std::string& colorConversionLibrary) { - std::string path = getResourcePath("nasa_13013.mp4"); - auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8}); - - std::unique_ptr ourDecoder = - createDecoderFromPath(path, GetParam()); - ourDecoder->scanFileAndUpdateMetadataAndIndex(); - int bestVideoStreamIndex = - *ourDecoder->getContainerMetadata().bestVideoStreamIndex; - ourDecoder->addVideoStreamDecoder( - bestVideoStreamIndex, - VideoDecoder::VideoStreamDecoderOptions( - "color_conversion_library=" + colorConversionLibrary)); - auto output = ourDecoder->getFrameAtIndex( - bestVideoStreamIndex, 0, preAllocatedOutputTensor); - EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr()); -} -TEST_P(VideoDecoderTest, PreAllocatedTensor) { - testDecoderWithColorConversion("swscale"); - testDecoderWithColorConversion("filtergraph"); -} - TEST_P(VideoDecoderTest, DecodesFramesInABatchInNHWC) { std::string path = getResourcePath("nasa_13013.mp4"); std::unique_ptr ourDecoder = @@ -409,6 +387,42 @@ TEST_P(VideoDecoderTest, SeeksToFrameWithSpecificPts) { } } +TEST_P(VideoDecoderTest, PreAllocatedTensorFilterGraph) { + std::string path = getResourcePath("nasa_13013.mp4"); + auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8}); + + std::unique_ptr ourDecoder = + VideoDecoderTest::createDecoderFromPath(path, GetParam()); + ourDecoder->scanFileAndUpdateMetadataAndIndex(); + int bestVideoStreamIndex = + *ourDecoder->getContainerMetadata().bestVideoStreamIndex; + ourDecoder->addVideoStreamDecoder( + bestVideoStreamIndex, + VideoDecoder::VideoStreamDecoderOptions( + "color_conversion_library=filtergraph")); + auto output = ourDecoder->getFrameAtIndex( + bestVideoStreamIndex, 0, preAllocatedOutputTensor); + EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr()); +} + +TEST_P(VideoDecoderTest, PreAllocatedTensorSwscale) { + std::string path = getResourcePath("nasa_13013.mp4"); + auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8}); + + std::unique_ptr ourDecoder = + VideoDecoderTest::createDecoderFromPath(path, GetParam()); + ourDecoder->scanFileAndUpdateMetadataAndIndex(); + int bestVideoStreamIndex = + *ourDecoder->getContainerMetadata().bestVideoStreamIndex; + ourDecoder->addVideoStreamDecoder( + bestVideoStreamIndex, + VideoDecoder::VideoStreamDecoderOptions( + "color_conversion_library=swscale")); + auto output = ourDecoder->getFrameAtIndex( + bestVideoStreamIndex, 0, preAllocatedOutputTensor); + EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr()); +} + TEST_P(VideoDecoderTest, GetAudioMetadata) { std::string path = getResourcePath("nasa_13013.mp4.audio.mp3"); std::unique_ptr decoder = From 9b8b8abe096a4b29c54f3bd31e32365199dfa886 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 28 Oct 2024 16:21:28 +0000 Subject: [PATCH 6/6] revert --- packaging/check_glibcxx.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packaging/check_glibcxx.py b/packaging/check_glibcxx.py index b7efd9813..37ff654c7 100644 --- a/packaging/check_glibcxx.py +++ b/packaging/check_glibcxx.py @@ -46,9 +46,7 @@ all_symbols.add(match.group(0)) if not all_symbols: - raise ValueError( - f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong." - ) + raise ValueError(f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong.") all_versions = (symbol.split("_")[1].split(".") for symbol in all_symbols) all_versions = (tuple(int(v) for v in version) for version in all_versions)