From 32cd4b4477be0978956ba62bed52c6be43446856 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 25 Oct 2024 13:03:23 +0100
Subject: [PATCH 1/6] Honor preAllocatedOutputTensor with filtergraph

---
 .../decoders/_core/VideoDecoder.cpp           | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
index 8c9f43635..47915a80b 100644
--- a/src/torchcodec/decoders/_core/VideoDecoder.cpp
+++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -890,6 +890,14 @@ VideoDecoder::DecodedOutput VideoDecoder::convertAVFrameToDecodedOutput(
   return output;
 }
 
+// Note [preAllocatedOutputTensor with swscale and filtergraph]:
+// Callers may pass a pre-allocated tensor, where the output frame tensor will
+// be stored. This parameter is honored in any case, but it only leads to a
+// speed-up when swscale is used. With swscale, we can tell ffmpeg to place the
+// decoded frame directly into `preAllocatedtensor.data_ptr()`. That's not
+// possible with filtegraph, leading to an extra copy.
+// Dimension order of the preAllocatedOutputTensor must be HWC, regardless of
+// `dimension_order` parameter. It's up to callers to re-shape it if needed.
 void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(
     VideoDecoder::RawDecodedOutput& rawOutput,
     DecodedOutput& output,
@@ -926,6 +934,9 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(
         streamInfo.colorConversionLibrary ==
         ColorConversionLibrary::FILTERGRAPH) {
       output.frame = convertFrameToTensorUsingFilterGraph(streamIndex, frame);
+      if (preAllocatedOutputTensor.has_value()) {
+        preAllocatedOutputTensor.value().copy_(output.frame);
+      }
     } else {
       throw std::runtime_error(
           "Invalid color conversion library: " +
@@ -1077,10 +1088,6 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices(
     } else {
       DecodedOutput singleOut = getFrameAtIndex(
           streamIndex, indexInVideo, output.frames[indexInOutput]);
-      if (options.colorConversionLibrary ==
-          ColorConversionLibrary::FILTERGRAPH) {
-        output.frames[indexInOutput] = singleOut.frame;
-      }
       output.ptsSeconds[indexInOutput] = singleOut.ptsSeconds;
       output.durationSeconds[indexInOutput] = singleOut.durationSeconds;
     }
@@ -1157,9 +1164,6 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange(
 
   for (int64_t i = start, f = 0; i < stop; i += step, ++f) {
     DecodedOutput singleOut = getFrameAtIndex(streamIndex, i, output.frames[f]);
-    if (options.colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) {
-      output.frames[f] = singleOut.frame;
-    }
     output.ptsSeconds[f] = singleOut.ptsSeconds;
     output.durationSeconds[f] = singleOut.durationSeconds;
   }
@@ -1253,9 +1257,6 @@ VideoDecoder::getFramesDisplayedByTimestampInRange(
   BatchDecodedOutput output(numFrames, options, streamMetadata);
   for (int64_t i = startFrameIndex, f = 0; i < stopFrameIndex; ++i, ++f) {
     DecodedOutput singleOut = getFrameAtIndex(streamIndex, i, output.frames[f]);
-    if (options.colorConversionLibrary == ColorConversionLibrary::FILTERGRAPH) {
-      output.frames[f] = singleOut.frame;
-    }
     output.ptsSeconds[f] = singleOut.ptsSeconds;
     output.durationSeconds[f] = singleOut.durationSeconds;
   }

From afc2fcd6fa53d823f7406ec68fe5719d945d9929 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 25 Oct 2024 14:33:36 +0100
Subject: [PATCH 2/6] make sure ptrs are the same

---
 src/torchcodec/decoders/_core/VideoDecoder.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
index 47915a80b..68ab6e673 100644
--- a/src/torchcodec/decoders/_core/VideoDecoder.cpp
+++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -905,9 +905,9 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(
   int streamIndex = rawOutput.streamIndex;
   AVFrame* frame = rawOutput.frame.get();
   auto& streamInfo = streams_[streamIndex];
+  torch::Tensor tensor;
   if (output.streamType == AVMEDIA_TYPE_VIDEO) {
     if (streamInfo.colorConversionLibrary == ColorConversionLibrary::SWSCALE) {
-      torch::Tensor tensor;
       int width = streamInfo.options.width.value_or(frame->width);
       int height = streamInfo.options.height.value_or(frame->height);
       if (preAllocatedOutputTensor.has_value()) {
@@ -933,9 +933,12 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(
     } else if (
         streamInfo.colorConversionLibrary ==
         ColorConversionLibrary::FILTERGRAPH) {
-      output.frame = convertFrameToTensorUsingFilterGraph(streamIndex, frame);
+      tensor = convertFrameToTensorUsingFilterGraph(streamIndex, frame);
       if (preAllocatedOutputTensor.has_value()) {
-        preAllocatedOutputTensor.value().copy_(output.frame);
+        preAllocatedOutputTensor.value().copy_(tensor);
+        output.frame = preAllocatedOutputTensor.value();
+      } else {
+        output.frame = tensor;
       }
     } else {
       throw std::runtime_error(

From 6d83114b1a97a69401687d8326423874303b4400 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 25 Oct 2024 15:11:18 +0100
Subject: [PATCH 3/6] Add C++ test for ptr equality

---
 test/decoders/VideoDecoderTest.cpp | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp
index e8d863410..d5f01b343 100644
--- a/test/decoders/VideoDecoderTest.cpp
+++ b/test/decoders/VideoDecoderTest.cpp
@@ -223,6 +223,28 @@ TEST_P(VideoDecoderTest, DecodesFramesInABatchInNCHW) {
   EXPECT_TRUE(torch::equal(tensor[1], tensorTime6FromFFMPEG));
 }
 
+void testDecoderWithColorConversion(const std::string& colorConversionLibrary) {
+  std::string path = getResourcePath("nasa_13013.mp4");
+  auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8});
+
+  std::unique_ptr<VideoDecoder> ourDecoder =
+      createDecoderFromPath(path, GetParam());
+  ourDecoder->scanFileAndUpdateMetadataAndIndex();
+  int bestVideoStreamIndex =
+      *ourDecoder->getContainerMetadata().bestVideoStreamIndex;
+  ourDecoder->addVideoStreamDecoder(
+      bestVideoStreamIndex,
+      VideoDecoder::VideoStreamDecoderOptions(
+          "color_conversion_library=" + colorConversionLibrary));
+  auto output = ourDecoder->getFrameAtIndex(
+      bestVideoStreamIndex, 0, preAllocatedOutputTensor);
+  EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr());
+}
+TEST_P(VideoDecoderTest, PreAllocatedTensor) {
+  testDecoderWithColorConversion("swscale");
+  testDecoderWithColorConversion("filtergraph");
+}
+
 TEST_P(VideoDecoderTest, DecodesFramesInABatchInNHWC) {
   std::string path = getResourcePath("nasa_13013.mp4");
   std::unique_ptr<VideoDecoder> ourDecoder =

From 91df09a4f820a7267e592a22b6a98ecbdfa57d4c Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 25 Oct 2024 15:12:14 +0100
Subject: [PATCH 4/6] Update comment

---
 src/torchcodec/decoders/_core/VideoDecoder.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
index 68ab6e673..b1c4e6ca4 100644
--- a/src/torchcodec/decoders/_core/VideoDecoder.cpp
+++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -894,8 +894,9 @@ VideoDecoder::DecodedOutput VideoDecoder::convertAVFrameToDecodedOutput(
 // Callers may pass a pre-allocated tensor, where the output frame tensor will
 // be stored. This parameter is honored in any case, but it only leads to a
 // speed-up when swscale is used. With swscale, we can tell ffmpeg to place the
-// decoded frame directly into `preAllocatedtensor.data_ptr()`. That's not
-// possible with filtegraph, leading to an extra copy.
+// decoded frame directly into `preAllocatedtensor.data_ptr()`. We haven't yet
+// found a way to do that with filtegraph.
+// TODO: Figure out whether that's possilbe!
 // Dimension order of the preAllocatedOutputTensor must be HWC, regardless of
 // `dimension_order` parameter. It's up to callers to re-shape it if needed.
 void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(

From bc1acd3c5e931d10d8b2f87af0e0a4e2098ad95b Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 28 Oct 2024 16:21:00 +0000
Subject: [PATCH 5/6] Just duplicate the thing

---
 packaging/check_glibcxx.py         |  4 ++-
 test/decoders/VideoDecoderTest.cpp | 58 ++++++++++++++++++------------
 2 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/packaging/check_glibcxx.py b/packaging/check_glibcxx.py
index 37ff654c7..b7efd9813 100644
--- a/packaging/check_glibcxx.py
+++ b/packaging/check_glibcxx.py
@@ -46,7 +46,9 @@
         all_symbols.add(match.group(0))
 
 if not all_symbols:
-    raise ValueError(f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong.")
+    raise ValueError(
+        f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong."
+    )
 
 all_versions = (symbol.split("_")[1].split(".") for symbol in all_symbols)
 all_versions = (tuple(int(v) for v in version) for version in all_versions)
diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp
index 3827eee48..389d47f4f 100644
--- a/test/decoders/VideoDecoderTest.cpp
+++ b/test/decoders/VideoDecoderTest.cpp
@@ -223,28 +223,6 @@ TEST_P(VideoDecoderTest, DecodesFramesInABatchInNCHW) {
   EXPECT_TRUE(torch::equal(tensor[1], tensorTime6FromFFMPEG));
 }
 
-void testDecoderWithColorConversion(const std::string& colorConversionLibrary) {
-  std::string path = getResourcePath("nasa_13013.mp4");
-  auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8});
-
-  std::unique_ptr<VideoDecoder> ourDecoder =
-      createDecoderFromPath(path, GetParam());
-  ourDecoder->scanFileAndUpdateMetadataAndIndex();
-  int bestVideoStreamIndex =
-      *ourDecoder->getContainerMetadata().bestVideoStreamIndex;
-  ourDecoder->addVideoStreamDecoder(
-      bestVideoStreamIndex,
-      VideoDecoder::VideoStreamDecoderOptions(
-          "color_conversion_library=" + colorConversionLibrary));
-  auto output = ourDecoder->getFrameAtIndex(
-      bestVideoStreamIndex, 0, preAllocatedOutputTensor);
-  EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr());
-}
-TEST_P(VideoDecoderTest, PreAllocatedTensor) {
-  testDecoderWithColorConversion("swscale");
-  testDecoderWithColorConversion("filtergraph");
-}
-
 TEST_P(VideoDecoderTest, DecodesFramesInABatchInNHWC) {
   std::string path = getResourcePath("nasa_13013.mp4");
   std::unique_ptr<VideoDecoder> ourDecoder =
@@ -409,6 +387,42 @@ TEST_P(VideoDecoderTest, SeeksToFrameWithSpecificPts) {
   }
 }
 
+TEST_P(VideoDecoderTest, PreAllocatedTensorFilterGraph) {
+  std::string path = getResourcePath("nasa_13013.mp4");
+  auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8});
+
+  std::unique_ptr<VideoDecoder> ourDecoder =
+      VideoDecoderTest::createDecoderFromPath(path, GetParam());
+  ourDecoder->scanFileAndUpdateMetadataAndIndex();
+  int bestVideoStreamIndex =
+      *ourDecoder->getContainerMetadata().bestVideoStreamIndex;
+  ourDecoder->addVideoStreamDecoder(
+      bestVideoStreamIndex,
+      VideoDecoder::VideoStreamDecoderOptions(
+          "color_conversion_library=filtergraph"));
+  auto output = ourDecoder->getFrameAtIndex(
+      bestVideoStreamIndex, 0, preAllocatedOutputTensor);
+  EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr());
+}
+
+TEST_P(VideoDecoderTest, PreAllocatedTensorSwscale) {
+  std::string path = getResourcePath("nasa_13013.mp4");
+  auto preAllocatedOutputTensor = torch::empty({270, 480, 3}, {torch::kUInt8});
+
+  std::unique_ptr<VideoDecoder> ourDecoder =
+      VideoDecoderTest::createDecoderFromPath(path, GetParam());
+  ourDecoder->scanFileAndUpdateMetadataAndIndex();
+  int bestVideoStreamIndex =
+      *ourDecoder->getContainerMetadata().bestVideoStreamIndex;
+  ourDecoder->addVideoStreamDecoder(
+      bestVideoStreamIndex,
+      VideoDecoder::VideoStreamDecoderOptions(
+          "color_conversion_library=swscale"));
+  auto output = ourDecoder->getFrameAtIndex(
+      bestVideoStreamIndex, 0, preAllocatedOutputTensor);
+  EXPECT_EQ(output.frame.data_ptr(), preAllocatedOutputTensor.data_ptr());
+}
+
 TEST_P(VideoDecoderTest, GetAudioMetadata) {
   std::string path = getResourcePath("nasa_13013.mp4.audio.mp3");
   std::unique_ptr<VideoDecoder> decoder =

From 9b8b8abe096a4b29c54f3bd31e32365199dfa886 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Mon, 28 Oct 2024 16:21:28 +0000
Subject: [PATCH 6/6] revert

---
 packaging/check_glibcxx.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/packaging/check_glibcxx.py b/packaging/check_glibcxx.py
index b7efd9813..37ff654c7 100644
--- a/packaging/check_glibcxx.py
+++ b/packaging/check_glibcxx.py
@@ -46,9 +46,7 @@
         all_symbols.add(match.group(0))
 
 if not all_symbols:
-    raise ValueError(
-        f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong."
-    )
+    raise ValueError(f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong.")
 
 all_versions = (symbol.split("_")[1].split(".") for symbol in all_symbols)
 all_versions = (tuple(int(v) for v in version) for version in all_versions)