From 0c7cafb986ca4afd12569986e296d7ce5b1b3dcc Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Mon, 20 Oct 2025 18:53:36 +0100
Subject: [PATCH 1/3] Add _core._get_backend_details() utility

---
 .../_core/BetaCudaDeviceInterface.cpp         |  5 +++++
 .../_core/BetaCudaDeviceInterface.h           |  2 ++
 src/torchcodec/_core/CpuDeviceInterface.cpp   |  4 ++++
 src/torchcodec/_core/CpuDeviceInterface.h     |  2 ++
 src/torchcodec/_core/CudaDeviceInterface.cpp  | 11 ++++++++++
 src/torchcodec/_core/CudaDeviceInterface.h    |  4 ++++
 src/torchcodec/_core/DeviceInterface.h        |  4 ++++
 src/torchcodec/_core/SingleStreamDecoder.cpp  |  5 +++++
 src/torchcodec/_core/SingleStreamDecoder.h    |  2 ++
 src/torchcodec/_core/__init__.py              |  1 +
 src/torchcodec/_core/custom_ops.cpp           |  8 +++++++
 src/torchcodec/_core/ops.py                   |  6 +++++
 test/test_decoders.py                         | 22 +++----------------
 13 files changed, 57 insertions(+), 19 deletions(-)
diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
index 7124e4309..07ed92126 100644
--- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
+++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
@@ -699,4 +699,9 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
       avFrame, device_, nppCtx_, nvdecStream, preAllocatedOutputTensor);
 }
 
+std::string BetaCudaDeviceInterface::getDetails() {
+  return std::string("Beta CUDA Device Interface. Using ") +
+      (cpuFallback_ ? "CPU fallback." : "NVDEC.");
+}
+
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.h b/src/torchcodec/_core/BetaCudaDeviceInterface.h
index 7424a877d..3a9520867 100644
--- a/src/torchcodec/_core/BetaCudaDeviceInterface.h
+++ b/src/torchcodec/_core/BetaCudaDeviceInterface.h
@@ -59,6 +59,8 @@ class BetaCudaDeviceInterface : public DeviceInterface {
   int frameReadyForDecoding(CUVIDPICPARAMS* picParams);
   int frameReadyInDisplayOrder(CUVIDPARSERDISPINFO* dispInfo);
 
+  std::string getDetails() override;
+
  private:
   int sendCuvidPacket(CUVIDSOURCEDATAPACKET& cuvidPacket);
 
diff --git a/src/torchcodec/_core/CpuDeviceInterface.cpp b/src/torchcodec/_core/CpuDeviceInterface.cpp
index 0e9b46434..5aa20b09e 100644
--- a/src/torchcodec/_core/CpuDeviceInterface.cpp
+++ b/src/torchcodec/_core/CpuDeviceInterface.cpp
@@ -346,4 +346,8 @@ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph(
   return rgbAVFrameToTensor(filterGraph_->convert(avFrame));
 }
 
+std::string CpuDeviceInterface::getDetails() {
+  return std::string("CPU Device Interface.");
+}
+
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h
index 9f44c4e8c..3f6f7c962 100644
--- a/src/torchcodec/_core/CpuDeviceInterface.h
+++ b/src/torchcodec/_core/CpuDeviceInterface.h
@@ -39,6 +39,8 @@ class CpuDeviceInterface : public DeviceInterface {
       std::optional<torch::Tensor> preAllocatedOutputTensor =
           std::nullopt) override;
 
+  std::string getDetails() override;
+
  private:
   int convertAVFrameToTensorUsingSwScale(
       const UniqueAVFrame& avFrame,
diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp
index 01fdac827..7963a0ad5 100644
--- a/src/torchcodec/_core/CudaDeviceInterface.cpp
+++ b/src/torchcodec/_core/CudaDeviceInterface.cpp
@@ -284,9 +284,12 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
       frameOutput.data = cpuFrameOutput.data.to(device_);
     }
 
+    usingCPUFallback_ = true;
     return;
   }
 
+  usingCPUFallback_ = false;
+
   // Above we checked that the AVFrame was on GPU, but that's not enough, we
   // also need to check that the AVFrame is in AV_PIX_FMT_NV12 format (8 bits),
   // because this is what the NPP color conversion routines expect. This SHOULD
@@ -351,4 +354,12 @@ std::optional<const AVCodec*> CudaDeviceInterface::findCodec(
   return std::nullopt;
 }
 
+std::string CudaDeviceInterface::getDetails() {
+  // Note: for this interface specificaly the fallback is only known after a
+  // frame has been decoded, not before: that's when FFmpeg decides to fallback,
+  // so we can't know earlier.
+  return std::string("FFmpeg CUDA Device Interface. Using ") +
+      (usingCPUFallback_ ? "CPU fallback." : "NVDEC.");
+}
+
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h
index d240066f4..9f171ee3c 100644
--- a/src/torchcodec/_core/CudaDeviceInterface.h
+++ b/src/torchcodec/_core/CudaDeviceInterface.h
@@ -40,6 +40,8 @@ class CudaDeviceInterface : public DeviceInterface {
       std::optional<torch::Tensor> preAllocatedOutputTensor =
           std::nullopt) override;
 
+  std::string getDetails() override;
+
  private:
   // Our CUDA decoding code assumes NV12 format. In order to handle other
   // kinds of input, we need to convert them to NV12. Our current implementation
@@ -60,6 +62,8 @@ class CudaDeviceInterface : public DeviceInterface {
   // maybeConvertAVFrameToNV12().
   std::unique_ptr<FiltersContext> nv12ConversionContext_;
   std::unique_ptr<FilterGraph> nv12Conversion_;
+
+  bool usingCPUFallback_ = false;
 };
 
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h
index 8aad60f24..773317e83 100644
--- a/src/torchcodec/_core/DeviceInterface.h
+++ b/src/torchcodec/_core/DeviceInterface.h
@@ -119,6 +119,10 @@ class DeviceInterface {
     avcodec_flush_buffers(codecContext_.get());
   }
 
+  virtual std::string getDetails() {
+    return "";
+  }
+
  protected:
   torch::Device device_;
   SharedAVCodecContext codecContext_;
diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
index 2fbc111c1..7a448402f 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.cpp
+++ b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1702,4 +1702,9 @@ double SingleStreamDecoder::getPtsSecondsForFrame(int64_t frameIndex) {
       streamInfo.allFrames[frameIndex].pts, streamInfo.timeBase);
 }
 
+std::string SingleStreamDecoder::getInterfaceDetails() const {
+  TORCH_CHECK(deviceInterface_ != nullptr, "Device interface doesn't exist.");
+  return deviceInterface_->getDetails();
+}
+
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h
index 06ea0cd04..cd86de96a 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.h
+++ b/src/torchcodec/_core/SingleStreamDecoder.h
@@ -186,6 +186,8 @@ class SingleStreamDecoder {
   DecodeStats getDecodeStats() const;
   void resetDecodeStats();
 
+  std::string getInterfaceDetails() const;
+
  private:
   // --------------------------------------------------------------------------
   // STREAMINFO AND ASSOCIATED STRUCTS
diff --git a/src/torchcodec/_core/__init__.py b/src/torchcodec/_core/__init__.py
index eb8dd9697..b3be25d34 100644
--- a/src/torchcodec/_core/__init__.py
+++ b/src/torchcodec/_core/__init__.py
@@ -14,6 +14,7 @@
 )
 from .ops import (
     _add_video_stream,
+    _get_backend_details,
     _get_key_frame_indices,
     _test_frame_pts_equality,
     add_audio_stream,
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
index 466ebe50d..3f71388a1 100644
--- a/src/torchcodec/_core/custom_ops.cpp
+++ b/src/torchcodec/_core/custom_ops.cpp
@@ -72,6 +72,7 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "get_stream_json_metadata(Tensor(a!) decoder, int stream_index) -> str");
   m.def("_get_json_ffmpeg_library_versions() -> str");
+  m.def("_get_backend_details(Tensor(a!) decoder) -> str");
   m.def(
       "_test_frame_pts_equality(Tensor(a!) decoder, *, int frame_index, float pts_seconds_to_test) -> bool");
   m.def("scan_all_streams_to_update_metadata(Tensor(a!) decoder) -> ()");
@@ -869,6 +870,11 @@ std::string _get_json_ffmpeg_library_versions() {
   return ss.str();
 }
 
+std::string get_backend_details(at::Tensor& decoder) {
+  auto videoDecoder = unwrapTensorToGetDecoder(decoder);
+  return videoDecoder->getInterfaceDetails();
+}
+
 // Scans video packets to get more accurate metadata like frame count, exact
 // keyframe positions, etc. Exact keyframe positions are useful for efficient
 // accurate seeking. Note that this function reads the entire video but it does
@@ -912,6 +918,8 @@ TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) {
   m.impl(
       "scan_all_streams_to_update_metadata",
       &scan_all_streams_to_update_metadata);
+
+  m.impl("_get_backend_details", &get_backend_details);
 }
 
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
index 03cf8cf6d..6e2703908 100644
--- a/src/torchcodec/_core/ops.py
+++ b/src/torchcodec/_core/ops.py
@@ -139,6 +139,7 @@ def load_torchcodec_shared_libraries():
 _get_json_ffmpeg_library_versions = (
     torch.ops.torchcodec_ns._get_json_ffmpeg_library_versions.default
 )
+_get_backend_details = torch.ops.torchcodec_ns._get_backend_details.default
 
 
 # =============================
@@ -509,3 +510,8 @@ def scan_all_streams_to_update_metadata_abstract(decoder: torch.Tensor) -> None:
 def get_ffmpeg_library_versions():
     versions_json = _get_json_ffmpeg_library_versions()
     return json.loads(versions_json)
+
+
+@register_fake("torchcodec_ns::_get_backend_details")
+def _get_backend_details_abstract(decoder: torch.Tensor) -> str:
+    return ""
diff --git a/test/test_decoders.py b/test/test_decoders.py
index 098e4e969..6e08e05a4 100644
--- a/test/test_decoders.py
+++ b/test/test_decoders.py
@@ -1738,26 +1738,10 @@ def test_set_cuda_backend(self):
         with set_cuda_backend("BETA"):
             assert _get_cuda_backend() == "beta"
 
-        def assert_decoder_uses(decoder, *, expected_backend):
-            # TODO: This doesn't work anymore after
-            # https://github.com/meta-pytorch/torchcodec/pull/977
-            # We need to define a better way to assert which backend a decoder
-            # is using.
-            return
-            # Assert that a decoder instance is using a given backend.
-            #
-            # We know H265_VIDEO fails on the BETA backend while it works on the
-            # ffmpeg one.
-            # if expected_backend == "ffmpeg":
-            #     decoder.get_frame_at(0)  # this would fail if this was BETA
-            # else:
-            #     with pytest.raises(RuntimeError, match="Video is too small"):
-            #         decoder.get_frame_at(0)
-
         # Check that the default is the ffmpeg backend
         assert _get_cuda_backend() == "ffmpeg"
         dec = VideoDecoder(H265_VIDEO.path, device="cuda")
-        assert_decoder_uses(dec, expected_backend="ffmpeg")
+        assert _core._get_backend_details(dec._decoder).startswith("FFmpeg CUDA")
 
         # Check the setting "beta" effectively uses the BETA backend.
         # We also show that the affects decoder creation only. When the decoder
@@ -1766,9 +1750,9 @@ def assert_decoder_uses(decoder, *, expected_backend):
         with set_cuda_backend("beta"):
             dec = VideoDecoder(H265_VIDEO.path, device="cuda")
         assert _get_cuda_backend() == "ffmpeg"
-        assert_decoder_uses(dec, expected_backend="beta")
+        assert _core._get_backend_details(dec._decoder).startswith("Beta CUDA")
         with set_cuda_backend("ffmpeg"):
-            assert_decoder_uses(dec, expected_backend="beta")
+            assert _core._get_backend_details(dec._decoder).startswith("Beta CUDA")
 
         # Hacky way to ensure passing "cuda:1" is supported by both backends. We
         # just check that there's an error when passing cuda:N where N is too

From 1ee851b10ef7b5185d31c0cfd6314a17c73bd0e8 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Mon, 20 Oct 2025 23:08:06 +0100
Subject: [PATCH 2/3] getInterfaceDetails -> getDeviceInterfaceDetails

---
 src/torchcodec/_core/SingleStreamDecoder.cpp | 2 +-
 src/torchcodec/_core/SingleStreamDecoder.h   | 2 +-
 src/torchcodec/_core/custom_ops.cpp          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
index 7a448402f..8d9e9f651 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.cpp
+++ b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1702,7 +1702,7 @@ double SingleStreamDecoder::getPtsSecondsForFrame(int64_t frameIndex) {
       streamInfo.allFrames[frameIndex].pts, streamInfo.timeBase);
 }
 
-std::string SingleStreamDecoder::getInterfaceDetails() const {
+std::string SingleStreamDecoder::getDeviceInterfaceDetails() const {
   TORCH_CHECK(deviceInterface_ != nullptr, "Device interface doesn't exist.");
   return deviceInterface_->getDetails();
 }
diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h
index cd86de96a..4d4c11aa2 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.h
+++ b/src/torchcodec/_core/SingleStreamDecoder.h
@@ -186,7 +186,7 @@ class SingleStreamDecoder {
   DecodeStats getDecodeStats() const;
   void resetDecodeStats();
 
-  std::string getInterfaceDetails() const;
+  std::string getDeviceInterfaceDetails() const;
 
  private:
   // --------------------------------------------------------------------------
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
index 3f71388a1..a5d538482 100644
--- a/src/torchcodec/_core/custom_ops.cpp
+++ b/src/torchcodec/_core/custom_ops.cpp
@@ -872,7 +872,7 @@ std::string _get_json_ffmpeg_library_versions() {
 
 std::string get_backend_details(at::Tensor& decoder) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
-  return videoDecoder->getInterfaceDetails();
+  return videoDecoder->getDeviceInterfaceDetails();
 }
 
 // Scans video packets to get more accurate metadata like frame count, exact

From 0cc024bd1d81b9c5a48a923e6efe9095b1e7bd01 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Mon, 20 Oct 2025 23:13:24 +0100
Subject: [PATCH 3/3] typoo

---
 src/torchcodec/_core/CudaDeviceInterface.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp
index 7963a0ad5..be45050e6 100644
--- a/src/torchcodec/_core/CudaDeviceInterface.cpp
+++ b/src/torchcodec/_core/CudaDeviceInterface.cpp
@@ -355,7 +355,7 @@ std::optional<const AVCodec*> CudaDeviceInterface::findCodec(
 }
 
 std::string CudaDeviceInterface::getDetails() {
-  // Note: for this interface specificaly the fallback is only known after a
+  // Note: for this interface specifically the fallback is only known after a
   // frame has been decoded, not before: that's when FFmpeg decides to fallback,
   // so we can't know earlier.
   return std::string("FFmpeg CUDA Device Interface. Using ") +