From 0c7cafb986ca4afd12569986e296d7ce5b1b3dcc Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 20 Oct 2025 18:53:36 +0100 Subject: [PATCH 1/3] Add _core._get_backend_details() utility --- .../_core/BetaCudaDeviceInterface.cpp | 5 +++++ .../_core/BetaCudaDeviceInterface.h | 2 ++ src/torchcodec/_core/CpuDeviceInterface.cpp | 4 ++++ src/torchcodec/_core/CpuDeviceInterface.h | 2 ++ src/torchcodec/_core/CudaDeviceInterface.cpp | 11 ++++++++++ src/torchcodec/_core/CudaDeviceInterface.h | 4 ++++ src/torchcodec/_core/DeviceInterface.h | 4 ++++ src/torchcodec/_core/SingleStreamDecoder.cpp | 5 +++++ src/torchcodec/_core/SingleStreamDecoder.h | 2 ++ src/torchcodec/_core/__init__.py | 1 + src/torchcodec/_core/custom_ops.cpp | 8 +++++++ src/torchcodec/_core/ops.py | 6 +++++ test/test_decoders.py | 22 +++---------------- 13 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index 7124e4309..07ed92126 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -699,4 +699,9 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput( avFrame, device_, nppCtx_, nvdecStream, preAllocatedOutputTensor); } +std::string BetaCudaDeviceInterface::getDetails() { + return std::string("Beta CUDA Device Interface. Using ") + + (cpuFallback_ ? "CPU fallback." : "NVDEC."); +} + } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.h b/src/torchcodec/_core/BetaCudaDeviceInterface.h index 7424a877d..3a9520867 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.h +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.h @@ -59,6 +59,8 @@ class BetaCudaDeviceInterface : public DeviceInterface { int frameReadyForDecoding(CUVIDPICPARAMS* picParams); int frameReadyInDisplayOrder(CUVIDPARSERDISPINFO* dispInfo); + std::string getDetails() override; + private: int sendCuvidPacket(CUVIDSOURCEDATAPACKET& cuvidPacket); diff --git a/src/torchcodec/_core/CpuDeviceInterface.cpp b/src/torchcodec/_core/CpuDeviceInterface.cpp index 0e9b46434..5aa20b09e 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.cpp +++ b/src/torchcodec/_core/CpuDeviceInterface.cpp @@ -346,4 +346,8 @@ torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph( return rgbAVFrameToTensor(filterGraph_->convert(avFrame)); } +std::string CpuDeviceInterface::getDetails() { + return std::string("CPU Device Interface."); +} + } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h index 9f44c4e8c..3f6f7c962 100644 --- a/src/torchcodec/_core/CpuDeviceInterface.h +++ b/src/torchcodec/_core/CpuDeviceInterface.h @@ -39,6 +39,8 @@ class CpuDeviceInterface : public DeviceInterface { std::optional preAllocatedOutputTensor = std::nullopt) override; + std::string getDetails() override; + private: int convertAVFrameToTensorUsingSwScale( const UniqueAVFrame& avFrame, diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index 01fdac827..7963a0ad5 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -284,9 +284,12 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput( frameOutput.data = cpuFrameOutput.data.to(device_); } + usingCPUFallback_ = true; return; } + usingCPUFallback_ = false; + // Above we checked that the AVFrame was on GPU, but that's not enough, we // also need to check that the AVFrame is in AV_PIX_FMT_NV12 format (8 bits), // because this is what the NPP color conversion routines expect. This SHOULD @@ -351,4 +354,12 @@ std::optional CudaDeviceInterface::findCodec( return std::nullopt; } +std::string CudaDeviceInterface::getDetails() { + // Note: for this interface specificaly the fallback is only known after a + // frame has been decoded, not before: that's when FFmpeg decides to fallback, + // so we can't know earlier. + return std::string("FFmpeg CUDA Device Interface. Using ") + + (usingCPUFallback_ ? "CPU fallback." : "NVDEC."); +} + } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h index d240066f4..9f171ee3c 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.h +++ b/src/torchcodec/_core/CudaDeviceInterface.h @@ -40,6 +40,8 @@ class CudaDeviceInterface : public DeviceInterface { std::optional preAllocatedOutputTensor = std::nullopt) override; + std::string getDetails() override; + private: // Our CUDA decoding code assumes NV12 format. In order to handle other // kinds of input, we need to convert them to NV12. Our current implementation @@ -60,6 +62,8 @@ class CudaDeviceInterface : public DeviceInterface { // maybeConvertAVFrameToNV12(). std::unique_ptr nv12ConversionContext_; std::unique_ptr nv12Conversion_; + + bool usingCPUFallback_ = false; }; } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h index 8aad60f24..773317e83 100644 --- a/src/torchcodec/_core/DeviceInterface.h +++ b/src/torchcodec/_core/DeviceInterface.h @@ -119,6 +119,10 @@ class DeviceInterface { avcodec_flush_buffers(codecContext_.get()); } + virtual std::string getDetails() { + return ""; + } + protected: torch::Device device_; SharedAVCodecContext codecContext_; diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 2fbc111c1..7a448402f 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1702,4 +1702,9 @@ double SingleStreamDecoder::getPtsSecondsForFrame(int64_t frameIndex) { streamInfo.allFrames[frameIndex].pts, streamInfo.timeBase); } +std::string SingleStreamDecoder::getInterfaceDetails() const { + TORCH_CHECK(deviceInterface_ != nullptr, "Device interface doesn't exist."); + return deviceInterface_->getDetails(); +} + } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h index 06ea0cd04..cd86de96a 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.h +++ b/src/torchcodec/_core/SingleStreamDecoder.h @@ -186,6 +186,8 @@ class SingleStreamDecoder { DecodeStats getDecodeStats() const; void resetDecodeStats(); + std::string getInterfaceDetails() const; + private: // -------------------------------------------------------------------------- // STREAMINFO AND ASSOCIATED STRUCTS diff --git a/src/torchcodec/_core/__init__.py b/src/torchcodec/_core/__init__.py index eb8dd9697..b3be25d34 100644 --- a/src/torchcodec/_core/__init__.py +++ b/src/torchcodec/_core/__init__.py @@ -14,6 +14,7 @@ ) from .ops import ( _add_video_stream, + _get_backend_details, _get_key_frame_indices, _test_frame_pts_equality, add_audio_stream, diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 466ebe50d..3f71388a1 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -72,6 +72,7 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "get_stream_json_metadata(Tensor(a!) decoder, int stream_index) -> str"); m.def("_get_json_ffmpeg_library_versions() -> str"); + m.def("_get_backend_details(Tensor(a!) decoder) -> str"); m.def( "_test_frame_pts_equality(Tensor(a!) decoder, *, int frame_index, float pts_seconds_to_test) -> bool"); m.def("scan_all_streams_to_update_metadata(Tensor(a!) decoder) -> ()"); @@ -869,6 +870,11 @@ std::string _get_json_ffmpeg_library_versions() { return ss.str(); } +std::string get_backend_details(at::Tensor& decoder) { + auto videoDecoder = unwrapTensorToGetDecoder(decoder); + return videoDecoder->getInterfaceDetails(); +} + // Scans video packets to get more accurate metadata like frame count, exact // keyframe positions, etc. Exact keyframe positions are useful for efficient // accurate seeking. Note that this function reads the entire video but it does @@ -912,6 +918,8 @@ TORCH_LIBRARY_IMPL(torchcodec_ns, CPU, m) { m.impl( "scan_all_streams_to_update_metadata", &scan_all_streams_to_update_metadata); + + m.impl("_get_backend_details", &get_backend_details); } } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index 03cf8cf6d..6e2703908 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -139,6 +139,7 @@ def load_torchcodec_shared_libraries(): _get_json_ffmpeg_library_versions = ( torch.ops.torchcodec_ns._get_json_ffmpeg_library_versions.default ) +_get_backend_details = torch.ops.torchcodec_ns._get_backend_details.default # ============================= @@ -509,3 +510,8 @@ def scan_all_streams_to_update_metadata_abstract(decoder: torch.Tensor) -> None: def get_ffmpeg_library_versions(): versions_json = _get_json_ffmpeg_library_versions() return json.loads(versions_json) + + +@register_fake("torchcodec_ns::_get_backend_details") +def _get_backend_details_abstract(decoder: torch.Tensor) -> str: + return "" diff --git a/test/test_decoders.py b/test/test_decoders.py index 098e4e969..6e08e05a4 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -1738,26 +1738,10 @@ def test_set_cuda_backend(self): with set_cuda_backend("BETA"): assert _get_cuda_backend() == "beta" - def assert_decoder_uses(decoder, *, expected_backend): - # TODO: This doesn't work anymore after - # https://github.com/meta-pytorch/torchcodec/pull/977 - # We need to define a better way to assert which backend a decoder - # is using. - return - # Assert that a decoder instance is using a given backend. - # - # We know H265_VIDEO fails on the BETA backend while it works on the - # ffmpeg one. - # if expected_backend == "ffmpeg": - # decoder.get_frame_at(0) # this would fail if this was BETA - # else: - # with pytest.raises(RuntimeError, match="Video is too small"): - # decoder.get_frame_at(0) - # Check that the default is the ffmpeg backend assert _get_cuda_backend() == "ffmpeg" dec = VideoDecoder(H265_VIDEO.path, device="cuda") - assert_decoder_uses(dec, expected_backend="ffmpeg") + assert _core._get_backend_details(dec._decoder).startswith("FFmpeg CUDA") # Check the setting "beta" effectively uses the BETA backend. # We also show that the affects decoder creation only. When the decoder @@ -1766,9 +1750,9 @@ def assert_decoder_uses(decoder, *, expected_backend): with set_cuda_backend("beta"): dec = VideoDecoder(H265_VIDEO.path, device="cuda") assert _get_cuda_backend() == "ffmpeg" - assert_decoder_uses(dec, expected_backend="beta") + assert _core._get_backend_details(dec._decoder).startswith("Beta CUDA") with set_cuda_backend("ffmpeg"): - assert_decoder_uses(dec, expected_backend="beta") + assert _core._get_backend_details(dec._decoder).startswith("Beta CUDA") # Hacky way to ensure passing "cuda:1" is supported by both backends. We # just check that there's an error when passing cuda:N where N is too From 1ee851b10ef7b5185d31c0cfd6314a17c73bd0e8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 20 Oct 2025 23:08:06 +0100 Subject: [PATCH 2/3] getInterfaceDetails -> getDeviceInterfaceDetails --- src/torchcodec/_core/SingleStreamDecoder.cpp | 2 +- src/torchcodec/_core/SingleStreamDecoder.h | 2 +- src/torchcodec/_core/custom_ops.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 7a448402f..8d9e9f651 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1702,7 +1702,7 @@ double SingleStreamDecoder::getPtsSecondsForFrame(int64_t frameIndex) { streamInfo.allFrames[frameIndex].pts, streamInfo.timeBase); } -std::string SingleStreamDecoder::getInterfaceDetails() const { +std::string SingleStreamDecoder::getDeviceInterfaceDetails() const { TORCH_CHECK(deviceInterface_ != nullptr, "Device interface doesn't exist."); return deviceInterface_->getDetails(); } diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h index cd86de96a..4d4c11aa2 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.h +++ b/src/torchcodec/_core/SingleStreamDecoder.h @@ -186,7 +186,7 @@ class SingleStreamDecoder { DecodeStats getDecodeStats() const; void resetDecodeStats(); - std::string getInterfaceDetails() const; + std::string getDeviceInterfaceDetails() const; private: // -------------------------------------------------------------------------- diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 3f71388a1..a5d538482 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -872,7 +872,7 @@ std::string _get_json_ffmpeg_library_versions() { std::string get_backend_details(at::Tensor& decoder) { auto videoDecoder = unwrapTensorToGetDecoder(decoder); - return videoDecoder->getInterfaceDetails(); + return videoDecoder->getDeviceInterfaceDetails(); } // Scans video packets to get more accurate metadata like frame count, exact From 0cc024bd1d81b9c5a48a923e6efe9095b1e7bd01 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 20 Oct 2025 23:13:24 +0100 Subject: [PATCH 3/3] typoo --- src/torchcodec/_core/CudaDeviceInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index 7963a0ad5..be45050e6 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -355,7 +355,7 @@ std::optional CudaDeviceInterface::findCodec( } std::string CudaDeviceInterface::getDetails() { - // Note: for this interface specificaly the fallback is only known after a + // Note: for this interface specifically the fallback is only known after a // frame has been decoded, not before: that's when FFmpeg decides to fallback, // so we can't know earlier. return std::string("FFmpeg CUDA Device Interface. Using ") +