diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index d55bb1137..420bf6c2c 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -14,6 +14,7 @@ #include "src/torchcodec/_core/DeviceInterface.h" #include "src/torchcodec/_core/FFMPEGCommon.h" #include "src/torchcodec/_core/NVDECCache.h" +#include "src/torchcodec/_core/NVCUVIDLoader.h" // #include // For cudaStreamSynchronize #include "src/torchcodec/_core/nvcuvid_include/cuviddec.h" @@ -53,12 +54,13 @@ pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) { } static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) { + const auto& nvcuvid = NVCUVIDLoader::instance().api(); // Check decoder capabilities - same checks as DALI auto caps = CUVIDDECODECAPS{}; caps.eCodecType = videoFormat->codec; caps.eChromaFormat = videoFormat->chroma_format; caps.nBitDepthMinus8 = videoFormat->bit_depth_luma_minus8; - CUresult result = cuvidGetDecoderCaps(&caps); + CUresult result = nvcuvid.cuvidGetDecoderCaps(&caps); TORCH_CHECK(result == CUDA_SUCCESS, "Failed to get decoder caps: ", result); TORCH_CHECK( @@ -157,7 +159,7 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) { decoderParams.display_area.bottom = videoFormat->display_area.bottom; CUvideodecoder* decoder = new CUvideodecoder(); - result = cuvidCreateDecoder(decoder, &decoderParams); + result = nvcuvid.cuvidCreateDecoder(decoder, &decoderParams); TORCH_CHECK( result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result); return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{}); @@ -221,7 +223,8 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() { } if (videoParser_) { - cuvidDestroyVideoParser(videoParser_); + const auto& nvcuvid = NVCUVIDLoader::instance().api(); + nvcuvid.cuvidDestroyVideoParser(videoParser_); videoParser_ = nullptr; } @@ -253,7 +256,11 @@ void BetaCudaDeviceInterface::initialize( parserParams.pfnDecodePicture = pfnDecodePictureCallback; parserParams.pfnDisplayPicture = pfnDisplayPictureCallback; - CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams); + TORCH_CHECK( + NVCUVIDLoader::instance().ensureLoaded(), + "NVDEC runtime library (libnvcuvid) could not be loaded. Make sure the NVIDIA Video Codec SDK runtime is installed and libnvcuvid.so is present on your system."); + const auto& nvcuvid = NVCUVIDLoader::instance().api(); + CUresult result = nvcuvid.cuvidCreateVideoParser(&videoParser_, &parserParams); TORCH_CHECK( result == CUDA_SUCCESS, "Failed to create video parser: ", result); } @@ -415,7 +422,8 @@ int BetaCudaDeviceInterface::sendEOFPacket() { int BetaCudaDeviceInterface::sendCuvidPacket( CUVIDSOURCEDATAPACKET& cuvidPacket) { - CUresult result = cuvidParseVideoData(videoParser_, &cuvidPacket); + const auto& nvcuvid = NVCUVIDLoader::instance().api(); + CUresult result = nvcuvid.cuvidParseVideoData(videoParser_, &cuvidPacket); return result == CUDA_SUCCESS ? AVSUCCESS : AVERROR_EXTERNAL; } @@ -453,7 +461,8 @@ int BetaCudaDeviceInterface::frameReadyForDecoding(CUVIDPICPARAMS* picParams) { TORCH_CHECK(picParams != nullptr, "Invalid picture parameters"); TORCH_CHECK(decoder_, "Decoder not initialized before picture decode"); // Send frame to be decoded by NVDEC - non-blocking call. - CUresult result = cuvidDecodePicture(*decoder_.get(), picParams); + const auto& nvcuvid = NVCUVIDLoader::instance().api(); + CUresult result = nvcuvid.cuvidDecodePicture(*decoder_.get(), picParams); // Yes, you're reading that right, 0 means error, 1 means success return (result == CUDA_SUCCESS); @@ -506,7 +515,8 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) { // SingleStreamDecoder. Either way, the underlying output surface can be // safely re-used. unmapPreviousFrame(); - CUresult result = cuvidMapVideoFrame( + const auto& nvcuvid2 = NVCUVIDLoader::instance().api(); + CUresult result = nvcuvid2.cuvidMapVideoFrame( *decoder_.get(), dispInfo.picture_index, &framePtr, &pitch, &procParams); if (result != CUDA_SUCCESS) { return AVERROR_EXTERNAL; @@ -523,7 +533,8 @@ void BetaCudaDeviceInterface::unmapPreviousFrame() { return; } CUresult result = - cuvidUnmapVideoFrame(*decoder_.get(), previouslyMappedFrame_); + NVCUVIDLoader::instance().api().cuvidUnmapVideoFrame( + *decoder_.get(), previouslyMappedFrame_); TORCH_CHECK( result == CUDA_SUCCESS, "Failed to unmap previous frame: ", result); previouslyMappedFrame_ = 0; diff --git a/src/torchcodec/_core/CMakeLists.txt b/src/torchcodec/_core/CMakeLists.txt index 75d1b036c..661449e1a 100644 --- a/src/torchcodec/_core/CMakeLists.txt +++ b/src/torchcodec/_core/CMakeLists.txt @@ -99,7 +99,7 @@ function(make_torchcodec_libraries ) if(ENABLE_CUDA) - list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp) + list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp NVCUVIDLoader.cpp) endif() set(core_library_dependencies @@ -108,28 +108,14 @@ function(make_torchcodec_libraries ) if(ENABLE_CUDA) - # Try to find NVCUVID. Try the normal way first. This should work locally. - find_library(NVCUVID_LIBRARY NAMES nvcuvid) - # If not found, try with version suffix, or hardcoded path. Appears - # to be necessary on the CI. - if(NOT NVCUVID_LIBRARY) - find_library(NVCUVID_LIBRARY NAMES nvcuvid.1 PATHS /usr/lib64 /usr/lib) - endif() - if(NOT NVCUVID_LIBRARY) - set(NVCUVID_LIBRARY "/usr/lib64/libnvcuvid.so.1") - endif() - - if(NVCUVID_LIBRARY) - message(STATUS "Found NVCUVID: ${NVCUVID_LIBRARY}") - else() - message(FATAL_ERROR "Could not find NVCUVID library") - endif() - list(APPEND core_library_dependencies ${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} - ${NVCUVID_LIBRARY} ) + # We link dl to load dynamically nvcuvid + if(UNIX AND NOT APPLE) + list(APPEND core_library_dependencies ${CMAKE_DL_LIBS}) + endif() endif() make_torchcodec_sublibrary( diff --git a/src/torchcodec/_core/NVCUVIDLoader.cpp b/src/torchcodec/_core/NVCUVIDLoader.cpp new file mode 100644 index 000000000..b4ce66592 --- /dev/null +++ b/src/torchcodec/_core/NVCUVIDLoader.cpp @@ -0,0 +1,104 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include "src/torchcodec/_core/NVCUVIDLoader.h" + +#include + +namespace facebook::torchcodec { + +namespace { + +#if defined(_WIN32) +constexpr const wchar_t* kLibName = L"nvcuvid.dll"; +#else +constexpr const char* kLibName = "libnvcuvid.so"; +#endif + +template +inline bool ResolveSymbol(NVCUVIDLoader::LibHandle handle, const char* name, T*& out) { +#if defined(_WIN32) + FARPROC p = GetProcAddress(handle, name); + out = reinterpret_cast(p); +#else + void* p = dlsym(handle, name); + out = reinterpret_cast(p); +#endif + return out != nullptr; +} + +} // namespace + +NVCUVIDLoader& NVCUVIDLoader::instance() { + static NVCUVIDLoader loader; + return loader; +} + +NVCUVIDLoader::~NVCUVIDLoader() { +#if defined(_WIN32) + if (handle_) { + FreeLibrary(handle_); + } +#else + if (handle_) { + dlclose(handle_); + } +#endif +} + +bool NVCUVIDLoader::ensureLoaded() { + if (loaded_) { + return true; + } + if (!loadLibrary()) { + return false; + } + loaded_ = resolveSymbols(); + return loaded_; +} + +const NVCUVIDLoader::API& NVCUVIDLoader::api() { + if (!ensureLoaded()) { + // Keep the error message concise; callers should convert this to a + // TORCH_CHECK with more context. + std::fputs("Failed to load libnvcuvid and resolve required symbols\n", stderr); + } + return api_; +} + +bool NVCUVIDLoader::loadLibrary() { +#if defined(_WIN32) + handle_ = LoadLibraryW(kLibName); +#else + handle_ = dlopen(kLibName, RTLD_NOW); + if (!handle_) { + // Fallback to common soname with version suffix used on some systems, as done by dali + // https://github.com/NVIDIA/DALI/blob/a10cef187c0a5f27b6415df5d023c8057b9b43e2/dali/operators/video/dynlink_nvcuvid/dynlink_nvcuvid.cc#L35C18-L35C34 + handle_ = dlopen("libnvcuvid.so.1", RTLD_NOW); + } +#endif + return handle_ != nullptr; +} + +bool NVCUVIDLoader::resolveSymbols() { + bool ok = true; + + ok &= ResolveSymbol(handle_, "cuvidCreateVideoParser", api_.cuvidCreateVideoParser); + ok &= ResolveSymbol(handle_, "cuvidParseVideoData", api_.cuvidParseVideoData); + ok &= ResolveSymbol(handle_, "cuvidDestroyVideoParser", api_.cuvidDestroyVideoParser); + + ok &= ResolveSymbol(handle_, "cuvidGetDecoderCaps", api_.cuvidGetDecoderCaps); + ok &= ResolveSymbol(handle_, "cuvidCreateDecoder", api_.cuvidCreateDecoder); + ok &= ResolveSymbol(handle_, "cuvidDestroyDecoder", api_.cuvidDestroyDecoder); + ok &= ResolveSymbol(handle_, "cuvidDecodePicture", api_.cuvidDecodePicture); + + ok &= ResolveSymbol(handle_, "cuvidMapVideoFrame", api_.cuvidMapVideoFrame); + ok &= ResolveSymbol(handle_, "cuvidUnmapVideoFrame", api_.cuvidUnmapVideoFrame); + + return ok; +} + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/NVCUVIDLoader.h b/src/torchcodec/_core/NVCUVIDLoader.h new file mode 100644 index 000000000..c06f6e3af --- /dev/null +++ b/src/torchcodec/_core/NVCUVIDLoader.h @@ -0,0 +1,84 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +#if defined(_WIN32) +#include +#else +#include +#endif + +#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h" +#include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h" + +namespace facebook::torchcodec { + +// Thin runtime loader for NVCUVID (NVDEC) symbols so we don't need to +// hard-link against libnvcuvid. This follows NVIDIA's guidance for dynamic +// loading. +class NVCUVIDLoader { + public: + struct API { + // Parser + CUresult(CUDAAPI* cuvidCreateVideoParser)( + CUvideoparser*, CUVIDPARSERPARAMS*); + CUresult(CUDAAPI* cuvidParseVideoData)( + CUvideoparser, CUVIDSOURCEDATAPACKET*); + CUresult(CUDAAPI* cuvidDestroyVideoParser)(CUvideoparser); + + // Decoder + CUresult(CUDAAPI* cuvidGetDecoderCaps)(CUVIDDECODECAPS*); + CUresult(CUDAAPI* cuvidCreateDecoder)( + CUvideodecoder*, CUVIDDECODECREATEINFO*); + CUresult(CUDAAPI* cuvidDestroyDecoder)(CUvideodecoder); + CUresult(CUDAAPI* cuvidDecodePicture)( + CUvideodecoder, CUVIDPICPARAMS*); + + // Frame mapping + CUresult(CUDAAPI* cuvidMapVideoFrame)( + CUvideodecoder, + int, + CUdeviceptr*, + unsigned int*, + CUVIDPROCPARAMS*); + CUresult(CUDAAPI* cuvidUnmapVideoFrame)( + CUvideodecoder, unsigned int /* DevPtr */); + }; + + // Singleton + static NVCUVIDLoader& instance(); + + // Returns true if the library is loaded and required symbols resolved. + bool ensureLoaded(); + + // Access resolved API. ensureLoaded() will be called implicitly; returns a + // reference to a fully populated API or aborts if unavailable. + const API& api(); + + private: + NVCUVIDLoader() = default; + ~NVCUVIDLoader(); + NVCUVIDLoader(const NVCUVIDLoader&) = delete; + NVCUVIDLoader& operator=(const NVCUVIDLoader&) = delete; + +#if defined(_WIN32) + using LibHandle = HMODULE; +#else + using LibHandle = void*; +#endif + + LibHandle handle_ = nullptr; + bool loaded_ = false; + API api_{}; + + bool loadLibrary(); + bool resolveSymbols(); +}; + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/NVDECCache.h b/src/torchcodec/_core/NVDECCache.h index b248ebc68..be787be9c 100644 --- a/src/torchcodec/_core/NVDECCache.h +++ b/src/torchcodec/_core/NVDECCache.h @@ -14,6 +14,7 @@ #include #include "src/torchcodec/_core/nvcuvid_include/cuviddec.h" #include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h" +#include "src/torchcodec/_core/NVCUVIDLoader.h" namespace facebook::torchcodec { @@ -24,7 +25,8 @@ namespace facebook::torchcodec { struct CUvideoDecoderDeleter { void operator()(CUvideodecoder* decoderPtr) const { if (decoderPtr && *decoderPtr) { - cuvidDestroyDecoder(*decoderPtr); + // Destroy via dynamic loader to avoid hard dependency on libnvcuvid. + NVCUVIDLoader::instance().api().cuvidDestroyDecoder(*decoderPtr); delete decoderPtr; } }