Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions src/torchcodec/_core/BetaCudaDeviceInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "src/torchcodec/_core/DeviceInterface.h"
#include "src/torchcodec/_core/FFMPEGCommon.h"
#include "src/torchcodec/_core/NVDECCache.h"
#include "src/torchcodec/_core/NVCUVIDLoader.h"

// #include <cuda_runtime.h> // For cudaStreamSynchronize
#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
Expand Down Expand Up @@ -53,12 +54,13 @@ pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) {
}

static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
const auto& nvcuvid = NVCUVIDLoader::instance().api();
// Check decoder capabilities - same checks as DALI
auto caps = CUVIDDECODECAPS{};
caps.eCodecType = videoFormat->codec;
caps.eChromaFormat = videoFormat->chroma_format;
caps.nBitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
CUresult result = cuvidGetDecoderCaps(&caps);
CUresult result = nvcuvid.cuvidGetDecoderCaps(&caps);
TORCH_CHECK(result == CUDA_SUCCESS, "Failed to get decoder caps: ", result);

TORCH_CHECK(
Expand Down Expand Up @@ -157,7 +159,7 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
decoderParams.display_area.bottom = videoFormat->display_area.bottom;

CUvideodecoder* decoder = new CUvideodecoder();
result = cuvidCreateDecoder(decoder, &decoderParams);
result = nvcuvid.cuvidCreateDecoder(decoder, &decoderParams);
TORCH_CHECK(
result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result);
return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{});
Expand Down Expand Up @@ -221,7 +223,8 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
}

if (videoParser_) {
cuvidDestroyVideoParser(videoParser_);
const auto& nvcuvid = NVCUVIDLoader::instance().api();
nvcuvid.cuvidDestroyVideoParser(videoParser_);
videoParser_ = nullptr;
}

Expand Down Expand Up @@ -253,7 +256,11 @@ void BetaCudaDeviceInterface::initialize(
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;

CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
TORCH_CHECK(
NVCUVIDLoader::instance().ensureLoaded(),
"NVDEC runtime library (libnvcuvid) could not be loaded. Make sure the NVIDIA Video Codec SDK runtime is installed and libnvcuvid.so is present on your system.");
const auto& nvcuvid = NVCUVIDLoader::instance().api();
CUresult result = nvcuvid.cuvidCreateVideoParser(&videoParser_, &parserParams);
TORCH_CHECK(
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
}
Expand Down Expand Up @@ -415,7 +422,8 @@ int BetaCudaDeviceInterface::sendEOFPacket() {

int BetaCudaDeviceInterface::sendCuvidPacket(
CUVIDSOURCEDATAPACKET& cuvidPacket) {
CUresult result = cuvidParseVideoData(videoParser_, &cuvidPacket);
const auto& nvcuvid = NVCUVIDLoader::instance().api();
CUresult result = nvcuvid.cuvidParseVideoData(videoParser_, &cuvidPacket);
return result == CUDA_SUCCESS ? AVSUCCESS : AVERROR_EXTERNAL;
}

Expand Down Expand Up @@ -453,7 +461,8 @@ int BetaCudaDeviceInterface::frameReadyForDecoding(CUVIDPICPARAMS* picParams) {
TORCH_CHECK(picParams != nullptr, "Invalid picture parameters");
TORCH_CHECK(decoder_, "Decoder not initialized before picture decode");
// Send frame to be decoded by NVDEC - non-blocking call.
CUresult result = cuvidDecodePicture(*decoder_.get(), picParams);
const auto& nvcuvid = NVCUVIDLoader::instance().api();
CUresult result = nvcuvid.cuvidDecodePicture(*decoder_.get(), picParams);

// Yes, you're reading that right, 0 means error, 1 means success
return (result == CUDA_SUCCESS);
Expand Down Expand Up @@ -506,7 +515,8 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
// SingleStreamDecoder. Either way, the underlying output surface can be
// safely re-used.
unmapPreviousFrame();
CUresult result = cuvidMapVideoFrame(
const auto& nvcuvid2 = NVCUVIDLoader::instance().api();
CUresult result = nvcuvid2.cuvidMapVideoFrame(
*decoder_.get(), dispInfo.picture_index, &framePtr, &pitch, &procParams);
if (result != CUDA_SUCCESS) {
return AVERROR_EXTERNAL;
Expand All @@ -523,7 +533,8 @@ void BetaCudaDeviceInterface::unmapPreviousFrame() {
return;
}
CUresult result =
cuvidUnmapVideoFrame(*decoder_.get(), previouslyMappedFrame_);
NVCUVIDLoader::instance().api().cuvidUnmapVideoFrame(
*decoder_.get(), previouslyMappedFrame_);
TORCH_CHECK(
result == CUDA_SUCCESS, "Failed to unmap previous frame: ", result);
previouslyMappedFrame_ = 0;
Expand Down
24 changes: 5 additions & 19 deletions src/torchcodec/_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ function(make_torchcodec_libraries
)

if(ENABLE_CUDA)
list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp)
list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp NVCUVIDLoader.cpp)
endif()

set(core_library_dependencies
Expand All @@ -108,28 +108,14 @@ function(make_torchcodec_libraries
)

if(ENABLE_CUDA)
# Try to find NVCUVID. Try the normal way first. This should work locally.
find_library(NVCUVID_LIBRARY NAMES nvcuvid)
# If not found, try with version suffix, or hardcoded path. Appears
# to be necessary on the CI.
if(NOT NVCUVID_LIBRARY)
find_library(NVCUVID_LIBRARY NAMES nvcuvid.1 PATHS /usr/lib64 /usr/lib)
endif()
if(NOT NVCUVID_LIBRARY)
set(NVCUVID_LIBRARY "/usr/lib64/libnvcuvid.so.1")
endif()

if(NVCUVID_LIBRARY)
message(STATUS "Found NVCUVID: ${NVCUVID_LIBRARY}")
else()
message(FATAL_ERROR "Could not find NVCUVID library")
endif()

list(APPEND core_library_dependencies
${CUDA_nppi_LIBRARY}
${CUDA_nppicc_LIBRARY}
${NVCUVID_LIBRARY}
)
# We link dl to load dynamically nvcuvid
if(UNIX AND NOT APPLE)
list(APPEND core_library_dependencies ${CMAKE_DL_LIBS})
endif()
endif()

make_torchcodec_sublibrary(
Expand Down
104 changes: 104 additions & 0 deletions src/torchcodec/_core/NVCUVIDLoader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#include "src/torchcodec/_core/NVCUVIDLoader.h"

#include <cstdio>

namespace facebook::torchcodec {

namespace {

#if defined(_WIN32)
constexpr const wchar_t* kLibName = L"nvcuvid.dll";
#else
constexpr const char* kLibName = "libnvcuvid.so";
#endif

template <typename T>
inline bool ResolveSymbol(NVCUVIDLoader::LibHandle handle, const char* name, T*& out) {
#if defined(_WIN32)
FARPROC p = GetProcAddress(handle, name);
out = reinterpret_cast<T*>(p);
#else
void* p = dlsym(handle, name);
out = reinterpret_cast<T*>(p);
#endif
return out != nullptr;
}

} // namespace

NVCUVIDLoader& NVCUVIDLoader::instance() {
static NVCUVIDLoader loader;
return loader;
}

NVCUVIDLoader::~NVCUVIDLoader() {
#if defined(_WIN32)
if (handle_) {
FreeLibrary(handle_);
}
#else
if (handle_) {
dlclose(handle_);
}
#endif
}

bool NVCUVIDLoader::ensureLoaded() {
if (loaded_) {
return true;
}
if (!loadLibrary()) {
return false;
}
loaded_ = resolveSymbols();
return loaded_;
}

const NVCUVIDLoader::API& NVCUVIDLoader::api() {
if (!ensureLoaded()) {
// Keep the error message concise; callers should convert this to a
// TORCH_CHECK with more context.
std::fputs("Failed to load libnvcuvid and resolve required symbols\n", stderr);
}
return api_;
}

bool NVCUVIDLoader::loadLibrary() {
#if defined(_WIN32)
handle_ = LoadLibraryW(kLibName);
#else
handle_ = dlopen(kLibName, RTLD_NOW);
if (!handle_) {
// Fallback to common soname with version suffix used on some systems, as done by dali
// https://github.com/NVIDIA/DALI/blob/a10cef187c0a5f27b6415df5d023c8057b9b43e2/dali/operators/video/dynlink_nvcuvid/dynlink_nvcuvid.cc#L35C18-L35C34
handle_ = dlopen("libnvcuvid.so.1", RTLD_NOW);
}
#endif
return handle_ != nullptr;
}

bool NVCUVIDLoader::resolveSymbols() {
bool ok = true;

ok &= ResolveSymbol(handle_, "cuvidCreateVideoParser", api_.cuvidCreateVideoParser);
ok &= ResolveSymbol(handle_, "cuvidParseVideoData", api_.cuvidParseVideoData);
ok &= ResolveSymbol(handle_, "cuvidDestroyVideoParser", api_.cuvidDestroyVideoParser);

ok &= ResolveSymbol(handle_, "cuvidGetDecoderCaps", api_.cuvidGetDecoderCaps);
ok &= ResolveSymbol(handle_, "cuvidCreateDecoder", api_.cuvidCreateDecoder);
ok &= ResolveSymbol(handle_, "cuvidDestroyDecoder", api_.cuvidDestroyDecoder);
ok &= ResolveSymbol(handle_, "cuvidDecodePicture", api_.cuvidDecodePicture);

ok &= ResolveSymbol(handle_, "cuvidMapVideoFrame", api_.cuvidMapVideoFrame);
ok &= ResolveSymbol(handle_, "cuvidUnmapVideoFrame", api_.cuvidUnmapVideoFrame);

return ok;
}

} // namespace facebook::torchcodec
84 changes: 84 additions & 0 deletions src/torchcodec/_core/NVCUVIDLoader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <cstddef>

#if defined(_WIN32)
#include <Windows.h>
#else
#include <dlfcn.h>
#endif

#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
#include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"

namespace facebook::torchcodec {

// Thin runtime loader for NVCUVID (NVDEC) symbols so we don't need to
// hard-link against libnvcuvid. This follows NVIDIA's guidance for dynamic
// loading.
class NVCUVIDLoader {
public:
struct API {
// Parser
CUresult(CUDAAPI* cuvidCreateVideoParser)(
CUvideoparser*, CUVIDPARSERPARAMS*);
CUresult(CUDAAPI* cuvidParseVideoData)(
CUvideoparser, CUVIDSOURCEDATAPACKET*);
CUresult(CUDAAPI* cuvidDestroyVideoParser)(CUvideoparser);

// Decoder
CUresult(CUDAAPI* cuvidGetDecoderCaps)(CUVIDDECODECAPS*);
CUresult(CUDAAPI* cuvidCreateDecoder)(
CUvideodecoder*, CUVIDDECODECREATEINFO*);
CUresult(CUDAAPI* cuvidDestroyDecoder)(CUvideodecoder);
CUresult(CUDAAPI* cuvidDecodePicture)(
CUvideodecoder, CUVIDPICPARAMS*);

// Frame mapping
CUresult(CUDAAPI* cuvidMapVideoFrame)(
CUvideodecoder,
int,
CUdeviceptr*,
unsigned int*,
CUVIDPROCPARAMS*);
CUresult(CUDAAPI* cuvidUnmapVideoFrame)(
CUvideodecoder, unsigned int /* DevPtr */);
};

// Singleton
static NVCUVIDLoader& instance();

// Returns true if the library is loaded and required symbols resolved.
bool ensureLoaded();

// Access resolved API. ensureLoaded() will be called implicitly; returns a
// reference to a fully populated API or aborts if unavailable.
const API& api();

private:
NVCUVIDLoader() = default;
~NVCUVIDLoader();
NVCUVIDLoader(const NVCUVIDLoader&) = delete;
NVCUVIDLoader& operator=(const NVCUVIDLoader&) = delete;

#if defined(_WIN32)
using LibHandle = HMODULE;
#else
using LibHandle = void*;
#endif

LibHandle handle_ = nullptr;
bool loaded_ = false;
API api_{};

bool loadLibrary();
bool resolveSymbols();
};

} // namespace facebook::torchcodec
4 changes: 3 additions & 1 deletion src/torchcodec/_core/NVDECCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <torch/types.h>
#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
#include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"
#include "src/torchcodec/_core/NVCUVIDLoader.h"

namespace facebook::torchcodec {

Expand All @@ -24,7 +25,8 @@ namespace facebook::torchcodec {
struct CUvideoDecoderDeleter {
void operator()(CUvideodecoder* decoderPtr) const {
if (decoderPtr && *decoderPtr) {
cuvidDestroyDecoder(*decoderPtr);
// Destroy via dynamic loader to avoid hard dependency on libnvcuvid.
NVCUVIDLoader::instance().api().cuvidDestroyDecoder(*decoderPtr);
delete decoderPtr;
}
}
Expand Down
Loading