diff --git a/.github/workflows/cpp_tests.yaml b/.github/workflows/cpp_tests.yaml index 6ae56c70f..5e31babc7 100644 --- a/.github/workflows/cpp_tests.yaml +++ b/.github/workflows/cpp_tests.yaml @@ -34,12 +34,12 @@ jobs: python-version: '3.12' - name: Update pip run: python -m pip install --upgrade pip - - name: Install dependencies + - name: Install torch dependencies run: | python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu - - name: Install ffmpeg and pkg-config + - name: Install ffmpeg, pkg-config and pybind11 run: | - conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config -c conda-forge + conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config pybind11 -c conda-forge ffmpeg -version - name: Build and run C++ tests run: | diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 89e8401de..60bfbfa2e 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -38,6 +38,7 @@ jobs: test-infra-repository: pytorch/test-infra test-infra-ref: main build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/pre_build_script.sh post-script: packaging/post_build_script.sh smoke-test-script: packaging/fake_smoke_test.py package-name: torchcodec diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 71cc071c8..c156a833c 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -63,7 +63,7 @@ jobs: - name: Install dependencies and FFmpeg run: | python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu - conda install "ffmpeg=7.0.1" pkg-config -c conda-forge + conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge ffmpeg -version - name: Build and install torchcodec run: | diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml index 65b06e933..53b5bfc20 100644 --- a/.github/workflows/linux_cuda_wheel.yaml +++ b/.github/workflows/linux_cuda_wheel.yaml @@ -48,6 +48,7 @@ jobs: test-infra-repository: pytorch/test-infra test-infra-ref: main build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/pre_build_script.sh post-script: packaging/post_build_script.sh smoke-test-script: packaging/fake_smoke_test.py package-name: torchcodec diff --git a/.github/workflows/linux_wheel.yaml b/.github/workflows/linux_wheel.yaml index f5e665f55..cd53219f7 100644 --- a/.github/workflows/linux_wheel.yaml +++ b/.github/workflows/linux_wheel.yaml @@ -49,6 +49,7 @@ jobs: test-infra-repository: pytorch/test-infra test-infra-ref: main build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/pre_build_script.sh post-script: packaging/post_build_script.sh smoke-test-script: packaging/fake_smoke_test.py package-name: torchcodec diff --git a/.github/workflows/macos_wheel.yaml b/.github/workflows/macos_wheel.yaml index b7cc965eb..d9472765c 100644 --- a/.github/workflows/macos_wheel.yaml +++ b/.github/workflows/macos_wheel.yaml @@ -49,6 +49,7 @@ jobs: test-infra-repository: pytorch/test-infra test-infra-ref: main build-matrix: ${{ needs.generate-matrix.outputs.matrix }} + pre-script: packaging/pre_build_script.sh post-script: packaging/post_build_script.sh smoke-test-script: packaging/fake_smoke_test.py runner-type: macos-m1-stable diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bc3ec3bb0..d516bc272 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,6 +20,7 @@ test locally you will need the following dependencies: installation already. - cmake - pkg-config +- pybind11 - FFmpeg - PyTorch nightly @@ -29,7 +30,7 @@ Start by installing the **nightly** build of PyTorch following the Then, the easiest way to install the rest of the dependencies is to run: ```bash -conda install cmake pkg-config ffmpeg -c conda-forge +conda install cmake pkg-config pbyind11 ffmpeg -c conda-forge ``` ### Clone and build diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh new file mode 100644 index 000000000..f22244e9c --- /dev/null +++ b/packaging/pre_build_script.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -ex + +# We need to install pybind11 because we need its CMake helpers in order to +# compile correctly on Mac. Pybind11 is actually a C++ header-only library, +# and PyTorch actually has it included. PyTorch, however, does not have the +# CMake helpers. +conda install -y pybind11 -c conda-forge diff --git a/setup.py b/setup.py index 9120c7fe0..f16521764 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ def run(self): super().run() def build_extension(self, ext): - """Call our CMake build system to build libtorchcodec?.so""" + """Call our CMake build system to build libtorchcodec*.so""" # Setuptools was designed to build one extension (.so file) at a time, # calling this method for each Extension object. We're using a # CMake-based build where all our extensions are built together at once. @@ -136,21 +136,27 @@ def copy_extensions_to_source(self): This is called by setuptools at the end of .run() during editable installs. """ self.get_finalized_command("build_py") - extension = "" + extensions = [] if sys.platform == "linux": - extension = "so" + extensions = ["so"] elif sys.platform == "darwin": - extension = "dylib" + # Mac has BOTH .dylib and .so as library extensions. Short version + # is that a .dylib is a shared library that can be both dynamically + # loaded and depended on by other libraries; a .so can only be a + # dynamically loaded module. For more, see: + # https://stackoverflow.com/a/2339910 + extensions = ["dylib", "so"] else: raise NotImplementedError( "Platforms other than linux/darwin are not supported yet" ) - for so_file in self._install_prefix.glob(f"*.{extension}"): - assert "libtorchcodec" in so_file.name - destination = Path("src/torchcodec/") / so_file.name - print(f"Copying {so_file} to {destination}") - self.copy_file(so_file, destination, level=self.verbose) + for ext in extensions: + for lib_file in self._install_prefix.glob(f"*.{ext}"): + assert "libtorchcodec" in lib_file.name + destination = Path("src/torchcodec/") / lib_file.name + print(f"Copying {lib_file} to {destination}") + self.copy_file(lib_file, destination, level=self.verbose) NOT_A_LICENSE_VIOLATION_VAR = "I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION" diff --git a/src/torchcodec/_internally_replaced_utils.py b/src/torchcodec/_internally_replaced_utils.py index 0833eb82f..a5a3ffa39 100644 --- a/src/torchcodec/_internally_replaced_utils.py +++ b/src/torchcodec/_internally_replaced_utils.py @@ -7,11 +7,12 @@ import importlib import sys from pathlib import Path +from types import ModuleType # Copy pasted from torchvision # https://github.com/pytorch/vision/blob/947ae1dc71867f28021d5bc0ff3a19c249236e2a/torchvision/_internally_replaced_utils.py#L25 -def _get_extension_path(lib_name): +def _get_extension_path(lib_name: str) -> str: extension_suffixes = [] if sys.platform == "linux": extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES @@ -31,6 +32,22 @@ def _get_extension_path(lib_name): ) ext_specs = extfinder.find_spec(lib_name) if ext_specs is None: - raise ImportError + raise ImportError(f"No spec found for {lib_name}") + + if ext_specs.origin is None: + raise ImportError(f"Existing spec found for {lib_name} does not have an origin") return ext_specs.origin + + +def _load_pybind11_module(module_name: str, library_path: str) -> ModuleType: + spec = importlib.util.spec_from_file_location( + module_name, + library_path, + ) + if spec is None: + raise ImportError( + f"Unable to load spec for module {module_name} from path {library_path}" + ) + + return importlib.util.module_from_spec(spec) diff --git a/src/torchcodec/decoders/_core/AVIOBytesContext.cpp b/src/torchcodec/decoders/_core/AVIOBytesContext.cpp new file mode 100644 index 000000000..0d1e9d413 --- /dev/null +++ b/src/torchcodec/decoders/_core/AVIOBytesContext.cpp @@ -0,0 +1,70 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include "src/torchcodec/decoders/_core/AVIOBytesContext.h" +#include + +namespace facebook::torchcodec { + +AVIOBytesContext::AVIOBytesContext(const void* data, int64_t dataSize) + : dataContext_{static_cast(data), dataSize, 0} { + TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!"); + TORCH_CHECK(dataSize > 0, "Video data size must be positive"); + createAVIOContext(&read, &seek, &dataContext_); +} + +// The signature of this function is defined by FFMPEG. +int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { + auto dataContext = static_cast(opaque); + TORCH_CHECK( + dataContext->current <= dataContext->size, + "Tried to read outside of the buffer: current=", + dataContext->current, + ", size=", + dataContext->size); + + int64_t numBytesRead = std::min( + static_cast(buf_size), dataContext->size - dataContext->current); + + TORCH_CHECK( + numBytesRead >= 0, + "Tried to read negative bytes: numBytesRead=", + numBytesRead, + ", size=", + dataContext->size, + ", current=", + dataContext->current); + + if (numBytesRead == 0) { + return AVERROR_EOF; + } + + std::memcpy(buf, dataContext->data + dataContext->current, numBytesRead); + dataContext->current += numBytesRead; + return numBytesRead; +} + +// The signature of this function is defined by FFMPEG. +int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) { + auto dataContext = static_cast(opaque); + int64_t ret = -1; + + switch (whence) { + case AVSEEK_SIZE: + ret = dataContext->size; + break; + case SEEK_SET: + dataContext->current = offset; + ret = offset; + break; + default: + break; + } + + return ret; +} + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/AVIOBytesContext.h b/src/torchcodec/decoders/_core/AVIOBytesContext.h new file mode 100644 index 000000000..411866dc0 --- /dev/null +++ b/src/torchcodec/decoders/_core/AVIOBytesContext.h @@ -0,0 +1,32 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include "src/torchcodec/decoders/_core/AVIOContextHolder.h" + +namespace facebook::torchcodec { + +// Enables users to pass in the entire video as bytes. Our read and seek +// functions then traverse the bytes in memory. +class AVIOBytesContext : public AVIOContextHolder { + public: + explicit AVIOBytesContext(const void* data, int64_t dataSize); + + private: + struct DataContext { + const uint8_t* data; + int64_t size; + int64_t current; + }; + + static int read(void* opaque, uint8_t* buf, int buf_size); + static int64_t seek(void* opaque, int64_t offset, int whence); + + DataContext dataContext_; +}; + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/AVIOContextHolder.cpp b/src/torchcodec/decoders/_core/AVIOContextHolder.cpp new file mode 100644 index 000000000..1fc4f5ecf --- /dev/null +++ b/src/torchcodec/decoders/_core/AVIOContextHolder.cpp @@ -0,0 +1,50 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include "src/torchcodec/decoders/_core/AVIOContextHolder.h" +#include + +namespace facebook::torchcodec { + +void AVIOContextHolder::createAVIOContext( + AVIOReadFunction read, + AVIOSeekFunction seek, + void* heldData, + int bufferSize) { + TORCH_CHECK( + bufferSize > 0, + "Buffer size must be greater than 0; is " + std::to_string(bufferSize)); + auto buffer = static_cast(av_malloc(bufferSize)); + TORCH_CHECK( + buffer != nullptr, + "Failed to allocate buffer of size " + std::to_string(bufferSize)); + + avioContext_.reset(avio_alloc_context( + buffer, + bufferSize, + 0, + heldData, + read, + nullptr, // write function; not supported yet + seek)); + + if (!avioContext_) { + av_freep(&buffer); + TORCH_CHECK(false, "Failed to allocate AVIOContext"); + } +} + +AVIOContextHolder::~AVIOContextHolder() { + if (avioContext_) { + av_freep(&avioContext_->buffer); + } +} + +AVIOContext* AVIOContextHolder::getAVIOContext() { + return avioContext_.get(); +} + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/AVIOContextHolder.h b/src/torchcodec/decoders/_core/AVIOContextHolder.h new file mode 100644 index 000000000..26bb06f08 --- /dev/null +++ b/src/torchcodec/decoders/_core/AVIOContextHolder.h @@ -0,0 +1,65 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include "src/torchcodec/decoders/_core/FFMPEGCommon.h" + +namespace facebook::torchcodec { + +// The AVIOContextHolder serves several purposes: +// +// 1. It is a smart pointer for the AVIOContext. It has the logic to create +// a new AVIOContext and will appropriately free the AVIOContext when it +// goes out of scope. Note that this requires more than just having a +// UniqueAVIOContext, as the AVIOContext points to a buffer which must be +// freed. +// 2. It is a base class for AVIOContext specializations. When specializing a +// AVIOContext, we need to provide four things: +// 1. A read callback function. +// 2. A seek callback function. +// 3. A write callback function. (Not supported yet; it's for encoding.) +// 4. A pointer to some context object that has the same lifetime as the +// AVIOContext itself. This context object holds the custom state that +// tracks the custom behavior of reading, seeking and writing. It is +// provided upon AVIOContext creation and to the read, seek and +// write callback functions. +// While it's not required, it is natural for the derived classes to make +// all of the above members. Base classes need to call +// createAVIOContext(), ideally in their constructor. +// 3. A generic handle for those that just need to manage having access to an +// AVIOContext, but aren't necessarily concerned with how it was customized: +// typically, the VideoDecoder. +class AVIOContextHolder { + public: + virtual ~AVIOContextHolder(); + AVIOContext* getAVIOContext(); + + protected: + // Make constructor protected to prevent anyone from constructing + // an AVIOContextHolder without deriving it. (Ordinarily this would be + // enforced by having a pure virtual methods, but we don't have any.) + AVIOContextHolder() = default; + + // These signatures are defined by FFmpeg. + using AVIOReadFunction = int (*)(void*, uint8_t*, int); + using AVIOSeekFunction = int64_t (*)(void*, int64_t, int); + + // Deriving classes should call this function in their constructor. + void createAVIOContext( + AVIOReadFunction read, + AVIOSeekFunction seek, + void* heldData, + int bufferSize = defaultBufferSize); + + private: + UniqueAVIOContext avioContext_; + + // Defaults to 64 KB + static const int defaultBufferSize = 64 * 1024; +}; + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp b/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp new file mode 100644 index 000000000..60d1503ae --- /dev/null +++ b/src/torchcodec/decoders/_core/AVIOFileLikeContext.cpp @@ -0,0 +1,80 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include "src/torchcodec/decoders/_core/AVIOFileLikeContext.h" +#include + +namespace facebook::torchcodec { + +AVIOFileLikeContext::AVIOFileLikeContext(py::object fileLike) + : fileLike_{UniquePyObject(new py::object(fileLike))} { + { + // TODO: Is it necessary to acquire the GIL here? Is it maybe even + // harmful? At the moment, this is only called from within a pybind + // function, and pybind guarantees we have the GIL. + py::gil_scoped_acquire gil; + TORCH_CHECK( + py::hasattr(fileLike, "read"), + "File like object must implement a read method."); + TORCH_CHECK( + py::hasattr(fileLike, "seek"), + "File like object must implement a seek method."); + } + createAVIOContext(&read, &seek, &fileLike_); +} + +int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) { + auto fileLike = static_cast(opaque); + + // Note that we acquire the GIL outside of the loop. This is likely more + // efficient than releasing and acquiring it each loop iteration. + py::gil_scoped_acquire gil; + + int totalNumRead = 0; + while (totalNumRead < buf_size) { + int request = buf_size - totalNumRead; + + // The Python method returns the actual bytes, which we access through the + // py::bytes wrapper. That wrapper, however, does not provide us access to + // the underlying data pointer, which we need for the memcpy below. So we + // convert the bytes to a string_view to get access to the data pointer. + // Becauase it's a view and not a copy, it should be cheap. + auto bytesRead = static_cast((*fileLike)->attr("read")(request)); + auto bytesView = static_cast(bytesRead); + + int numBytesRead = static_cast(bytesView.size()); + if (numBytesRead == 0) { + break; + } + + TORCH_CHECK( + numBytesRead <= request, + "Requested up to ", + request, + " bytes but, received ", + numBytesRead, + " bytes. The given object does not conform to read protocol of file object."); + + std::memcpy(buf, bytesView.data(), numBytesRead); + buf += numBytesRead; + totalNumRead += numBytesRead; + } + + return totalNumRead == 0 ? AVERROR_EOF : totalNumRead; +} + +int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) { + // We do not know the file size. + if (whence == AVSEEK_SIZE) { + return AVERROR(EIO); + } + + auto fileLike = static_cast(opaque); + py::gil_scoped_acquire gil; + return py::cast((*fileLike)->attr("seek")(offset, whence)); +} + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/AVIOFileLikeContext.h b/src/torchcodec/decoders/_core/AVIOFileLikeContext.h new file mode 100644 index 000000000..7be07f2b6 --- /dev/null +++ b/src/torchcodec/decoders/_core/AVIOFileLikeContext.h @@ -0,0 +1,54 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include + +#include "src/torchcodec/decoders/_core/AVIOContextHolder.h" + +namespace py = pybind11; + +namespace facebook::torchcodec { + +// Enables uers to pass in a Python file-like object. We then forward all read +// and seek calls back up to the methods on the Python object. +class AVIOFileLikeContext : public AVIOContextHolder { + public: + explicit AVIOFileLikeContext(py::object fileLike); + + private: + static int read(void* opaque, uint8_t* buf, int buf_size); + static int64_t seek(void* opaque, int64_t offset, int whence); + + // Note that we dynamically allocate the Python object because we need to + // strictly control when its destructor is called. We must hold the GIL + // when its destructor gets called, as it needs to update the reference + // count. It's easiest to control that when it's dynamic memory. Otherwise, + // we'd have to ensure whatever enclosing scope holds the object has the GIL, + // and that's, at least, hard. For all of the common pitfalls, see: + // + // https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors + // + // We maintain a reference to the file-like object because the file-like + // object that was created on the Python side must live as long as our + // potential use. That is, even if there are no more references to the object + // on the Python side, we require that the object is still live. + struct PyObjectDeleter { + inline void operator()(py::object* obj) const { + if (obj) { + py::gil_scoped_acquire gil; + delete obj; + } + } + }; + + using UniquePyObject = std::unique_ptr; + UniquePyObject fileLike_; +}; + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/CMakeLists.txt b/src/torchcodec/decoders/_core/CMakeLists.txt index 8e4e6c5df..f0a8568fe 100644 --- a/src/torchcodec/decoders/_core/CMakeLists.txt +++ b/src/torchcodec/decoders/_core/CMakeLists.txt @@ -3,62 +3,167 @@ project(TorchCodec) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(PYBIND11_FINDPYTHON ON) +find_package(pybind11 REQUIRED) find_package(Torch REQUIRED) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}") find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development) -function(make_torchcodec_library library_name ffmpeg_target) - set( - sources - FFMPEGCommon.h +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -Werror ${TORCH_CXX_FLAGS}") + +function(make_torchcodec_sublibrary + library_name + type + sources + library_dependencies) + + add_library(${library_name} ${type} ${sources}) + set_target_properties(${library_name} PROPERTIES CXX_STANDARD 17) + target_include_directories(${library_name} + PRIVATE + ./../../../../ + "${TORCH_INSTALL_PREFIX}/include" + ${Python3_INCLUDE_DIRS} + ) + + # Avoid adding the "lib" prefix which we already add explicitly. + set_target_properties(${library_name} PROPERTIES PREFIX "") + + target_link_libraries( + ${library_name} + PUBLIC + ${library_dependencies} + ) +endfunction() + +function(make_torchcodec_libraries + ffmpeg_major_version + ffmpeg_target) + + # We create three shared libraries per version of FFmpeg, where the version + # is denoted by N: + # + # 1. libtorchcodec_decoderN.{ext}: Base library which contains the + # implementation of VideoDecoder and everything VideoDecoder needs. On + # Linux, {ext} is so. On Mac, it is dylib. + # + # 2. libtorchcodec_custom_opsN.{ext}: Implementation of the PyTorch custom + # ops. Depends on libtorchcodec_decoderN.{ext}. On Linux, {ext} is so. + # On Mac, it is dylib. + # + # 3. libtorchcodec_pybind_opsN.{ext}: Implementation of the pybind11 ops. We + # keep these separate from the PyTorch custom ops because we have to + # load these libraries separately on the Python side. Depends on + # libtorchcodec_decoderN.{ext}. On BOTH Linux and Mac {ext} is so. + + # 1. Create libtorchcodec_decoderN.{ext}. + set(decoder_library_name "libtorchcodec_decoder${ffmpeg_major_version}") + set(decoder_sources + AVIOContextHolder.cpp FFMPEGCommon.cpp - VideoDecoder.h VideoDecoder.cpp - VideoDecoderOps.h - VideoDecoderOps.cpp - DeviceInterface.h ) + if(ENABLE_CUDA) - list(APPEND sources CudaDevice.cpp) + list(APPEND decoder_sources CudaDevice.cpp) else() - list(APPEND sources CPUOnlyDevice.cpp) + list(APPEND decoder_sources CPUOnlyDevice.cpp) endif() - add_library(${library_name} SHARED ${sources}) - set_property(TARGET ${library_name} PROPERTY CXX_STANDARD 17) - target_include_directories( - ${library_name} - PRIVATE - ./../../../../ - "${TORCH_INSTALL_PREFIX}/include" - ${Python3_INCLUDE_DIRS} + set(decoder_library_dependencies + ${ffmpeg_target} + ${TORCH_LIBRARIES} ) - set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES} - ${Python3_LIBRARIES}) if(ENABLE_CUDA) - list(APPEND NEEDED_LIBRARIES - ${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} ) + list(APPEND decoder_library_dependencies + ${CUDA_nppi_LIBRARY} + ${CUDA_nppicc_LIBRARY} + ) endif() - target_link_libraries( - ${library_name} + + make_torchcodec_sublibrary( + "${decoder_library_name}" + SHARED + "${decoder_sources}" + "${decoder_library_dependencies}" + ) + + # 2. Create libtorchcodec_custom_opsN.{ext}. + set(custom_ops_library_name "libtorchcodec_custom_ops${ffmpeg_major_version}") + set(custom_ops_sources + AVIOBytesContext.cpp + VideoDecoderOps.cpp + ) + set(custom_ops_dependencies + ${decoder_library_name} + ${Python3_LIBRARIES} + ) + make_torchcodec_sublibrary( + "${custom_ops_library_name}" + SHARED + "${custom_ops_sources}" + "${custom_ops_dependencies}" + ) + + # 3. Create libtorchcodec_pybind_opsN.so. + set(pybind_ops_library_name "libtorchcodec_pybind_ops${ffmpeg_major_version}") + set(pybind_ops_sources + AVIOFileLikeContext.cpp + PyBindOps.cpp + ) + set(pybind_ops_dependencies + ${decoder_library_name} + pybind11::module # This library dependency makes sure we have the right + # Python libraries included as well as all of the right + # settings so that we can successfully load the shared + # library as a Python module on Mac. If we instead use + # ${Python3_LIBRARIES}, it works on Linux but not on + # Mac. + ) + make_torchcodec_sublibrary( + "${pybind_ops_library_name}" + MODULE # Note that this not SHARED; otherwise we build the wrong kind + # of library on Mac. On Mac, SHARED becomes .dylib and MODULE becomes + # a .so. We want pybind11 libraries to become .so. If this is + # changed to SHARED, we will be able to succesfully compile a + # .dylib, but we will not be able to succesfully import that as + # a Python module on Mac. + "${pybind_ops_sources}" + "${pybind_ops_dependencies}" + ) + # pybind11 limits the visibility of symbols in the shared library to prevent + # stray initialization of py::objects. The rest of the object code must + # match. See: + # https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes + target_compile_options( + ${pybind_ops_library_name} + PUBLIC + "-fvisibility=hidden" + ) + # If we don't make sure this flag is set, we run into segfauls at import + # time on Mac. See: + # https://github.com/pybind/pybind11/issues/3907#issuecomment-1170412764 + target_link_options( + ${pybind_ops_library_name} PUBLIC - ${NEEDED_LIBRARIES} + "-undefined dynamic_lookup" ) - # We already set the library_name to be libtorchcodecN, so we don't want - # cmake to add another "lib" prefix. We do it this way because it makes it - # easier to find references to libtorchcodec in the code (e.g. via `git - # grep`) - set_target_properties(${library_name} PROPERTIES PREFIX "") + # Install all libraries. + set( + all_libraries + ${decoder_library_name} + ${custom_ops_library_name} + ${pybind_ops_library_name} + ) # The install step is invoked within CMakeBuild.build_library() in - # setup.py and just copies the built .so files from the temp + # setup.py and just copies the built files from the temp # cmake/setuptools build folder into the CMAKE_INSTALL_PREFIX folder. We # still need to manually pass "DESTINATION ..." for cmake to copy those # files in CMAKE_INSTALL_PREFIX instead of CMAKE_INSTALL_PREFIX/lib. install( - TARGETS ${library_name} + TARGETS ${all_libraries} LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX} ) endfunction() @@ -76,12 +181,10 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3}) ${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake ) - - make_torchcodec_library(libtorchcodec7 ffmpeg7) - make_torchcodec_library(libtorchcodec6 ffmpeg6) - make_torchcodec_library(libtorchcodec5 ffmpeg5) - make_torchcodec_library(libtorchcodec4 ffmpeg4) - + make_torchcodec_libraries(7 ffmpeg7) + make_torchcodec_libraries(6 ffmpeg6) + make_torchcodec_libraries(4 ffmpeg4) + make_torchcodec_libraries(5 ffmpeg5) else() message( STATUS @@ -121,10 +224,12 @@ else() ) endif() - set(libtorchcodec_target_name libtorchcodec${ffmpeg_major_version}) - # Make libtorchcodec_target_name available in the parent's scope, for the - # test's CMakeLists.txt - set(libtorchcodec_target_name ${libtorchcodec_target_name} PARENT_SCOPE) + make_torchcodec_libraries(${ffmpeg_major_version} PkgConfig::LIBAV) - make_torchcodec_library(${libtorchcodec_target_name} PkgConfig::LIBAV) + # Expose these values updwards so that the test compilation does not need + # to re-figure it out. FIXME: it's not great that we just copy-paste the + # library names. + set(libtorchcodec_library_name "libtorchcodec_decoder${ffmpeg_major_version}" PARENT_SCOPE) + set(libtorchcodec_custom_ops_name "libtorchcodec_custom_ops${ffmpeg_major_version}" PARENT_SCOPE) + set(libav_include_dirs ${LIBAV_INCLUDE_DIRS} PARENT_SCOPE) endif() diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp index 1e3a1421f..8bb41a85d 100644 --- a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp @@ -126,88 +126,4 @@ SwrContext* allocateSwrContext( return swrContext; } -AVIOBytesContext::AVIOBytesContext( - const void* data, - size_t dataSize, - size_t bufferSize) - : bufferData_{static_cast(data), dataSize, 0} { - auto buffer = static_cast(av_malloc(bufferSize)); - TORCH_CHECK( - buffer != nullptr, - "Failed to allocate buffer of size " + std::to_string(bufferSize)); - - avioContext_.reset(avio_alloc_context( - buffer, - bufferSize, - 0, - &bufferData_, - &AVIOBytesContext::read, - nullptr, - &AVIOBytesContext::seek)); - - if (!avioContext_) { - av_freep(&buffer); - TORCH_CHECK(false, "Failed to allocate AVIOContext"); - } -} - -AVIOBytesContext::~AVIOBytesContext() { - if (avioContext_) { - av_freep(&avioContext_->buffer); - } -} - -AVIOContext* AVIOBytesContext::getAVIO() { - return avioContext_.get(); -} - -// The signature of this function is defined by FFMPEG. -int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { - auto bufferData = static_cast(opaque); - TORCH_CHECK( - bufferData->current <= bufferData->size, - "Tried to read outside of the buffer: current=", - bufferData->current, - ", size=", - bufferData->size); - - buf_size = - FFMIN(buf_size, static_cast(bufferData->size - bufferData->current)); - TORCH_CHECK( - buf_size >= 0, - "Tried to read negative bytes: buf_size=", - buf_size, - ", size=", - bufferData->size, - ", current=", - bufferData->current); - - if (!buf_size) { - return AVERROR_EOF; - } - memcpy(buf, bufferData->data + bufferData->current, buf_size); - bufferData->current += buf_size; - return buf_size; -} - -// The signature of this function is defined by FFMPEG. -int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) { - auto bufferData = static_cast(opaque); - int64_t ret = -1; - - switch (whence) { - case AVSEEK_SIZE: - ret = bufferData->size; - break; - case SEEK_SET: - bufferData->current = offset; - ret = offset; - break; - default: - break; - } - - return ret; -} - } // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h index c61ae2876..0309bf933 100644 --- a/src/torchcodec/decoders/_core/FFMPEGCommon.h +++ b/src/torchcodec/decoders/_core/FFMPEGCommon.h @@ -157,34 +157,4 @@ SwrContext* allocateSwrContext( // Returns true if sws_scale can handle unaligned data. bool canSwsScaleHandleUnalignedData(); -// A struct that holds state for reading bytes from an IO context. -// We give this to FFMPEG and it will pass it back to us when it needs to read -// or seek in the memory buffer. -struct AVIOBufferData { - const uint8_t* data; - size_t size; - size_t current; -}; - -// A class that can be used as AVFormatContext's IO context. It reads from a -// memory buffer that is passed in. -class AVIOBytesContext { - public: - AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize); - ~AVIOBytesContext(); - - // Returns the AVIOContext that can be passed to FFMPEG. - AVIOContext* getAVIO(); - - // The signature of this function is defined by FFMPEG. - static int read(void* opaque, uint8_t* buf, int buf_size); - - // The signature of this function is defined by FFMPEG. - static int64_t seek(void* opaque, int64_t offset, int whence); - - private: - UniqueAVIOContext avioContext_; - struct AVIOBufferData bufferData_; -}; - } // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/PyBindOps.cpp b/src/torchcodec/decoders/_core/PyBindOps.cpp new file mode 100644 index 000000000..0b0f6f177 --- /dev/null +++ b/src/torchcodec/decoders/_core/PyBindOps.cpp @@ -0,0 +1,45 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include +#include +#include +#include + +#include "src/torchcodec/decoders/_core/AVIOFileLikeContext.h" +#include "src/torchcodec/decoders/_core/VideoDecoder.h" + +namespace py = pybind11; + +namespace facebook::torchcodec { + +// In principle, this should be able to return a tensor. But when we try that, +// we run into the bug reported here: +// +// https://github.com/pytorch/pytorch/issues/136664 +// +// So we instead launder the pointer through an int, and then use a conversion +// function on the custom ops side to launder that int into a tensor. +int64_t create_from_file_like( + py::object file_like, + std::optional seek_mode) { + VideoDecoder::SeekMode realSeek = VideoDecoder::SeekMode::exact; + if (seek_mode.has_value()) { + realSeek = seekModeFromString(seek_mode.value()); + } + + auto avioContextHolder = std::make_unique(file_like); + + VideoDecoder* decoder = + new VideoDecoder(std::move(avioContextHolder), realSeek); + return reinterpret_cast(decoder); +} + +PYBIND11_MODULE(decoder_core_pybind_ops, m) { + m.def("create_from_file_like", &create_from_file_like); +} + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 3d51b8a35..a379cc0b0 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -66,15 +66,13 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode) initializeDecoder(); } -VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode) - : seekMode_(seekMode) { - TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!"); - +VideoDecoder::VideoDecoder( + std::unique_ptr context, + SeekMode seekMode) + : seekMode_(seekMode), avioContextHolder_(std::move(context)) { setFFmpegLogLevel(); - constexpr int bufferSize = 64 * 1024; - ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize)); - TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext"); + TORCH_CHECK(avioContextHolder_, "Context holder cannot be null"); // Because FFmpeg requires a reference to a pointer in the call to open, we // can't use a unique pointer here. Note that means we must call free if open @@ -82,7 +80,7 @@ VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode) AVFormatContext* rawContext = avformat_alloc_context(); TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context"); - rawContext->pb = ioBytesContext_->getAVIO(); + rawContext->pb = avioContextHolder_->getAVIOContext(); int status = avformat_open_input(&rawContext, nullptr, nullptr, nullptr); if (status != 0) { avformat_free_context(rawContext); @@ -2067,4 +2065,14 @@ FrameDims getHeightAndWidthFromOptionsOrAVFrame( videoStreamOptions.width.value_or(avFrame->width)); } +VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode) { + if (seekMode == "exact") { + return VideoDecoder::SeekMode::exact; + } else if (seekMode == "approximate") { + return VideoDecoder::SeekMode::approximate; + } else { + TORCH_CHECK(false, "Invalid seek mode: " + std::string(seekMode)); + } +} + } // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h index 4d3e2f2ce..1f8e423be 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.h +++ b/src/torchcodec/decoders/_core/VideoDecoder.h @@ -12,6 +12,7 @@ #include #include +#include "src/torchcodec/decoders/_core/AVIOContextHolder.h" #include "src/torchcodec/decoders/_core/FFMPEGCommon.h" namespace facebook::torchcodec { @@ -34,11 +35,12 @@ class VideoDecoder { const std::string& videoFilePath, SeekMode seekMode = SeekMode::exact); - // Creates a VideoDecoder from a given buffer of data. Note that the data is - // not owned by the VideoDecoder. + // Creates a VideoDecoder using the provided AVIOContext inside the + // AVIOContextHolder. The AVIOContextHolder is the base class, and the + // derived class will have specialized how the custom read, seek and writes + // work. explicit VideoDecoder( - const void* data, - size_t length, + std::unique_ptr context, SeekMode seekMode = SeekMode::exact); // -------------------------------------------------------------------------- @@ -501,7 +503,7 @@ class VideoDecoder { // Stores various internal decoding stats. DecodeStats decodeStats_; // Stores the AVIOContext for the input buffer. - std::unique_ptr ioBytesContext_; + std::unique_ptr avioContextHolder_; // Whether or not we have already scanned all streams to update the metadata. bool scannedAllStreams_ = false; // Tracks that we've already been initialized. @@ -583,4 +585,6 @@ std::ostream& operator<<( std::ostream& os, const VideoDecoder::DecodeStats& stats); +VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode); + } // namespace facebook::torchcodec diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp index 786d3f327..bd142d70e 100644 --- a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp @@ -11,6 +11,7 @@ #include #include "c10/core/SymIntArrayRef.h" #include "c10/util/Exception.h" +#include "src/torchcodec/decoders/_core/AVIOBytesContext.h" #include "src/torchcodec/decoders/_core/VideoDecoder.h" namespace facebook::torchcodec { @@ -29,6 +30,7 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def("create_from_file(str filename, str? seek_mode=None) -> Tensor"); m.def( "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor"); + m.def("_convert_to_tensor(int decoder_ptr) -> Tensor"); m.def( "_add_video_stream(Tensor(a!) decoder, *, int? width=None, int? height=None, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str? device=None, str? color_conversion_library=None) -> ()"); m.def( @@ -63,13 +65,14 @@ TORCH_LIBRARY(torchcodec_ns, m) { } namespace { + at::Tensor wrapDecoderPointerToTensor( std::unique_ptr uniqueDecoder) { VideoDecoder* decoder = uniqueDecoder.release(); auto deleter = [decoder](void*) { delete decoder; }; at::Tensor tensor = - at::from_blob(decoder, {sizeof(VideoDecoder)}, deleter, {at::kLong}); + at::from_blob(decoder, {sizeof(VideoDecoder*)}, deleter, {at::kLong}); auto videoDecoder = static_cast(tensor.mutable_data_ptr()); TORCH_CHECK_EQ(videoDecoder, decoder) << "videoDecoder=" << videoDecoder; return tensor; @@ -100,17 +103,6 @@ OpsAudioFramesOutput makeOpsAudioFramesOutput( audioFrames.data, torch::tensor(audioFrames.ptsSeconds, torch::dtype(torch::kFloat64))); } - -VideoDecoder::SeekMode seekModeFromString(std::string_view seekMode) { - if (seekMode == "exact") { - return VideoDecoder::SeekMode::exact; - } else if (seekMode == "approximate") { - return VideoDecoder::SeekMode::approximate; - } else { - throw std::runtime_error("Invalid seek mode: " + std::string(seekMode)); - } -} - } // namespace // ============================== @@ -137,7 +129,10 @@ at::Tensor create_from_tensor( at::Tensor video_tensor, std::optional seek_mode) { TORCH_CHECK(video_tensor.is_contiguous(), "video_tensor must be contiguous"); - void* buffer = video_tensor.mutable_data_ptr(); + TORCH_CHECK( + video_tensor.scalar_type() == torch::kUInt8, + "video_tensor must be kUInt8"); + void* data = video_tensor.mutable_data_ptr(); size_t length = video_tensor.numel(); VideoDecoder::SeekMode realSeek = VideoDecoder::SeekMode::exact; @@ -145,8 +140,16 @@ at::Tensor create_from_tensor( realSeek = seekModeFromString(seek_mode.value()); } + auto contextHolder = std::make_unique(data, length); + std::unique_ptr uniqueDecoder = - std::make_unique(buffer, length, realSeek); + std::make_unique(std::move(contextHolder), realSeek); + return wrapDecoderPointerToTensor(std::move(uniqueDecoder)); +} + +at::Tensor _convert_to_tensor(int64_t decoder_ptr) { + auto decoder = reinterpret_cast(decoder_ptr); + std::unique_ptr uniqueDecoder(decoder); return wrapDecoderPointerToTensor(std::move(uniqueDecoder)); } @@ -550,6 +553,7 @@ void scan_all_streams_to_update_metadata(at::Tensor& decoder) { TORCH_LIBRARY_IMPL(torchcodec_ns, BackendSelect, m) { m.impl("create_from_file", &create_from_file); m.impl("create_from_tensor", &create_from_tensor); + m.impl("_convert_to_tensor", &_convert_to_tensor); m.impl( "_get_json_ffmpeg_library_versions", &_get_json_ffmpeg_library_versions); } diff --git a/src/torchcodec/decoders/_core/__init__.py b/src/torchcodec/decoders/_core/__init__.py index 490e3d834..9de779f60 100644 --- a/src/torchcodec/decoders/_core/__init__.py +++ b/src/torchcodec/decoders/_core/__init__.py @@ -20,6 +20,7 @@ add_video_stream, create_from_bytes, create_from_file, + create_from_file_like, create_from_tensor, get_ffmpeg_library_versions, get_frame_at_index, diff --git a/src/torchcodec/decoders/_core/ops.py b/src/torchcodec/decoders/_core/ops.py index e8efa45f2..0f0bdfe25 100644 --- a/src/torchcodec/decoders/_core/ops.py +++ b/src/torchcodec/decoders/_core/ops.py @@ -4,41 +4,68 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import io import json import warnings -from typing import List, Optional, Tuple +from types import ModuleType +from typing import List, Optional, Tuple, Union import torch from torch.library import get_ctx, register_fake from torchcodec._internally_replaced_utils import ( # @manual=//pytorch/torchcodec/src:internally_replaced_utils _get_extension_path, + _load_pybind11_module, ) +_pybind_ops: Optional[ModuleType] = None -def load_torchcodec_extension(): - # Successively try to load libtorchcodec7.so, libtorchcodec6.so, - # libtorchcodec5.so, and libtorchcodec4.so. Each of these correspond to an + +def load_torchcodec_shared_libraries(): + # Successively try to load libtorchcodec_*7.so, libtorchcodec_*6.so, + # libtorchcodec_*5.so, and libtorchcodec_*4.so. Each of these correspond to an # ffmpeg major version. This should cover all potential ffmpeg versions # installed on the user's machine. # # On fbcode, _get_extension_path() is overridden and directly points to the # correct .so file, so this for-loop succeeds on the first iteration. + # + # Note that we use two different methods for loading shared libraries: + # + # 1. torch.ops.load_library(): For PyTorch custom ops and the C++ only + # libraries the custom ops depend on. Loading libraries through PyTorch + # registers the custom ops with PyTorch's runtime and the ops can be + # accessed through torch.ops after loading. + # + # 2. importlib: For pybind11 modules. We load them dynamically, rather + # than using a plain import statement. A plain import statement only + # works when the module name and file name match exactly. Our shared + # libraries do not meet those conditions. exceptions = [] + pybind_ops_module_name = "decoder_core_pybind_ops" for ffmpeg_major_version in (7, 6, 5, 4): - library_name = f"libtorchcodec{ffmpeg_major_version}" + decoder_library_name = f"libtorchcodec_decoder{ffmpeg_major_version}" + custom_ops_library_name = f"libtorchcodec_custom_ops{ffmpeg_major_version}" + pybind_ops_library_name = f"libtorchcodec_pybind_ops{ffmpeg_major_version}" try: - torch.ops.load_library(_get_extension_path(library_name)) + torch.ops.load_library(_get_extension_path(decoder_library_name)) + torch.ops.load_library(_get_extension_path(custom_ops_library_name)) + + pybind_ops_library_path = _get_extension_path(pybind_ops_library_name) + global _pybind_ops + _pybind_ops = _load_pybind11_module( + pybind_ops_module_name, pybind_ops_library_path + ) return except Exception as e: # TODO: recording and reporting exceptions this way is OK for now as it's just for debugging, # but we should probably handle that via a proper logging mechanism. - exceptions.append(e) + exceptions.append((ffmpeg_major_version, e)) traceback = ( "\n[start of libtorchcodec loading traceback]\n" - + "\n".join(str(e) for e in exceptions) + + "\n".join(f"FFmpeg version {v}: {str(e)}" for v, e in exceptions) + "\n[end of libtorchcodec loading traceback]." ) raise RuntimeError( @@ -56,7 +83,7 @@ def load_torchcodec_extension(): ) -load_torchcodec_extension() +load_torchcodec_shared_libraries() # Note: We use disallow_in_graph because PyTorch does constant propagation of @@ -67,6 +94,9 @@ def load_torchcodec_extension(): create_from_tensor = torch._dynamo.disallow_in_graph( torch.ops.torchcodec_ns.create_from_tensor.default ) +_convert_to_tensor = torch._dynamo.disallow_in_graph( + torch.ops.torchcodec_ns._convert_to_tensor.default +) add_video_stream = torch.ops.torchcodec_ns.add_video_stream.default _add_video_stream = torch.ops.torchcodec_ns._add_video_stream.default add_audio_stream = torch.ops.torchcodec_ns.add_audio_stream.default @@ -110,6 +140,13 @@ def create_from_bytes( return create_from_tensor(buffer, seek_mode) +def create_from_file_like( + file_like: Union[io.RawIOBase, io.BytesIO], seek_mode: Optional[str] = None +) -> torch.Tensor: + assert _pybind_ops is not None + return _convert_to_tensor(_pybind_ops.create_from_file_like(file_like, seek_mode)) + + # ============================== # Abstract impl for the operators. Needed by torch.compile. # ============================== @@ -125,6 +162,11 @@ def create_from_tensor_abstract( return torch.empty([], dtype=torch.long) +@register_fake("torchcodec_ns::_convert_to_tensor") +def _convert_to_tensor_abstract(decoder_ptr: int) -> torch.Tensor: + return torch.empty([], dtype=torch.long) + + @register_fake("torchcodec_ns::_add_video_stream") def _add_video_stream_abstract( decoder: torch.Tensor, diff --git a/test/decoders/CMakeLists.txt b/test/decoders/CMakeLists.txt index 3350c92c5..126dd2794 100644 --- a/test/decoders/CMakeLists.txt +++ b/test/decoders/CMakeLists.txt @@ -22,11 +22,13 @@ add_executable( ) target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS}) +target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${libav_include_dirs}) target_include_directories(VideoDecoderTest PRIVATE ../../) target_link_libraries( VideoDecoderTest - ${libtorchcodec_target_name} + ${libtorchcodec_library_name} + ${libtorchcodec_custom_ops_name} GTest::gtest_main ) diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp index f7747ef6b..dabe70cd0 100644 --- a/test/decoders/VideoDecoderTest.cpp +++ b/test/decoders/VideoDecoderTest.cpp @@ -5,6 +5,7 @@ // LICENSE file in the root directory of this source tree. #include "src/torchcodec/decoders/_core/VideoDecoder.h" +#include "src/torchcodec/decoders/_core/AVIOBytesContext.h" #include #include @@ -48,10 +49,12 @@ class VideoDecoderTest : public testing::TestWithParam { std::ifstream input(filepath, std::ios::binary); outputStringStream << input.rdbuf(); content_ = outputStringStream.str(); + void* buffer = content_.data(); - size_t length = outputStringStream.str().length(); + size_t length = content_.length(); + auto contextHolder = std::make_unique(buffer, length); return std::make_unique( - buffer, length, VideoDecoder::SeekMode::approximate); + std::move(contextHolder), VideoDecoder::SeekMode::approximate); } else { return std::make_unique( filepath, VideoDecoder::SeekMode::approximate); diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py index 264417c3f..9efb33f35 100644 --- a/test/decoders/test_ops.py +++ b/test/decoders/test_ops.py @@ -23,6 +23,7 @@ add_video_stream, create_from_bytes, create_from_file, + create_from_file_like, create_from_tensor, get_ffmpeg_library_versions, get_frame_at_index, @@ -345,7 +346,10 @@ def get_frame1_and_frame_time6(decoder): assert_frames_equal(frame_time6, reference_frame_time6.to(device)) @pytest.mark.parametrize("device", cpu_and_cuda()) - @pytest.mark.parametrize("create_from", ("file", "tensor", "bytes")) + @pytest.mark.parametrize( + "create_from", + ("file", "tensor", "bytes", "file_like_rawio", "file_like_bufferedio"), + ) def test_create_decoder(self, create_from, device): path = str(NASA_VIDEO.path) if create_from == "file": @@ -354,10 +358,18 @@ def test_create_decoder(self, create_from, device): arr = np.fromfile(path, dtype=np.uint8) video_tensor = torch.from_numpy(arr) decoder = create_from_tensor(video_tensor) - else: # bytes + elif create_from == "bytes": with open(path, "rb") as f: video_bytes = f.read() decoder = create_from_bytes(video_bytes) + elif create_from == "file_like_rawio": + decoder = create_from_file_like(open(path, mode="rb", buffering=0), "exact") + elif create_from == "file_like_bufferedio": + decoder = create_from_file_like( + open(path, mode="rb", buffering=4096), "exact" + ) + else: + raise ValueError("Oops, double check the parametrization of this test!") add_video_stream(decoder, device=device) frame0, _, _ = get_next_frame(decoder)