diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp index 5de4a4624..cb0035c91 100644 --- a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp @@ -62,28 +62,26 @@ int64_t getDuration(const AVFrame* frame) { AVIOBytesContext::AVIOBytesContext( const void* data, - size_t data_size, - size_t tempBufferSize) { - auto buffer = static_cast(av_malloc(tempBufferSize)); - if (!buffer) { - throw std::runtime_error( - "Failed to allocate buffer of size " + std::to_string(tempBufferSize)); - } - bufferData_.data = static_cast(data); - bufferData_.size = data_size; - bufferData_.current = 0; + size_t dataSize, + size_t bufferSize) + : bufferData_{static_cast(data), dataSize, 0} { + auto buffer = static_cast(av_malloc(bufferSize)); + TORCH_CHECK( + buffer != nullptr, + "Failed to allocate buffer of size " + std::to_string(bufferSize)); avioContext_.reset(avio_alloc_context( buffer, - tempBufferSize, + bufferSize, 0, &bufferData_, &AVIOBytesContext::read, nullptr, &AVIOBytesContext::seek)); + if (!avioContext_) { av_freep(&buffer); - throw std::runtime_error("Failed to allocate AVIOContext"); + TORCH_CHECK(false, "Failed to allocate AVIOContext"); } } @@ -99,14 +97,14 @@ AVIOContext* AVIOBytesContext::getAVIO() { // The signature of this function is defined by FFMPEG. int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { - struct AVIOBufferData* bufferData = - static_cast(opaque); + auto bufferData = static_cast(opaque); TORCH_CHECK( bufferData->current <= bufferData->size, "Tried to read outside of the buffer: current=", bufferData->current, ", size=", bufferData->size); + buf_size = FFMIN(buf_size, static_cast(bufferData->size - bufferData->current)); TORCH_CHECK( @@ -117,6 +115,7 @@ int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { bufferData->size, ", current=", bufferData->current); + if (!buf_size) { return AVERROR_EOF; } @@ -127,7 +126,7 @@ int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { // The signature of this function is defined by FFMPEG. int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) { - AVIOBufferData* bufferData = (AVIOBufferData*)opaque; + auto bufferData = static_cast(opaque); int64_t ret = -1; switch (whence) { @@ -141,6 +140,7 @@ int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) { default: break; } + return ret; } diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h index deabae52d..4245d3c00 100644 --- a/src/torchcodec/decoders/_core/FFMPEGCommon.h +++ b/src/torchcodec/decoders/_core/FFMPEGCommon.h @@ -155,7 +155,7 @@ struct AVIOBufferData { // memory buffer that is passed in. class AVIOBytesContext { public: - AVIOBytesContext(const void* data, size_t data_size, size_t tempBufferSize); + AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize); ~AVIOBytesContext(); // Returns the AVIOContext that can be passed to FFMPEG. diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 116379eb6..2f66105d3 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -40,11 +40,6 @@ int64_t secondsToClosestPts(double seconds, const AVRational& timeBase) { return static_cast(std::round(seconds * timeBase.den)); } -struct AVInput { - UniqueAVFormatContext formatContext; - std::unique_ptr ioBytesContext; -}; - std::vector splitStringWithDelimiters( const std::string& str, const std::string& delims) { @@ -72,50 +67,47 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode) : seekMode_(seekMode) { av_log_set_level(AV_LOG_QUIET); - AVFormatContext* formatContext = nullptr; - int open_ret = avformat_open_input( - &formatContext, videoFilePath.c_str(), nullptr, nullptr); - if (open_ret != 0) { - throw std::invalid_argument( - "Could not open input file: " + videoFilePath + " " + - getFFMPEGErrorStringFromErrorCode(open_ret)); - } - TORCH_CHECK(formatContext != nullptr); - AVInput input; - input.formatContext.reset(formatContext); - formatContext_ = std::move(input.formatContext); + AVFormatContext* rawContext = nullptr; + int ffmpegStatus = + avformat_open_input(&rawContext, videoFilePath.c_str(), nullptr, nullptr); + TORCH_CHECK( + ffmpegStatus == 0, + "Could not open input file: " + videoFilePath + " " + + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + TORCH_CHECK(rawContext != nullptr); + formatContext_.reset(rawContext); initializeDecoder(); } -VideoDecoder::VideoDecoder(const void* buffer, size_t length, SeekMode seekMode) +VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode) : seekMode_(seekMode) { - TORCH_CHECK(buffer != nullptr, "Video buffer cannot be nullptr!"); + TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!"); av_log_set_level(AV_LOG_QUIET); - AVInput input; - input.formatContext.reset(avformat_alloc_context()); - TORCH_CHECK( - input.formatContext.get() != nullptr, "Unable to alloc avformat context"); - constexpr int kAVIOInternalTemporaryBufferSize = 64 * 1024; - input.ioBytesContext.reset( - new AVIOBytesContext(buffer, length, kAVIOInternalTemporaryBufferSize)); - if (!input.ioBytesContext) { - throw std::runtime_error("Failed to create AVIOBytesContext"); - } - input.formatContext->pb = input.ioBytesContext->getAVIO(); - AVFormatContext* tempFormatContext = input.formatContext.release(); - int open_ret = - avformat_open_input(&tempFormatContext, nullptr, nullptr, nullptr); - input.formatContext.reset(tempFormatContext); - if (open_ret != 0) { - throw std::runtime_error( - std::string("Failed to open input buffer: ") + - getFFMPEGErrorStringFromErrorCode(open_ret)); + constexpr int bufferSize = 64 * 1024; + ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize)); + TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext"); + + // Because FFmpeg requires a reference to a pointer in the call to open, we + // can't use a unique pointer here. Note that means we must call free if open + // fails. + AVFormatContext* rawContext = avformat_alloc_context(); + TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context"); + + rawContext->pb = ioBytesContext_->getAVIO(); + int ffmpegStatus = + avformat_open_input(&rawContext, nullptr, nullptr, nullptr); + if (ffmpegStatus != 0) { + avformat_free_context(rawContext); + TORCH_CHECK( + false, + "Failed to open input buffer: " + + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } - formatContext_ = std::move(input.formatContext); - ioBytesContext_ = std::move(input.ioBytesContext); + + formatContext_.reset(rawContext); initializeDecoder(); } diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h index e71973851..412cbf2ed 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.h +++ b/src/torchcodec/decoders/_core/VideoDecoder.h @@ -34,10 +34,10 @@ class VideoDecoder { const std::string& videoFilePath, SeekMode seekMode = SeekMode::exact); - // Creates a VideoDecoder from a given buffer. Note that the buffer is not - // owned by the VideoDecoder. + // Creates a VideoDecoder from a given buffer of data. Note that the data is + // not owned by the VideoDecoder. explicit VideoDecoder( - const void* buffer, + const void* data, size_t length, SeekMode seekMode = SeekMode::exact); diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp index 4e6710cb7..145663227 100644 --- a/test/decoders/VideoDecoderTest.cpp +++ b/test/decoders/VideoDecoderTest.cpp @@ -93,8 +93,7 @@ TEST_P(VideoDecoderTest, ReturnsFpsAndDurationForVideoInMetadata) { } TEST(VideoDecoderTest, MissingVideoFileThrowsException) { - EXPECT_THROW( - VideoDecoder("/this/file/does/not/exist"), std::invalid_argument); + EXPECT_THROW(VideoDecoder("/this/file/does/not/exist"), c10::Error); } void dumpTensorToDisk(