From bf0b3aa2bb3dfe365aea2ef03d95b36fbfc198fa Mon Sep 17 00:00:00 2001 From: Scott Schneider Date: Tue, 4 Mar 2025 06:41:39 -0800 Subject: [PATCH 1/4] Refactor C++ decoder initialization --- .../decoders/_core/FFMPEGCommon.cpp | 30 ++++---- src/torchcodec/decoders/_core/FFMPEGCommon.h | 2 +- .../decoders/_core/VideoDecoder.cpp | 72 +++++++++---------- src/torchcodec/decoders/_core/VideoDecoder.h | 6 +- 4 files changed, 51 insertions(+), 59 deletions(-) diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp index 5de4a4624..cb0035c91 100644 --- a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp +++ b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp @@ -62,28 +62,26 @@ int64_t getDuration(const AVFrame* frame) { AVIOBytesContext::AVIOBytesContext( const void* data, - size_t data_size, - size_t tempBufferSize) { - auto buffer = static_cast(av_malloc(tempBufferSize)); - if (!buffer) { - throw std::runtime_error( - "Failed to allocate buffer of size " + std::to_string(tempBufferSize)); - } - bufferData_.data = static_cast(data); - bufferData_.size = data_size; - bufferData_.current = 0; + size_t dataSize, + size_t bufferSize) + : bufferData_{static_cast(data), dataSize, 0} { + auto buffer = static_cast(av_malloc(bufferSize)); + TORCH_CHECK( + buffer != nullptr, + "Failed to allocate buffer of size " + std::to_string(bufferSize)); avioContext_.reset(avio_alloc_context( buffer, - tempBufferSize, + bufferSize, 0, &bufferData_, &AVIOBytesContext::read, nullptr, &AVIOBytesContext::seek)); + if (!avioContext_) { av_freep(&buffer); - throw std::runtime_error("Failed to allocate AVIOContext"); + TORCH_CHECK(false, "Failed to allocate AVIOContext"); } } @@ -99,14 +97,14 @@ AVIOContext* AVIOBytesContext::getAVIO() { // The signature of this function is defined by FFMPEG. int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { - struct AVIOBufferData* bufferData = - static_cast(opaque); + auto bufferData = static_cast(opaque); TORCH_CHECK( bufferData->current <= bufferData->size, "Tried to read outside of the buffer: current=", bufferData->current, ", size=", bufferData->size); + buf_size = FFMIN(buf_size, static_cast(bufferData->size - bufferData->current)); TORCH_CHECK( @@ -117,6 +115,7 @@ int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { bufferData->size, ", current=", bufferData->current); + if (!buf_size) { return AVERROR_EOF; } @@ -127,7 +126,7 @@ int AVIOBytesContext::read(void* opaque, uint8_t* buf, int buf_size) { // The signature of this function is defined by FFMPEG. int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) { - AVIOBufferData* bufferData = (AVIOBufferData*)opaque; + auto bufferData = static_cast(opaque); int64_t ret = -1; switch (whence) { @@ -141,6 +140,7 @@ int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) { default: break; } + return ret; } diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h index deabae52d..4245d3c00 100644 --- a/src/torchcodec/decoders/_core/FFMPEGCommon.h +++ b/src/torchcodec/decoders/_core/FFMPEGCommon.h @@ -155,7 +155,7 @@ struct AVIOBufferData { // memory buffer that is passed in. class AVIOBytesContext { public: - AVIOBytesContext(const void* data, size_t data_size, size_t tempBufferSize); + AVIOBytesContext(const void* data, size_t dataSize, size_t bufferSize); ~AVIOBytesContext(); // Returns the AVIOContext that can be passed to FFMPEG. diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 116379eb6..2f66105d3 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -40,11 +40,6 @@ int64_t secondsToClosestPts(double seconds, const AVRational& timeBase) { return static_cast(std::round(seconds * timeBase.den)); } -struct AVInput { - UniqueAVFormatContext formatContext; - std::unique_ptr ioBytesContext; -}; - std::vector splitStringWithDelimiters( const std::string& str, const std::string& delims) { @@ -72,50 +67,47 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode) : seekMode_(seekMode) { av_log_set_level(AV_LOG_QUIET); - AVFormatContext* formatContext = nullptr; - int open_ret = avformat_open_input( - &formatContext, videoFilePath.c_str(), nullptr, nullptr); - if (open_ret != 0) { - throw std::invalid_argument( - "Could not open input file: " + videoFilePath + " " + - getFFMPEGErrorStringFromErrorCode(open_ret)); - } - TORCH_CHECK(formatContext != nullptr); - AVInput input; - input.formatContext.reset(formatContext); - formatContext_ = std::move(input.formatContext); + AVFormatContext* rawContext = nullptr; + int ffmpegStatus = + avformat_open_input(&rawContext, videoFilePath.c_str(), nullptr, nullptr); + TORCH_CHECK( + ffmpegStatus == 0, + "Could not open input file: " + videoFilePath + " " + + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + TORCH_CHECK(rawContext != nullptr); + formatContext_.reset(rawContext); initializeDecoder(); } -VideoDecoder::VideoDecoder(const void* buffer, size_t length, SeekMode seekMode) +VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode) : seekMode_(seekMode) { - TORCH_CHECK(buffer != nullptr, "Video buffer cannot be nullptr!"); + TORCH_CHECK(data != nullptr, "Video data buffer cannot be nullptr!"); av_log_set_level(AV_LOG_QUIET); - AVInput input; - input.formatContext.reset(avformat_alloc_context()); - TORCH_CHECK( - input.formatContext.get() != nullptr, "Unable to alloc avformat context"); - constexpr int kAVIOInternalTemporaryBufferSize = 64 * 1024; - input.ioBytesContext.reset( - new AVIOBytesContext(buffer, length, kAVIOInternalTemporaryBufferSize)); - if (!input.ioBytesContext) { - throw std::runtime_error("Failed to create AVIOBytesContext"); - } - input.formatContext->pb = input.ioBytesContext->getAVIO(); - AVFormatContext* tempFormatContext = input.formatContext.release(); - int open_ret = - avformat_open_input(&tempFormatContext, nullptr, nullptr, nullptr); - input.formatContext.reset(tempFormatContext); - if (open_ret != 0) { - throw std::runtime_error( - std::string("Failed to open input buffer: ") + - getFFMPEGErrorStringFromErrorCode(open_ret)); + constexpr int bufferSize = 64 * 1024; + ioBytesContext_.reset(new AVIOBytesContext(data, length, bufferSize)); + TORCH_CHECK(ioBytesContext_, "Failed to create AVIOBytesContext"); + + // Because FFmpeg requires a reference to a pointer in the call to open, we + // can't use a unique pointer here. Note that means we must call free if open + // fails. + AVFormatContext* rawContext = avformat_alloc_context(); + TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context"); + + rawContext->pb = ioBytesContext_->getAVIO(); + int ffmpegStatus = + avformat_open_input(&rawContext, nullptr, nullptr, nullptr); + if (ffmpegStatus != 0) { + avformat_free_context(rawContext); + TORCH_CHECK( + false, + "Failed to open input buffer: " + + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } - formatContext_ = std::move(input.formatContext); - ioBytesContext_ = std::move(input.ioBytesContext); + + formatContext_.reset(rawContext); initializeDecoder(); } diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h index e71973851..412cbf2ed 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.h +++ b/src/torchcodec/decoders/_core/VideoDecoder.h @@ -34,10 +34,10 @@ class VideoDecoder { const std::string& videoFilePath, SeekMode seekMode = SeekMode::exact); - // Creates a VideoDecoder from a given buffer. Note that the buffer is not - // owned by the VideoDecoder. + // Creates a VideoDecoder from a given buffer of data. Note that the data is + // not owned by the VideoDecoder. explicit VideoDecoder( - const void* buffer, + const void* data, size_t length, SeekMode seekMode = SeekMode::exact); From bd44ce3cab610a44bb3a450363d1ce61f114fcaf Mon Sep 17 00:00:00 2001 From: Scott Schneider Date: Tue, 4 Mar 2025 06:57:47 -0800 Subject: [PATCH 2/4] Refactor ffmpegStatus to status --- .../decoders/_core/VideoDecoder.cpp | 101 +++++++++--------- 1 file changed, 49 insertions(+), 52 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 2f66105d3..30d99f74b 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -68,12 +68,12 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode) av_log_set_level(AV_LOG_QUIET); AVFormatContext* rawContext = nullptr; - int ffmpegStatus = + int status = avformat_open_input(&rawContext, videoFilePath.c_str(), nullptr, nullptr); TORCH_CHECK( - ffmpegStatus == 0, + status == 0, "Could not open input file: " + videoFilePath + " " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); TORCH_CHECK(rawContext != nullptr); formatContext_.reset(rawContext); @@ -97,14 +97,13 @@ VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode) TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context"); rawContext->pb = ioBytesContext_->getAVIO(); - int ffmpegStatus = - avformat_open_input(&rawContext, nullptr, nullptr, nullptr); - if (ffmpegStatus != 0) { + int status = avformat_open_input(&rawContext, nullptr, nullptr, nullptr); + if (status != 0) { avformat_free_context(rawContext); TORCH_CHECK( false, "Failed to open input buffer: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } formatContext_.reset(rawContext); @@ -132,11 +131,11 @@ void VideoDecoder::initializeDecoder() { // store enough info in the header, so we call avformat_find_stream_info() // which decodes a few frames to get missing info. For more, see: // https://ffmpeg.org/doxygen/7.0/group__lavf__decoding.html - int ffmpegStatus = avformat_find_stream_info(formatContext_.get(), nullptr); - if (ffmpegStatus < 0) { + int status = avformat_find_stream_info(formatContext_.get(), nullptr); + if (status < 0) { throw std::runtime_error( "Failed to find stream info: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } for (unsigned int i = 0; i < formatContext_->nb_streams; i++) { @@ -231,16 +230,16 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() { ReferenceAVPacket packet(autoAVPacket); // av_read_frame is a misleading name: it gets the next **packet**. - int ffmpegStatus = av_read_frame(formatContext_.get(), packet.get()); + int status = av_read_frame(formatContext_.get(), packet.get()); - if (ffmpegStatus == AVERROR_EOF) { + if (status == AVERROR_EOF) { break; } - if (ffmpegStatus != AVSUCCESS) { + if (status != AVSUCCESS) { throw std::runtime_error( "Failed to read frame from input file: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } if (packet->flags & AV_PKT_FLAG_DISCARD) { @@ -923,23 +922,23 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( // Need to get the next frame or error from PopFrame. UniqueAVFrame avFrame(av_frame_alloc()); AutoAVPacket autoAVPacket; - int ffmpegStatus = AVSUCCESS; + int status = AVSUCCESS; bool reachedEOF = false; while (true) { - ffmpegStatus = + status = avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get()); - if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR(EAGAIN)) { + if (status != AVSUCCESS && status != AVERROR(EAGAIN)) { // Non-retriable error break; } decodeStats_.numFramesReceivedByDecoder++; // Is this the kind of frame we're looking for? - if (ffmpegStatus == AVSUCCESS && filterFunction(avFrame.get())) { + if (status == AVSUCCESS && filterFunction(avFrame.get())) { // Yes, this is the frame we'll return; break out of the decoding loop. break; - } else if (ffmpegStatus == AVSUCCESS) { + } else if (status == AVSUCCESS) { // No, but we received a valid frame - just not the kind we're looking // for. The logic below will read packets and send them to the decoder. // But since we did just receive a frame, we should skip reading more @@ -958,29 +957,29 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( // packets and send them to the decoder. ReferenceAVPacket packet(autoAVPacket); do { - ffmpegStatus = av_read_frame(formatContext_.get(), packet.get()); + status = av_read_frame(formatContext_.get(), packet.get()); decodeStats_.numPacketsRead++; - if (ffmpegStatus == AVERROR_EOF) { + if (status == AVERROR_EOF) { // End of file reached. We must drain the codec by sending a nullptr // packet. - ffmpegStatus = avcodec_send_packet( + status = avcodec_send_packet( streamInfo.codecContext.get(), /*avpkt=*/nullptr); - if (ffmpegStatus < AVSUCCESS) { + if (status < AVSUCCESS) { throw std::runtime_error( "Could not flush decoder: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } reachedEOF = true; break; } - if (ffmpegStatus < AVSUCCESS) { + if (status < AVSUCCESS) { throw std::runtime_error( "Could not read frame from input file: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } } while (packet->stream_index != activeStreamIndex_); @@ -992,26 +991,25 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( // We got a valid packet. Send it to the decoder, and we'll receive it in // the next iteration. - ffmpegStatus = - avcodec_send_packet(streamInfo.codecContext.get(), packet.get()); - if (ffmpegStatus < AVSUCCESS) { + status = avcodec_send_packet(streamInfo.codecContext.get(), packet.get()); + if (status < AVSUCCESS) { throw std::runtime_error( "Could not push packet to decoder: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } decodeStats_.numPacketsSentToDecoder++; } - if (ffmpegStatus < AVSUCCESS) { - if (reachedEOF || ffmpegStatus == AVERROR_EOF) { + if (status < AVSUCCESS) { + if (reachedEOF || status == AVERROR_EOF) { throw VideoDecoder::EndOfFileException( "Requested next frame while there are no more frames left to " "decode."); } throw std::runtime_error( "Could not receive frame from decoder: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } // Note that we don't flush the decoder when we reach EOF (even though that's @@ -1197,14 +1195,14 @@ torch::Tensor VideoDecoder::convertAVFrameToTensorUsingFilterGraph( const AVFrame* avFrame) { FilterGraphContext& filterGraphContext = streamInfos_[activeStreamIndex_].filterGraphContext; - int ffmpegStatus = + int status = av_buffersrc_write_frame(filterGraphContext.sourceContext, avFrame); - if (ffmpegStatus < AVSUCCESS) { + if (status < AVSUCCESS) { throw std::runtime_error("Failed to add frame to buffer source context"); } UniqueAVFrame filteredAVFrame(av_frame_alloc()); - ffmpegStatus = av_buffersink_get_frame( + status = av_buffersink_get_frame( filterGraphContext.sinkContext, filteredAVFrame.get()); TORCH_CHECK_EQ(filteredAVFrame->format, AV_PIX_FMT_RGB24); @@ -1328,44 +1326,44 @@ void VideoDecoder::createFilterGraph( filterArgs << ":pixel_aspect=" << codecContext->sample_aspect_ratio.num << "/" << codecContext->sample_aspect_ratio.den; - int ffmpegStatus = avfilter_graph_create_filter( + int status = avfilter_graph_create_filter( &filterGraphContext.sourceContext, buffersrc, "in", filterArgs.str().c_str(), nullptr, filterGraphContext.filterGraph.get()); - if (ffmpegStatus < 0) { + if (status < 0) { throw std::runtime_error( std::string("Failed to create filter graph: ") + filterArgs.str() + - ": " + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + ": " + getFFMPEGErrorStringFromErrorCode(status)); } - ffmpegStatus = avfilter_graph_create_filter( + status = avfilter_graph_create_filter( &filterGraphContext.sinkContext, buffersink, "out", nullptr, nullptr, filterGraphContext.filterGraph.get()); - if (ffmpegStatus < 0) { + if (status < 0) { throw std::runtime_error( "Failed to create filter graph: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE}; - ffmpegStatus = av_opt_set_int_list( + status = av_opt_set_int_list( filterGraphContext.sinkContext, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN); - if (ffmpegStatus < 0) { + if (status < 0) { throw std::runtime_error( "Failed to set output pixel formats: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } UniqueAVFilterInOut outputs(avfilter_inout_alloc()); @@ -1386,7 +1384,7 @@ void VideoDecoder::createFilterGraph( AVFilterInOut* outputsTmp = outputs.release(); AVFilterInOut* inputsTmp = inputs.release(); - ffmpegStatus = avfilter_graph_parse_ptr( + status = avfilter_graph_parse_ptr( filterGraphContext.filterGraph.get(), description.str().c_str(), &inputsTmp, @@ -1394,18 +1392,17 @@ void VideoDecoder::createFilterGraph( nullptr); outputs.reset(outputsTmp); inputs.reset(inputsTmp); - if (ffmpegStatus < 0) { + if (status < 0) { throw std::runtime_error( "Failed to parse filter description: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } - ffmpegStatus = - avfilter_graph_config(filterGraphContext.filterGraph.get(), nullptr); - if (ffmpegStatus < 0) { + status = avfilter_graph_config(filterGraphContext.filterGraph.get(), nullptr); + if (status < 0) { throw std::runtime_error( "Failed to configure filter graph: " + - getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); + getFFMPEGErrorStringFromErrorCode(status)); } } From 39509426e891fec9e950f70d79a5fffd983af1e1 Mon Sep 17 00:00:00 2001 From: Scott Schneider Date: Tue, 4 Mar 2025 07:06:43 -0800 Subject: [PATCH 3/4] Revert status name change --- .../decoders/_core/VideoDecoder.cpp | 101 +++++++++--------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 30d99f74b..2f66105d3 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -68,12 +68,12 @@ VideoDecoder::VideoDecoder(const std::string& videoFilePath, SeekMode seekMode) av_log_set_level(AV_LOG_QUIET); AVFormatContext* rawContext = nullptr; - int status = + int ffmpegStatus = avformat_open_input(&rawContext, videoFilePath.c_str(), nullptr, nullptr); TORCH_CHECK( - status == 0, + ffmpegStatus == 0, "Could not open input file: " + videoFilePath + " " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); TORCH_CHECK(rawContext != nullptr); formatContext_.reset(rawContext); @@ -97,13 +97,14 @@ VideoDecoder::VideoDecoder(const void* data, size_t length, SeekMode seekMode) TORCH_CHECK(rawContext != nullptr, "Unable to alloc avformat context"); rawContext->pb = ioBytesContext_->getAVIO(); - int status = avformat_open_input(&rawContext, nullptr, nullptr, nullptr); - if (status != 0) { + int ffmpegStatus = + avformat_open_input(&rawContext, nullptr, nullptr, nullptr); + if (ffmpegStatus != 0) { avformat_free_context(rawContext); TORCH_CHECK( false, "Failed to open input buffer: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } formatContext_.reset(rawContext); @@ -131,11 +132,11 @@ void VideoDecoder::initializeDecoder() { // store enough info in the header, so we call avformat_find_stream_info() // which decodes a few frames to get missing info. For more, see: // https://ffmpeg.org/doxygen/7.0/group__lavf__decoding.html - int status = avformat_find_stream_info(formatContext_.get(), nullptr); - if (status < 0) { + int ffmpegStatus = avformat_find_stream_info(formatContext_.get(), nullptr); + if (ffmpegStatus < 0) { throw std::runtime_error( "Failed to find stream info: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } for (unsigned int i = 0; i < formatContext_->nb_streams; i++) { @@ -230,16 +231,16 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() { ReferenceAVPacket packet(autoAVPacket); // av_read_frame is a misleading name: it gets the next **packet**. - int status = av_read_frame(formatContext_.get(), packet.get()); + int ffmpegStatus = av_read_frame(formatContext_.get(), packet.get()); - if (status == AVERROR_EOF) { + if (ffmpegStatus == AVERROR_EOF) { break; } - if (status != AVSUCCESS) { + if (ffmpegStatus != AVSUCCESS) { throw std::runtime_error( "Failed to read frame from input file: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } if (packet->flags & AV_PKT_FLAG_DISCARD) { @@ -922,23 +923,23 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( // Need to get the next frame or error from PopFrame. UniqueAVFrame avFrame(av_frame_alloc()); AutoAVPacket autoAVPacket; - int status = AVSUCCESS; + int ffmpegStatus = AVSUCCESS; bool reachedEOF = false; while (true) { - status = + ffmpegStatus = avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get()); - if (status != AVSUCCESS && status != AVERROR(EAGAIN)) { + if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR(EAGAIN)) { // Non-retriable error break; } decodeStats_.numFramesReceivedByDecoder++; // Is this the kind of frame we're looking for? - if (status == AVSUCCESS && filterFunction(avFrame.get())) { + if (ffmpegStatus == AVSUCCESS && filterFunction(avFrame.get())) { // Yes, this is the frame we'll return; break out of the decoding loop. break; - } else if (status == AVSUCCESS) { + } else if (ffmpegStatus == AVSUCCESS) { // No, but we received a valid frame - just not the kind we're looking // for. The logic below will read packets and send them to the decoder. // But since we did just receive a frame, we should skip reading more @@ -957,29 +958,29 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( // packets and send them to the decoder. ReferenceAVPacket packet(autoAVPacket); do { - status = av_read_frame(formatContext_.get(), packet.get()); + ffmpegStatus = av_read_frame(formatContext_.get(), packet.get()); decodeStats_.numPacketsRead++; - if (status == AVERROR_EOF) { + if (ffmpegStatus == AVERROR_EOF) { // End of file reached. We must drain the codec by sending a nullptr // packet. - status = avcodec_send_packet( + ffmpegStatus = avcodec_send_packet( streamInfo.codecContext.get(), /*avpkt=*/nullptr); - if (status < AVSUCCESS) { + if (ffmpegStatus < AVSUCCESS) { throw std::runtime_error( "Could not flush decoder: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } reachedEOF = true; break; } - if (status < AVSUCCESS) { + if (ffmpegStatus < AVSUCCESS) { throw std::runtime_error( "Could not read frame from input file: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } } while (packet->stream_index != activeStreamIndex_); @@ -991,25 +992,26 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame( // We got a valid packet. Send it to the decoder, and we'll receive it in // the next iteration. - status = avcodec_send_packet(streamInfo.codecContext.get(), packet.get()); - if (status < AVSUCCESS) { + ffmpegStatus = + avcodec_send_packet(streamInfo.codecContext.get(), packet.get()); + if (ffmpegStatus < AVSUCCESS) { throw std::runtime_error( "Could not push packet to decoder: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } decodeStats_.numPacketsSentToDecoder++; } - if (status < AVSUCCESS) { - if (reachedEOF || status == AVERROR_EOF) { + if (ffmpegStatus < AVSUCCESS) { + if (reachedEOF || ffmpegStatus == AVERROR_EOF) { throw VideoDecoder::EndOfFileException( "Requested next frame while there are no more frames left to " "decode."); } throw std::runtime_error( "Could not receive frame from decoder: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } // Note that we don't flush the decoder when we reach EOF (even though that's @@ -1195,14 +1197,14 @@ torch::Tensor VideoDecoder::convertAVFrameToTensorUsingFilterGraph( const AVFrame* avFrame) { FilterGraphContext& filterGraphContext = streamInfos_[activeStreamIndex_].filterGraphContext; - int status = + int ffmpegStatus = av_buffersrc_write_frame(filterGraphContext.sourceContext, avFrame); - if (status < AVSUCCESS) { + if (ffmpegStatus < AVSUCCESS) { throw std::runtime_error("Failed to add frame to buffer source context"); } UniqueAVFrame filteredAVFrame(av_frame_alloc()); - status = av_buffersink_get_frame( + ffmpegStatus = av_buffersink_get_frame( filterGraphContext.sinkContext, filteredAVFrame.get()); TORCH_CHECK_EQ(filteredAVFrame->format, AV_PIX_FMT_RGB24); @@ -1326,44 +1328,44 @@ void VideoDecoder::createFilterGraph( filterArgs << ":pixel_aspect=" << codecContext->sample_aspect_ratio.num << "/" << codecContext->sample_aspect_ratio.den; - int status = avfilter_graph_create_filter( + int ffmpegStatus = avfilter_graph_create_filter( &filterGraphContext.sourceContext, buffersrc, "in", filterArgs.str().c_str(), nullptr, filterGraphContext.filterGraph.get()); - if (status < 0) { + if (ffmpegStatus < 0) { throw std::runtime_error( std::string("Failed to create filter graph: ") + filterArgs.str() + - ": " + getFFMPEGErrorStringFromErrorCode(status)); + ": " + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } - status = avfilter_graph_create_filter( + ffmpegStatus = avfilter_graph_create_filter( &filterGraphContext.sinkContext, buffersink, "out", nullptr, nullptr, filterGraphContext.filterGraph.get()); - if (status < 0) { + if (ffmpegStatus < 0) { throw std::runtime_error( "Failed to create filter graph: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE}; - status = av_opt_set_int_list( + ffmpegStatus = av_opt_set_int_list( filterGraphContext.sinkContext, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN); - if (status < 0) { + if (ffmpegStatus < 0) { throw std::runtime_error( "Failed to set output pixel formats: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } UniqueAVFilterInOut outputs(avfilter_inout_alloc()); @@ -1384,7 +1386,7 @@ void VideoDecoder::createFilterGraph( AVFilterInOut* outputsTmp = outputs.release(); AVFilterInOut* inputsTmp = inputs.release(); - status = avfilter_graph_parse_ptr( + ffmpegStatus = avfilter_graph_parse_ptr( filterGraphContext.filterGraph.get(), description.str().c_str(), &inputsTmp, @@ -1392,17 +1394,18 @@ void VideoDecoder::createFilterGraph( nullptr); outputs.reset(outputsTmp); inputs.reset(inputsTmp); - if (status < 0) { + if (ffmpegStatus < 0) { throw std::runtime_error( "Failed to parse filter description: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } - status = avfilter_graph_config(filterGraphContext.filterGraph.get(), nullptr); - if (status < 0) { + ffmpegStatus = + avfilter_graph_config(filterGraphContext.filterGraph.get(), nullptr); + if (ffmpegStatus < 0) { throw std::runtime_error( "Failed to configure filter graph: " + - getFFMPEGErrorStringFromErrorCode(status)); + getFFMPEGErrorStringFromErrorCode(ffmpegStatus)); } } From 203258e3faa59c442fe3b7eb1a26ffecaedb082c Mon Sep 17 00:00:00 2001 From: Scott Schneider Date: Tue, 4 Mar 2025 07:16:44 -0800 Subject: [PATCH 4/4] Fix C++ test --- test/decoders/VideoDecoderTest.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp index 4e6710cb7..145663227 100644 --- a/test/decoders/VideoDecoderTest.cpp +++ b/test/decoders/VideoDecoderTest.cpp @@ -93,8 +93,7 @@ TEST_P(VideoDecoderTest, ReturnsFpsAndDurationForVideoInMetadata) { } TEST(VideoDecoderTest, MissingVideoFileThrowsException) { - EXPECT_THROW( - VideoDecoder("/this/file/does/not/exist"), std::invalid_argument); + EXPECT_THROW(VideoDecoder("/this/file/does/not/exist"), c10::Error); } void dumpTensorToDisk(