From f0ff1272f1ee54056b2e317b6584579701c7905d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 7 Feb 2025 17:21:11 +0000 Subject: [PATCH] Avoid receiving undesired packets --- src/torchcodec/decoders/_core/VideoDecoder.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp index 5a575d248..f0379c6ae 100644 --- a/src/torchcodec/decoders/_core/VideoDecoder.cpp +++ b/src/torchcodec/decoders/_core/VideoDecoder.cpp @@ -222,6 +222,13 @@ void VideoDecoder::scanFileAndUpdateMetadataAndIndex() { return; } + for (unsigned int i = 0; i < formatContext_->nb_streams; ++i) { + // We want to scan and update the metadata of all streams. + TORCH_CHECK( + formatContext_->streams[i]->discard != AVDISCARD_ALL, + "Did you add a stream before you called for a scan?"); + } + AutoAVPacket autoAVPacket; while (true) { ReferenceAVPacket packet(autoAVPacket); @@ -481,6 +488,16 @@ void VideoDecoder::addVideoStreamDecoder( updateMetadataWithCodecContext(streamInfo.streamIndex, codecContext); streamInfo.videoStreamOptions = videoStreamOptions; + // We will only need packets from the active stream, so we tell FFmpeg to + // discard packets from the other streams. Note that av_read_frame() may still + // return some of those un-desired packet under some conditions, so it's still + // important to discard/demux correctly in the inner decoding loop. + for (unsigned int i = 0; i < formatContext_->nb_streams; ++i) { + if (i != static_cast(activeStreamIndex_)) { + formatContext_->streams[i]->discard = AVDISCARD_ALL; + } + } + // By default, we want to use swscale for color conversion because it is // faster. However, it has width requirements, so we may need to fall back // to filtergraph. We also need to respect what was requested from the