From 28bc995b833cee4defe33740b63fa75cae081ffc Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 28 Nov 2025 00:00:25 +0000 Subject: [PATCH 1/5] Rename getKeyFrameIndexForPts into getKeyFrameIdentifier --- src/torchcodec/_core/SingleStreamDecoder.cpp | 18 +++++++++++++++--- src/torchcodec/_core/SingleStreamDecoder.h | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 6968a4b3f..d13910a72 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1139,8 +1139,8 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const { // We are seeking forwards. // We can only skip a seek if both lastDecodedAvFramePts and // cursor_ share the same keyframe. - int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_); - int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_); + int lastDecodedAvFrameIndex = getKeyFrameIdentifier(lastDecodedAvFramePts_); + int targetKeyFrameIndex = getKeyFrameIdentifier(cursor_); return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 && lastDecodedAvFrameIndex == targetKeyFrameIndex; } @@ -1359,7 +1359,19 @@ torch::Tensor SingleStreamDecoder::maybePermuteHWC2CHW( // PTS <-> INDEX CONVERSIONS // -------------------------------------------------------------------------- -int SingleStreamDecoder::getKeyFrameIndexForPts(int64_t pts) const { +int SingleStreamDecoder::getKeyFrameIdentifier(int64_t pts) const { + // This function "identifies" a key frame for a given pts value. + // We use the term "identifier" rather than "index" because the nature of the + // index that is returned depends on various factors: + // - If seek_mode is exact, we return the index of the key frame in the + // scanned key-frame vector (streamInfo.keyFrames). So the returned value is + // in [0, num_key_frames). + // - If seek_mode is approximate, we use av_index_search_timestamp() which + // may return a value in [0, num_frames) like for mkv, but also a value in + // [0, num_frames) like for mp4. It really depends on the container. + // + // The range of the "identifier" doesn't matter that much, for now we only + // use it to uniquely identify a key frame in canWeAvoidSeeking(). const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_); if (streamInfo.keyFrames.empty()) { return av_index_search_timestamp( diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h index 305a9c2a6..cbb9cbc2f 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.h +++ b/src/torchcodec/_core/SingleStreamDecoder.h @@ -282,7 +282,7 @@ class SingleStreamDecoder { // PTS <-> INDEX CONVERSIONS // -------------------------------------------------------------------------- - int getKeyFrameIndexForPts(int64_t pts) const; + int getKeyFrameIdentifier(int64_t pts) const; // Returns the key frame index of the presentation timestamp using our index. // We build this index by scanning the file in From 7cca5e41e0006fd5d932d1c58b1f5c143f39a306 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 28 Nov 2025 00:05:58 +0000 Subject: [PATCH 2/5] more stuff --- src/torchcodec/_core/SingleStreamDecoder.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 2dfdb4d70..652e03379 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1092,13 +1092,6 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const { // Returns true if we can avoid seeking in the AVFormatContext based on // heuristics that rely on the target cursor_ and the last decoded frame. // Seeking is expensive, so we try to avoid it when possible. - // Note that this function itself isn't always that cheap to call: in - // particular the calls to getKeyFrameIndexForPts below in approximate mode - // are sometimes slow. - // TODO we should understand why (is it because it reads the file?) and - // potentially optimize it. E.g. we may not want to ever seek, or even *check* - // if we need to seek in some cases, like if we're going to decode 80% of the - // frames anyway. const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_); if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) { // For audio, we only need to seek if a backwards seek was requested @@ -1145,10 +1138,10 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const { // I P P P I P P P I P P I P // x j y // (2) is only more efficient than (1) if there is an I frame between x and y. - int lastKeyFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_); - int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_); - return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 && - lastKeyFrameIndex == targetKeyFrameIndex; + int lastKeyFrame = getKeyFrameIdentifier(lastDecodedAvFramePts_); + int targetKeyFrame = getKeyFrameIdentifier(cursor_); + return lastKeyFrame >= 0 && targetKeyFrame >= 0 && + lastKeyFrame == targetKeyFrame; } // This method looks at currentPts and desiredPts and seeks in the From fc2142ddbf6f27d3c7f9380995578482d084c4b2 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 28 Nov 2025 00:07:36 +0000 Subject: [PATCH 3/5] more stuff --- src/torchcodec/_core/SingleStreamDecoder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index 652e03379..c8870dfb1 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -1366,8 +1366,8 @@ int SingleStreamDecoder::getKeyFrameIdentifier(int64_t pts) const { // scanned key-frame vector (streamInfo.keyFrames). So the returned value is // in [0, num_key_frames). // - If seek_mode is approximate, we use av_index_search_timestamp() which - // may return a value in [0, num_frames) like for mkv, but also a value in - // [0, num_frames) like for mp4. It really depends on the container. + // may return a value in [0, num_key_frames) like for mkv, but also a value + // in [0, num_frames) like for mp4. It really depends on the container. // // The range of the "identifier" doesn't matter that much, for now we only // use it to uniquely identify a key frame in canWeAvoidSeeking(). From 4485fbbb7c3b568a8faa755ee495d9b8f4149a09 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 1 Dec 2025 14:48:59 +0000 Subject: [PATCH 4/5] empty From f88a7ba919a78e0c54c34843667af63c2536a0a1 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 1 Dec 2025 18:21:11 +0000 Subject: [PATCH 5/5] empty