From 28bc995b833cee4defe33740b63fa75cae081ffc Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Fri, 28 Nov 2025 00:00:25 +0000
Subject: [PATCH 1/5] Rename getKeyFrameIndexForPts into getKeyFrameIdentifier

---
 src/torchcodec/_core/SingleStreamDecoder.cpp | 18 +++++++++++++++---
 src/torchcodec/_core/SingleStreamDecoder.h   |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
index 6968a4b3f..d13910a72 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.cpp
+++ b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1139,8 +1139,8 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
   // We are seeking forwards.
   // We can only skip a seek if both lastDecodedAvFramePts and
   // cursor_ share the same keyframe.
-  int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
-  int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
+  int lastDecodedAvFrameIndex = getKeyFrameIdentifier(lastDecodedAvFramePts_);
+  int targetKeyFrameIndex = getKeyFrameIdentifier(cursor_);
   return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
       lastDecodedAvFrameIndex == targetKeyFrameIndex;
 }
@@ -1359,7 +1359,19 @@ torch::Tensor SingleStreamDecoder::maybePermuteHWC2CHW(
 // PTS <-> INDEX CONVERSIONS
 // --------------------------------------------------------------------------
 
-int SingleStreamDecoder::getKeyFrameIndexForPts(int64_t pts) const {
+int SingleStreamDecoder::getKeyFrameIdentifier(int64_t pts) const {
+  // This function "identifies" a key frame for a given pts value.
+  // We use the term "identifier" rather than "index" because the nature of the
+  // index that is returned depends on various factors:
+  // - If seek_mode is exact, we return the index of the key frame in the
+  //   scanned key-frame vector (streamInfo.keyFrames). So the returned value is
+  //   in [0, num_key_frames).
+  // - If seek_mode is approximate, we use av_index_search_timestamp() which
+  //   may return a value in [0, num_frames) like for mkv, but also a value in
+  //   [0, num_frames) like for mp4. It really depends on the container.
+  //
+  //  The range of the "identifier" doesn't matter that much, for now we only
+  //  use it to uniquely identify a key frame in canWeAvoidSeeking().
   const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
   if (streamInfo.keyFrames.empty()) {
     return av_index_search_timestamp(
diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h
index 305a9c2a6..cbb9cbc2f 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.h
+++ b/src/torchcodec/_core/SingleStreamDecoder.h
@@ -282,7 +282,7 @@ class SingleStreamDecoder {
   // PTS <-> INDEX CONVERSIONS
   // --------------------------------------------------------------------------
 
-  int getKeyFrameIndexForPts(int64_t pts) const;
+  int getKeyFrameIdentifier(int64_t pts) const;
 
   // Returns the key frame index of the presentation timestamp using our index.
   // We build this index by scanning the file in

From 7cca5e41e0006fd5d932d1c58b1f5c143f39a306 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Fri, 28 Nov 2025 00:05:58 +0000
Subject: [PATCH 2/5] more stuff

---
 src/torchcodec/_core/SingleStreamDecoder.cpp | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
index 2dfdb4d70..652e03379 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.cpp
+++ b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1092,13 +1092,6 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
   // Returns true if we can avoid seeking in the AVFormatContext based on
   // heuristics that rely on the target cursor_ and the last decoded frame.
   // Seeking is expensive, so we try to avoid it when possible.
-  // Note that this function itself isn't always that cheap to call: in
-  // particular the calls to getKeyFrameIndexForPts below in approximate mode
-  // are sometimes slow.
-  // TODO we should understand why (is it because it reads the file?) and
-  // potentially optimize it. E.g. we may not want to ever seek, or even *check*
-  // if we need to seek in some cases, like if we're going to decode 80% of the
-  // frames anyway.
   const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
   if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
     // For audio, we only need to seek if a backwards seek was requested
@@ -1145,10 +1138,10 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
   // I    P     P    P    I    P    P    P    I    P    P    I    P
   //                           x              j         y
   // (2) is only more efficient than (1) if there is an I frame between x and y.
-  int lastKeyFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
-  int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
-  return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
-      lastKeyFrameIndex == targetKeyFrameIndex;
+  int lastKeyFrame = getKeyFrameIdentifier(lastDecodedAvFramePts_);
+  int targetKeyFrame = getKeyFrameIdentifier(cursor_);
+  return lastKeyFrame >= 0 && targetKeyFrame >= 0 &&
+      lastKeyFrame == targetKeyFrame;
 }
 
 // This method looks at currentPts and desiredPts and seeks in the

From fc2142ddbf6f27d3c7f9380995578482d084c4b2 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Fri, 28 Nov 2025 00:07:36 +0000
Subject: [PATCH 3/5] more stuff

---
 src/torchcodec/_core/SingleStreamDecoder.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp
index 652e03379..c8870dfb1 100644
--- a/src/torchcodec/_core/SingleStreamDecoder.cpp
+++ b/src/torchcodec/_core/SingleStreamDecoder.cpp
@@ -1366,8 +1366,8 @@ int SingleStreamDecoder::getKeyFrameIdentifier(int64_t pts) const {
   //   scanned key-frame vector (streamInfo.keyFrames). So the returned value is
   //   in [0, num_key_frames).
   // - If seek_mode is approximate, we use av_index_search_timestamp() which
-  //   may return a value in [0, num_frames) like for mkv, but also a value in
-  //   [0, num_frames) like for mp4. It really depends on the container.
+  //   may return a value in [0, num_key_frames) like for mkv, but also a value
+  //   in [0, num_frames) like for mp4. It really depends on the container.
   //
   //  The range of the "identifier" doesn't matter that much, for now we only
   //  use it to uniquely identify a key frame in canWeAvoidSeeking().

From 4485fbbb7c3b568a8faa755ee495d9b8f4149a09 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Mon, 1 Dec 2025 14:48:59 +0000
Subject: [PATCH 4/5] empty


From f88a7ba919a78e0c54c34843667af63c2536a0a1 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@meta.com>
Date: Mon, 1 Dec 2025 18:21:11 +0000
Subject: [PATCH 5/5] empty