meta-pytorch · NicolasHug · Oct 28, 2024 · Oct 28, 2024
diff --git a/README.md b/README.md
@@ -72,7 +72,7 @@ decoder.get_frames_in_range(start=10, stop=30, step=5)
 #   duration_seconds: tensor([0.0400, 0.0400, 0.0400, 0.0400])
 
 # Time-based indexing with PTS and duration info
-decoder.get_frame_displayed_at(pts_seconds=2)
+decoder.get_frame_played_at(pts_seconds=2)
 # Frame:
 #   data (shape): torch.Size([3, 400, 640])
 #   pts_seconds: 2.0

diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
@@ -4,7 +4,7 @@ Glossary
 .. glossary::
 
     pts
-       Presentation Time Stamp. The time at which a frame should be displayed.
+       Presentation Time Stamp. The time at which a frame should be played.
        In TorchCodec, pts are expressed in seconds.
 
     best stream

diff --git a/examples/basic_example.py b/examples/basic_example.py
@@ -150,24 +150,24 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
 # -------------------------
 #
 # So far, we have retrieved frames based on their index. We can also retrieve
-# frames based on *when* they are displayed with
-# :meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at` and
-# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_displayed_in_range`, which
+# frames based on *when* they are played with
+# :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` and
+# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`, which
 # also returns :class:`~torchcodec.Frame` and :class:`~torchcodec.FrameBatch`
 # respectively.
 
-frame_at_2_seconds = decoder.get_frame_displayed_at(seconds=2)
+frame_at_2_seconds = decoder.get_frame_played_at(seconds=2)
 print(f"{type(frame_at_2_seconds) = }")
 print(frame_at_2_seconds)
 
 # %%
-first_two_seconds = decoder.get_frames_displayed_in_range(
+first_two_seconds = decoder.get_frames_played_in_range(
     start_seconds=0,
     stop_seconds=2,
 )
 print(f"{type(first_two_seconds) = }")
 print(first_two_seconds)
 
 # %%
-plot(frame_at_2_seconds.data, "Frame displayed at 2 seconds")
-plot(first_two_seconds.data, "Frames displayed during [0, 2) seconds")
+plot(frame_at_2_seconds.data, "Frame played at 2 seconds")
+plot(first_two_seconds.data, "Frames played during [0, 2) seconds")
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -947,7 +947,7 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(
   }
 }
 
-VideoDecoder::DecodedOutput VideoDecoder::getFrameDisplayedAtTimestampNoDemux(
+VideoDecoder::DecodedOutput VideoDecoder::getFramePlayedAtTimestampNoDemux(
     double seconds) {
   for (auto& [streamIndex, stream] : streams_) {
     double frameStartTime = ptsToSeconds(stream.currentPts, stream.timeBase);
@@ -1090,13 +1090,13 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices(
   return output;
 }
 
-VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesDisplayedByTimestamps(
+VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesPlayedByTimestamps(
     int streamIndex,
     const std::vector<double>& timestamps) {
   validateUserProvidedStreamIndex(streamIndex);
-  validateScannedAllStreams("getFramesDisplayedByTimestamps");
+  validateScannedAllStreams("getFramesPlayedByTimestamps");
 
-  // The frame displayed at timestamp t and the one displayed at timestamp `t +
+  // The frame played at timestamp t and the one played at timestamp `t +
   // eps` are probably the same frame, with the same index. The easiest way to
   // avoid decoding that unique frame twice is to convert the input timestamps
   // to indices, and leverage the de-duplication logic of getFramesAtIndices.
@@ -1168,12 +1168,12 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange(
 }
 
 VideoDecoder::BatchDecodedOutput
-VideoDecoder::getFramesDisplayedByTimestampInRange(
+VideoDecoder::getFramesPlayedByTimestampInRange(
     int streamIndex,
     double startSeconds,
     double stopSeconds) {
   validateUserProvidedStreamIndex(streamIndex);
-  validateScannedAllStreams("getFramesDisplayedByTimestampInRange");
+  validateScannedAllStreams("getFramesPlayedByTimestampInRange");
 
   const auto& streamMetadata = containerMetadata_.streams[streamIndex];
   double minSeconds = streamMetadata.minPtsSecondsFromScan.value();
@@ -1224,7 +1224,7 @@ VideoDecoder::getFramesDisplayedByTimestampInRange(
   // abstract player displays frames starting at the pts for that frame until
   // the pts for the next frame. There are two consequences:
   //
-  //   1. We ignore the duration for a frame. A frame is displayed until the
+  //   1. We ignore the duration for a frame. A frame is played until the
   //   next frame replaces it. This model is robust to durations being 0 or
   //   incorrect; our source of truth is the pts for frames. If duration is
   //   accurate, the nextPts for a frame would be equivalent to pts + duration.

diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -222,7 +222,7 @@ class VideoDecoder {
   // duration of 1.0s, it will be visible in the timestamp range [5.0, 6.0).
   // i.e. it will be returned when this function is called with seconds=5.0 or
   // seconds=5.999, etc.
-  DecodedOutput getFrameDisplayedAtTimestampNoDemux(double seconds);
+  DecodedOutput getFramePlayedAtTimestampNoDemux(double seconds);
 
   DecodedOutput getFrameAtIndex(
       int streamIndex,
@@ -244,7 +244,7 @@ class VideoDecoder {
       int streamIndex,
       const std::vector<int64_t>& frameIndices);
 
-  BatchDecodedOutput getFramesDisplayedByTimestamps(
+  BatchDecodedOutput getFramesPlayedByTimestamps(
       int streamIndex,
       const std::vector<double>& timestamps);
 
@@ -265,15 +265,15 @@ class VideoDecoder {
   // frame. Otherwise, the moment in time immediately before stopSeconds is in
   // the range, and that time maps to the same frame as stopSeconds.
   //
-  // The frames returned are the frames that would be displayed by our abstract
+  // The frames returned are the frames that would be played by our abstract
   // player. Our abstract player displays frames based on pts only. It displays
   // frame i starting at the pts for frame i, and stops at the pts for frame
   // i+1. This model ignores a frame's reported duration.
   //
   // Valid values for startSeconds and stopSeconds are:
   //
   //   [minPtsSecondsFromScan, maxPtsSecondsFromScan)
-  BatchDecodedOutput getFramesDisplayedByTimestampInRange(
+  BatchDecodedOutput getFramesPlayedByTimestampInRange(
       int streamIndex,
       double startSeconds,
       double stopSeconds);

diff --git a/src/torchcodec/decoders/_core/VideoDecoderOps.cpp b/src/torchcodec/decoders/_core/VideoDecoderOps.cpp
@@ -207,7 +207,7 @@ OpsDecodedOutput get_next_frame(at::Tensor& decoder) {
 
 OpsDecodedOutput get_frame_at_pts(at::Tensor& decoder, double seconds) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
-  auto result = videoDecoder->getFrameDisplayedAtTimestampNoDemux(seconds);
+  auto result = videoDecoder->getFramePlayedAtTimestampNoDemux(seconds);
   return makeOpsDecodedOutput(result);
 }
 
@@ -249,7 +249,7 @@ OpsBatchDecodedOutput get_frames_by_pts(
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
   std::vector<double> timestampsVec(timestamps.begin(), timestamps.end());
   auto result =
-      videoDecoder->getFramesDisplayedByTimestamps(stream_index, timestampsVec);
+      videoDecoder->getFramesPlayedByTimestamps(stream_index, timestampsVec);
   return makeOpsBatchDecodedOutput(result);
 }
 
@@ -259,7 +259,7 @@ OpsBatchDecodedOutput get_frames_by_pts_in_range(
     double start_seconds,
     double stop_seconds) {
   auto videoDecoder = unwrapTensorToGetDecoder(decoder);
-  auto result = videoDecoder->getFramesDisplayedByTimestampInRange(
+  auto result = videoDecoder->getFramesPlayedByTimestampInRange(
       stream_index, start_seconds, stop_seconds);
   return makeOpsBatchDecodedOutput(result);
 }

diff --git a/src/torchcodec/decoders/_core/_metadata.py b/src/torchcodec/decoders/_core/_metadata.py
@@ -46,8 +46,8 @@ class VideoStreamMetadata:
     """End of the stream, in seconds (float or None).
     Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
     is computed as max(frame.pts + frame.duration) across all frames in the
-    stream. Note that no frame is displayed at this time value, so calling
-    :meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at` with
+    stream. Note that no frame is played at this time value, so calling
+    :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with
     this value would result in an error. Retrieving the last frame is best done
     by simply indexing the :class:`~torchcodec.decoders.VideoDecoder`
     object with ``[-1]``.

diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py
@@ -240,14 +240,14 @@ def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatc
         )
         return FrameBatch(*frames)
 
-    def get_frame_displayed_at(self, seconds: float) -> Frame:
-        """Return a single frame displayed at the given timestamp in seconds.
+    def get_frame_played_at(self, seconds: float) -> Frame:
+        """Return a single frame played at the given timestamp in seconds.
 
         Args:
-            seconds (float): The time stamp in seconds when the frame is displayed.
+            seconds (float): The time stamp in seconds when the frame is played.
 
         Returns:
-            Frame: The frame that is displayed at ``seconds``.
+            Frame: The frame that is played at ``seconds``.
         """
         if not self._begin_stream_seconds <= seconds < self._end_stream_seconds:
             raise IndexError(
@@ -264,21 +264,21 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:
             duration_seconds=duration_seconds.item(),
         )
 
-    def get_frames_displayed_at(self, seconds: list[float]) -> FrameBatch:
-        """Return frames displayed at the given timestamps in seconds.
+    def get_frames_played_at(self, seconds: list[float]) -> FrameBatch:
+        """Return frames played at the given timestamps in seconds.
 
         .. note::
 
             Calling this method is more efficient that repeated individual calls
-            to :meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at`.
+            to :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at`.
             This method makes sure not to decode the same frame twice, and also
             avoids "backwards seek" operations, which are slow.
 
         Args:
-            seconds (list of float): The timestamps in seconds when the frames are displayed.
+            seconds (list of float): The timestamps in seconds when the frames are played.
 
         Returns:
-            FrameBatch: The frames that are displayed at ``seconds``.
+            FrameBatch: The frames that are played at ``seconds``.
         """
         data, pts_seconds, duration_seconds = core.get_frames_by_pts(
             self._decoder, timestamps=seconds, stream_index=self.stream_index
@@ -289,7 +289,7 @@ def get_frames_displayed_at(self, seconds: list[float]) -> FrameBatch:
             duration_seconds=duration_seconds,
         )
 
-    def get_frames_displayed_in_range(
+    def get_frames_played_in_range(
         self, start_seconds: float, stop_seconds: float
     ) -> FrameBatch:
         """Returns multiple frames in the given range.

diff --git a/src/torchcodec/samplers/_time_based.py b/src/torchcodec/samplers/_time_based.py
@@ -209,7 +209,7 @@ def _generic_time_based_sampler(
         policy_fun=_POLICY_FUNCTIONS[policy],
     )
 
-    frames = decoder.get_frames_displayed_at(seconds=all_clips_timestamps)
+    frames = decoder.get_frames_played_at(seconds=all_clips_timestamps)
     return _reshape_4d_framebatch_into_5d(
         frames=frames,
         num_clips=num_clips,

diff --git a/test/decoders/VideoDecoderTest.cpp b/test/decoders/VideoDecoderTest.cpp
@@ -261,24 +261,23 @@ TEST_P(VideoDecoderTest, SeeksCloseToEof) {
   EXPECT_THROW(ourDecoder->getNextDecodedOutputNoDemux(), std::exception);
 }
 
-TEST_P(VideoDecoderTest, GetsFrameDisplayedAtTimestamp) {
+TEST_P(VideoDecoderTest, GetsFramePlayedAtTimestamp) {
   std::string path = getResourcePath("nasa_13013.mp4");
   std::unique_ptr<VideoDecoder> ourDecoder =
       createDecoderFromPath(path, GetParam());
   ourDecoder->addVideoStreamDecoder(-1);
-  auto output = ourDecoder->getFrameDisplayedAtTimestampNoDemux(6.006);
+  auto output = ourDecoder->getFramePlayedAtTimestampNoDemux(6.006);
   EXPECT_EQ(output.ptsSeconds, 6.006);
   // The frame's duration is 0.033367 according to ffprobe,
-  // so the next frame is displayed at timestamp=6.039367.
+  // so the next frame is played at timestamp=6.039367.
   const double kNextFramePts = 6.039366666666667;
-  // The frame that is displayed a microsecond before the next frame is still
+  // The frame that is played a microsecond before the next frame is still
   // the previous frame.
-  output =
-      ourDecoder->getFrameDisplayedAtTimestampNoDemux(kNextFramePts - 1e-6);
+  output = ourDecoder->getFramePlayedAtTimestampNoDemux(kNextFramePts - 1e-6);
   EXPECT_EQ(output.ptsSeconds, 6.006);
-  // The frame that is displayed at the exact pts of the frame is the next
+  // The frame that is played at the exact pts of the frame is the next
   // frame.
-  output = ourDecoder->getFrameDisplayedAtTimestampNoDemux(kNextFramePts);
+  output = ourDecoder->getFramePlayedAtTimestampNoDemux(kNextFramePts);
   EXPECT_EQ(output.ptsSeconds, kNextFramePts);
 
   // This is the timestamp of the last frame in this video.
@@ -288,7 +287,7 @@ TEST_P(VideoDecoderTest, GetsFrameDisplayedAtTimestamp) {
       kPtsOfLastFrameInVideoStream + kDurationOfLastFrameInVideoStream;
   // Sanity check: make sure duration is strictly positive.
   EXPECT_GT(kPtsPlusDurationOfLastFrame, kPtsOfLastFrameInVideoStream);
-  output = ourDecoder->getFrameDisplayedAtTimestampNoDemux(
+  output = ourDecoder->getFramePlayedAtTimestampNoDemux(
       kPtsPlusDurationOfLastFrame - 1e-6);
   EXPECT_EQ(output.ptsSeconds, kPtsOfLastFrameInVideoStream);
 }