Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ decoder.get_frames_in_range(start=10, stop=30, step=5)
# duration_seconds: tensor([0.0400, 0.0400, 0.0400, 0.0400])

# Time-based indexing with PTS and duration info
decoder.get_frame_displayed_at(pts_seconds=2)
decoder.get_frame_played_at(pts_seconds=2)
# Frame:
# data (shape): torch.Size([3, 400, 640])
# pts_seconds: 2.0
Expand Down
2 changes: 1 addition & 1 deletion docs/source/glossary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Glossary
.. glossary::

pts
Presentation Time Stamp. The time at which a frame should be displayed.
Presentation Time Stamp. The time at which a frame should be played.
In TorchCodec, pts are expressed in seconds.

best stream
Expand Down
14 changes: 7 additions & 7 deletions examples/basic_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,24 +150,24 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
# -------------------------
#
# So far, we have retrieved frames based on their index. We can also retrieve
# frames based on *when* they are displayed with
# :meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at` and
# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_displayed_in_range`, which
# frames based on *when* they are played with
# :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` and
# :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range`, which
# also returns :class:`~torchcodec.Frame` and :class:`~torchcodec.FrameBatch`
# respectively.

frame_at_2_seconds = decoder.get_frame_displayed_at(seconds=2)
frame_at_2_seconds = decoder.get_frame_played_at(seconds=2)
print(f"{type(frame_at_2_seconds) = }")
print(frame_at_2_seconds)

# %%
first_two_seconds = decoder.get_frames_displayed_in_range(
first_two_seconds = decoder.get_frames_played_in_range(
start_seconds=0,
stop_seconds=2,
)
print(f"{type(first_two_seconds) = }")
print(first_two_seconds)

# %%
plot(frame_at_2_seconds.data, "Frame displayed at 2 seconds")
plot(first_two_seconds.data, "Frames displayed during [0, 2) seconds")
plot(frame_at_2_seconds.data, "Frame played at 2 seconds")
plot(first_two_seconds.data, "Frames played during [0, 2) seconds")
14 changes: 7 additions & 7 deletions src/torchcodec/decoders/_core/VideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,7 @@ void VideoDecoder::convertAVFrameToDecodedOutputOnCPU(
}
}

VideoDecoder::DecodedOutput VideoDecoder::getFrameDisplayedAtTimestampNoDemux(
VideoDecoder::DecodedOutput VideoDecoder::getFramePlayedAtTimestampNoDemux(
double seconds) {
for (auto& [streamIndex, stream] : streams_) {
double frameStartTime = ptsToSeconds(stream.currentPts, stream.timeBase);
Expand Down Expand Up @@ -1090,13 +1090,13 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices(
return output;
}

VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesDisplayedByTimestamps(
VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesPlayedByTimestamps(
int streamIndex,
const std::vector<double>& timestamps) {
validateUserProvidedStreamIndex(streamIndex);
validateScannedAllStreams("getFramesDisplayedByTimestamps");
validateScannedAllStreams("getFramesPlayedByTimestamps");

// The frame displayed at timestamp t and the one displayed at timestamp `t +
// The frame played at timestamp t and the one played at timestamp `t +
// eps` are probably the same frame, with the same index. The easiest way to
// avoid decoding that unique frame twice is to convert the input timestamps
// to indices, and leverage the de-duplication logic of getFramesAtIndices.
Expand Down Expand Up @@ -1168,12 +1168,12 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesInRange(
}

VideoDecoder::BatchDecodedOutput
VideoDecoder::getFramesDisplayedByTimestampInRange(
VideoDecoder::getFramesPlayedByTimestampInRange(
int streamIndex,
double startSeconds,
double stopSeconds) {
validateUserProvidedStreamIndex(streamIndex);
validateScannedAllStreams("getFramesDisplayedByTimestampInRange");
validateScannedAllStreams("getFramesPlayedByTimestampInRange");

const auto& streamMetadata = containerMetadata_.streams[streamIndex];
double minSeconds = streamMetadata.minPtsSecondsFromScan.value();
Expand Down Expand Up @@ -1224,7 +1224,7 @@ VideoDecoder::getFramesDisplayedByTimestampInRange(
// abstract player displays frames starting at the pts for that frame until
// the pts for the next frame. There are two consequences:
//
// 1. We ignore the duration for a frame. A frame is displayed until the
// 1. We ignore the duration for a frame. A frame is played until the
// next frame replaces it. This model is robust to durations being 0 or
// incorrect; our source of truth is the pts for frames. If duration is
// accurate, the nextPts for a frame would be equivalent to pts + duration.
Expand Down
8 changes: 4 additions & 4 deletions src/torchcodec/decoders/_core/VideoDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class VideoDecoder {
// duration of 1.0s, it will be visible in the timestamp range [5.0, 6.0).
// i.e. it will be returned when this function is called with seconds=5.0 or
// seconds=5.999, etc.
DecodedOutput getFrameDisplayedAtTimestampNoDemux(double seconds);
DecodedOutput getFramePlayedAtTimestampNoDemux(double seconds);

DecodedOutput getFrameAtIndex(
int streamIndex,
Expand All @@ -244,7 +244,7 @@ class VideoDecoder {
int streamIndex,
const std::vector<int64_t>& frameIndices);

BatchDecodedOutput getFramesDisplayedByTimestamps(
BatchDecodedOutput getFramesPlayedByTimestamps(
int streamIndex,
const std::vector<double>& timestamps);

Expand All @@ -265,15 +265,15 @@ class VideoDecoder {
// frame. Otherwise, the moment in time immediately before stopSeconds is in
// the range, and that time maps to the same frame as stopSeconds.
//
// The frames returned are the frames that would be displayed by our abstract
// The frames returned are the frames that would be played by our abstract
// player. Our abstract player displays frames based on pts only. It displays
// frame i starting at the pts for frame i, and stops at the pts for frame
// i+1. This model ignores a frame's reported duration.
//
// Valid values for startSeconds and stopSeconds are:
//
// [minPtsSecondsFromScan, maxPtsSecondsFromScan)
BatchDecodedOutput getFramesDisplayedByTimestampInRange(
BatchDecodedOutput getFramesPlayedByTimestampInRange(
int streamIndex,
double startSeconds,
double stopSeconds);
Expand Down
6 changes: 3 additions & 3 deletions src/torchcodec/decoders/_core/VideoDecoderOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ OpsDecodedOutput get_next_frame(at::Tensor& decoder) {

OpsDecodedOutput get_frame_at_pts(at::Tensor& decoder, double seconds) {
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
auto result = videoDecoder->getFrameDisplayedAtTimestampNoDemux(seconds);
auto result = videoDecoder->getFramePlayedAtTimestampNoDemux(seconds);
return makeOpsDecodedOutput(result);
}

Expand Down Expand Up @@ -249,7 +249,7 @@ OpsBatchDecodedOutput get_frames_by_pts(
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
std::vector<double> timestampsVec(timestamps.begin(), timestamps.end());
auto result =
videoDecoder->getFramesDisplayedByTimestamps(stream_index, timestampsVec);
videoDecoder->getFramesPlayedByTimestamps(stream_index, timestampsVec);
return makeOpsBatchDecodedOutput(result);
}

Expand All @@ -259,7 +259,7 @@ OpsBatchDecodedOutput get_frames_by_pts_in_range(
double start_seconds,
double stop_seconds) {
auto videoDecoder = unwrapTensorToGetDecoder(decoder);
auto result = videoDecoder->getFramesDisplayedByTimestampInRange(
auto result = videoDecoder->getFramesPlayedByTimestampInRange(
stream_index, start_seconds, stop_seconds);
return makeOpsBatchDecodedOutput(result);
}
Expand Down
4 changes: 2 additions & 2 deletions src/torchcodec/decoders/_core/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ class VideoStreamMetadata:
"""End of the stream, in seconds (float or None).
Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
is computed as max(frame.pts + frame.duration) across all frames in the
stream. Note that no frame is displayed at this time value, so calling
:meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at` with
stream. Note that no frame is played at this time value, so calling
:meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with
this value would result in an error. Retrieving the last frame is best done
by simply indexing the :class:`~torchcodec.decoders.VideoDecoder`
object with ``[-1]``.
Expand Down
20 changes: 10 additions & 10 deletions src/torchcodec/decoders/_video_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,14 @@ def get_frames_in_range(self, start: int, stop: int, step: int = 1) -> FrameBatc
)
return FrameBatch(*frames)

def get_frame_displayed_at(self, seconds: float) -> Frame:
"""Return a single frame displayed at the given timestamp in seconds.
def get_frame_played_at(self, seconds: float) -> Frame:
"""Return a single frame played at the given timestamp in seconds.

Args:
seconds (float): The time stamp in seconds when the frame is displayed.
seconds (float): The time stamp in seconds when the frame is played.

Returns:
Frame: The frame that is displayed at ``seconds``.
Frame: The frame that is played at ``seconds``.
"""
if not self._begin_stream_seconds <= seconds < self._end_stream_seconds:
raise IndexError(
Expand All @@ -264,21 +264,21 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:
duration_seconds=duration_seconds.item(),
)

def get_frames_displayed_at(self, seconds: list[float]) -> FrameBatch:
"""Return frames displayed at the given timestamps in seconds.
def get_frames_played_at(self, seconds: list[float]) -> FrameBatch:
"""Return frames played at the given timestamps in seconds.

.. note::

Calling this method is more efficient that repeated individual calls
to :meth:`~torchcodec.decoders.VideoDecoder.get_frame_displayed_at`.
to :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at`.
This method makes sure not to decode the same frame twice, and also
avoids "backwards seek" operations, which are slow.

Args:
seconds (list of float): The timestamps in seconds when the frames are displayed.
seconds (list of float): The timestamps in seconds when the frames are played.

Returns:
FrameBatch: The frames that are displayed at ``seconds``.
FrameBatch: The frames that are played at ``seconds``.
"""
data, pts_seconds, duration_seconds = core.get_frames_by_pts(
self._decoder, timestamps=seconds, stream_index=self.stream_index
Expand All @@ -289,7 +289,7 @@ def get_frames_displayed_at(self, seconds: list[float]) -> FrameBatch:
duration_seconds=duration_seconds,
)

def get_frames_displayed_in_range(
def get_frames_played_in_range(
self, start_seconds: float, stop_seconds: float
) -> FrameBatch:
"""Returns multiple frames in the given range.
Expand Down
2 changes: 1 addition & 1 deletion src/torchcodec/samplers/_time_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _generic_time_based_sampler(
policy_fun=_POLICY_FUNCTIONS[policy],
)

frames = decoder.get_frames_displayed_at(seconds=all_clips_timestamps)
frames = decoder.get_frames_played_at(seconds=all_clips_timestamps)
return _reshape_4d_framebatch_into_5d(
frames=frames,
num_clips=num_clips,
Expand Down
17 changes: 8 additions & 9 deletions test/decoders/VideoDecoderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,24 +261,23 @@ TEST_P(VideoDecoderTest, SeeksCloseToEof) {
EXPECT_THROW(ourDecoder->getNextDecodedOutputNoDemux(), std::exception);
}

TEST_P(VideoDecoderTest, GetsFrameDisplayedAtTimestamp) {
TEST_P(VideoDecoderTest, GetsFramePlayedAtTimestamp) {
std::string path = getResourcePath("nasa_13013.mp4");
std::unique_ptr<VideoDecoder> ourDecoder =
createDecoderFromPath(path, GetParam());
ourDecoder->addVideoStreamDecoder(-1);
auto output = ourDecoder->getFrameDisplayedAtTimestampNoDemux(6.006);
auto output = ourDecoder->getFramePlayedAtTimestampNoDemux(6.006);
EXPECT_EQ(output.ptsSeconds, 6.006);
// The frame's duration is 0.033367 according to ffprobe,
// so the next frame is displayed at timestamp=6.039367.
// so the next frame is played at timestamp=6.039367.
const double kNextFramePts = 6.039366666666667;
// The frame that is displayed a microsecond before the next frame is still
// The frame that is played a microsecond before the next frame is still
// the previous frame.
output =
ourDecoder->getFrameDisplayedAtTimestampNoDemux(kNextFramePts - 1e-6);
output = ourDecoder->getFramePlayedAtTimestampNoDemux(kNextFramePts - 1e-6);
EXPECT_EQ(output.ptsSeconds, 6.006);
// The frame that is displayed at the exact pts of the frame is the next
// The frame that is played at the exact pts of the frame is the next
// frame.
output = ourDecoder->getFrameDisplayedAtTimestampNoDemux(kNextFramePts);
output = ourDecoder->getFramePlayedAtTimestampNoDemux(kNextFramePts);
EXPECT_EQ(output.ptsSeconds, kNextFramePts);

// This is the timestamp of the last frame in this video.
Expand All @@ -288,7 +287,7 @@ TEST_P(VideoDecoderTest, GetsFrameDisplayedAtTimestamp) {
kPtsOfLastFrameInVideoStream + kDurationOfLastFrameInVideoStream;
// Sanity check: make sure duration is strictly positive.
EXPECT_GT(kPtsPlusDurationOfLastFrame, kPtsOfLastFrameInVideoStream);
output = ourDecoder->getFrameDisplayedAtTimestampNoDemux(
output = ourDecoder->getFramePlayedAtTimestampNoDemux(
kPtsPlusDurationOfLastFrame - 1e-6);
EXPECT_EQ(output.ptsSeconds, kPtsOfLastFrameInVideoStream);
}
Expand Down
Loading
Loading