Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ from torchcodec.decoders import SimpleVideoDecoder
decoder = SimpleVideoDecoder("path/to/video.mp4")

decoder.metadata
# VideoStreamMetadata: (Truncated output)
# VideoStreamMetadata:
# num_frames: 250
# duration_seconds: 10.0
# bit_rate: 31315.0
# codec: h264
# average_fps: 25.0
# ... (truncated output)

len(decoder) # == decoder.metadata.num_frames!
# 250
Expand Down
5 changes: 5 additions & 0 deletions docs/source/glossary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,8 @@ Glossary
<https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga757780d38f482deb4d809c6c521fbcc2>`_:

*The best stream is determined according to various heuristics as the most likely to be what the user expects.*

scan
A scan corresponds to an entire pass over a video file, with the purpose
of retrieving metadata about the different streams and frames. **It does
not involve decoding**, so it is a lot cheaper than decoding the file.
9 changes: 3 additions & 6 deletions examples/basic_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,11 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
# -------------------------
#
# So far, we have retrieved frames based on their index. We can also retrieve
# frames based on *when* they are displayed. The available method are
# :meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frame_displayed_at` and
# :meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frames_displayed_at`, which
# also return :class:`~torchcodec.decoders.Frame` and
# :class:`~torchcodec.decoders.FrameBatch` objects respectively.
# frames based on *when* they are displayed with
# :meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frame_displayed_at`, which
# also returns :class:`~torchcodec.decoders.Frame`.

frame_at_2_seconds = decoder.get_frame_displayed_at(seconds=2)
print(f"{type(frame_at_2_seconds) = }")
print(frame_at_2_seconds)
plot(frame_at_2_seconds.data, "Frame displayed at 2 seconds")
# TODO_BEFORE_RELEASE: illustrate get_frames_displayed_at
75 changes: 32 additions & 43 deletions src/torchcodec/decoders/_core/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class VideoStreamMetadata:
"""Metadata of a single video stream."""

duration_seconds_from_header: Optional[float]
"""Duration of the stream, in seconds obtained from the header (float or
"""Duration of the stream, in seconds, obtained from the header (float or
None). This could be inaccurate."""
bit_rate: Optional[float]
"""Bit rate of the stream, in seconds (float or None)."""
Expand All @@ -37,36 +37,38 @@ class VideoStreamMetadata:
content (the scan doesn't involve decoding). This is more accurate
than ``num_frames_from_header``. We recommend using the
``num_frames`` attribute instead. (int or None)."""
begin_stream_from_content_seconds: Optional[float]
"""Beginning of the stream in seconds (float or None).
This is min(frame.pts) for all frames in this stream."""
end_stream_from_content_seconds: Optional[float]
"""End of the stream in seconds (float or None).
This is max(frame.pts + frame.duration) for all frames in this stream.
Note that frames have a pts and duration and the interval defined by
[pts, pts + duration) is a half-open interval (the right boundary is open).
Therefore no frame is displayed at this time value.
Calling
SimpleVideoDecoder.get_frame_displayed_at(end_stream_from_content_seconds)
will raise a StopIteration exception.
If you want to get the last frame you can use [-1] on a SimpleVideoDecoder
object."""
begin_stream_seconds: Optional[float]
"""Beginning of the stream, in seconds (float or None).
Conceptually, this corresponds to the first frame's :term:`pts`. It is
computed as min(frame.pts) across all frames in the stream. Usually, this is
equal to 0."""
end_stream_seconds: Optional[float]
"""End of the stream, in seconds (float or None).
Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
is computed as max(frame.pts + frame.duration) across all frames in the
stream. Note that no frame is displayed at this time value, so calling
:meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frame_displayed_at` with
this value would result in an error. Retrieving the last frame is best done
by simply indexing the :class:`~torchcodec.decoders.SimpleVideoDecoder`
object with ``[-1]``.
"""
codec: Optional[str]
"""Codec (str or None)."""
width: Optional[int]
"""Width of the frames (int or None)."""
height: Optional[int]
"""Height of the frames (int or None)."""
average_fps_from_header: Optional[float]
"""Averate fps of the stream (float or None)."""
"""Averate fps of the stream, obtained from the header (float or None).
We recommend using the ``average_fps`` attribute instead."""
stream_index: int
"""Index of the stream within the video (int)."""

@property
def num_frames(self) -> Optional[int]:
"""Number of frames in the stream. This corresponds to
``num_frames_from_content`` if it's not None, otherwise it corresponds
to ``num_frames_from_header``.
``num_frames_from_content`` if a :term:`scan` was made, otherwise it
corresponds to ``num_frames_from_header``.
"""
if self.num_frames_from_content is not None:
return self.num_frames_from_content
Expand All @@ -76,35 +78,26 @@ def num_frames(self) -> Optional[int]:
@property
def duration_seconds(self) -> Optional[float]:
"""Duration of the stream in seconds. We try to calculate the duration
from the actual frames if we scanned the frames. Otherwise we fall back
to the duration obtained from the header.
from the actual frames if a :term:`scan` was performed. Otherwise we
fall back to ``duration_seconds_from_header``.
"""
if (
self.end_stream_from_content_seconds is None
or self.begin_stream_from_content_seconds is None
):
if self.end_stream_seconds is None or self.begin_stream_seconds is None:
return self.duration_seconds_from_header
return (
self.end_stream_from_content_seconds
- self.begin_stream_from_content_seconds
)
return self.end_stream_seconds - self.begin_stream_seconds

@property
def average_fps(self) -> Optional[float]:
"""Average fps of the stream. We try to get the average fps from the
actual frames if we scanned the frames. Otherwise we fall back to the
fps obtained from the header.
"""Average fps of the stream. If a :term:`scan` was perfomed, this is
computed from the number of frames and the duration of the stream.
Otherwise we fall back to ``average_fps_from_header``.
"""
if (
self.end_stream_from_content_seconds is None
or self.begin_stream_from_content_seconds is None
self.end_stream_seconds is None
or self.begin_stream_seconds is None
or self.num_frames is None
):
return self.average_fps_from_header
return self.num_frames / (
self.end_stream_from_content_seconds
- self.begin_stream_from_content_seconds
)
return self.num_frames / (self.end_stream_seconds - self.begin_stream_seconds)

def __repr__(self):
# Overridden because properites are not printed by default.
Expand Down Expand Up @@ -161,12 +154,8 @@ def get_video_metadata(decoder: torch.Tensor) -> VideoMetadata:
# keys with the Python names
num_frames_from_header=stream_dict.get("numFrames"),
num_frames_from_content=stream_dict.get("numFramesFromScan"),
begin_stream_from_content_seconds=stream_dict.get(
"minPtsSecondsFromScan"
),
end_stream_from_content_seconds=stream_dict.get(
"maxPtsSecondsFromScan"
),
begin_stream_seconds=stream_dict.get("minPtsSecondsFromScan"),
end_stream_seconds=stream_dict.get("maxPtsSecondsFromScan"),
codec=stream_dict.get("codec"),
width=stream_dict.get("width"),
height=stream_dict.get("height"),
Expand Down
29 changes: 10 additions & 19 deletions src/torchcodec/decoders/_simple_video_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def __repr__(self):
class SimpleVideoDecoder:
"""A single-stream video decoder.

If the video contains multiple video streams, the :term:`best stream` is used.
If the video contains multiple video streams, the :term:`best stream` is
used. This decoder always performs a :term:`scan` of the video.

Args:
source (str, ``Pathlib.path``, ``torch.Tensor``, or bytes): The source of the video.
Expand Down Expand Up @@ -140,23 +141,19 @@ def __init__(
)
self._num_frames = self.metadata.num_frames_from_content

if self.metadata.begin_stream_from_content_seconds is None:
if self.metadata.begin_stream_seconds is None:
raise ValueError(
"The minimum pts value in seconds is unknown. "
+ _ERROR_REPORTING_INSTRUCTIONS
)
self._begin_stream_from_content_seconds = (
self.metadata.begin_stream_from_content_seconds
)
self._begin_stream_seconds = self.metadata.begin_stream_seconds

if self.metadata.end_stream_from_content_seconds is None:
if self.metadata.end_stream_seconds is None:
raise ValueError(
"The maximum pts value in seconds is unknown. "
+ _ERROR_REPORTING_INSTRUCTIONS
)
self._end_stream_from_content_seconds = (
self.metadata.end_stream_from_content_seconds
)
self._end_stream_seconds = self.metadata.end_stream_seconds

def __len__(self) -> int:
return self._num_frames
Expand Down Expand Up @@ -267,22 +264,16 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:
"""Return a single frame displayed at the given timestamp in seconds.

Args:
seconds (float): The time stamp in seconds when the frame is
displayed, i.e. seconds is in
[:term:`pts`, :term:`pts` + duration).
seconds (float): The time stamp in seconds when the frame is displayed.

Returns:
Frame: The frame that is displayed at ``seconds``.
"""
if (
not self._begin_stream_from_content_seconds
<= seconds
< self._end_stream_from_content_seconds
):
if not self._begin_stream_seconds <= seconds < self._end_stream_seconds:
raise IndexError(
f"Invalid pts in seconds: {seconds}. "
f"It must be greater than or equal to {self._begin_stream_from_content_seconds} "
f"and less than {self._end_stream_from_content_seconds}."
f"It must be greater than or equal to {self._begin_stream_seconds} "
f"and less than {self._end_stream_seconds}."
)
data, pts_seconds, duration_seconds = core.get_frame_at_pts(
self._decoder, seconds
Expand Down
4 changes: 2 additions & 2 deletions test/decoders/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def test_num_frames_fallback(
bit_rate=123,
num_frames_from_header=num_frames_from_header,
num_frames_from_content=num_frames_from_content,
begin_stream_from_content_seconds=0,
end_stream_from_content_seconds=4,
begin_stream_seconds=0,
end_stream_seconds=4,
codec="whatever",
width=123,
height=321,
Expand Down