From ba3cbbfd5e0f494e7aaf1c2f6a4eb2405a00ba06 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 19 Nov 2025 01:02:27 -0500 Subject: [PATCH 1/6] add tutorial w videos --- docs/source/conf.py | 1 + examples/encoding/video_encoding.py | 262 ++++++++++++++++++++++++++++ 2 files changed, 263 insertions(+) create mode 100644 examples/encoding/video_encoding.py diff --git a/docs/source/conf.py b/docs/source/conf.py index 133bccf2e..c36217833 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -87,6 +87,7 @@ def __call__(self, filename): assert "examples/encoding" in self.src_dir order = [ "audio_encoding.py", + "video_encoding.py", ] try: diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py new file mode 100644 index 000000000..6193f4508 --- /dev/null +++ b/examples/encoding/video_encoding.py @@ -0,0 +1,262 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +======================================= +Encoding video frames with VideoEncoder +======================================= + +In this example, we'll learn how to encode video frames to a file or to raw +bytes using the :class:`~torchcodec.encoders.VideoEncoder` class. +""" + +# %% +# First, we'll download a video and decode some frames to tensors. +# These will be the input to the VideoEncoder. For more details on decoding, +# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`. +# Otherwise, skip ahead to :ref:`creating_encoder`. + +import requests +from torchcodec.decoders import VideoDecoder +from IPython.display import Video + + +def play_video(encoded_bytes): + return Video( + data=encoded_bytes.numpy().tobytes(), + embed=True, + width=640, + height=360, + mimetype="video/mp4", + ) + + +# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/ +# License: CC0. Author: Altaf Shah. +url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4" + +response = requests.get(url, headers={"User-Agent": ""}) +if response.status_code != 200: + raise RuntimeError(f"Failed to download video. {response.status_code = }.") + +raw_video_bytes = response.content + +decoder = VideoDecoder(raw_video_bytes) +frames = decoder[:60] # Get first 60 frames +# TODO: use float once other PR lands +frame_rate = int(decoder.metadata.average_fps) + +# %% +# .. _creating_encoder: +# +# Creating an encoder +# ------------------- +# +# Let's instantiate a :class:`~torchcodec.encoders.VideoEncoder`. We will need to provide +# the frames to be encoded as a 4D tensor of shape +# ``(num_frames, num_channels, height, width)`` with values in the ``[0, 255]`` +# range and ``torch.uint8`` dtype. We will also need to provide the frame rate of the input +# video. +# +# .. note:: +# +# The ``frame_rate`` parameter corresponds to the frame rate of the +# *input* video. It will also be used for the frame rate of the *output* encoded video. +from torchcodec.encoders import VideoEncoder + +print(f"{frames.shape = }, {frames.dtype = }") +print(f"{frame_rate = } fps") + +encoder = VideoEncoder(frames=frames, frame_rate=frame_rate) + +# %% +# Encoding to file, bytes, or file-like +# ------------------------------------- +# +# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a +# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to +# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_filelike` +# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`. +# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we +# can easily inspect and display the encoded video. + +encoded_frames = encoder.to_tensor(format="mp4") +play_video(encoded_frames) + +# %% +# +# Now that we have encoded data, we can decode it back to verify the +# round-trip encode/decode process works as expected: + +decoder_verify = VideoDecoder(encoded_frames) +decoded_frames = decoder_verify[:] + +print(f"Re-decoded video: {decoded_frames.shape = }") +print(f"Original frames: {frames.shape = }") + +# %% +# Codec Selection +# --------------- +# +# The ``codec`` parameter specifies which video codec to use for encoding. +# You can specify either a specific codec implementation (e.g., ``"libx264"``) +# or a codec specification (e.g., ``"h264"``). Different codecs offer +# different tradeoffs between quality, file size, and encoding speed. +# +# .. note:: +# +# To see available encoders on your system, run ``ffmpeg -encoders``. +# +# Let's encode the same frames using different codecs: + +# H.264 encoding +h264_output = "libx264_encoded.mp4" +encoder.to_file(h264_output, codec="libx264") + +# H.265 encoding +hevc_output = "hevc_encoded.mp4" +encoder.to_file(hevc_output, codec="hevc") + +# Now let's use ffprobe to verify the codec used in the output files +import subprocess + +for output in [h264_output, hevc_output]: + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name", + "-of", + "default=noprint_wrappers=1:nokey=1", + output, + ], + capture_output=True, + text=True, + ) + print(f"Codec used in {output}: {result.stdout.strip()}") + +# %% +# Pixel Format +# ------------ +# +# The ``pixel_format`` parameter controls the color sampling (chroma subsampling) +# of the output video. This affects both quality and file size. +# +# Common pixel formats: +# +# - ``"yuv420p"`` - 4:2:0 chroma subsampling (standard quality, smaller file size, widely compatible) +# - ``"yuv444p"`` - 4:4:4 chroma subsampling (full chroma resolution, higher quality, larger file size) +# +# Most playback devices and platforms support ``yuv420p``, making it the most +# common choice for video encoding. +# +# .. note:: +# +# Pixel format support depends on the codec used. Use ``ffmpeg -h encoder=`` +# to check available options for your selected codec. + +# Standard pixel format +yuv420_encoded_frames = encoder.to_tensor( + format="mp4", codec="libx264", pixel_format="yuv420p" +) +play_video(yuv420_encoded_frames) + +# %% +# CRF (Constant Rate Factor) +# -------------------------- +# +# The ``crf`` parameter controls video quality, where lower values produce higher quality output. +# +# For example, with the commonly used H.264 codec, ``libx264``: +# +# - Values range from 0 (lossless) to 51 (worst quality) +# - Values 17 or 18 are conisdered visually lossless, and the default is 23. +# +# .. note:: +# +# The range and interpretation of CRF values depend on the codec used, and +# not all codecs support CRF. Use ``ffmpeg -h encoder=`` to +# check available options for your selected codec. +# + +# High quality (low CRF) +high_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=0) +play_video(high_quality_output) + +# %% +# Low quality (high CRF) +low_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=50) +play_video(low_quality_output) + + +# %% +# Preset +# ------ +# +# The ``preset`` parameter controls the tradeoff between encoding speed and file compression. +# Faster presets encode faster but produce larger files, while slower +# presets take more time to encode but result in better compression. +# +# For example, with the commonly used H.264 codec, ``libx264`` presets include: +# +# - ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, ``"veryslow"`` (slowest, best compression). +# +# .. note:: +# +# Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=`` +# to check available options for your selected codec. +# + +import os +# Fast encoding with a larger file size +fast_output = "fast_encoded.mp4" +encoder.to_file(fast_output, codec="libx264", preset="ultrafast") +print(f"Size of fast encoded file: {os.path.getsize(fast_output)} bytes") + +# Slow encoding for a smaller file size +slow_output = "slow_encoded.mp4" +encoder.to_file(slow_output, codec="libx264", preset="veryslow") +print(f"Size of slow encoded file: {os.path.getsize(slow_output)} bytes") + +# %% +# Extra Options +# ------------- +# +# The ``extra_options`` parameter accepts a dictionary of codec-specific options +# that would normally be set via FFmpeg command-line arguments. This enables +# control of encoding settings beyond the common parameters. +# +# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include: +# For example, with , ``libx264``: +# +# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval +# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames +# - ``"tune"`` - Tuning preset (e.g., ``"film"``, ``"animation"``, ``"grain"``) +# +# .. note:: +# +# Use ``ffmpeg -h encoder=`` to see all available options for +# a specific codec. +# + + +# Custom GOP size and tuning +custom_output = "custom_encoded.mp4" +encoder.to_file( + custom_output, + codec="libx264", + extra_options={ + "g": 50, # Keyframe every 50 frames + "max_b_frames": 0, # Disable B-frames for faster decoding + "tune": "fastdecode", # Optimize for fast decoding + } +) + +# %% From d5be152388784b4d2a61c5ef58a078e1b0de9079 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 19 Nov 2025 13:56:47 -0500 Subject: [PATCH 2/6] add suggestions, link in docstrings --- docs/source/api_ref_encoders.rst | 1 + docs/source/index.rst | 8 ++++ examples/encoding/video_encoding.py | 57 ++++++++++++++++------- src/torchcodec/encoders/_video_encoder.py | 15 ++++++ 4 files changed, 65 insertions(+), 16 deletions(-) diff --git a/docs/source/api_ref_encoders.rst b/docs/source/api_ref_encoders.rst index 52c7295bc..6c7fc825d 100644 --- a/docs/source/api_ref_encoders.rst +++ b/docs/source/api_ref_encoders.rst @@ -16,3 +16,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_a :template: class.rst AudioEncoder + VideoEncoder diff --git a/docs/source/index.rst b/docs/source/index.rst index 3e312ccd3..74e8d1298 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -98,6 +98,14 @@ Encoding How encode audio samples + .. grid-item-card:: :octicon:`file-code;1em` + Video Encoding + :img-top: _static/img/card-background.svg + :link: generated_examples/encoding/video_encoding.html + :link-type: url + + How to encode video frames + .. toctree:: :maxdepth: 1 :caption: TorchCodec documentation diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py index 6193f4508..afa58aecf 100644 --- a/examples/encoding/video_encoding.py +++ b/examples/encoding/video_encoding.py @@ -15,7 +15,7 @@ # %% # First, we'll download a video and decode some frames to tensors. -# These will be the input to the VideoEncoder. For more details on decoding, +# These will be the input to the :class:`~torchcodec.encoders.VideoEncoder`. For more details on decoding, # see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`. # Otherwise, skip ahead to :ref:`creating_encoder`. @@ -35,7 +35,7 @@ def play_video(encoded_bytes): # Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/ -# License: CC0. Author: Altaf Shah. +# Author: Altaf Shah. url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4" response = requests.get(url, headers={"User-Agent": ""}) @@ -45,7 +45,7 @@ def play_video(encoded_bytes): raw_video_bytes = response.content decoder = VideoDecoder(raw_video_bytes) -frames = decoder[:60] # Get first 60 frames +frames = decoder.get_frames_in_range(0, 60).data # Get first 60 frames # TODO: use float once other PR lands frame_rate = int(decoder.metadata.average_fps) @@ -78,7 +78,7 @@ def play_video(encoded_bytes): # # :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a # file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to -# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_filelike` +# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` # method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`. # For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we # can easily inspect and display the encoded video. @@ -92,15 +92,25 @@ def play_video(encoded_bytes): # round-trip encode/decode process works as expected: decoder_verify = VideoDecoder(encoded_frames) -decoded_frames = decoder_verify[:] +decoded_frames = decoder_verify.get_frames_in_range(0, 60).data print(f"Re-decoded video: {decoded_frames.shape = }") print(f"Original frames: {frames.shape = }") # %% +# .. _codec_selection: +# # Codec Selection # --------------- # +# By default, the codec used is selected automatically using the file extension provided +# in the ``dest`` parameter for the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, +# or using the ``format`` parameter for the +# :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` and +# :meth:`~torchcodec.encoders.VideoEncoder.to_tensor` methods. +# +# - For example, when encoding to MP4 format, the default codec used is ``H.264``. +# # The ``codec`` parameter specifies which video codec to use for encoding. # You can specify either a specific codec implementation (e.g., ``"libx264"``) # or a codec specification (e.g., ``"h264"``). Different codecs offer @@ -112,18 +122,21 @@ def play_video(encoded_bytes): # # Let's encode the same frames using different codecs: +import tempfile +from pathlib import Path + # H.264 encoding -h264_output = "libx264_encoded.mp4" +h264_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name encoder.to_file(h264_output, codec="libx264") # H.265 encoding -hevc_output = "hevc_encoded.mp4" +hevc_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name encoder.to_file(hevc_output, codec="hevc") # Now let's use ffprobe to verify the codec used in the output files import subprocess -for output in [h264_output, hevc_output]: +for output, name in [(h264_output, "h264_output"), (hevc_output, "hevc_output")]: result = subprocess.run( [ "ffprobe", @@ -140,9 +153,16 @@ def play_video(encoded_bytes): capture_output=True, text=True, ) - print(f"Codec used in {output}: {result.stdout.strip()}") + print(f"Codec used in {name}: {result.stdout.strip()}") + +# %% +# For most cases, you can simply specify the format parameter and let the FFmpeg select the default codec. +# However, specifying the codec parameter is useful to select a particular codec implementation +# (``libx264`` vs ``libx265``) or to have more control over the encoding behavior. # %% +# .. _pixel_format: +# # Pixel Format # ------------ # @@ -169,6 +189,8 @@ def play_video(encoded_bytes): play_video(yuv420_encoded_frames) # %% +# .. _crf: +# # CRF (Constant Rate Factor) # -------------------------- # @@ -197,6 +219,8 @@ def play_video(encoded_bytes): # %% +# .. _preset: +# # Preset # ------ # @@ -207,6 +231,7 @@ def play_video(encoded_bytes): # For example, with the commonly used H.264 codec, ``libx264`` presets include: # # - ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, ``"veryslow"`` (slowest, best compression). +# - See additional details in the `H.264 Video Encoding Guide `_. # # .. note:: # @@ -214,18 +239,19 @@ def play_video(encoded_bytes): # to check available options for your selected codec. # -import os # Fast encoding with a larger file size -fast_output = "fast_encoded.mp4" +fast_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name encoder.to_file(fast_output, codec="libx264", preset="ultrafast") -print(f"Size of fast encoded file: {os.path.getsize(fast_output)} bytes") +print(f"Size of fast encoded file: {Path(fast_output).stat().st_size} bytes") # Slow encoding for a smaller file size -slow_output = "slow_encoded.mp4" +slow_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name encoder.to_file(slow_output, codec="libx264", preset="veryslow") -print(f"Size of slow encoded file: {os.path.getsize(slow_output)} bytes") +print(f"Size of slow encoded file: {Path(slow_output).stat().st_size} bytes") # %% +# .. _extra_options: +# # Extra Options # ------------- # @@ -234,7 +260,6 @@ def play_video(encoded_bytes): # control of encoding settings beyond the common parameters. # # For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include: -# For example, with , ``libx264``: # # - ``"g"`` - GOP (Group of Pictures) size / keyframe interval # - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames @@ -248,7 +273,7 @@ def play_video(encoded_bytes): # Custom GOP size and tuning -custom_output = "custom_encoded.mp4" +custom_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name encoder.to_file( custom_output, codec="libx264", diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 909cf73a9..3c8b36fd1 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -51,18 +51,23 @@ def to_file( codec (str, optional): The codec to use for encoding (e.g., "libx264", "h264"). If not specified, the default codec for the container format will be used. + See :ref:`codec_selection` for details. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + See :ref:`pixel_format` for details. crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values mean better quality. Valid range depends on the encoder (commonly 0-51). Defaults to None (which will use encoder's default). + See :ref:`crf` for details. preset (str or int, optional): Encoder option that controls the tradeoff between encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). + See :ref:`preset` for details. extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. + See :ref:`extra_options` for details. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file( @@ -96,18 +101,23 @@ def to_tensor( codec (str, optional): The codec to use for encoding (e.g., "libx264", "h264"). If not specified, the default codec for the container format will be used. + See :ref:`codec_selection` for details. pixel_format (str, optional): The pixel format to encode frames into (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + See :ref:`pixel_format` for details. crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values mean better quality. Valid range depends on the encoder (commonly 0-51). Defaults to None (which will use encoder's default). + See :ref:`crf` for details. preset (str or int, optional): Encoder option that controls the tradeoff between encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). + See :ref:`preset` for details. extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. + See :ref:`extra_options` for details. Returns: Tensor: The raw encoded bytes as 1D uint8 Tensor. @@ -150,18 +160,23 @@ def to_file_like( codec (str, optional): The codec to use for encoding (e.g., "libx264", "h264"). If not specified, the default codec for the container format will be used. + See :ref:`codec_selection` for details. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + See :ref:`pixel_format` for details. crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values mean better quality. Valid range depends on the encoder (commonly 0-51). Defaults to None (which will use encoder's default). + See :ref:`crf` for details. preset (str or int, optional): Encoder option that controls the tradeoff between encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). + See :ref:`preset` for details. extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. + See :ref:`extra_options` for details. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file_like( From fd59e4c3158b710c2b2d6c550ab609a88f53a923 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 19 Nov 2025 14:03:17 -0500 Subject: [PATCH 3/6] add word commonly --- examples/encoding/video_encoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py index afa58aecf..516394143 100644 --- a/examples/encoding/video_encoding.py +++ b/examples/encoding/video_encoding.py @@ -109,7 +109,7 @@ def play_video(encoded_bytes): # :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` and # :meth:`~torchcodec.encoders.VideoEncoder.to_tensor` methods. # -# - For example, when encoding to MP4 format, the default codec used is ``H.264``. +# For example, when encoding to MP4 format, the default codec is typically ``H.264``. # # The ``codec`` parameter specifies which video codec to use for encoding. # You can specify either a specific codec implementation (e.g., ``"libx264"``) From 3eaee28906a553de483b48ffe8b1ce63615a6d83 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 19 Nov 2025 23:59:55 -0500 Subject: [PATCH 4/6] transition sentence between codec default and selection --- examples/encoding/video_encoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py index 516394143..ee7ff7b78 100644 --- a/examples/encoding/video_encoding.py +++ b/examples/encoding/video_encoding.py @@ -111,7 +111,7 @@ def play_video(encoded_bytes): # # For example, when encoding to MP4 format, the default codec is typically ``H.264``. # -# The ``codec`` parameter specifies which video codec to use for encoding. +# To use a codec other than the default, use the ``codec`` parameter. # You can specify either a specific codec implementation (e.g., ``"libx264"``) # or a codec specification (e.g., ``"h264"``). Different codecs offer # different tradeoffs between quality, file size, and encoding speed. From 9bbeb1ffff245ca5fb0334a93f5a9bc578af2235 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 20 Nov 2025 08:41:58 -0500 Subject: [PATCH 5/6] adjust docstirngs, apply nits --- examples/encoding/video_encoding.py | 18 +++++++----------- src/torchcodec/encoders/_video_encoder.py | 15 ++++++--------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py index ee7ff7b78..90f3b8e26 100644 --- a/examples/encoding/video_encoding.py +++ b/examples/encoding/video_encoding.py @@ -155,10 +155,6 @@ def play_video(encoded_bytes): ) print(f"Codec used in {name}: {result.stdout.strip()}") -# %% -# For most cases, you can simply specify the format parameter and let the FFmpeg select the default codec. -# However, specifying the codec parameter is useful to select a particular codec implementation -# (``libx264`` vs ``libx265``) or to have more control over the encoding behavior. # %% # .. _pixel_format: @@ -199,7 +195,7 @@ def play_video(encoded_bytes): # For example, with the commonly used H.264 codec, ``libx264``: # # - Values range from 0 (lossless) to 51 (worst quality) -# - Values 17 or 18 are conisdered visually lossless, and the default is 23. +# - Values 17 or 18 are considered visually lossless, and the default is 23. # # .. note:: # @@ -228,10 +224,11 @@ def play_video(encoded_bytes): # Faster presets encode faster but produce larger files, while slower # presets take more time to encode but result in better compression. # -# For example, with the commonly used H.264 codec, ``libx264`` presets include: -# -# - ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, ``"veryslow"`` (slowest, best compression). -# - See additional details in the `H.264 Video Encoding Guide `_. +# For example, with the commonly used H.264 codec, ``libx264`` presets include +# ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, and +# ``"veryslow"`` (slowest, best compression). See the +# `H.264 Video Encoding Guide `_ +# for additional details. # # .. note:: # @@ -272,13 +269,12 @@ def play_video(encoded_bytes): # -# Custom GOP size and tuning custom_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name encoder.to_file( custom_output, codec="libx264", extra_options={ - "g": 50, # Keyframe every 50 frames + "g": 50, # Keyframe every 50 frames "max_b_frames": 0, # Disable B-frames for faster decoding "tune": "fastdecode", # Optimize for fast decoding } diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 3c8b36fd1..3267de5e2 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -56,17 +56,16 @@ def to_file( "yuv420p", "yuv444p"). If not specified, uses codec's default format. See :ref:`pixel_format` for details. crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values - mean better quality. Valid range depends on the encoder (commonly 0-51). + mean better quality. Valid range depends on the encoder (e.g. 0-51 for libx264). Defaults to None (which will use encoder's default). See :ref:`crf` for details. preset (str or int, optional): Encoder option that controls the tradeoff between - encoding speed and compression. Valid values depend on the encoder (commonly + encoding encoding speed and compression (output size). Valid on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). See :ref:`preset` for details. extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. - Values will be converted to strings before passing to the encoder. See :ref:`extra_options` for details. """ preset = str(preset) if isinstance(preset, int) else preset @@ -106,17 +105,16 @@ def to_tensor( "yuv420p", "yuv444p"). If not specified, uses codec's default format. See :ref:`pixel_format` for details. crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values - mean better quality. Valid range depends on the encoder (commonly 0-51). + mean better quality. Valid range depends on the encoder (e.g. 0-51 for libx264). Defaults to None (which will use encoder's default). See :ref:`crf` for details. preset (str or int, optional): Encoder option that controls the tradeoff between - encoding speed and compression. Valid values depend on the encoder (commonly + encoding encoding speed and compression (output size). Valid on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). See :ref:`preset` for details. extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. - Values will be converted to strings before passing to the encoder. See :ref:`extra_options` for details. Returns: @@ -165,17 +163,16 @@ def to_file_like( "yuv420p", "yuv444p"). If not specified, uses codec's default format. See :ref:`pixel_format` for details. crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values - mean better quality. Valid range depends on the encoder (commonly 0-51). + mean better quality. Valid range depends on the encoder (e.g. 0-51 for libx264). Defaults to None (which will use encoder's default). See :ref:`crf` for details. preset (str or int, optional): Encoder option that controls the tradeoff between - encoding speed and compression. Valid values depend on the encoder (commonly + encoding encoding speed and compression (output size). Valid on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). See :ref:`preset` for details. extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. - Values will be converted to strings before passing to the encoder. See :ref:`extra_options` for details. """ preset = str(preset) if isinstance(preset, int) else preset From 1bcb9ce2896d1d202e800d95979f69f146a7de26 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 20 Nov 2025 11:15:55 -0500 Subject: [PATCH 6/6] remove todo to use float frame rate --- examples/encoding/video_encoding.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py index 90f3b8e26..255fbffdf 100644 --- a/examples/encoding/video_encoding.py +++ b/examples/encoding/video_encoding.py @@ -46,8 +46,7 @@ def play_video(encoded_bytes): decoder = VideoDecoder(raw_video_bytes) frames = decoder.get_frames_in_range(0, 60).data # Get first 60 frames -# TODO: use float once other PR lands -frame_rate = int(decoder.metadata.average_fps) +frame_rate = decoder.metadata.average_fps # %% # .. _creating_encoder: