From 1d79594996d9672ad81c96702e268575fcefccbe Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 13 Nov 2025 17:07:58 -0500 Subject: [PATCH 1/7] add codec options, apply numeric error handling --- src/torchcodec/_core/Encoder.cpp | 87 +++++++++++++++++------ src/torchcodec/_core/Encoder.h | 11 +++ src/torchcodec/_core/StreamOptions.h | 2 + src/torchcodec/_core/custom_ops.cpp | 42 +++++++++-- src/torchcodec/_core/ops.py | 12 +++- src/torchcodec/encoders/_video_encoder.py | 33 ++++++++- test/test_encoders.py | 39 ++++++++++ 7 files changed, 194 insertions(+), 32 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 90e326087..8ac13cdd6 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -570,10 +570,10 @@ AVPixelFormat validatePixelFormat( TORCH_CHECK(false, errorMsg.str()); } -void validateDoubleOption( +void tryToValidateCodecOption( const AVCodec& avCodec, const char* optionName, - double value) { + const std::string& value) { if (!avCodec.priv_class) { return; } @@ -586,24 +586,36 @@ void validateDoubleOption( 0, AV_OPT_SEARCH_FAKE_OBJ, nullptr); - // If the option was not found, let FFmpeg handle it later + // If option is not found we cannot validate it, let FFmpeg handle it if (!option) { return; } + // Validate options defined as a numeric type if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 || option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) { - TORCH_CHECK( - value >= option->min && value <= option->max, - optionName, - "=", - value, - " is out of valid range [", - option->min, - ", ", - option->max, - "] for this codec. For more details, run 'ffmpeg -h encoder=", - avCodec.name, - "'"); + try { + double numericValue = std::stod(value); + TORCH_CHECK( + numericValue >= option->min && numericValue <= option->max, + optionName, + "=", + numericValue, + " is out of valid range [", + option->min, + ", ", + option->max, + "] for this codec. For more details, run 'ffmpeg -h encoder=", + avCodec.name, + "'"); + } catch (const std::invalid_argument& e) { + TORCH_CHECK( + false, + "Option ", + optionName, + " expects a numeric value but got '", + value, + "'"); + } } } } // namespace @@ -685,6 +697,30 @@ VideoEncoder::VideoEncoder( initializeEncoder(videoStreamOptions); } +void VideoEncoder::sortCodecOptions( + const std::map& codecOptions, + AVDictionary** codecDict, + AVDictionary** formatDict) { + // Search AVFormatContext's AVClass for options + const AVClass* formatClass = avformat_get_class(); + for (const auto& [key, value] : codecOptions) { + const AVOption* fmtOpt = av_opt_find2( + &formatClass, + key.c_str(), + nullptr, + 0, + AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ, + nullptr); + if (fmtOpt) { + av_dict_set(formatDict, key.c_str(), value.c_str(), 0); + } else { + // Default to codec option (includes AVCodecContext + encoder-private) + // validateCodecOption(*avCodecContext_->codec, key.c_str(), value); + av_dict_set(codecDict, key.c_str(), value.c_str(), 0); + } + } +} + void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { const AVCodec* avCodec = @@ -737,13 +773,19 @@ void VideoEncoder::initializeEncoder( // Apply videoStreamOptions AVDictionary* options = nullptr; + if (videoStreamOptions.codecOptions.has_value()) { + // Validate all codec options before setting them + for (const auto& [key, value] : videoStreamOptions.codecOptions.value()) { + tryToValidateCodecOption(*avCodec, key.c_str(), value); + } + sortCodecOptions( + videoStreamOptions.codecOptions.value(), &options, &formatOptions_); + } + if (videoStreamOptions.crf.has_value()) { - validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value()); - av_dict_set( - &options, - "crf", - std::to_string(videoStreamOptions.crf.value()).c_str(), - 0); + std::string crfValue = std::to_string(videoStreamOptions.crf.value()); + tryToValidateCodecOption(*avCodec, "crf", crfValue); + av_dict_set(&options, "crf", crfValue.c_str(), 0); } if (videoStreamOptions.preset.has_value()) { av_dict_set( @@ -775,7 +817,8 @@ void VideoEncoder::encode() { TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice."); encodeWasCalled_ = true; - int status = avformat_write_header(avFormatContext_.get(), nullptr); + int status = avformat_write_header(avFormatContext_.get(), &formatOptions_); + av_dict_free(&formatOptions_); TORCH_CHECK( status == AVSUCCESS, "Error in avformat_write_header: ", diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h index c1055281a..24ce19011 100644 --- a/src/torchcodec/_core/Encoder.h +++ b/src/torchcodec/_core/Encoder.h @@ -1,9 +1,15 @@ #pragma once #include +#include +#include #include "AVIOContextHolder.h" #include "FFMPEGCommon.h" #include "StreamOptions.h" +extern "C" { +#include +} + namespace facebook::torchcodec { class AudioEncoder { public: @@ -154,6 +160,10 @@ class VideoEncoder { private: void initializeEncoder(const VideoStreamOptions& videoStreamOptions); + void sortCodecOptions( + const std::map& codecOptions, + AVDictionary** codecDict, + AVDictionary** formatDict); UniqueAVFrame convertTensorToAVFrame( const torch::Tensor& frame, int frameIndex); @@ -179,6 +189,7 @@ class VideoEncoder { std::unique_ptr avioContextHolder_; bool encodeWasCalled_ = false; + AVDictionary* formatOptions_ = nullptr; }; } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index 01af6846c..56df44754 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -50,6 +51,7 @@ struct VideoStreamOptions { std::optional pixelFormat; std::optional crf; std::optional preset; + std::optional> codecOptions; }; struct AudioStreamOptions { diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 2ef5d0f49..f6d14a9d0 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()"); m.def( - "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor"); + "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> Tensor"); m.def( - "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()"); + "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()"); m.def( "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor"); m.def( @@ -158,6 +158,16 @@ std::string quoteValue(const std::string& value) { return "\"" + value + "\""; } +// Helper function to unflatten codec_options, alternating keys and values +std::map unflattenCodecOptions( + const std::vector& opts) { + std::map optionsMap; + for (size_t i = 0; i < opts.size(); i += 2) { + optionsMap[opts[i]] = opts[i + 1]; + } + return optionsMap; +} + std::string mapToJson(const std::map& metadataMap) { std::stringstream ss; ss << "{\n"; @@ -605,11 +615,18 @@ void encode_video_to_file( std::string_view file_name, std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, - std::optional preset = std::nullopt) { + std::optional preset = std::nullopt, + std::optional> codec_options = std::nullopt) { VideoStreamOptions videoStreamOptions; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; videoStreamOptions.preset = preset; + + if (codec_options.has_value()) { + videoStreamOptions.codecOptions = + unflattenCodecOptions(codec_options.value()); + } + VideoEncoder( frames, validateInt64ToInt(frame_rate, "frame_rate"), @@ -624,12 +641,19 @@ at::Tensor encode_video_to_tensor( std::string_view format, std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, - std::optional preset = std::nullopt) { + std::optional preset = std::nullopt, + std::optional> codec_options = std::nullopt) { auto avioContextHolder = std::make_unique(); VideoStreamOptions videoStreamOptions; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; videoStreamOptions.preset = preset; + + if (codec_options.has_value()) { + videoStreamOptions.codecOptions = + unflattenCodecOptions(codec_options.value()); + } + return VideoEncoder( frames, validateInt64ToInt(frame_rate, "frame_rate"), @@ -646,7 +670,8 @@ void _encode_video_to_file_like( int64_t file_like_context, std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, - std::optional preset = std::nullopt) { + std::optional preset = std::nullopt, + std::optional> codec_options = std::nullopt) { auto fileLikeContext = reinterpret_cast(file_like_context); TORCH_CHECK( @@ -658,6 +683,11 @@ void _encode_video_to_file_like( videoStreamOptions.crf = crf; videoStreamOptions.preset = preset; + if (codec_options.has_value()) { + videoStreamOptions.codecOptions = + unflattenCodecOptions(codec_options.value()); + } + VideoEncoder encoder( frames, validateInt64ToInt(frame_rate, "frame_rate"), diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index a2f1fa0a3..409b06f13 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -216,6 +216,7 @@ def encode_video_to_file_like( crf: Optional[Union[int, float]] = None, pixel_format: Optional[str] = None, preset: Optional[str] = None, + codec_options: Optional[list[str]] = None, ) -> None: """Encode video frames to a file-like object. @@ -227,6 +228,7 @@ def encode_video_to_file_like( crf: Optional constant rate factor for encoding quality pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p") preset: Optional encoder preset as string (e.g., "ultrafast", "medium") + codec_options: Optional list of codec options as flattened key-value pairs """ assert _pybind_ops is not None @@ -238,6 +240,7 @@ def encode_video_to_file_like( pixel_format, crf, preset, + codec_options, ) @@ -326,8 +329,9 @@ def encode_video_to_file_abstract( frame_rate: int, filename: str, pixel_format: Optional[str] = None, - crf: Optional[Union[int, float]] = None, preset: Optional[str] = None, + crf: Optional[Union[int, float]] = None, + codec_options: Optional[list[str]] = None, ) -> None: return @@ -338,8 +342,9 @@ def encode_video_to_tensor_abstract( frame_rate: int, format: str, pixel_format: Optional[str] = None, - crf: Optional[Union[int, float]] = None, preset: Optional[str] = None, + crf: Optional[Union[int, float]] = None, + codec_options: Optional[list[str]] = None, ) -> torch.Tensor: return torch.empty([], dtype=torch.long) @@ -351,8 +356,9 @@ def _encode_video_to_file_like_abstract( format: str, file_like_context: int, pixel_format: Optional[str] = None, - crf: Optional[Union[int, float]] = None, preset: Optional[str] = None, + crf: Optional[Union[int, float]] = None, + codec_options: Optional[list[str]] = None, ) -> None: return diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index d812d4a11..29af0661c 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Optional, Union +from typing import Dict, Optional, Union import torch from torch import Tensor @@ -7,6 +7,25 @@ from torchcodec import _core +def _flatten_codec_options(codec_options: Optional[Dict[str, str]]) -> Optional[list]: + """Convert codec_options dict to flattened list for torch op. + + Args: + codec_options: A dictionary of codec-specific options, e.g. + {"preset": "slow", "tune": "film"} + + Returns: + A flattened list of alternating keys and values, e.g. + ["preset", "slow", "tune", "film"], or None if input is None. + """ + if codec_options is None: + return None + result = [] + for key, value in codec_options.items(): + result.extend([key, value]) + return result + + class VideoEncoder: """A video encoder. @@ -35,6 +54,7 @@ def __init__(self, frames: Tensor, *, frame_rate: int): def to_file( self, dest: Union[str, Path], + codec_options: Optional[Dict[str, str]] = None, *, pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, @@ -55,6 +75,8 @@ def to_file( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). + codec_options (dict[str, str], optional): A dictionary of codec-specific + options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file( @@ -64,6 +86,7 @@ def to_file( pixel_format=pixel_format, crf=crf, preset=preset, + codec_options=_flatten_codec_options(codec_options), ) def to_tensor( @@ -73,6 +96,7 @@ def to_tensor( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, + codec_options: Optional[Dict[str, str]] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. @@ -88,6 +112,8 @@ def to_tensor( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). + codec_options (dict[str, str], optional): A dictionary of codec-specific + options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. Returns: Tensor: The raw encoded bytes as 4D uint8 Tensor. @@ -100,6 +126,7 @@ def to_tensor( pixel_format=pixel_format, crf=crf, preset=preset_value, + codec_options=_flatten_codec_options(codec_options), ) def to_file_like( @@ -110,6 +137,7 @@ def to_file_like( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, + codec_options: Optional[Dict[str, str]] = None, ) -> None: """Encode frames into a file-like object. @@ -130,6 +158,8 @@ def to_file_like( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). + codec_options (dict[str, str], optional): A dictionary of codec-specific + options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file_like( @@ -140,4 +170,5 @@ def to_file_like( pixel_format=pixel_format, crf=crf, preset=preset, + codec_options=_flatten_codec_options(codec_options), ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 303f3307c..87a889741 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -689,6 +689,45 @@ def test_pixel_format_errors(self, method, tmp_path): ): getattr(encoder, method)(**valid_params, pixel_format="rgb24") + @pytest.mark.parametrize( + "codec_options,error", + [ + ({"qp": "-10"}, "qp=-10 is out of valid range"), + ( + {"qp": ""}, + "Option qp expects a numeric value but got", + ), + ( + {"direct-pred": "a"}, + "Option direct-pred expects a numeric value but got 'a'", + ), + ({"tune": "not_a_real_tune"}, "avcodec_open2 failed: Invalid argument"), + ( + {"tune": "10"}, + "avcodec_open2 failed: Invalid argument", + ), + ], + ) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) + def test_codec_options_errors(self, method, tmp_path, codec_options, error): + frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) + encoder = VideoEncoder(frames, frame_rate=30) + + if method == "to_file": + valid_params = dict(dest=str(tmp_path / "output.mp4")) + elif method == "to_tensor": + valid_params = dict(format="mp4") + elif method == "to_file_like": + valid_params = dict(file_like=io.BytesIO(), format="mp4") + else: + raise ValueError(f"Unknown method: {method}") + + with pytest.raises( + RuntimeError, + match=error, + ): + getattr(encoder, method)(**valid_params, codec_options=codec_options) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_contiguity(self, method, tmp_path): # Ensure that 2 sets of video frames with the same pixel values are encoded From e4d3edeefb85b5c11f486b5fa79386f939766d7b Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 09:05:27 -0500 Subject: [PATCH 2/7] inline dict flattening, accept str, Any --- src/torchcodec/encoders/_video_encoder.py | 48 +++++++++-------------- test/test_encoders.py | 4 +- 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 29af0661c..edada5694 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, Optional, Union +from typing import Any, Dict, Optional, Union import torch from torch import Tensor @@ -7,25 +7,6 @@ from torchcodec import _core -def _flatten_codec_options(codec_options: Optional[Dict[str, str]]) -> Optional[list]: - """Convert codec_options dict to flattened list for torch op. - - Args: - codec_options: A dictionary of codec-specific options, e.g. - {"preset": "slow", "tune": "film"} - - Returns: - A flattened list of alternating keys and values, e.g. - ["preset", "slow", "tune", "film"], or None if input is None. - """ - if codec_options is None: - return None - result = [] - for key, value in codec_options.items(): - result.extend([key, value]) - return result - - class VideoEncoder: """A video encoder. @@ -54,7 +35,7 @@ def __init__(self, frames: Tensor, *, frame_rate: int): def to_file( self, dest: Union[str, Path], - codec_options: Optional[Dict[str, str]] = None, + codec_options: Optional[Dict[str, Any]] = None, *, pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, @@ -75,8 +56,9 @@ def to_file( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). - codec_options (dict[str, str], optional): A dictionary of codec-specific + codec_options (dict[str, Any], optional): A dictionary of codec-specific options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. + Values will be converted to strings before passing to the encoder. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file( @@ -86,7 +68,9 @@ def to_file( pixel_format=pixel_format, crf=crf, preset=preset, - codec_options=_flatten_codec_options(codec_options), + codec_options=[ + x for k, v in (codec_options or {}).items() for x in (k, str(v)) + ], ) def to_tensor( @@ -96,7 +80,7 @@ def to_tensor( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, - codec_options: Optional[Dict[str, str]] = None, + codec_options: Optional[Dict[str, Any]] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. @@ -112,8 +96,9 @@ def to_tensor( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). - codec_options (dict[str, str], optional): A dictionary of codec-specific + codec_options (dict[str, Any], optional): A dictionary of codec-specific options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. + Values will be converted to strings before passing to the encoder. Returns: Tensor: The raw encoded bytes as 4D uint8 Tensor. @@ -126,7 +111,9 @@ def to_tensor( pixel_format=pixel_format, crf=crf, preset=preset_value, - codec_options=_flatten_codec_options(codec_options), + codec_options=[ + x for k, v in (codec_options or {}).items() for x in (k, str(v)) + ], ) def to_file_like( @@ -137,7 +124,7 @@ def to_file_like( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, - codec_options: Optional[Dict[str, str]] = None, + codec_options: Optional[Dict[str, Any]] = None, ) -> None: """Encode frames into a file-like object. @@ -158,8 +145,9 @@ def to_file_like( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). - codec_options (dict[str, str], optional): A dictionary of codec-specific + codec_options (dict[str, Any], optional): A dictionary of codec-specific options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. + Values will be converted to strings before passing to the encoder. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file_like( @@ -170,5 +158,7 @@ def to_file_like( pixel_format=pixel_format, crf=crf, preset=preset, - codec_options=_flatten_codec_options(codec_options), + codec_options=[ + x for k, v in (codec_options or {}).items() for x in (k, str(v)) + ], ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 87a889741..b8483ad30 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -692,7 +692,7 @@ def test_pixel_format_errors(self, method, tmp_path): @pytest.mark.parametrize( "codec_options,error", [ - ({"qp": "-10"}, "qp=-10 is out of valid range"), + ({"qp": -10}, "qp=-10 is out of valid range"), ( {"qp": ""}, "Option qp expects a numeric value but got", @@ -703,7 +703,7 @@ def test_pixel_format_errors(self, method, tmp_path): ), ({"tune": "not_a_real_tune"}, "avcodec_open2 failed: Invalid argument"), ( - {"tune": "10"}, + {"tune": 10}, "avcodec_open2 failed: Invalid argument", ), ], From e6fd72b5307997452c07e86adbf7a33ba2fb7eb1 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 09:42:19 -0500 Subject: [PATCH 3/7] namespace, var names, other suggestions --- src/torchcodec/_core/Encoder.cpp | 72 ++++++++++++----------- src/torchcodec/_core/Encoder.h | 6 +- src/torchcodec/encoders/_video_encoder.py | 2 +- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 8ac13cdd6..583266a65 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -590,7 +590,7 @@ void tryToValidateCodecOption( if (!option) { return; } - // Validate options defined as a numeric type + // Validate if option is defined as a numeric type if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 || option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) { try { @@ -618,6 +618,30 @@ void tryToValidateCodecOption( } } } + +void sortCodecOptions( + const std::map& codecOptions, + AVDictionary** codecDict, + AVDictionary** formatDict) { + // Accepts a map of options as input, then sorts them into codec options and + // format options. The sorted options are returned into two separate dicts. + const AVClass* formatClass = avformat_get_class(); + for (const auto& [key, value] : codecOptions) { + const AVOption* fmtOpt = av_opt_find2( + &formatClass, + key.c_str(), + nullptr, + 0, + AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ, + nullptr); + if (fmtOpt) { + av_dict_set(formatDict, key.c_str(), value.c_str(), 0); + } else { + // Default to codec option (includes AVCodecContext + encoder-private) + av_dict_set(codecDict, key.c_str(), value.c_str(), 0); + } + } +} } // namespace VideoEncoder::~VideoEncoder() { @@ -633,6 +657,7 @@ VideoEncoder::~VideoEncoder() { avFormatContext_->pb = nullptr; } } + av_dict_free(&avFormatOptions_); } VideoEncoder::VideoEncoder( @@ -697,30 +722,6 @@ VideoEncoder::VideoEncoder( initializeEncoder(videoStreamOptions); } -void VideoEncoder::sortCodecOptions( - const std::map& codecOptions, - AVDictionary** codecDict, - AVDictionary** formatDict) { - // Search AVFormatContext's AVClass for options - const AVClass* formatClass = avformat_get_class(); - for (const auto& [key, value] : codecOptions) { - const AVOption* fmtOpt = av_opt_find2( - &formatClass, - key.c_str(), - nullptr, - 0, - AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ, - nullptr); - if (fmtOpt) { - av_dict_set(formatDict, key.c_str(), value.c_str(), 0); - } else { - // Default to codec option (includes AVCodecContext + encoder-private) - // validateCodecOption(*avCodecContext_->codec, key.c_str(), value); - av_dict_set(codecDict, key.c_str(), value.c_str(), 0); - } - } -} - void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { const AVCodec* avCodec = @@ -772,27 +773,31 @@ void VideoEncoder::initializeEncoder( } // Apply videoStreamOptions - AVDictionary* options = nullptr; + AVDictionary* avCodecOptions = nullptr; if (videoStreamOptions.codecOptions.has_value()) { - // Validate all codec options before setting them for (const auto& [key, value] : videoStreamOptions.codecOptions.value()) { tryToValidateCodecOption(*avCodec, key.c_str(), value); } sortCodecOptions( - videoStreamOptions.codecOptions.value(), &options, &formatOptions_); + videoStreamOptions.codecOptions.value(), + &avCodecOptions, + &avFormatOptions_); } if (videoStreamOptions.crf.has_value()) { std::string crfValue = std::to_string(videoStreamOptions.crf.value()); tryToValidateCodecOption(*avCodec, "crf", crfValue); - av_dict_set(&options, "crf", crfValue.c_str(), 0); + av_dict_set(&avCodecOptions, "crf", crfValue.c_str(), 0); } if (videoStreamOptions.preset.has_value()) { av_dict_set( - &options, "preset", videoStreamOptions.preset.value().c_str(), 0); + &avCodecOptions, + "preset", + videoStreamOptions.preset.value().c_str(), + 0); } - int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); - av_dict_free(&options); + int status = avcodec_open2(avCodecContext_.get(), avCodec, &avCodecOptions); + av_dict_free(&avCodecOptions); TORCH_CHECK( status == AVSUCCESS, @@ -817,8 +822,7 @@ void VideoEncoder::encode() { TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice."); encodeWasCalled_ = true; - int status = avformat_write_header(avFormatContext_.get(), &formatOptions_); - av_dict_free(&formatOptions_); + int status = avformat_write_header(avFormatContext_.get(), &avFormatOptions_); TORCH_CHECK( status == AVSUCCESS, "Error in avformat_write_header: ", diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h index 24ce19011..3d59eb6f6 100644 --- a/src/torchcodec/_core/Encoder.h +++ b/src/torchcodec/_core/Encoder.h @@ -160,10 +160,6 @@ class VideoEncoder { private: void initializeEncoder(const VideoStreamOptions& videoStreamOptions); - void sortCodecOptions( - const std::map& codecOptions, - AVDictionary** codecDict, - AVDictionary** formatDict); UniqueAVFrame convertTensorToAVFrame( const torch::Tensor& frame, int frameIndex); @@ -189,7 +185,7 @@ class VideoEncoder { std::unique_ptr avioContextHolder_; bool encodeWasCalled_ = false; - AVDictionary* formatOptions_ = nullptr; + AVDictionary* avFormatOptions_ = nullptr; }; } // namespace facebook::torchcodec diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index edada5694..b0d5c5b2b 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -57,7 +57,7 @@ def to_file( a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). codec_options (dict[str, Any], optional): A dictionary of codec-specific - options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. + options to pass to the encoder, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. """ preset = str(preset) if isinstance(preset, int) else preset From 888c8d405bcf5a1bda4ad8c994424e1d05eb45d0 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 10:40:41 -0500 Subject: [PATCH 4/7] wip test --- test/test_encoders.py | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/test_encoders.py b/test/test_encoders.py index b8483ad30..e5ff968df 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -1029,3 +1029,55 @@ def write(self, data): RuntimeError, match="File like object must implement a seek method" ): encoder.to_file_like(NoSeekMethod(), format="mp4") + + def get_video_metadata_from_file(self, file_path): + """Helper function to get video metadata from a file using ffprobe. + + Returns a dict with codec_name, profile, pix_fmt, etc. + """ + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name,profile,pix_fmt", + "-of", + "default=noprint_wrappers=1", + str(file_path), + ], + check=True, + capture_output=True, + text=True, + ) + # Parse output like "codec_name=h264\nprofile=Baseline\npix_fmt=yuv420p" + metadata = {} + for line in result.stdout.strip().split("\n"): + if "=" in line: + key, value = line.split("=", 1) + metadata[key] = value + return metadata + + @pytest.mark.skipif(in_fbcode(), reason="ffprobe not available") + def test_codec_options_profile(self, tmp_path): + # Test that we can set the H.264 profile via codec_options + # and validate it was used by checking with ffprobe. + source_frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + # Encode with baseline profile (explicitly specified via codec_options) + output_path = tmp_path / "output_baseline.mp4" + encoder.to_file( + dest=str(output_path), + codec_options={"profile": "baseline"}, + ) + + # Validate the profile was used + metadata = self.get_video_metadata_from_file(output_path) + assert metadata.get("codec_name") == "h264" + assert metadata.get("profile") in ( + "Baseline", + "Constrained Baseline", + ), f"Expected Baseline profile, got {metadata.get('profile')}" From 36a2c4181bd9e3892df4566490a15af1b11bfdbc Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 11:45:40 -0500 Subject: [PATCH 5/7] add codec_options test, generalize ffprobe function to reuse there --- test/test_encoders.py | 103 ++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/test/test_encoders.py b/test/test_encoders.py index d5d102f27..6a8210ec6 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -572,8 +572,8 @@ class TestVideoEncoder: def decode(self, source=None) -> torch.Tensor: return VideoDecoder(source).get_frames_in_range(start=0, stop=60) - def _get_codec_spec(self, file_path): - """Helper function to get codec name from a video file using ffprobe.""" + def _get_video_metadata(self, file_path, fields): + """Helper function to get video metadata from a file using ffprobe.""" result = subprocess.run( [ "ffprobe", @@ -582,16 +582,21 @@ def _get_codec_spec(self, file_path): "-select_streams", "v:0", "-show_entries", - "stream=codec_name", + f"stream={','.join(fields)}", "-of", - "default=noprint_wrappers=1:nokey=1", + "default=noprint_wrappers=1", str(file_path), ], capture_output=True, check=True, text=True, ) - return result.stdout.strip() + metadata = {} + for line in result.stdout.strip().split("\n"): + if "=" in line: + key, value = line.split("=", 1) + metadata[key] = value + return metadata @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_bad_input_parameterized(self, tmp_path, method): @@ -1087,7 +1092,9 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec): dest = str(tmp_path / f"output.{format}") VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec_spec) - actual_codec_spec = self._get_codec_spec(dest) + actual_codec_spec = self._get_video_metadata(dest, fields=["codec_name"]).get( + "codec_name" + ) assert actual_codec_spec == codec_spec @pytest.mark.skipif( @@ -1123,61 +1130,51 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl): dest=impl_output, codec=codec_impl ) - assert self._get_codec_spec(spec_output) == codec_spec - assert self._get_codec_spec(impl_output) == codec_spec + assert ( + self._get_video_metadata(spec_output, fields=["codec_name"]).get( + "codec_name" + ) + == codec_spec + ) + assert ( + self._get_video_metadata(impl_output, fields=["codec_name"]).get( + "codec_name" + ) + == codec_spec + ) frames_spec = self.decode(spec_output).data frames_impl = self.decode(impl_output).data torch.testing.assert_close(frames_spec, frames_impl, rtol=0, atol=0) - def get_video_metadata_from_file(self, file_path): - """Helper function to get video metadata from a file using ffprobe. - - Returns a dict with codec_name, profile, pix_fmt, etc. - """ - result = subprocess.run( - [ - "ffprobe", - "-v", - "error", - "-select_streams", - "v:0", - "-show_entries", - "stream=codec_name,profile,pix_fmt", - "-of", - "default=noprint_wrappers=1", - str(file_path), - ], - check=True, - capture_output=True, - text=True, - ) - # Parse output like "codec_name=h264\nprofile=Baseline\npix_fmt=yuv420p" - metadata = {} - for line in result.stdout.strip().split("\n"): - if "=" in line: - key, value = line.split("=", 1) - metadata[key] = value - return metadata - @pytest.mark.skipif(in_fbcode(), reason="ffprobe not available") - def test_codec_options_profile(self, tmp_path): - # Test that we can set the H.264 profile via codec_options - # and validate it was used by checking with ffprobe. + @pytest.mark.parametrize( + "profile,colorspace,color_range", + [ + ("baseline", "bt709", "tv"), + ("main", "bt470bg", "pc"), + ("high", "fcc", "pc"), + ], + ) + def test_codec_options_utilized(self, tmp_path, profile, colorspace, color_range): + # Test setting profile, colorspace, and color_range via codec_options is utilized source_frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) encoder = VideoEncoder(frames=source_frames, frame_rate=30) - # Encode with baseline profile (explicitly specified via codec_options) - output_path = tmp_path / "output_baseline.mp4" + output_path = str(tmp_path / "output.mp4") encoder.to_file( - dest=str(output_path), - codec_options={"profile": "baseline"}, + dest=output_path, + codec_options={ + "profile": profile, + "colorspace": colorspace, + "color_range": color_range, + }, ) - - # Validate the profile was used - metadata = self.get_video_metadata_from_file(output_path) - assert metadata.get("codec_name") == "h264" - assert metadata.get("profile") in ( - "Baseline", - "Constrained Baseline", - ), f"Expected Baseline profile, got {metadata.get('profile')}" + metadata = self._get_video_metadata( + output_path, + fields=["profile", "color_space", "color_range"], + ) + # Validate profile (case-insensitive, baseline is reported as "Constrained Baseline") + assert profile in metadata.get("profile", "").lower() + assert metadata.get("color_space") == colorspace + assert metadata.get("color_range") == color_range From 75486878080876d7f2a36cae3f6e548df41c0b69 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 12:07:44 -0500 Subject: [PATCH 6/7] rename codec_options to extra_options --- src/torchcodec/_core/Encoder.cpp | 10 +++---- src/torchcodec/_core/StreamOptions.h | 2 +- src/torchcodec/_core/custom_ops.cpp | 34 +++++++++++------------ src/torchcodec/_core/ops.py | 12 ++++---- src/torchcodec/encoders/_video_encoder.py | 30 ++++++++++---------- test/test_encoders.py | 12 ++++---- 6 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 98688046e..3d052ab50 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -620,13 +620,13 @@ void tryToValidateCodecOption( } void sortCodecOptions( - const std::map& codecOptions, + const std::map& extraOptions, AVDictionary** codecDict, AVDictionary** formatDict) { // Accepts a map of options as input, then sorts them into codec options and // format options. The sorted options are returned into two separate dicts. const AVClass* formatClass = avformat_get_class(); - for (const auto& [key, value] : codecOptions) { + for (const auto& [key, value] : extraOptions) { const AVOption* fmtOpt = av_opt_find2( &formatClass, key.c_str(), @@ -798,12 +798,12 @@ void VideoEncoder::initializeEncoder( // Apply videoStreamOptions AVDictionary* avCodecOptions = nullptr; - if (videoStreamOptions.codecOptions.has_value()) { - for (const auto& [key, value] : videoStreamOptions.codecOptions.value()) { + if (videoStreamOptions.extraOptions.has_value()) { + for (const auto& [key, value] : videoStreamOptions.extraOptions.value()) { tryToValidateCodecOption(*avCodec, key.c_str(), value); } sortCodecOptions( - videoStreamOptions.codecOptions.value(), + videoStreamOptions.extraOptions.value(), &avCodecOptions, &avFormatOptions_); } diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index 96d2fb9c5..ce0f27d3b 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -52,7 +52,7 @@ struct VideoStreamOptions { std::optional pixelFormat; std::optional crf; std::optional preset; - std::optional> codecOptions; + std::optional> extraOptions; }; struct AudioStreamOptions { diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index ce3cd6aa3..3836e52da 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()"); m.def( - "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> Tensor"); + "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor"); m.def( - "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()"); + "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()"); m.def( "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor"); m.def( @@ -158,8 +158,8 @@ std::string quoteValue(const std::string& value) { return "\"" + value + "\""; } -// Helper function to unflatten codec_options, alternating keys and values -std::map unflattenCodecOptions( +// Helper function to unflatten extra_options, alternating keys and values +std::map unflattenExtraOptions( const std::vector& opts) { std::map optionsMap; for (size_t i = 0; i < opts.size(); i += 2) { @@ -617,16 +617,16 @@ void encode_video_to_file( std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, std::optional preset = std::nullopt, - std::optional> codec_options = std::nullopt) { + std::optional> extra_options = std::nullopt) { VideoStreamOptions videoStreamOptions; videoStreamOptions.codec = codec; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; videoStreamOptions.preset = preset; - if (codec_options.has_value()) { - videoStreamOptions.codecOptions = - unflattenCodecOptions(codec_options.value()); + if (extra_options.has_value()) { + videoStreamOptions.extraOptions = + unflattenExtraOptions(extra_options.value()); } VideoEncoder( @@ -645,7 +645,7 @@ at::Tensor encode_video_to_tensor( std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, std::optional preset = std::nullopt, - std::optional> codec_options = std::nullopt) { + std::optional> extra_options = std::nullopt) { auto avioContextHolder = std::make_unique(); VideoStreamOptions videoStreamOptions; videoStreamOptions.codec = codec; @@ -653,9 +653,9 @@ at::Tensor encode_video_to_tensor( videoStreamOptions.crf = crf; videoStreamOptions.preset = preset; - if (codec_options.has_value()) { - videoStreamOptions.codecOptions = - unflattenCodecOptions(codec_options.value()); + if (extra_options.has_value()) { + videoStreamOptions.extraOptions = + unflattenExtraOptions(extra_options.value()); } return VideoEncoder( @@ -676,7 +676,7 @@ void _encode_video_to_file_like( std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, std::optional preset = std::nullopt, - std::optional> codec_options = std::nullopt) { + std::optional> extra_options = std::nullopt) { auto fileLikeContext = reinterpret_cast(file_like_context); TORCH_CHECK( @@ -689,9 +689,9 @@ void _encode_video_to_file_like( videoStreamOptions.crf = crf; videoStreamOptions.preset = preset; - if (codec_options.has_value()) { - videoStreamOptions.codecOptions = - unflattenCodecOptions(codec_options.value()); + if (extra_options.has_value()) { + videoStreamOptions.extraOptions = + unflattenExtraOptions(extra_options.value()); } VideoEncoder encoder( diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index 23b71130a..6823f4037 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -217,7 +217,7 @@ def encode_video_to_file_like( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[str] = None, - codec_options: Optional[list[str]] = None, + extra_options: Optional[list[str]] = None, ) -> None: """Encode video frames to a file-like object. @@ -230,7 +230,7 @@ def encode_video_to_file_like( pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p") crf: Optional constant rate factor for encoding quality preset: Optional encoder preset as string (e.g., "ultrafast", "medium") - codec_options: Optional list of codec options as flattened key-value pairs + extra_options: Optional list of extra options as flattened key-value pairs """ assert _pybind_ops is not None @@ -243,7 +243,7 @@ def encode_video_to_file_like( pixel_format, crf, preset, - codec_options, + extra_options, ) @@ -335,7 +335,7 @@ def encode_video_to_file_abstract( pixel_format: Optional[str] = None, preset: Optional[str] = None, crf: Optional[Union[int, float]] = None, - codec_options: Optional[list[str]] = None, + extra_options: Optional[list[str]] = None, ) -> None: return @@ -349,7 +349,7 @@ def encode_video_to_tensor_abstract( pixel_format: Optional[str] = None, preset: Optional[str] = None, crf: Optional[Union[int, float]] = None, - codec_options: Optional[list[str]] = None, + extra_options: Optional[list[str]] = None, ) -> torch.Tensor: return torch.empty([], dtype=torch.long) @@ -364,7 +364,7 @@ def _encode_video_to_file_like_abstract( pixel_format: Optional[str] = None, preset: Optional[str] = None, crf: Optional[Union[int, float]] = None, - codec_options: Optional[list[str]] = None, + extra_options: Optional[list[str]] = None, ) -> None: return diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 3626fc285..f35329bb2 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -35,7 +35,7 @@ def __init__(self, frames: Tensor, *, frame_rate: int): def to_file( self, dest: Union[str, Path], - codec_options: Optional[Dict[str, Any]] = None, + extra_options: Optional[Dict[str, Any]] = None, *, codec: Optional[str] = None, pixel_format: Optional[str] = None, @@ -60,8 +60,8 @@ def to_file( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). - codec_options (dict[str, Any], optional): A dictionary of codec-specific - options to pass to the encoder, e.g. ``{"qp": 5, "tune": "film"}``. + extra_options (dict[str, Any], optional): A dictionary of additional + encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. """ preset = str(preset) if isinstance(preset, int) else preset @@ -73,8 +73,8 @@ def to_file( pixel_format=pixel_format, crf=crf, preset=preset, - codec_options=[ - x for k, v in (codec_options or {}).items() for x in (k, str(v)) + extra_options=[ + x for k, v in (extra_options or {}).items() for x in (k, str(v)) ], ) @@ -86,7 +86,7 @@ def to_tensor( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, - codec_options: Optional[Dict[str, Any]] = None, + extra_options: Optional[Dict[str, Any]] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. @@ -105,8 +105,8 @@ def to_tensor( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). - codec_options (dict[str, Any], optional): A dictionary of codec-specific - options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. + extra_options (dict[str, Any], optional): A dictionary of additional + encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``. Values will be converted to strings before passing to the encoder. Returns: @@ -121,8 +121,8 @@ def to_tensor( pixel_format=pixel_format, crf=crf, preset=preset_value, - codec_options=[ - x for k, v in (codec_options or {}).items() for x in (k, str(v)) + extra_options=[ + x for k, v in (extra_options or {}).items() for x in (k, str(v)) ], ) @@ -135,7 +135,7 @@ def to_file_like( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, - codec_options: Optional[Dict[str, Any]] = None, + extra_options: Optional[Dict[str, Any]] = None, ) -> None: """Encode frames into a file-like object. @@ -159,8 +159,8 @@ def to_file_like( encoding speed and compression. Valid values depend on the encoder (commonly a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). - codec_options (dict[str, Any], optional): A dictionary of codec-specific - options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``. + extra_options (dict[str, Any], optional): A dictionary of additional + encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``. Values will be converted to strings before passing to the encoder. """ preset = str(preset) if isinstance(preset, int) else preset @@ -173,7 +173,7 @@ def to_file_like( pixel_format=pixel_format, crf=crf, preset=preset, - codec_options=[ - x for k, v in (codec_options or {}).items() for x in (k, str(v)) + extra_options=[ + x for k, v in (extra_options or {}).items() for x in (k, str(v)) ], ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 6a8210ec6..a7089db57 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -726,7 +726,7 @@ def test_pixel_format_errors(self, method, tmp_path): getattr(encoder, method)(**valid_params, pixel_format="rgb24") @pytest.mark.parametrize( - "codec_options,error", + "extra_options,error", [ ({"qp": -10}, "qp=-10 is out of valid range"), ( @@ -745,7 +745,7 @@ def test_pixel_format_errors(self, method, tmp_path): ], ) @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) - def test_codec_options_errors(self, method, tmp_path, codec_options, error): + def test_extra_options_errors(self, method, tmp_path, extra_options, error): frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) encoder = VideoEncoder(frames, frame_rate=30) @@ -762,7 +762,7 @@ def test_codec_options_errors(self, method, tmp_path, codec_options, error): RuntimeError, match=error, ): - getattr(encoder, method)(**valid_params, codec_options=codec_options) + getattr(encoder, method)(**valid_params, extra_options=extra_options) @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_contiguity(self, method, tmp_path): @@ -1156,15 +1156,15 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl): ("high", "fcc", "pc"), ], ) - def test_codec_options_utilized(self, tmp_path, profile, colorspace, color_range): - # Test setting profile, colorspace, and color_range via codec_options is utilized + def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range): + # Test setting profile, colorspace, and color_range via extra_options is utilized source_frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) encoder = VideoEncoder(frames=source_frames, frame_rate=30) output_path = str(tmp_path / "output.mp4") encoder.to_file( dest=output_path, - codec_options={ + extra_options={ "profile": profile, "colorspace": colorspace, "color_range": color_range, From f933cef775269e4bd84d963b1c4769a14486d553 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 15:54:13 -0500 Subject: [PATCH 7/7] str cast, docs update, [], remove 'in' from test --- src/torchcodec/encoders/_video_encoder.py | 10 +++++----- test/test_encoders.py | 19 ++++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index f35329bb2..909cf73a9 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -74,7 +74,7 @@ def to_file( crf=crf, preset=preset, extra_options=[ - x for k, v in (extra_options or {}).items() for x in (k, str(v)) + str(x) for k, v in (extra_options or {}).items() for x in (k, v) ], ) @@ -106,7 +106,7 @@ def to_tensor( a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). extra_options (dict[str, Any], optional): A dictionary of additional - encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``. + encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. Returns: @@ -122,7 +122,7 @@ def to_tensor( crf=crf, preset=preset_value, extra_options=[ - x for k, v in (extra_options or {}).items() for x in (k, str(v)) + str(x) for k, v in (extra_options or {}).items() for x in (k, v) ], ) @@ -160,7 +160,7 @@ def to_file_like( a string: "fast", "medium", "slow"). Defaults to None (which will use encoder's default). extra_options (dict[str, Any], optional): A dictionary of additional - encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``. + encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. """ preset = str(preset) if isinstance(preset, int) else preset @@ -174,6 +174,6 @@ def to_file_like( crf=crf, preset=preset, extra_options=[ - x for k, v in (extra_options or {}).items() for x in (k, str(v)) + str(x) for k, v in (extra_options or {}).items() for x in (k, v) ], ) diff --git a/test/test_encoders.py b/test/test_encoders.py index a7089db57..714a857b5 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -1092,9 +1092,9 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec): dest = str(tmp_path / f"output.{format}") VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec_spec) - actual_codec_spec = self._get_video_metadata(dest, fields=["codec_name"]).get( + actual_codec_spec = self._get_video_metadata(dest, fields=["codec_name"])[ "codec_name" - ) + ] assert actual_codec_spec == codec_spec @pytest.mark.skipif( @@ -1131,15 +1131,11 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl): ) assert ( - self._get_video_metadata(spec_output, fields=["codec_name"]).get( - "codec_name" - ) + self._get_video_metadata(spec_output, fields=["codec_name"])["codec_name"] == codec_spec ) assert ( - self._get_video_metadata(impl_output, fields=["codec_name"]).get( - "codec_name" - ) + self._get_video_metadata(impl_output, fields=["codec_name"])["codec_name"] == codec_spec ) @@ -1175,6 +1171,7 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range fields=["profile", "color_space", "color_range"], ) # Validate profile (case-insensitive, baseline is reported as "Constrained Baseline") - assert profile in metadata.get("profile", "").lower() - assert metadata.get("color_space") == colorspace - assert metadata.get("color_range") == color_range + expected_profile = "constrained baseline" if profile == "baseline" else profile + assert metadata["profile"].lower() == expected_profile + assert metadata["color_space"] == colorspace + assert metadata["color_range"] == color_range