From bcc89490feb573a7a64d5970d8db07220ea6f604 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 16:15:39 -0500 Subject: [PATCH 1/2] add frame rate, tests --- src/torchcodec/_core/Encoder.cpp | 13 +++- src/torchcodec/_core/Encoder.h | 1 + src/torchcodec/_core/StreamOptions.h | 1 + src/torchcodec/_core/custom_ops.cpp | 23 ++++--- src/torchcodec/_core/ops.py | 9 ++- src/torchcodec/encoders/_video_encoder.py | 16 ++++- test/test_encoders.py | 75 +++++++++++++++++++++++ 7 files changed, 126 insertions(+), 12 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 3d052ab50..6f47854a6 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -642,6 +642,10 @@ void sortCodecOptions( } } } + +void validateFrameRate(int frameRate) { + TORCH_CHECK(frameRate > 0, "frame_rate=", frameRate, " must be > 0."); +} } // namespace VideoEncoder::~VideoEncoder() { @@ -667,6 +671,7 @@ VideoEncoder::VideoEncoder( const VideoStreamOptions& videoStreamOptions) : frames_(validateFrames(frames)), inFrameRate_(frameRate) { setFFmpegLogLevel(); + validateFrameRate(frameRate); // Allocate output format context AVFormatContext* avFormatContext = nullptr; @@ -724,6 +729,10 @@ VideoEncoder::VideoEncoder( void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { + // Set output frame rate and validate + outFrameRate_ = videoStreamOptions.frameRate.value_or(inFrameRate_); + validateFrameRate(outFrameRate_); + const AVCodec* avCodec = nullptr; // If codec arg is provided, find codec using logic similar to FFmpeg: // https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835 @@ -788,8 +797,8 @@ void VideoEncoder::initializeEncoder( avCodecContext_->height = outHeight_; avCodecContext_->pix_fmt = outPixelFormat_; // TODO-VideoEncoder: Verify that frame_rate and time_base are correct - avCodecContext_->time_base = {1, inFrameRate_}; - avCodecContext_->framerate = {inFrameRate_, 1}; + avCodecContext_->time_base = {1, outFrameRate_}; + avCodecContext_->framerate = {outFrameRate_, 1}; // Set flag for containers that require extradata to be in the codec context if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) { diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h index 3d59eb6f6..84949323b 100644 --- a/src/torchcodec/_core/Encoder.h +++ b/src/torchcodec/_core/Encoder.h @@ -173,6 +173,7 @@ class VideoEncoder { const torch::Tensor frames_; int inFrameRate_; + int outFrameRate_ = -1; int inWidth_ = -1; int inHeight_ = -1; diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index ce0f27d3b..864122b47 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -53,6 +53,7 @@ struct VideoStreamOptions { std::optional crf; std::optional preset; std::optional> extraOptions; + std::optional frameRate; }; struct AudioStreamOptions { diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 3836e52da..525721a0f 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> ()"); m.def( - "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor"); + "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> Tensor"); m.def( - "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()"); + "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> ()"); m.def( "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor"); m.def( @@ -617,7 +617,8 @@ void encode_video_to_file( std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, std::optional preset = std::nullopt, - std::optional> extra_options = std::nullopt) { + std::optional> extra_options = std::nullopt, + std::optional desired_frame_rate = std::nullopt) { VideoStreamOptions videoStreamOptions; videoStreamOptions.codec = codec; videoStreamOptions.pixelFormat = pixel_format; @@ -628,7 +629,8 @@ void encode_video_to_file( videoStreamOptions.extraOptions = unflattenExtraOptions(extra_options.value()); } - + videoStreamOptions.frameRate = + validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate"); VideoEncoder( frames, validateInt64ToInt(frame_rate, "frame_rate"), @@ -645,7 +647,8 @@ at::Tensor encode_video_to_tensor( std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, std::optional preset = std::nullopt, - std::optional> extra_options = std::nullopt) { + std::optional> extra_options = std::nullopt, + std::optional desired_frame_rate = std::nullopt) { auto avioContextHolder = std::make_unique(); VideoStreamOptions videoStreamOptions; videoStreamOptions.codec = codec; @@ -657,7 +660,8 @@ at::Tensor encode_video_to_tensor( videoStreamOptions.extraOptions = unflattenExtraOptions(extra_options.value()); } - + videoStreamOptions.frameRate = + validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate"); return VideoEncoder( frames, validateInt64ToInt(frame_rate, "frame_rate"), @@ -676,7 +680,8 @@ void _encode_video_to_file_like( std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt, std::optional preset = std::nullopt, - std::optional> extra_options = std::nullopt) { + std::optional> extra_options = std::nullopt, + std::optional desired_frame_rate = std::nullopt) { auto fileLikeContext = reinterpret_cast(file_like_context); TORCH_CHECK( @@ -693,6 +698,8 @@ void _encode_video_to_file_like( videoStreamOptions.extraOptions = unflattenExtraOptions(extra_options.value()); } + videoStreamOptions.frameRate = + validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate"); VideoEncoder encoder( frames, diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index 6823f4037..836c22c16 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -218,12 +218,13 @@ def encode_video_to_file_like( crf: Optional[Union[int, float]] = None, preset: Optional[str] = None, extra_options: Optional[list[str]] = None, + desired_frame_rate: Optional[int] = None, ) -> None: """Encode video frames to a file-like object. Args: frames: Video frames tensor - frame_rate: Frame rate in frames per second + frame_rate: Frame rate in frames per second (input frame rate) format: Video format (e.g., "mp4", "mov", "mkv") file_like: File-like object that supports write() and seek() methods codec: Optional codec name (e.g., "libx264", "h264") @@ -231,6 +232,8 @@ def encode_video_to_file_like( crf: Optional constant rate factor for encoding quality preset: Optional encoder preset as string (e.g., "ultrafast", "medium") extra_options: Optional list of extra options as flattened key-value pairs + desired_frame_rate: Optional desired output frame rate. If not specified, + uses the input frame_rate. """ assert _pybind_ops is not None @@ -244,6 +247,7 @@ def encode_video_to_file_like( crf, preset, extra_options, + desired_frame_rate, ) @@ -336,6 +340,7 @@ def encode_video_to_file_abstract( preset: Optional[str] = None, crf: Optional[Union[int, float]] = None, extra_options: Optional[list[str]] = None, + desired_frame_rate: Optional[int] = None, ) -> None: return @@ -350,6 +355,7 @@ def encode_video_to_tensor_abstract( preset: Optional[str] = None, crf: Optional[Union[int, float]] = None, extra_options: Optional[list[str]] = None, + desired_frame_rate: Optional[int] = None, ) -> torch.Tensor: return torch.empty([], dtype=torch.long) @@ -365,6 +371,7 @@ def _encode_video_to_file_like_abstract( preset: Optional[str] = None, crf: Optional[Union[int, float]] = None, extra_options: Optional[list[str]] = None, + desired_frame_rate: Optional[int] = None, ) -> None: return diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 909cf73a9..191e0de03 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -15,7 +15,9 @@ class VideoEncoder: tensor of shape ``(N, C, H, W)`` where N is the number of frames, C is 3 channels (RGB), H is height, and W is width. Values must be uint8 in the range ``[0, 255]``. - frame_rate (int): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate. + frame_rate (int): The frame rate of the **input** ``frames``. The + frame rate of the encoded output can be specified using the + encoding methods (``to_file``, etc.). """ def __init__(self, frames: Tensor, *, frame_rate: int): @@ -41,6 +43,7 @@ def to_file( pixel_format: Optional[str] = None, crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, + frame_rate: Optional[int] = None, ) -> None: """Encode frames into a file. @@ -63,6 +66,8 @@ def to_file( extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. + frame_rate (int, optional): The frame rate of the output video. If not specified, + uses the frame rate provided to the VideoEncoder constructor. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file( @@ -76,6 +81,7 @@ def to_file( extra_options=[ str(x) for k, v in (extra_options or {}).items() for x in (k, v) ], + desired_frame_rate=frame_rate, ) def to_tensor( @@ -87,6 +93,7 @@ def to_tensor( crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, extra_options: Optional[Dict[str, Any]] = None, + frame_rate: Optional[int] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. @@ -108,6 +115,8 @@ def to_tensor( extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. + frame_rate (int, optional): The frame rate of the output video. If not specified, + uses the frame rate provided to the VideoEncoder constructor. Returns: Tensor: The raw encoded bytes as 1D uint8 Tensor. @@ -124,6 +133,7 @@ def to_tensor( extra_options=[ str(x) for k, v in (extra_options or {}).items() for x in (k, v) ], + desired_frame_rate=frame_rate, ) def to_file_like( @@ -136,6 +146,7 @@ def to_file_like( crf: Optional[Union[int, float]] = None, preset: Optional[Union[str, int]] = None, extra_options: Optional[Dict[str, Any]] = None, + frame_rate: Optional[int] = None, ) -> None: """Encode frames into a file-like object. @@ -162,6 +173,8 @@ def to_file_like( extra_options (dict[str, Any], optional): A dictionary of additional encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. + frame_rate (int, optional): The frame rate of the output video. If not specified, + uses the frame rate provided to the VideoEncoder constructor. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file_like( @@ -176,4 +189,5 @@ def to_file_like( extra_options=[ str(x) for k, v in (extra_options or {}).items() for x in (k, v) ], + desired_frame_rate=frame_rate, ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 714a857b5..31fba5148 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -659,6 +659,13 @@ def test_bad_input_parameterized(self, tmp_path, method): ): encoder.to_tensor(format="mp4", preset="fake_preset") + with pytest.raises(RuntimeError, match=r"frame_rate"): + encoder = VideoEncoder( + frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), + frame_rate=30, + ) + getattr(encoder, method)(**valid_params, frame_rate=0) + @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) @pytest.mark.parametrize("crf", [23, 23.5, -0.9]) def test_crf_valid_values(self, method, crf, tmp_path): @@ -1175,3 +1182,71 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range assert metadata["profile"].lower() == expected_profile assert metadata["color_space"] == colorspace assert metadata["color_range"] == color_range + + @pytest.mark.skipif( + in_fbcode(), + reason="ffprobe not available internally", + ) + @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) + @pytest.mark.parametrize("output_frame_rate", [10, 60, None]) + def test_frame_rate_parameter(self, tmp_path, method, output_frame_rate): + + # Use ffprobe to get frame rate + def get_frame_rate(file_path): + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=r_frame_rate", + "-of", + "default=noprint_wrappers=1:nokey=1", + str(file_path), + ], + capture_output=True, + check=True, + text=True, + ) + # Frame rate is returned as a fraction like "60/1" + numerator, denominator = result.stdout.strip().split("/") + return int(numerator) / int(denominator) + + frames = ( + VideoDecoder(TEST_SRC_2_720P.path) + .get_frames_in_range(start=0, stop=60) + .data + ) + input_frame_rate = 30 + encoder = VideoEncoder(frames=frames, frame_rate=input_frame_rate) + + if method == "to_file": + output_path = str(tmp_path / "output.mp4") + encoder.to_file(dest=output_path, frame_rate=output_frame_rate) + elif method == "to_tensor": + encoded_tensor = encoder.to_tensor( + format="mp4", frame_rate=output_frame_rate + ) + # Write tensor to file to check with ffprobe + output_path = str(tmp_path / "output_from_tensor.mp4") + with open(output_path, "wb") as f: + f.write(encoded_tensor.numpy().tobytes()) + elif method == "to_file_like": + file_like = io.BytesIO() + encoder.to_file_like( + file_like=file_like, format="mp4", frame_rate=output_frame_rate + ) + # Write file_like to file to check with ffprobe + output_path = str(tmp_path / "output_from_file_like.mp4") + with open(output_path, "wb") as f: + f.write(file_like.getvalue()) + else: + raise ValueError(f"Unknown method: {method}") + + actual_frame_rate = get_frame_rate(output_path) + # Ensure frame_rate=None uses the input_frame_rate + if output_frame_rate is None: + output_frame_rate = input_frame_rate + assert actual_frame_rate == output_frame_rate From cb7ff172b0d8ca3e1872c6b81f33c41df8324774 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Fri, 14 Nov 2025 17:28:44 -0500 Subject: [PATCH 2/2] small fixes, reuse _get_video_metadata --- src/torchcodec/_core/Encoder.cpp | 7 ++++-- src/torchcodec/encoders/_video_encoder.py | 6 ++--- test/test_encoders.py | 30 ++++------------------- 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 6f47854a6..a0c13dde8 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -644,7 +644,11 @@ void sortCodecOptions( } void validateFrameRate(int frameRate) { - TORCH_CHECK(frameRate > 0, "frame_rate=", frameRate, " must be > 0."); + TORCH_CHECK( + frameRate > 0, + "Invalid frame_rate: ", + frameRate, + ". Frame rate must be a positive integer."); } } // namespace @@ -796,7 +800,6 @@ void VideoEncoder::initializeEncoder( avCodecContext_->width = outWidth_; avCodecContext_->height = outHeight_; avCodecContext_->pix_fmt = outPixelFormat_; - // TODO-VideoEncoder: Verify that frame_rate and time_base are correct avCodecContext_->time_base = {1, outFrameRate_}; avCodecContext_->framerate = {outFrameRate_, 1}; diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index 191e0de03..9c0dde519 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -67,7 +67,7 @@ def to_file( encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. frame_rate (int, optional): The frame rate of the output video. If not specified, - uses the frame rate provided to the VideoEncoder constructor. + uses the frame rate of the input ``frames``. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file( @@ -116,7 +116,7 @@ def to_tensor( encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. frame_rate (int, optional): The frame rate of the output video. If not specified, - uses the frame rate provided to the VideoEncoder constructor. + uses the frame rate of the input ``frames``. Returns: Tensor: The raw encoded bytes as 1D uint8 Tensor. @@ -174,7 +174,7 @@ def to_file_like( encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``. Values will be converted to strings before passing to the encoder. frame_rate (int, optional): The frame rate of the output video. If not specified, - uses the frame rate provided to the VideoEncoder constructor. + uses the frame rate of the input ``frames``. """ preset = str(preset) if isinstance(preset, int) else preset _core.encode_video_to_file_like( diff --git a/test/test_encoders.py b/test/test_encoders.py index 31fba5148..5b6fe1546 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -659,7 +659,7 @@ def test_bad_input_parameterized(self, tmp_path, method): ): encoder.to_tensor(format="mp4", preset="fake_preset") - with pytest.raises(RuntimeError, match=r"frame_rate"): + with pytest.raises(RuntimeError, match="Invalid frame_rate: "): encoder = VideoEncoder( frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), frame_rate=30, @@ -1191,29 +1191,6 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range @pytest.mark.parametrize("output_frame_rate", [10, 60, None]) def test_frame_rate_parameter(self, tmp_path, method, output_frame_rate): - # Use ffprobe to get frame rate - def get_frame_rate(file_path): - result = subprocess.run( - [ - "ffprobe", - "-v", - "error", - "-select_streams", - "v:0", - "-show_entries", - "stream=r_frame_rate", - "-of", - "default=noprint_wrappers=1:nokey=1", - str(file_path), - ], - capture_output=True, - check=True, - text=True, - ) - # Frame rate is returned as a fraction like "60/1" - numerator, denominator = result.stdout.strip().split("/") - return int(numerator) / int(denominator) - frames = ( VideoDecoder(TEST_SRC_2_720P.path) .get_frames_in_range(start=0, stop=60) @@ -1245,7 +1222,10 @@ def get_frame_rate(file_path): else: raise ValueError(f"Unknown method: {method}") - actual_frame_rate = get_frame_rate(output_path) + metadata = self._get_video_metadata(output_path, ["r_frame_rate"]) + # Frame rate is returned as a fraction like "60/1" + numerator, denominator = metadata["r_frame_rate"].split("/") + actual_frame_rate = int(numerator) / int(denominator) # Ensure frame_rate=None uses the input_frame_rate if output_frame_rate is None: output_frame_rate = input_frame_rate