From bcc89490feb573a7a64d5970d8db07220ea6f604 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 16:15:39 -0500
Subject: [PATCH 1/2] add frame rate, tests

---
 src/torchcodec/_core/Encoder.cpp          | 13 +++-
 src/torchcodec/_core/Encoder.h            |  1 +
 src/torchcodec/_core/StreamOptions.h      |  1 +
 src/torchcodec/_core/custom_ops.cpp       | 23 ++++---
 src/torchcodec/_core/ops.py               |  9 ++-
 src/torchcodec/encoders/_video_encoder.py | 16 ++++-
 test/test_encoders.py                     | 75 +++++++++++++++++++++++
 7 files changed, 126 insertions(+), 12 deletions(-)
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index 3d052ab50..6f47854a6 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -642,6 +642,10 @@ void sortCodecOptions(
     }
   }
 }
+
+void validateFrameRate(int frameRate) {
+  TORCH_CHECK(frameRate > 0, "frame_rate=", frameRate, " must be > 0.");
+}
 } // namespace
 
 VideoEncoder::~VideoEncoder() {
@@ -667,6 +671,7 @@ VideoEncoder::VideoEncoder(
     const VideoStreamOptions& videoStreamOptions)
     : frames_(validateFrames(frames)), inFrameRate_(frameRate) {
   setFFmpegLogLevel();
+  validateFrameRate(frameRate);
 
   // Allocate output format context
   AVFormatContext* avFormatContext = nullptr;
@@ -724,6 +729,10 @@ VideoEncoder::VideoEncoder(
 
 void VideoEncoder::initializeEncoder(
     const VideoStreamOptions& videoStreamOptions) {
+  // Set output frame rate and validate
+  outFrameRate_ = videoStreamOptions.frameRate.value_or(inFrameRate_);
+  validateFrameRate(outFrameRate_);
+
   const AVCodec* avCodec = nullptr;
   // If codec arg is provided, find codec using logic similar to FFmpeg:
   // https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835
@@ -788,8 +797,8 @@ void VideoEncoder::initializeEncoder(
   avCodecContext_->height = outHeight_;
   avCodecContext_->pix_fmt = outPixelFormat_;
   // TODO-VideoEncoder: Verify that frame_rate and time_base are correct
-  avCodecContext_->time_base = {1, inFrameRate_};
-  avCodecContext_->framerate = {inFrameRate_, 1};
+  avCodecContext_->time_base = {1, outFrameRate_};
+  avCodecContext_->framerate = {outFrameRate_, 1};
 
   // Set flag for containers that require extradata to be in the codec context
   if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) {
diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
index 3d59eb6f6..84949323b 100644
--- a/src/torchcodec/_core/Encoder.h
+++ b/src/torchcodec/_core/Encoder.h
@@ -173,6 +173,7 @@ class VideoEncoder {
 
   const torch::Tensor frames_;
   int inFrameRate_;
+  int outFrameRate_ = -1;
 
   int inWidth_ = -1;
   int inHeight_ = -1;
diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
index ce0f27d3b..864122b47 100644
--- a/src/torchcodec/_core/StreamOptions.h
+++ b/src/torchcodec/_core/StreamOptions.h
@@ -53,6 +53,7 @@ struct VideoStreamOptions {
   std::optional<double> crf;
   std::optional<std::string> preset;
   std::optional<std::map<std::string, std::string>> extraOptions;
+  std::optional<int> frameRate;
 };
 
 struct AudioStreamOptions {
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
index 3836e52da..525721a0f 100644
--- a/src/torchcodec/_core/custom_ops.cpp
+++ b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -617,7 +617,8 @@ void encode_video_to_file(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt,
+    std::optional<int64_t> desired_frame_rate = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.codec = codec;
   videoStreamOptions.pixelFormat = pixel_format;
@@ -628,7 +629,8 @@ void encode_video_to_file(
     videoStreamOptions.extraOptions =
         unflattenExtraOptions(extra_options.value());
   }
-
+  videoStreamOptions.frameRate =
+      validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate");
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -645,7 +647,8 @@ at::Tensor encode_video_to_tensor(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt,
+    std::optional<int64_t> desired_frame_rate = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.codec = codec;
@@ -657,7 +660,8 @@ at::Tensor encode_video_to_tensor(
     videoStreamOptions.extraOptions =
         unflattenExtraOptions(extra_options.value());
   }
-
+  videoStreamOptions.frameRate =
+      validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate");
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -676,7 +680,8 @@ void _encode_video_to_file_like(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt,
+    std::optional<int64_t> desired_frame_rate = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
@@ -693,6 +698,8 @@ void _encode_video_to_file_like(
     videoStreamOptions.extraOptions =
         unflattenExtraOptions(extra_options.value());
   }
+  videoStreamOptions.frameRate =
+      validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate");
 
   VideoEncoder encoder(
       frames,
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
index 6823f4037..836c22c16 100644
--- a/src/torchcodec/_core/ops.py
+++ b/src/torchcodec/_core/ops.py
@@ -218,12 +218,13 @@ def encode_video_to_file_like(
     crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> None:
     """Encode video frames to a file-like object.
 
     Args:
         frames: Video frames tensor
-        frame_rate: Frame rate in frames per second
+        frame_rate: Frame rate in frames per second (input frame rate)
         format: Video format (e.g., "mp4", "mov", "mkv")
         file_like: File-like object that supports write() and seek() methods
         codec: Optional codec name (e.g., "libx264", "h264")
@@ -231,6 +232,8 @@ def encode_video_to_file_like(
         crf: Optional constant rate factor for encoding quality
         preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
         extra_options: Optional list of extra options as flattened key-value pairs
+        desired_frame_rate: Optional desired output frame rate. If not specified,
+            uses the input frame_rate.
     """
     assert _pybind_ops is not None
 
@@ -244,6 +247,7 @@ def encode_video_to_file_like(
         crf,
         preset,
         extra_options,
+        desired_frame_rate,
     )
 
 
@@ -336,6 +340,7 @@ def encode_video_to_file_abstract(
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> None:
     return
 
@@ -350,6 +355,7 @@ def encode_video_to_tensor_abstract(
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -365,6 +371,7 @@ def _encode_video_to_file_like_abstract(
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> None:
     return
 
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index 909cf73a9..191e0de03 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -15,7 +15,9 @@ class VideoEncoder:
             tensor of shape ``(N, C, H, W)`` where N is the number of frames,
             C is 3 channels (RGB), H is height, and W is width.
             Values must be uint8 in the range ``[0, 255]``.
-        frame_rate (int): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate.
+        frame_rate (int): The frame rate of the **input** ``frames``. The
+            frame rate of the encoded output can be specified using the
+            encoding methods (``to_file``, etc.).
     """
 
     def __init__(self, frames: Tensor, *, frame_rate: int):
@@ -41,6 +43,7 @@ def to_file(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
+        frame_rate: Optional[int] = None,
     ) -> None:
         """Encode frames into a file.
 
@@ -63,6 +66,8 @@ def to_file(
             extra_options (dict[str, Any], optional): A dictionary of additional
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
+            frame_rate (int, optional): The frame rate of the output video. If not specified,
+                uses the frame rate provided to the VideoEncoder constructor.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file(
@@ -76,6 +81,7 @@ def to_file(
             extra_options=[
                 str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
+            desired_frame_rate=frame_rate,
         )
 
     def to_tensor(
@@ -87,6 +93,7 @@ def to_tensor(
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
         extra_options: Optional[Dict[str, Any]] = None,
+        frame_rate: Optional[int] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
@@ -108,6 +115,8 @@ def to_tensor(
             extra_options (dict[str, Any], optional): A dictionary of additional
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
+            frame_rate (int, optional): The frame rate of the output video. If not specified,
+                uses the frame rate provided to the VideoEncoder constructor.
 
         Returns:
             Tensor: The raw encoded bytes as 1D uint8 Tensor.
@@ -124,6 +133,7 @@ def to_tensor(
             extra_options=[
                 str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
+            desired_frame_rate=frame_rate,
         )
 
     def to_file_like(
@@ -136,6 +146,7 @@ def to_file_like(
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
         extra_options: Optional[Dict[str, Any]] = None,
+        frame_rate: Optional[int] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -162,6 +173,8 @@ def to_file_like(
             extra_options (dict[str, Any], optional): A dictionary of additional
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
+            frame_rate (int, optional): The frame rate of the output video. If not specified,
+                uses the frame rate provided to the VideoEncoder constructor.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file_like(
@@ -176,4 +189,5 @@ def to_file_like(
             extra_options=[
                 str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
+            desired_frame_rate=frame_rate,
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
index 714a857b5..31fba5148 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -659,6 +659,13 @@ def test_bad_input_parameterized(self, tmp_path, method):
         ):
             encoder.to_tensor(format="mp4", preset="fake_preset")
 
+        with pytest.raises(RuntimeError, match=r"frame_rate"):
+            encoder = VideoEncoder(
+                frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
+                frame_rate=30,
+            )
+            getattr(encoder, method)(**valid_params, frame_rate=0)
+
     @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
     @pytest.mark.parametrize("crf", [23, 23.5, -0.9])
     def test_crf_valid_values(self, method, crf, tmp_path):
@@ -1175,3 +1182,71 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range
         assert metadata["profile"].lower() == expected_profile
         assert metadata["color_space"] == colorspace
         assert metadata["color_range"] == color_range
+
+    @pytest.mark.skipif(
+        in_fbcode(),
+        reason="ffprobe not available internally",
+    )
+    @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
+    @pytest.mark.parametrize("output_frame_rate", [10, 60, None])
+    def test_frame_rate_parameter(self, tmp_path, method, output_frame_rate):
+
+        # Use ffprobe to get frame rate
+        def get_frame_rate(file_path):
+            result = subprocess.run(
+                [
+                    "ffprobe",
+                    "-v",
+                    "error",
+                    "-select_streams",
+                    "v:0",
+                    "-show_entries",
+                    "stream=r_frame_rate",
+                    "-of",
+                    "default=noprint_wrappers=1:nokey=1",
+                    str(file_path),
+                ],
+                capture_output=True,
+                check=True,
+                text=True,
+            )
+            # Frame rate is returned as a fraction like "60/1"
+            numerator, denominator = result.stdout.strip().split("/")
+            return int(numerator) / int(denominator)
+
+        frames = (
+            VideoDecoder(TEST_SRC_2_720P.path)
+            .get_frames_in_range(start=0, stop=60)
+            .data
+        )
+        input_frame_rate = 30
+        encoder = VideoEncoder(frames=frames, frame_rate=input_frame_rate)
+
+        if method == "to_file":
+            output_path = str(tmp_path / "output.mp4")
+            encoder.to_file(dest=output_path, frame_rate=output_frame_rate)
+        elif method == "to_tensor":
+            encoded_tensor = encoder.to_tensor(
+                format="mp4", frame_rate=output_frame_rate
+            )
+            # Write tensor to file to check with ffprobe
+            output_path = str(tmp_path / "output_from_tensor.mp4")
+            with open(output_path, "wb") as f:
+                f.write(encoded_tensor.numpy().tobytes())
+        elif method == "to_file_like":
+            file_like = io.BytesIO()
+            encoder.to_file_like(
+                file_like=file_like, format="mp4", frame_rate=output_frame_rate
+            )
+            # Write file_like to file to check with ffprobe
+            output_path = str(tmp_path / "output_from_file_like.mp4")
+            with open(output_path, "wb") as f:
+                f.write(file_like.getvalue())
+        else:
+            raise ValueError(f"Unknown method: {method}")
+
+        actual_frame_rate = get_frame_rate(output_path)
+        # Ensure frame_rate=None uses the input_frame_rate
+        if output_frame_rate is None:
+            output_frame_rate = input_frame_rate
+        assert actual_frame_rate == output_frame_rate

From cb7ff172b0d8ca3e1872c6b81f33c41df8324774 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 17:28:44 -0500
Subject: [PATCH 2/2] small fixes, reuse _get_video_metadata

---
 src/torchcodec/_core/Encoder.cpp          |  7 ++++--
 src/torchcodec/encoders/_video_encoder.py |  6 ++---
 test/test_encoders.py                     | 30 ++++-------------------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index 6f47854a6..a0c13dde8 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -644,7 +644,11 @@ void sortCodecOptions(
 }
 
 void validateFrameRate(int frameRate) {
-  TORCH_CHECK(frameRate > 0, "frame_rate=", frameRate, " must be > 0.");
+  TORCH_CHECK(
+      frameRate > 0,
+      "Invalid frame_rate: ",
+      frameRate,
+      ". Frame rate must be a positive integer.");
 }
 } // namespace
 
@@ -796,7 +800,6 @@ void VideoEncoder::initializeEncoder(
   avCodecContext_->width = outWidth_;
   avCodecContext_->height = outHeight_;
   avCodecContext_->pix_fmt = outPixelFormat_;
-  // TODO-VideoEncoder: Verify that frame_rate and time_base are correct
   avCodecContext_->time_base = {1, outFrameRate_};
   avCodecContext_->framerate = {outFrameRate_, 1};
 
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index 191e0de03..9c0dde519 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -67,7 +67,7 @@ def to_file(
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
             frame_rate (int, optional): The frame rate of the output video. If not specified,
-                uses the frame rate provided to the VideoEncoder constructor.
+                uses the frame rate of the input ``frames``.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file(
@@ -116,7 +116,7 @@ def to_tensor(
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
             frame_rate (int, optional): The frame rate of the output video. If not specified,
-                uses the frame rate provided to the VideoEncoder constructor.
+                uses the frame rate of the input ``frames``.
 
         Returns:
             Tensor: The raw encoded bytes as 1D uint8 Tensor.
@@ -174,7 +174,7 @@ def to_file_like(
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
             frame_rate (int, optional): The frame rate of the output video. If not specified,
-                uses the frame rate provided to the VideoEncoder constructor.
+                uses the frame rate of the input ``frames``.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file_like(
diff --git a/test/test_encoders.py b/test/test_encoders.py
index 31fba5148..5b6fe1546 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -659,7 +659,7 @@ def test_bad_input_parameterized(self, tmp_path, method):
         ):
             encoder.to_tensor(format="mp4", preset="fake_preset")
 
-        with pytest.raises(RuntimeError, match=r"frame_rate"):
+        with pytest.raises(RuntimeError, match="Invalid frame_rate: "):
             encoder = VideoEncoder(
                 frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
                 frame_rate=30,
@@ -1191,29 +1191,6 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range
     @pytest.mark.parametrize("output_frame_rate", [10, 60, None])
     def test_frame_rate_parameter(self, tmp_path, method, output_frame_rate):
 
-        # Use ffprobe to get frame rate
-        def get_frame_rate(file_path):
-            result = subprocess.run(
-                [
-                    "ffprobe",
-                    "-v",
-                    "error",
-                    "-select_streams",
-                    "v:0",
-                    "-show_entries",
-                    "stream=r_frame_rate",
-                    "-of",
-                    "default=noprint_wrappers=1:nokey=1",
-                    str(file_path),
-                ],
-                capture_output=True,
-                check=True,
-                text=True,
-            )
-            # Frame rate is returned as a fraction like "60/1"
-            numerator, denominator = result.stdout.strip().split("/")
-            return int(numerator) / int(denominator)
-
         frames = (
             VideoDecoder(TEST_SRC_2_720P.path)
             .get_frames_in_range(start=0, stop=60)
@@ -1245,7 +1222,10 @@ def get_frame_rate(file_path):
         else:
             raise ValueError(f"Unknown method: {method}")
 
-        actual_frame_rate = get_frame_rate(output_path)
+        metadata = self._get_video_metadata(output_path, ["r_frame_rate"])
+        # Frame rate is returned as a fraction like "60/1"
+        numerator, denominator = metadata["r_frame_rate"].split("/")
+        actual_frame_rate = int(numerator) / int(denominator)
         # Ensure frame_rate=None uses the input_frame_rate
         if output_frame_rate is None:
             output_frame_rate = input_frame_rate