meta-pytorch · Dan-Flores · Nov 14, 2025 · Nov 14, 2025 · Dan-Flores · Nov 14, 2025
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -642,6 +642,14 @@ void sortCodecOptions(
     }
   }
 }
+
+void validateFrameRate(int frameRate) {
+  TORCH_CHECK(
+      frameRate > 0,
+      "Invalid frame_rate: ",
+      frameRate,
+      ". Frame rate must be a positive integer.");
+}
 } // namespace
 
 VideoEncoder::~VideoEncoder() {
@@ -667,6 +675,7 @@ VideoEncoder::VideoEncoder(
     const VideoStreamOptions& videoStreamOptions)
     : frames_(validateFrames(frames)), inFrameRate_(frameRate) {
   setFFmpegLogLevel();
+  validateFrameRate(frameRate);
 
   // Allocate output format context
   AVFormatContext* avFormatContext = nullptr;
@@ -724,6 +733,10 @@ VideoEncoder::VideoEncoder(
 
 void VideoEncoder::initializeEncoder(
     const VideoStreamOptions& videoStreamOptions) {
+  // Set output frame rate and validate
+  outFrameRate_ = videoStreamOptions.frameRate.value_or(inFrameRate_);
+  validateFrameRate(outFrameRate_);
+
   const AVCodec* avCodec = nullptr;
   // If codec arg is provided, find codec using logic similar to FFmpeg:
   // https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835
@@ -787,9 +800,8 @@ void VideoEncoder::initializeEncoder(
   avCodecContext_->width = outWidth_;
   avCodecContext_->height = outHeight_;
   avCodecContext_->pix_fmt = outPixelFormat_;
-  // TODO-VideoEncoder: Verify that frame_rate and time_base are correct
-  avCodecContext_->time_base = {1, inFrameRate_};
-  avCodecContext_->framerate = {inFrameRate_, 1};
+  avCodecContext_->time_base = {1, outFrameRate_};
+  avCodecContext_->framerate = {outFrameRate_, 1};
 
   // Set flag for containers that require extradata to be in the codec context
   if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) {

diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
@@ -173,6 +173,7 @@ class VideoEncoder {
 
   const torch::Tensor frames_;
   int inFrameRate_;
+  int outFrameRate_ = -1;
 
   int inWidth_ = -1;
   int inHeight_ = -1;

diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
@@ -53,6 +53,7 @@ struct VideoStreamOptions {
   std::optional<double> crf;
   std::optional<std::string> preset;
   std::optional<std::map<std::string, std::string>> extraOptions;
+  std::optional<int> frameRate;
 };
 
 struct AudioStreamOptions {

diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None, int? desired_frame_rate=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -617,7 +617,8 @@ void encode_video_to_file(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt,
+    std::optional<int64_t> desired_frame_rate = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.codec = codec;
   videoStreamOptions.pixelFormat = pixel_format;
@@ -628,7 +629,8 @@ void encode_video_to_file(
     videoStreamOptions.extraOptions =
         unflattenExtraOptions(extra_options.value());
   }
-
+  videoStreamOptions.frameRate =
+      validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate");
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -645,7 +647,8 @@ at::Tensor encode_video_to_tensor(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt,
+    std::optional<int64_t> desired_frame_rate = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.codec = codec;
@@ -657,7 +660,8 @@ at::Tensor encode_video_to_tensor(
     videoStreamOptions.extraOptions =
         unflattenExtraOptions(extra_options.value());
   }
-
+  videoStreamOptions.frameRate =
+      validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate");
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -676,7 +680,8 @@ void _encode_video_to_file_like(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt,
+    std::optional<int64_t> desired_frame_rate = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
@@ -693,6 +698,8 @@ void _encode_video_to_file_like(
     videoStreamOptions.extraOptions =
         unflattenExtraOptions(extra_options.value());
   }
+  videoStreamOptions.frameRate =
+      validateOptionalInt64ToInt(desired_frame_rate, "desired_frame_rate");
 
   VideoEncoder encoder(
       frames,

diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -218,19 +218,22 @@ def encode_video_to_file_like(
     crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> None:
     """Encode video frames to a file-like object.
 
     Args:
         frames: Video frames tensor
-        frame_rate: Frame rate in frames per second
+        frame_rate: Frame rate in frames per second (input frame rate)
         format: Video format (e.g., "mp4", "mov", "mkv")
         file_like: File-like object that supports write() and seek() methods
         codec: Optional codec name (e.g., "libx264", "h264")
         pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
         crf: Optional constant rate factor for encoding quality
         preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
         extra_options: Optional list of extra options as flattened key-value pairs
+        desired_frame_rate: Optional desired output frame rate. If not specified,
+            uses the input frame_rate.
     """
     assert _pybind_ops is not None
 
@@ -244,6 +247,7 @@ def encode_video_to_file_like(
         crf,
         preset,
         extra_options,
+        desired_frame_rate,
     )
 
 
@@ -336,6 +340,7 @@ def encode_video_to_file_abstract(
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> None:
     return
 
@@ -350,6 +355,7 @@ def encode_video_to_tensor_abstract(
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -365,6 +371,7 @@ def _encode_video_to_file_like_abstract(
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     extra_options: Optional[list[str]] = None,
+    desired_frame_rate: Optional[int] = None,
 ) -> None:
     return
 

diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -15,7 +15,9 @@ class VideoEncoder:
             tensor of shape ``(N, C, H, W)`` where N is the number of frames,
             C is 3 channels (RGB), H is height, and W is width.
             Values must be uint8 in the range ``[0, 255]``.
-        frame_rate (int): The frame rate of the **input** ``frames``. Also defines the encoded **output** frame rate.
+        frame_rate (int): The frame rate of the **input** ``frames``. The
+            frame rate of the encoded output can be specified using the
+            encoding methods (``to_file``, etc.).
     """
 
     def __init__(self, frames: Tensor, *, frame_rate: int):
@@ -41,6 +43,7 @@ def to_file(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
+        frame_rate: Optional[int] = None,
     ) -> None:
         """Encode frames into a file.
 
@@ -63,6 +66,8 @@ def to_file(
             extra_options (dict[str, Any], optional): A dictionary of additional
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
+            frame_rate (int, optional): The frame rate of the output video. If not specified,
+                uses the frame rate of the input ``frames``.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file(
@@ -76,6 +81,7 @@ def to_file(
             extra_options=[
                 str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
+            desired_frame_rate=frame_rate,
         )
 
     def to_tensor(
@@ -87,6 +93,7 @@ def to_tensor(
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
         extra_options: Optional[Dict[str, Any]] = None,
+        frame_rate: Optional[int] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
@@ -108,6 +115,8 @@ def to_tensor(
             extra_options (dict[str, Any], optional): A dictionary of additional
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
+            frame_rate (int, optional): The frame rate of the output video. If not specified,
+                uses the frame rate of the input ``frames``.
 
         Returns:
             Tensor: The raw encoded bytes as 1D uint8 Tensor.
@@ -124,6 +133,7 @@ def to_tensor(
             extra_options=[
                 str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
+            desired_frame_rate=frame_rate,
         )
 
     def to_file_like(
@@ -136,6 +146,7 @@ def to_file_like(
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
         extra_options: Optional[Dict[str, Any]] = None,
+        frame_rate: Optional[int] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -162,6 +173,8 @@ def to_file_like(
             extra_options (dict[str, Any], optional): A dictionary of additional
                 encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
+            frame_rate (int, optional): The frame rate of the output video. If not specified,
+                uses the frame rate of the input ``frames``.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file_like(
@@ -176,4 +189,5 @@ def to_file_like(
             extra_options=[
                 str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
+            desired_frame_rate=frame_rate,
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -659,6 +659,13 @@ def test_bad_input_parameterized(self, tmp_path, method):
         ):
             encoder.to_tensor(format="mp4", preset="fake_preset")
 
+        with pytest.raises(RuntimeError, match="Invalid frame_rate: "):
+            encoder = VideoEncoder(
+                frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
+                frame_rate=30,
+            )
+            getattr(encoder, method)(**valid_params, frame_rate=0)
+
     @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
     @pytest.mark.parametrize("crf", [23, 23.5, -0.9])
     def test_crf_valid_values(self, method, crf, tmp_path):
@@ -1175,3 +1182,51 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range
         assert metadata["profile"].lower() == expected_profile
         assert metadata["color_space"] == colorspace
         assert metadata["color_range"] == color_range
+
+    @pytest.mark.skipif(
+        in_fbcode(),
+        reason="ffprobe not available internally",
+    )
+    @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
+    @pytest.mark.parametrize("output_frame_rate", [10, 60, None])
+    def test_frame_rate_parameter(self, tmp_path, method, output_frame_rate):
+
+        frames = (
+            VideoDecoder(TEST_SRC_2_720P.path)
+            .get_frames_in_range(start=0, stop=60)
+            .data
+        )
+        input_frame_rate = 30
+        encoder = VideoEncoder(frames=frames, frame_rate=input_frame_rate)
+
+        if method == "to_file":
+            output_path = str(tmp_path / "output.mp4")
+            encoder.to_file(dest=output_path, frame_rate=output_frame_rate)
+        elif method == "to_tensor":
+            encoded_tensor = encoder.to_tensor(
+                format="mp4", frame_rate=output_frame_rate
+            )
+            # Write tensor to file to check with ffprobe
+            output_path = str(tmp_path / "output_from_tensor.mp4")
+            with open(output_path, "wb") as f:
+                f.write(encoded_tensor.numpy().tobytes())
+        elif method == "to_file_like":
+            file_like = io.BytesIO()
+            encoder.to_file_like(
+                file_like=file_like, format="mp4", frame_rate=output_frame_rate
+            )
+            # Write file_like to file to check with ffprobe
+            output_path = str(tmp_path / "output_from_file_like.mp4")
+            with open(output_path, "wb") as f:
+                f.write(file_like.getvalue())
+        else:
+            raise ValueError(f"Unknown method: {method}")
+
+        metadata = self._get_video_metadata(output_path, ["r_frame_rate"])
+        # Frame rate is returned as a fraction like "60/1"
+        numerator, denominator = metadata["r_frame_rate"].split("/")
+        actual_frame_rate = int(numerator) / int(denominator)
+        # Ensure frame_rate=None uses the input_frame_rate
+        if output_frame_rate is None:
+            output_frame_rate = input_frame_rate
+        assert actual_frame_rate == output_frame_rate