From 1d79594996d9672ad81c96702e268575fcefccbe Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Thu, 13 Nov 2025 17:07:58 -0500
Subject: [PATCH 1/7] add codec options, apply numeric error handling

---
 src/torchcodec/_core/Encoder.cpp          | 87 +++++++++++++++++------
 src/torchcodec/_core/Encoder.h            | 11 +++
 src/torchcodec/_core/StreamOptions.h      |  2 +
 src/torchcodec/_core/custom_ops.cpp       | 42 +++++++++--
 src/torchcodec/_core/ops.py               | 12 +++-
 src/torchcodec/encoders/_video_encoder.py | 33 ++++++++-
 test/test_encoders.py                     | 39 ++++++++++
 7 files changed, 194 insertions(+), 32 deletions(-)

diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index 90e326087..8ac13cdd6 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -570,10 +570,10 @@ AVPixelFormat validatePixelFormat(
   TORCH_CHECK(false, errorMsg.str());
 }
 
-void validateDoubleOption(
+void tryToValidateCodecOption(
     const AVCodec& avCodec,
     const char* optionName,
-    double value) {
+    const std::string& value) {
   if (!avCodec.priv_class) {
     return;
   }
@@ -586,24 +586,36 @@ void validateDoubleOption(
       0,
       AV_OPT_SEARCH_FAKE_OBJ,
       nullptr);
-  // If the option was not found, let FFmpeg handle it later
+  // If option is not found we cannot validate it, let FFmpeg handle it
   if (!option) {
     return;
   }
+  // Validate options defined as a numeric type
   if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
       option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
-    TORCH_CHECK(
-        value >= option->min && value <= option->max,
-        optionName,
-        "=",
-        value,
-        " is out of valid range [",
-        option->min,
-        ", ",
-        option->max,
-        "] for this codec. For more details, run 'ffmpeg -h encoder=",
-        avCodec.name,
-        "'");
+    try {
+      double numericValue = std::stod(value);
+      TORCH_CHECK(
+          numericValue >= option->min && numericValue <= option->max,
+          optionName,
+          "=",
+          numericValue,
+          " is out of valid range [",
+          option->min,
+          ", ",
+          option->max,
+          "] for this codec. For more details, run 'ffmpeg -h encoder=",
+          avCodec.name,
+          "'");
+    } catch (const std::invalid_argument& e) {
+      TORCH_CHECK(
+          false,
+          "Option ",
+          optionName,
+          " expects a numeric value but got '",
+          value,
+          "'");
+    }
   }
 }
 } // namespace
@@ -685,6 +697,30 @@ VideoEncoder::VideoEncoder(
   initializeEncoder(videoStreamOptions);
 }
 
+void VideoEncoder::sortCodecOptions(
+    const std::map<std::string, std::string>& codecOptions,
+    AVDictionary** codecDict,
+    AVDictionary** formatDict) {
+  // Search AVFormatContext's AVClass for options
+  const AVClass* formatClass = avformat_get_class();
+  for (const auto& [key, value] : codecOptions) {
+    const AVOption* fmtOpt = av_opt_find2(
+        &formatClass,
+        key.c_str(),
+        nullptr,
+        0,
+        AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ,
+        nullptr);
+    if (fmtOpt) {
+      av_dict_set(formatDict, key.c_str(), value.c_str(), 0);
+    } else {
+      // Default to codec option (includes AVCodecContext + encoder-private)
+      // validateCodecOption(*avCodecContext_->codec, key.c_str(), value);
+      av_dict_set(codecDict, key.c_str(), value.c_str(), 0);
+    }
+  }
+}
+
 void VideoEncoder::initializeEncoder(
     const VideoStreamOptions& videoStreamOptions) {
   const AVCodec* avCodec =
@@ -737,13 +773,19 @@ void VideoEncoder::initializeEncoder(
 
   // Apply videoStreamOptions
   AVDictionary* options = nullptr;
+  if (videoStreamOptions.codecOptions.has_value()) {
+    // Validate all codec options before setting them
+    for (const auto& [key, value] : videoStreamOptions.codecOptions.value()) {
+      tryToValidateCodecOption(*avCodec, key.c_str(), value);
+    }
+    sortCodecOptions(
+        videoStreamOptions.codecOptions.value(), &options, &formatOptions_);
+  }
+
   if (videoStreamOptions.crf.has_value()) {
-    validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
-    av_dict_set(
-        &options,
-        "crf",
-        std::to_string(videoStreamOptions.crf.value()).c_str(),
-        0);
+    std::string crfValue = std::to_string(videoStreamOptions.crf.value());
+    tryToValidateCodecOption(*avCodec, "crf", crfValue);
+    av_dict_set(&options, "crf", crfValue.c_str(), 0);
   }
   if (videoStreamOptions.preset.has_value()) {
     av_dict_set(
@@ -775,7 +817,8 @@ void VideoEncoder::encode() {
   TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice.");
   encodeWasCalled_ = true;
 
-  int status = avformat_write_header(avFormatContext_.get(), nullptr);
+  int status = avformat_write_header(avFormatContext_.get(), &formatOptions_);
+  av_dict_free(&formatOptions_);
   TORCH_CHECK(
       status == AVSUCCESS,
       "Error in avformat_write_header: ",
diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
index c1055281a..24ce19011 100644
--- a/src/torchcodec/_core/Encoder.h
+++ b/src/torchcodec/_core/Encoder.h
@@ -1,9 +1,15 @@
 #pragma once
 #include <torch/types.h>
+#include <map>
+#include <string>
 #include "AVIOContextHolder.h"
 #include "FFMPEGCommon.h"
 #include "StreamOptions.h"
 
+extern "C" {
+#include <libavutil/dict.h>
+}
+
 namespace facebook::torchcodec {
 class AudioEncoder {
  public:
@@ -154,6 +160,10 @@ class VideoEncoder {
 
  private:
   void initializeEncoder(const VideoStreamOptions& videoStreamOptions);
+  void sortCodecOptions(
+      const std::map<std::string, std::string>& codecOptions,
+      AVDictionary** codecDict,
+      AVDictionary** formatDict);
   UniqueAVFrame convertTensorToAVFrame(
       const torch::Tensor& frame,
       int frameIndex);
@@ -179,6 +189,7 @@ class VideoEncoder {
   std::unique_ptr<AVIOContextHolder> avioContextHolder_;
 
   bool encodeWasCalled_ = false;
+  AVDictionary* formatOptions_ = nullptr;
 };
 
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
index 01af6846c..56df44754 100644
--- a/src/torchcodec/_core/StreamOptions.h
+++ b/src/torchcodec/_core/StreamOptions.h
@@ -7,6 +7,7 @@
 #pragma once
 
 #include <torch/types.h>
+#include <map>
 #include <optional>
 #include <string>
 #include <string_view>
@@ -50,6 +51,7 @@ struct VideoStreamOptions {
   std::optional<std::string> pixelFormat;
   std::optional<double> crf;
   std::optional<std::string> preset;
+  std::optional<std::map<std::string, std::string>> codecOptions;
 };
 
 struct AudioStreamOptions {
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
index 2ef5d0f49..f6d14a9d0 100644
--- a/src/torchcodec/_core/custom_ops.cpp
+++ b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -158,6 +158,16 @@ std::string quoteValue(const std::string& value) {
   return "\"" + value + "\"";
 }
 
+// Helper function to unflatten codec_options, alternating keys and values
+std::map<std::string, std::string> unflattenCodecOptions(
+    const std::vector<std::string>& opts) {
+  std::map<std::string, std::string> optionsMap;
+  for (size_t i = 0; i < opts.size(); i += 2) {
+    optionsMap[opts[i]] = opts[i + 1];
+  }
+  return optionsMap;
+}
+
 std::string mapToJson(const std::map<std::string, std::string>& metadataMap) {
   std::stringstream ss;
   ss << "{\n";
@@ -605,11 +615,18 @@ void encode_video_to_file(
     std::string_view file_name,
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
-    std::optional<std::string_view> preset = std::nullopt) {
+    std::optional<std::string_view> preset = std::nullopt,
+    std::optional<std::vector<std::string>> codec_options = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
   videoStreamOptions.preset = preset;
+
+  if (codec_options.has_value()) {
+    videoStreamOptions.codecOptions =
+        unflattenCodecOptions(codec_options.value());
+  }
+
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -624,12 +641,19 @@ at::Tensor encode_video_to_tensor(
     std::string_view format,
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
-    std::optional<std::string_view> preset = std::nullopt) {
+    std::optional<std::string_view> preset = std::nullopt,
+    std::optional<std::vector<std::string>> codec_options = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
   videoStreamOptions.preset = preset;
+
+  if (codec_options.has_value()) {
+    videoStreamOptions.codecOptions =
+        unflattenCodecOptions(codec_options.value());
+  }
+
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -646,7 +670,8 @@ void _encode_video_to_file_like(
     int64_t file_like_context,
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
-    std::optional<std::string_view> preset = std::nullopt) {
+    std::optional<std::string_view> preset = std::nullopt,
+    std::optional<std::vector<std::string>> codec_options = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
@@ -658,6 +683,11 @@ void _encode_video_to_file_like(
   videoStreamOptions.crf = crf;
   videoStreamOptions.preset = preset;
 
+  if (codec_options.has_value()) {
+    videoStreamOptions.codecOptions =
+        unflattenCodecOptions(codec_options.value());
+  }
+
   VideoEncoder encoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
index a2f1fa0a3..409b06f13 100644
--- a/src/torchcodec/_core/ops.py
+++ b/src/torchcodec/_core/ops.py
@@ -216,6 +216,7 @@ def encode_video_to_file_like(
     crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
+    codec_options: Optional[list[str]] = None,
 ) -> None:
     """Encode video frames to a file-like object.
 
@@ -227,6 +228,7 @@ def encode_video_to_file_like(
         crf: Optional constant rate factor for encoding quality
         pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
         preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
+        codec_options: Optional list of codec options as flattened key-value pairs
     """
     assert _pybind_ops is not None
 
@@ -238,6 +240,7 @@ def encode_video_to_file_like(
         pixel_format,
         crf,
         preset,
+        codec_options,
     )
 
 
@@ -326,8 +329,9 @@ def encode_video_to_file_abstract(
     frame_rate: int,
     filename: str,
     pixel_format: Optional[str] = None,
-    crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    codec_options: Optional[list[str]] = None,
 ) -> None:
     return
 
@@ -338,8 +342,9 @@ def encode_video_to_tensor_abstract(
     frame_rate: int,
     format: str,
     pixel_format: Optional[str] = None,
-    crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    codec_options: Optional[list[str]] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -351,8 +356,9 @@ def _encode_video_to_file_like_abstract(
     format: str,
     file_like_context: int,
     pixel_format: Optional[str] = None,
-    crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    codec_options: Optional[list[str]] = None,
 ) -> None:
     return
 
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index d812d4a11..29af0661c 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Optional, Union
+from typing import Dict, Optional, Union
 
 import torch
 from torch import Tensor
@@ -7,6 +7,25 @@
 from torchcodec import _core
 
 
+def _flatten_codec_options(codec_options: Optional[Dict[str, str]]) -> Optional[list]:
+    """Convert codec_options dict to flattened list for torch op.
+
+    Args:
+        codec_options: A dictionary of codec-specific options, e.g.
+            {"preset": "slow", "tune": "film"}
+
+    Returns:
+        A flattened list of alternating keys and values, e.g.
+            ["preset", "slow", "tune", "film"], or None if input is None.
+    """
+    if codec_options is None:
+        return None
+    result = []
+    for key, value in codec_options.items():
+        result.extend([key, value])
+    return result
+
+
 class VideoEncoder:
     """A video encoder.
 
@@ -35,6 +54,7 @@ def __init__(self, frames: Tensor, *, frame_rate: int):
     def to_file(
         self,
         dest: Union[str, Path],
+        codec_options: Optional[Dict[str, str]] = None,
         *,
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
@@ -55,6 +75,8 @@ def to_file(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
+            codec_options (dict[str, str], optional): A dictionary of codec-specific
+                options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file(
@@ -64,6 +86,7 @@ def to_file(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset,
+            codec_options=_flatten_codec_options(codec_options),
         )
 
     def to_tensor(
@@ -73,6 +96,7 @@ def to_tensor(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
+        codec_options: Optional[Dict[str, str]] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
@@ -88,6 +112,8 @@ def to_tensor(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
+            codec_options (dict[str, str], optional): A dictionary of codec-specific
+                options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
 
         Returns:
             Tensor: The raw encoded bytes as 4D uint8 Tensor.
@@ -100,6 +126,7 @@ def to_tensor(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset_value,
+            codec_options=_flatten_codec_options(codec_options),
         )
 
     def to_file_like(
@@ -110,6 +137,7 @@ def to_file_like(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
+        codec_options: Optional[Dict[str, str]] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -130,6 +158,8 @@ def to_file_like(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
+            codec_options (dict[str, str], optional): A dictionary of codec-specific
+                options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file_like(
@@ -140,4 +170,5 @@ def to_file_like(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset,
+            codec_options=_flatten_codec_options(codec_options),
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
index 303f3307c..87a889741 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -689,6 +689,45 @@ def test_pixel_format_errors(self, method, tmp_path):
         ):
             getattr(encoder, method)(**valid_params, pixel_format="rgb24")
 
+    @pytest.mark.parametrize(
+        "codec_options,error",
+        [
+            ({"qp": "-10"}, "qp=-10 is out of valid range"),
+            (
+                {"qp": ""},
+                "Option qp expects a numeric value but got",
+            ),
+            (
+                {"direct-pred": "a"},
+                "Option direct-pred expects a numeric value but got 'a'",
+            ),
+            ({"tune": "not_a_real_tune"}, "avcodec_open2 failed: Invalid argument"),
+            (
+                {"tune": "10"},
+                "avcodec_open2 failed: Invalid argument",
+            ),
+        ],
+    )
+    @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
+    def test_codec_options_errors(self, method, tmp_path, codec_options, error):
+        frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)
+        encoder = VideoEncoder(frames, frame_rate=30)
+
+        if method == "to_file":
+            valid_params = dict(dest=str(tmp_path / "output.mp4"))
+        elif method == "to_tensor":
+            valid_params = dict(format="mp4")
+        elif method == "to_file_like":
+            valid_params = dict(file_like=io.BytesIO(), format="mp4")
+        else:
+            raise ValueError(f"Unknown method: {method}")
+
+        with pytest.raises(
+            RuntimeError,
+            match=error,
+        ):
+            getattr(encoder, method)(**valid_params, codec_options=codec_options)
+
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
     def test_contiguity(self, method, tmp_path):
         # Ensure that 2 sets of video frames with the same pixel values are encoded

From e4d3edeefb85b5c11f486b5fa79386f939766d7b Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 09:05:27 -0500
Subject: [PATCH 2/7] inline dict flattening, accept str, Any

---
 src/torchcodec/encoders/_video_encoder.py | 48 +++++++++--------------
 test/test_encoders.py                     |  4 +-
 2 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index 29af0661c..edada5694 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
 
 import torch
 from torch import Tensor
@@ -7,25 +7,6 @@
 from torchcodec import _core
 
 
-def _flatten_codec_options(codec_options: Optional[Dict[str, str]]) -> Optional[list]:
-    """Convert codec_options dict to flattened list for torch op.
-
-    Args:
-        codec_options: A dictionary of codec-specific options, e.g.
-            {"preset": "slow", "tune": "film"}
-
-    Returns:
-        A flattened list of alternating keys and values, e.g.
-            ["preset", "slow", "tune", "film"], or None if input is None.
-    """
-    if codec_options is None:
-        return None
-    result = []
-    for key, value in codec_options.items():
-        result.extend([key, value])
-    return result
-
-
 class VideoEncoder:
     """A video encoder.
 
@@ -54,7 +35,7 @@ def __init__(self, frames: Tensor, *, frame_rate: int):
     def to_file(
         self,
         dest: Union[str, Path],
-        codec_options: Optional[Dict[str, str]] = None,
+        codec_options: Optional[Dict[str, Any]] = None,
         *,
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
@@ -75,8 +56,9 @@ def to_file(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
-            codec_options (dict[str, str], optional): A dictionary of codec-specific
+            codec_options (dict[str, Any], optional): A dictionary of codec-specific
                 options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
+                Values will be converted to strings before passing to the encoder.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file(
@@ -86,7 +68,9 @@ def to_file(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset,
-            codec_options=_flatten_codec_options(codec_options),
+            codec_options=[
+                x for k, v in (codec_options or {}).items() for x in (k, str(v))
+            ],
         )
 
     def to_tensor(
@@ -96,7 +80,7 @@ def to_tensor(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
-        codec_options: Optional[Dict[str, str]] = None,
+        codec_options: Optional[Dict[str, Any]] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
@@ -112,8 +96,9 @@ def to_tensor(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
-            codec_options (dict[str, str], optional): A dictionary of codec-specific
+            codec_options (dict[str, Any], optional): A dictionary of codec-specific
                 options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
+                Values will be converted to strings before passing to the encoder.
 
         Returns:
             Tensor: The raw encoded bytes as 4D uint8 Tensor.
@@ -126,7 +111,9 @@ def to_tensor(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset_value,
-            codec_options=_flatten_codec_options(codec_options),
+            codec_options=[
+                x for k, v in (codec_options or {}).items() for x in (k, str(v))
+            ],
         )
 
     def to_file_like(
@@ -137,7 +124,7 @@ def to_file_like(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
-        codec_options: Optional[Dict[str, str]] = None,
+        codec_options: Optional[Dict[str, Any]] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -158,8 +145,9 @@ def to_file_like(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
-            codec_options (dict[str, str], optional): A dictionary of codec-specific
+            codec_options (dict[str, Any], optional): A dictionary of codec-specific
                 options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
+                Values will be converted to strings before passing to the encoder.
         """
         preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file_like(
@@ -170,5 +158,7 @@ def to_file_like(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset,
-            codec_options=_flatten_codec_options(codec_options),
+            codec_options=[
+                x for k, v in (codec_options or {}).items() for x in (k, str(v))
+            ],
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
index 87a889741..b8483ad30 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -692,7 +692,7 @@ def test_pixel_format_errors(self, method, tmp_path):
     @pytest.mark.parametrize(
         "codec_options,error",
         [
-            ({"qp": "-10"}, "qp=-10 is out of valid range"),
+            ({"qp": -10}, "qp=-10 is out of valid range"),
             (
                 {"qp": ""},
                 "Option qp expects a numeric value but got",
@@ -703,7 +703,7 @@ def test_pixel_format_errors(self, method, tmp_path):
             ),
             ({"tune": "not_a_real_tune"}, "avcodec_open2 failed: Invalid argument"),
             (
-                {"tune": "10"},
+                {"tune": 10},
                 "avcodec_open2 failed: Invalid argument",
             ),
         ],

From e6fd72b5307997452c07e86adbf7a33ba2fb7eb1 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 09:42:19 -0500
Subject: [PATCH 3/7] namespace, var names, other suggestions

---
 src/torchcodec/_core/Encoder.cpp          | 72 ++++++++++++-----------
 src/torchcodec/_core/Encoder.h            |  6 +-
 src/torchcodec/encoders/_video_encoder.py |  2 +-
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index 8ac13cdd6..583266a65 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -590,7 +590,7 @@ void tryToValidateCodecOption(
   if (!option) {
     return;
   }
-  // Validate options defined as a numeric type
+  // Validate if option is defined as a numeric type
   if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
       option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
     try {
@@ -618,6 +618,30 @@ void tryToValidateCodecOption(
     }
   }
 }
+
+void sortCodecOptions(
+    const std::map<std::string, std::string>& codecOptions,
+    AVDictionary** codecDict,
+    AVDictionary** formatDict) {
+  // Accepts a map of options as input, then sorts them into codec options and
+  // format options. The sorted options are returned into two separate dicts.
+  const AVClass* formatClass = avformat_get_class();
+  for (const auto& [key, value] : codecOptions) {
+    const AVOption* fmtOpt = av_opt_find2(
+        &formatClass,
+        key.c_str(),
+        nullptr,
+        0,
+        AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ,
+        nullptr);
+    if (fmtOpt) {
+      av_dict_set(formatDict, key.c_str(), value.c_str(), 0);
+    } else {
+      // Default to codec option (includes AVCodecContext + encoder-private)
+      av_dict_set(codecDict, key.c_str(), value.c_str(), 0);
+    }
+  }
+}
 } // namespace
 
 VideoEncoder::~VideoEncoder() {
@@ -633,6 +657,7 @@ VideoEncoder::~VideoEncoder() {
       avFormatContext_->pb = nullptr;
     }
   }
+  av_dict_free(&avFormatOptions_);
 }
 
 VideoEncoder::VideoEncoder(
@@ -697,30 +722,6 @@ VideoEncoder::VideoEncoder(
   initializeEncoder(videoStreamOptions);
 }
 
-void VideoEncoder::sortCodecOptions(
-    const std::map<std::string, std::string>& codecOptions,
-    AVDictionary** codecDict,
-    AVDictionary** formatDict) {
-  // Search AVFormatContext's AVClass for options
-  const AVClass* formatClass = avformat_get_class();
-  for (const auto& [key, value] : codecOptions) {
-    const AVOption* fmtOpt = av_opt_find2(
-        &formatClass,
-        key.c_str(),
-        nullptr,
-        0,
-        AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ,
-        nullptr);
-    if (fmtOpt) {
-      av_dict_set(formatDict, key.c_str(), value.c_str(), 0);
-    } else {
-      // Default to codec option (includes AVCodecContext + encoder-private)
-      // validateCodecOption(*avCodecContext_->codec, key.c_str(), value);
-      av_dict_set(codecDict, key.c_str(), value.c_str(), 0);
-    }
-  }
-}
-
 void VideoEncoder::initializeEncoder(
     const VideoStreamOptions& videoStreamOptions) {
   const AVCodec* avCodec =
@@ -772,27 +773,31 @@ void VideoEncoder::initializeEncoder(
   }
 
   // Apply videoStreamOptions
-  AVDictionary* options = nullptr;
+  AVDictionary* avCodecOptions = nullptr;
   if (videoStreamOptions.codecOptions.has_value()) {
-    // Validate all codec options before setting them
     for (const auto& [key, value] : videoStreamOptions.codecOptions.value()) {
       tryToValidateCodecOption(*avCodec, key.c_str(), value);
     }
     sortCodecOptions(
-        videoStreamOptions.codecOptions.value(), &options, &formatOptions_);
+        videoStreamOptions.codecOptions.value(),
+        &avCodecOptions,
+        &avFormatOptions_);
   }
 
   if (videoStreamOptions.crf.has_value()) {
     std::string crfValue = std::to_string(videoStreamOptions.crf.value());
     tryToValidateCodecOption(*avCodec, "crf", crfValue);
-    av_dict_set(&options, "crf", crfValue.c_str(), 0);
+    av_dict_set(&avCodecOptions, "crf", crfValue.c_str(), 0);
   }
   if (videoStreamOptions.preset.has_value()) {
     av_dict_set(
-        &options, "preset", videoStreamOptions.preset.value().c_str(), 0);
+        &avCodecOptions,
+        "preset",
+        videoStreamOptions.preset.value().c_str(),
+        0);
   }
-  int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
-  av_dict_free(&options);
+  int status = avcodec_open2(avCodecContext_.get(), avCodec, &avCodecOptions);
+  av_dict_free(&avCodecOptions);
 
   TORCH_CHECK(
       status == AVSUCCESS,
@@ -817,8 +822,7 @@ void VideoEncoder::encode() {
   TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice.");
   encodeWasCalled_ = true;
 
-  int status = avformat_write_header(avFormatContext_.get(), &formatOptions_);
-  av_dict_free(&formatOptions_);
+  int status = avformat_write_header(avFormatContext_.get(), &avFormatOptions_);
   TORCH_CHECK(
       status == AVSUCCESS,
       "Error in avformat_write_header: ",
diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
index 24ce19011..3d59eb6f6 100644
--- a/src/torchcodec/_core/Encoder.h
+++ b/src/torchcodec/_core/Encoder.h
@@ -160,10 +160,6 @@ class VideoEncoder {
 
  private:
   void initializeEncoder(const VideoStreamOptions& videoStreamOptions);
-  void sortCodecOptions(
-      const std::map<std::string, std::string>& codecOptions,
-      AVDictionary** codecDict,
-      AVDictionary** formatDict);
   UniqueAVFrame convertTensorToAVFrame(
       const torch::Tensor& frame,
       int frameIndex);
@@ -189,7 +185,7 @@ class VideoEncoder {
   std::unique_ptr<AVIOContextHolder> avioContextHolder_;
 
   bool encodeWasCalled_ = false;
-  AVDictionary* formatOptions_ = nullptr;
+  AVDictionary* avFormatOptions_ = nullptr;
 };
 
 } // namespace facebook::torchcodec
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index edada5694..b0d5c5b2b 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -57,7 +57,7 @@ def to_file(
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
             codec_options (dict[str, Any], optional): A dictionary of codec-specific
-                options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
+                options to pass to the encoder, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
         """
         preset = str(preset) if isinstance(preset, int) else preset

From 888c8d405bcf5a1bda4ad8c994424e1d05eb45d0 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 10:40:41 -0500
Subject: [PATCH 4/7] wip test

---
 test/test_encoders.py | 52 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/test/test_encoders.py b/test/test_encoders.py
index b8483ad30..e5ff968df 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -1029,3 +1029,55 @@ def write(self, data):
             RuntimeError, match="File like object must implement a seek method"
         ):
             encoder.to_file_like(NoSeekMethod(), format="mp4")
+
+    def get_video_metadata_from_file(self, file_path):
+        """Helper function to get video metadata from a file using ffprobe.
+
+        Returns a dict with codec_name, profile, pix_fmt, etc.
+        """
+        result = subprocess.run(
+            [
+                "ffprobe",
+                "-v",
+                "error",
+                "-select_streams",
+                "v:0",
+                "-show_entries",
+                "stream=codec_name,profile,pix_fmt",
+                "-of",
+                "default=noprint_wrappers=1",
+                str(file_path),
+            ],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+        # Parse output like "codec_name=h264\nprofile=Baseline\npix_fmt=yuv420p"
+        metadata = {}
+        for line in result.stdout.strip().split("\n"):
+            if "=" in line:
+                key, value = line.split("=", 1)
+                metadata[key] = value
+        return metadata
+
+    @pytest.mark.skipif(in_fbcode(), reason="ffprobe not available")
+    def test_codec_options_profile(self, tmp_path):
+        # Test that we can set the H.264 profile via codec_options
+        # and validate it was used by checking with ffprobe.
+        source_frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)
+        encoder = VideoEncoder(frames=source_frames, frame_rate=30)
+
+        # Encode with baseline profile (explicitly specified via codec_options)
+        output_path = tmp_path / "output_baseline.mp4"
+        encoder.to_file(
+            dest=str(output_path),
+            codec_options={"profile": "baseline"},
+        )
+
+        # Validate the profile was used
+        metadata = self.get_video_metadata_from_file(output_path)
+        assert metadata.get("codec_name") == "h264"
+        assert metadata.get("profile") in (
+            "Baseline",
+            "Constrained Baseline",
+        ), f"Expected Baseline profile, got {metadata.get('profile')}"

From 36a2c4181bd9e3892df4566490a15af1b11bfdbc Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 11:45:40 -0500
Subject: [PATCH 5/7] add codec_options test, generalize ffprobe function to
 reuse there

---
 test/test_encoders.py | 103 ++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 53 deletions(-)

diff --git a/test/test_encoders.py b/test/test_encoders.py
index d5d102f27..6a8210ec6 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -572,8 +572,8 @@ class TestVideoEncoder:
     def decode(self, source=None) -> torch.Tensor:
         return VideoDecoder(source).get_frames_in_range(start=0, stop=60)
 
-    def _get_codec_spec(self, file_path):
-        """Helper function to get codec name from a video file using ffprobe."""
+    def _get_video_metadata(self, file_path, fields):
+        """Helper function to get video metadata from a file using ffprobe."""
         result = subprocess.run(
             [
                 "ffprobe",
@@ -582,16 +582,21 @@ def _get_codec_spec(self, file_path):
                 "-select_streams",
                 "v:0",
                 "-show_entries",
-                "stream=codec_name",
+                f"stream={','.join(fields)}",
                 "-of",
-                "default=noprint_wrappers=1:nokey=1",
+                "default=noprint_wrappers=1",
                 str(file_path),
             ],
             capture_output=True,
             check=True,
             text=True,
         )
-        return result.stdout.strip()
+        metadata = {}
+        for line in result.stdout.strip().split("\n"):
+            if "=" in line:
+                key, value = line.split("=", 1)
+                metadata[key] = value
+        return metadata
 
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
     def test_bad_input_parameterized(self, tmp_path, method):
@@ -1087,7 +1092,9 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec):
         dest = str(tmp_path / f"output.{format}")
 
         VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec_spec)
-        actual_codec_spec = self._get_codec_spec(dest)
+        actual_codec_spec = self._get_video_metadata(dest, fields=["codec_name"]).get(
+            "codec_name"
+        )
         assert actual_codec_spec == codec_spec
 
     @pytest.mark.skipif(
@@ -1123,61 +1130,51 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl):
             dest=impl_output, codec=codec_impl
         )
 
-        assert self._get_codec_spec(spec_output) == codec_spec
-        assert self._get_codec_spec(impl_output) == codec_spec
+        assert (
+            self._get_video_metadata(spec_output, fields=["codec_name"]).get(
+                "codec_name"
+            )
+            == codec_spec
+        )
+        assert (
+            self._get_video_metadata(impl_output, fields=["codec_name"]).get(
+                "codec_name"
+            )
+            == codec_spec
+        )
 
         frames_spec = self.decode(spec_output).data
         frames_impl = self.decode(impl_output).data
         torch.testing.assert_close(frames_spec, frames_impl, rtol=0, atol=0)
 
-    def get_video_metadata_from_file(self, file_path):
-        """Helper function to get video metadata from a file using ffprobe.
-
-        Returns a dict with codec_name, profile, pix_fmt, etc.
-        """
-        result = subprocess.run(
-            [
-                "ffprobe",
-                "-v",
-                "error",
-                "-select_streams",
-                "v:0",
-                "-show_entries",
-                "stream=codec_name,profile,pix_fmt",
-                "-of",
-                "default=noprint_wrappers=1",
-                str(file_path),
-            ],
-            check=True,
-            capture_output=True,
-            text=True,
-        )
-        # Parse output like "codec_name=h264\nprofile=Baseline\npix_fmt=yuv420p"
-        metadata = {}
-        for line in result.stdout.strip().split("\n"):
-            if "=" in line:
-                key, value = line.split("=", 1)
-                metadata[key] = value
-        return metadata
-
     @pytest.mark.skipif(in_fbcode(), reason="ffprobe not available")
-    def test_codec_options_profile(self, tmp_path):
-        # Test that we can set the H.264 profile via codec_options
-        # and validate it was used by checking with ffprobe.
+    @pytest.mark.parametrize(
+        "profile,colorspace,color_range",
+        [
+            ("baseline", "bt709", "tv"),
+            ("main", "bt470bg", "pc"),
+            ("high", "fcc", "pc"),
+        ],
+    )
+    def test_codec_options_utilized(self, tmp_path, profile, colorspace, color_range):
+        # Test setting profile, colorspace, and color_range via codec_options is utilized
         source_frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)
         encoder = VideoEncoder(frames=source_frames, frame_rate=30)
 
-        # Encode with baseline profile (explicitly specified via codec_options)
-        output_path = tmp_path / "output_baseline.mp4"
+        output_path = str(tmp_path / "output.mp4")
         encoder.to_file(
-            dest=str(output_path),
-            codec_options={"profile": "baseline"},
+            dest=output_path,
+            codec_options={
+                "profile": profile,
+                "colorspace": colorspace,
+                "color_range": color_range,
+            },
         )
-
-        # Validate the profile was used
-        metadata = self.get_video_metadata_from_file(output_path)
-        assert metadata.get("codec_name") == "h264"
-        assert metadata.get("profile") in (
-            "Baseline",
-            "Constrained Baseline",
-        ), f"Expected Baseline profile, got {metadata.get('profile')}"
+        metadata = self._get_video_metadata(
+            output_path,
+            fields=["profile", "color_space", "color_range"],
+        )
+        # Validate profile (case-insensitive, baseline is reported as "Constrained Baseline")
+        assert profile in metadata.get("profile", "").lower()
+        assert metadata.get("color_space") == colorspace
+        assert metadata.get("color_range") == color_range

From 75486878080876d7f2a36cae3f6e548df41c0b69 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 12:07:44 -0500
Subject: [PATCH 6/7] rename codec_options to extra_options

---
 src/torchcodec/_core/Encoder.cpp          | 10 +++----
 src/torchcodec/_core/StreamOptions.h      |  2 +-
 src/torchcodec/_core/custom_ops.cpp       | 34 +++++++++++------------
 src/torchcodec/_core/ops.py               | 12 ++++----
 src/torchcodec/encoders/_video_encoder.py | 30 ++++++++++----------
 test/test_encoders.py                     | 12 ++++----
 6 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
index 98688046e..3d052ab50 100644
--- a/src/torchcodec/_core/Encoder.cpp
+++ b/src/torchcodec/_core/Encoder.cpp
@@ -620,13 +620,13 @@ void tryToValidateCodecOption(
 }
 
 void sortCodecOptions(
-    const std::map<std::string, std::string>& codecOptions,
+    const std::map<std::string, std::string>& extraOptions,
     AVDictionary** codecDict,
     AVDictionary** formatDict) {
   // Accepts a map of options as input, then sorts them into codec options and
   // format options. The sorted options are returned into two separate dicts.
   const AVClass* formatClass = avformat_get_class();
-  for (const auto& [key, value] : codecOptions) {
+  for (const auto& [key, value] : extraOptions) {
     const AVOption* fmtOpt = av_opt_find2(
         &formatClass,
         key.c_str(),
@@ -798,12 +798,12 @@ void VideoEncoder::initializeEncoder(
 
   // Apply videoStreamOptions
   AVDictionary* avCodecOptions = nullptr;
-  if (videoStreamOptions.codecOptions.has_value()) {
-    for (const auto& [key, value] : videoStreamOptions.codecOptions.value()) {
+  if (videoStreamOptions.extraOptions.has_value()) {
+    for (const auto& [key, value] : videoStreamOptions.extraOptions.value()) {
       tryToValidateCodecOption(*avCodec, key.c_str(), value);
     }
     sortCodecOptions(
-        videoStreamOptions.codecOptions.value(),
+        videoStreamOptions.extraOptions.value(),
         &avCodecOptions,
         &avFormatOptions_);
   }
diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
index 96d2fb9c5..ce0f27d3b 100644
--- a/src/torchcodec/_core/StreamOptions.h
+++ b/src/torchcodec/_core/StreamOptions.h
@@ -52,7 +52,7 @@ struct VideoStreamOptions {
   std::optional<std::string> pixelFormat;
   std::optional<double> crf;
   std::optional<std::string> preset;
-  std::optional<std::map<std::string, std::string>> codecOptions;
+  std::optional<std::map<std::string, std::string>> extraOptions;
 };
 
 struct AudioStreamOptions {
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
index ce3cd6aa3..3836e52da 100644
--- a/src/torchcodec/_core/custom_ops.cpp
+++ b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? codec_options=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, float? crf=None, str? preset=None, str[]? extra_options=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -158,8 +158,8 @@ std::string quoteValue(const std::string& value) {
   return "\"" + value + "\"";
 }
 
-// Helper function to unflatten codec_options, alternating keys and values
-std::map<std::string, std::string> unflattenCodecOptions(
+// Helper function to unflatten extra_options, alternating keys and values
+std::map<std::string, std::string> unflattenExtraOptions(
     const std::vector<std::string>& opts) {
   std::map<std::string, std::string> optionsMap;
   for (size_t i = 0; i < opts.size(); i += 2) {
@@ -617,16 +617,16 @@ void encode_video_to_file(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> codec_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.codec = codec;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
   videoStreamOptions.preset = preset;
 
-  if (codec_options.has_value()) {
-    videoStreamOptions.codecOptions =
-        unflattenCodecOptions(codec_options.value());
+  if (extra_options.has_value()) {
+    videoStreamOptions.extraOptions =
+        unflattenExtraOptions(extra_options.value());
   }
 
   VideoEncoder(
@@ -645,7 +645,7 @@ at::Tensor encode_video_to_tensor(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> codec_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.codec = codec;
@@ -653,9 +653,9 @@ at::Tensor encode_video_to_tensor(
   videoStreamOptions.crf = crf;
   videoStreamOptions.preset = preset;
 
-  if (codec_options.has_value()) {
-    videoStreamOptions.codecOptions =
-        unflattenCodecOptions(codec_options.value());
+  if (extra_options.has_value()) {
+    videoStreamOptions.extraOptions =
+        unflattenExtraOptions(extra_options.value());
   }
 
   return VideoEncoder(
@@ -676,7 +676,7 @@ void _encode_video_to_file_like(
     std::optional<std::string_view> pixel_format = std::nullopt,
     std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt,
-    std::optional<std::vector<std::string>> codec_options = std::nullopt) {
+    std::optional<std::vector<std::string>> extra_options = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
@@ -689,9 +689,9 @@ void _encode_video_to_file_like(
   videoStreamOptions.crf = crf;
   videoStreamOptions.preset = preset;
 
-  if (codec_options.has_value()) {
-    videoStreamOptions.codecOptions =
-        unflattenCodecOptions(codec_options.value());
+  if (extra_options.has_value()) {
+    videoStreamOptions.extraOptions =
+        unflattenExtraOptions(extra_options.value());
   }
 
   VideoEncoder encoder(
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
index 23b71130a..6823f4037 100644
--- a/src/torchcodec/_core/ops.py
+++ b/src/torchcodec/_core/ops.py
@@ -217,7 +217,7 @@ def encode_video_to_file_like(
     pixel_format: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
-    codec_options: Optional[list[str]] = None,
+    extra_options: Optional[list[str]] = None,
 ) -> None:
     """Encode video frames to a file-like object.
 
@@ -230,7 +230,7 @@ def encode_video_to_file_like(
         pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
         crf: Optional constant rate factor for encoding quality
         preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
-        codec_options: Optional list of codec options as flattened key-value pairs
+        extra_options: Optional list of extra options as flattened key-value pairs
     """
     assert _pybind_ops is not None
 
@@ -243,7 +243,7 @@ def encode_video_to_file_like(
         pixel_format,
         crf,
         preset,
-        codec_options,
+        extra_options,
     )
 
 
@@ -335,7 +335,7 @@ def encode_video_to_file_abstract(
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
-    codec_options: Optional[list[str]] = None,
+    extra_options: Optional[list[str]] = None,
 ) -> None:
     return
 
@@ -349,7 +349,7 @@ def encode_video_to_tensor_abstract(
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
-    codec_options: Optional[list[str]] = None,
+    extra_options: Optional[list[str]] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -364,7 +364,7 @@ def _encode_video_to_file_like_abstract(
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
     crf: Optional[Union[int, float]] = None,
-    codec_options: Optional[list[str]] = None,
+    extra_options: Optional[list[str]] = None,
 ) -> None:
     return
 
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index 3626fc285..f35329bb2 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -35,7 +35,7 @@ def __init__(self, frames: Tensor, *, frame_rate: int):
     def to_file(
         self,
         dest: Union[str, Path],
-        codec_options: Optional[Dict[str, Any]] = None,
+        extra_options: Optional[Dict[str, Any]] = None,
         *,
         codec: Optional[str] = None,
         pixel_format: Optional[str] = None,
@@ -60,8 +60,8 @@ def to_file(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
-            codec_options (dict[str, Any], optional): A dictionary of codec-specific
-                options to pass to the encoder, e.g. ``{"qp": 5, "tune": "film"}``.
+            extra_options (dict[str, Any], optional): A dictionary of additional
+                encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
         """
         preset = str(preset) if isinstance(preset, int) else preset
@@ -73,8 +73,8 @@ def to_file(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset,
-            codec_options=[
-                x for k, v in (codec_options or {}).items() for x in (k, str(v))
+            extra_options=[
+                x for k, v in (extra_options or {}).items() for x in (k, str(v))
             ],
         )
 
@@ -86,7 +86,7 @@ def to_tensor(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
-        codec_options: Optional[Dict[str, Any]] = None,
+        extra_options: Optional[Dict[str, Any]] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
@@ -105,8 +105,8 @@ def to_tensor(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
-            codec_options (dict[str, Any], optional): A dictionary of codec-specific
-                options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
+            extra_options (dict[str, Any], optional): A dictionary of additional
+                encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
 
         Returns:
@@ -121,8 +121,8 @@ def to_tensor(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset_value,
-            codec_options=[
-                x for k, v in (codec_options or {}).items() for x in (k, str(v))
+            extra_options=[
+                x for k, v in (extra_options or {}).items() for x in (k, str(v))
             ],
         )
 
@@ -135,7 +135,7 @@ def to_file_like(
         pixel_format: Optional[str] = None,
         crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
-        codec_options: Optional[Dict[str, Any]] = None,
+        extra_options: Optional[Dict[str, Any]] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -159,8 +159,8 @@ def to_file_like(
                 encoding speed and compression. Valid values depend on the encoder (commonly
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
-            codec_options (dict[str, Any], optional): A dictionary of codec-specific
-                options to pass to the encoder, e.g. ``{"preset": "slow", "tune": "film"}``.
+            extra_options (dict[str, Any], optional): A dictionary of additional
+                encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
         """
         preset = str(preset) if isinstance(preset, int) else preset
@@ -173,7 +173,7 @@ def to_file_like(
             pixel_format=pixel_format,
             crf=crf,
             preset=preset,
-            codec_options=[
-                x for k, v in (codec_options or {}).items() for x in (k, str(v))
+            extra_options=[
+                x for k, v in (extra_options or {}).items() for x in (k, str(v))
             ],
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
index 6a8210ec6..a7089db57 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -726,7 +726,7 @@ def test_pixel_format_errors(self, method, tmp_path):
             getattr(encoder, method)(**valid_params, pixel_format="rgb24")
 
     @pytest.mark.parametrize(
-        "codec_options,error",
+        "extra_options,error",
         [
             ({"qp": -10}, "qp=-10 is out of valid range"),
             (
@@ -745,7 +745,7 @@ def test_pixel_format_errors(self, method, tmp_path):
         ],
     )
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
-    def test_codec_options_errors(self, method, tmp_path, codec_options, error):
+    def test_extra_options_errors(self, method, tmp_path, extra_options, error):
         frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)
         encoder = VideoEncoder(frames, frame_rate=30)
 
@@ -762,7 +762,7 @@ def test_codec_options_errors(self, method, tmp_path, codec_options, error):
             RuntimeError,
             match=error,
         ):
-            getattr(encoder, method)(**valid_params, codec_options=codec_options)
+            getattr(encoder, method)(**valid_params, extra_options=extra_options)
 
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
     def test_contiguity(self, method, tmp_path):
@@ -1156,15 +1156,15 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl):
             ("high", "fcc", "pc"),
         ],
     )
-    def test_codec_options_utilized(self, tmp_path, profile, colorspace, color_range):
-        # Test setting profile, colorspace, and color_range via codec_options is utilized
+    def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range):
+        # Test setting profile, colorspace, and color_range via extra_options is utilized
         source_frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)
         encoder = VideoEncoder(frames=source_frames, frame_rate=30)
 
         output_path = str(tmp_path / "output.mp4")
         encoder.to_file(
             dest=output_path,
-            codec_options={
+            extra_options={
                 "profile": profile,
                 "colorspace": colorspace,
                 "color_range": color_range,

From f933cef775269e4bd84d963b1c4769a14486d553 Mon Sep 17 00:00:00 2001
From: Daniel Flores <danielflores3@meta.com>
Date: Fri, 14 Nov 2025 15:54:13 -0500
Subject: [PATCH 7/7] str cast, docs update, [], remove 'in' from test

---
 src/torchcodec/encoders/_video_encoder.py | 10 +++++-----
 test/test_encoders.py                     | 19 ++++++++-----------
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
index f35329bb2..909cf73a9 100644
--- a/src/torchcodec/encoders/_video_encoder.py
+++ b/src/torchcodec/encoders/_video_encoder.py
@@ -74,7 +74,7 @@ def to_file(
             crf=crf,
             preset=preset,
             extra_options=[
-                x for k, v in (extra_options or {}).items() for x in (k, str(v))
+                str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
         )
 
@@ -106,7 +106,7 @@ def to_tensor(
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
             extra_options (dict[str, Any], optional): A dictionary of additional
-                encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``.
+                encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
 
         Returns:
@@ -122,7 +122,7 @@ def to_tensor(
             crf=crf,
             preset=preset_value,
             extra_options=[
-                x for k, v in (extra_options or {}).items() for x in (k, str(v))
+                str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
         )
 
@@ -160,7 +160,7 @@ def to_file_like(
                 a string: "fast", "medium", "slow"). Defaults to None
                 (which will use encoder's default).
             extra_options (dict[str, Any], optional): A dictionary of additional
-                encoder options to pass, e.g. ``{"preset": "slow", "tune": "film"}``.
+                encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
                 Values will be converted to strings before passing to the encoder.
         """
         preset = str(preset) if isinstance(preset, int) else preset
@@ -174,6 +174,6 @@ def to_file_like(
             crf=crf,
             preset=preset,
             extra_options=[
-                x for k, v in (extra_options or {}).items() for x in (k, str(v))
+                str(x) for k, v in (extra_options or {}).items() for x in (k, v)
             ],
         )
diff --git a/test/test_encoders.py b/test/test_encoders.py
index a7089db57..714a857b5 100644
--- a/test/test_encoders.py
+++ b/test/test_encoders.py
@@ -1092,9 +1092,9 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec):
         dest = str(tmp_path / f"output.{format}")
 
         VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec_spec)
-        actual_codec_spec = self._get_video_metadata(dest, fields=["codec_name"]).get(
+        actual_codec_spec = self._get_video_metadata(dest, fields=["codec_name"])[
             "codec_name"
-        )
+        ]
         assert actual_codec_spec == codec_spec
 
     @pytest.mark.skipif(
@@ -1131,15 +1131,11 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl):
         )
 
         assert (
-            self._get_video_metadata(spec_output, fields=["codec_name"]).get(
-                "codec_name"
-            )
+            self._get_video_metadata(spec_output, fields=["codec_name"])["codec_name"]
             == codec_spec
         )
         assert (
-            self._get_video_metadata(impl_output, fields=["codec_name"]).get(
-                "codec_name"
-            )
+            self._get_video_metadata(impl_output, fields=["codec_name"])["codec_name"]
             == codec_spec
         )
 
@@ -1175,6 +1171,7 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range
             fields=["profile", "color_space", "color_range"],
         )
         # Validate profile (case-insensitive, baseline is reported as "Constrained Baseline")
-        assert profile in metadata.get("profile", "").lower()
-        assert metadata.get("color_space") == colorspace
-        assert metadata.get("color_range") == color_range
+        expected_profile = "constrained baseline" if profile == "baseline" else profile
+        assert metadata["profile"].lower() == expected_profile
+        assert metadata["color_space"] == colorspace
+        assert metadata["color_range"] == color_range