From 976bd2c1342c20661a38401ddc95a63aca847571 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 12 Nov 2025 14:17:50 -0500 Subject: [PATCH 1/7] add codec selection + logic, add valid codecs test --- src/torchcodec/_core/Encoder.cpp | 27 ++++++++++++++++++++--- src/torchcodec/_core/StreamOptions.h | 1 + src/torchcodec/_core/custom_ops.cpp | 12 +++++++--- src/torchcodec/_core/ops.py | 10 +++++++-- src/torchcodec/encoders/_video_encoder.py | 21 +++++++++++++++--- test/test_encoders.py | 24 ++++++++++++++++++++ 6 files changed, 84 insertions(+), 11 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 239b4c828..3d60fe337 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -649,9 +649,30 @@ VideoEncoder::VideoEncoder( void VideoEncoder::initializeEncoder( const VideoStreamOptions& videoStreamOptions) { - const AVCodec* avCodec = - avcodec_find_encoder(avFormatContext_->oformat->video_codec); - TORCH_CHECK(avCodec != nullptr, "Video codec not found"); + const AVCodec* avCodec = nullptr; + // If codec arg is provided, find codec using logic similar to FFmpeg: + // https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835 + if (videoStreamOptions.codec.has_value()) { + const std::string& codec = videoStreamOptions.codec.value(); + // Try to find codec by name ("libx264", "libsvtav1") + avCodec = avcodec_find_encoder_by_name(codec.c_str()); + // Try to find by codec descriptor ("h264", "av1") + if (!avCodec) { + const AVCodecDescriptor* desc = + avcodec_descriptor_get_by_name(codec.c_str()); + if (desc) { + avCodec = avcodec_find_encoder(desc->id); + } + } + TORCH_CHECK( + avCodec != nullptr, + "Video codec ", + codec, + " not found. Provide a codec name ('libx264', 'libx265') or a codec descriptor ('h264', 'hevc'), or do not specify a codec to use the default codec."); + } else { + avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); + TORCH_CHECK(avCodec != nullptr, "Video codec not found"); + } AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec); TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context."); diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index b7647176c..41858be2b 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -45,6 +45,7 @@ struct VideoStreamOptions { std::string_view deviceVariant = "ffmpeg"; // Encoding options + std::optional codec; // TODO-VideoEncoder: Consider adding other optional fields here // (bit rate, gop size, max b frames, preset) std::optional crf; diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index a9c9c9b78..d25eff72c 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? codec=None, str? pixel_format=None, int? crf=None) -> ()"); m.def( - "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor"); + "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? codec=None, str? pixel_format=None, int? crf=None) -> Tensor"); m.def( - "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()"); + "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? codec=None, str? pixel_format=None, int? crf=None) -> ()"); m.def( "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor"); m.def( @@ -603,9 +603,11 @@ void encode_video_to_file( const at::Tensor& frames, int64_t frame_rate, std::string_view file_name, + std::optional codec = std::nullopt, std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt) { VideoStreamOptions videoStreamOptions; + videoStreamOptions.codec = codec; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; VideoEncoder( @@ -620,10 +622,12 @@ at::Tensor encode_video_to_tensor( const at::Tensor& frames, int64_t frame_rate, std::string_view format, + std::optional codec = std::nullopt, std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt) { auto avioContextHolder = std::make_unique(); VideoStreamOptions videoStreamOptions; + videoStreamOptions.codec = codec; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; return VideoEncoder( @@ -640,6 +644,7 @@ void _encode_video_to_file_like( int64_t frame_rate, std::string_view format, int64_t file_like_context, + std::optional codec = std::nullopt, std::optional pixel_format = std::nullopt, std::optional crf = std::nullopt) { auto fileLikeContext = @@ -649,6 +654,7 @@ void _encode_video_to_file_like( std::unique_ptr avioContextHolder(fileLikeContext); VideoStreamOptions videoStreamOptions; + videoStreamOptions.codec = codec; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index cd6174245..671d5a945 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -213,6 +213,7 @@ def encode_video_to_file_like( frame_rate: int, format: str, file_like: Union[io.RawIOBase, io.BufferedIOBase], + codec: Optional[str] = None, crf: Optional[int] = None, pixel_format: Optional[str] = None, ) -> None: @@ -223,6 +224,7 @@ def encode_video_to_file_like( frame_rate: Frame rate in frames per second format: Video format (e.g., "mp4", "mov", "mkv") file_like: File-like object that supports write() and seek() methods + codec: Optional codec name (e.g., "libx264", "h264") crf: Optional constant rate factor for encoding quality pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p") """ @@ -233,6 +235,7 @@ def encode_video_to_file_like( frame_rate, format, _pybind_ops.create_file_like_context(file_like, True), # True means for writing + codec, pixel_format, crf, ) @@ -322,8 +325,9 @@ def encode_video_to_file_abstract( frames: torch.Tensor, frame_rate: int, filename: str, - crf: Optional[int] = None, + codec: Optional[str], pixel_format: Optional[str] = None, + crf: Optional[int] = None, ) -> None: return @@ -333,6 +337,7 @@ def encode_video_to_tensor_abstract( frames: torch.Tensor, frame_rate: int, format: str, + codec: Optional[str], crf: Optional[int] = None, pixel_format: Optional[str] = None, ) -> torch.Tensor: @@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract( frame_rate: int, format: str, file_like_context: int, - crf: Optional[int] = None, + codec: Optional[str] = None, pixel_format: Optional[str] = None, + crf: Optional[int] = None, ) -> None: return diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index e0630d012..af28b8865 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -36,6 +36,7 @@ def to_file( self, dest: Union[str, Path], *, + codec: Optional[str] = None, pixel_format: Optional[str] = None, ) -> None: """Encode frames into a file. @@ -44,6 +45,9 @@ def to_file( dest (str or ``pathlib.Path``): The path to the output file, e.g. ``video.mp4``. The extension of the file determines the video container format. + codec (str, optional): The codec to use for encoding (e.g., "libx264", + "h264"). If not specified, the default codec + for the container format will be used. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. """ @@ -51,6 +55,7 @@ def to_file( frames=self._frames, frame_rate=self._frame_rate, filename=str(dest), + codec=codec, pixel_format=pixel_format, ) @@ -58,23 +63,28 @@ def to_tensor( self, format: str, *, + codec: Optional[str] = None, pixel_format: Optional[str] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. Args: format (str): The container format of the encoded frames, e.g. "mp4", "mov", - "mkv", "avi", "webm", "flv", or "gif" + "mkv", "avi", "webm", "flv", etc. + codec (str, optional): The codec to use for encoding (e.g., "libx264", + "h264"). If not specified, the default codec + for the container format will be used. pixel_format (str, optional): The pixel format to encode frames into (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. Returns: - Tensor: The raw encoded bytes as 4D uint8 Tensor. + Tensor: The raw encoded bytes as 1D uint8 Tensor. """ return _core.encode_video_to_tensor( frames=self._frames, frame_rate=self._frame_rate, format=format, + codec=codec, pixel_format=pixel_format, ) @@ -83,6 +93,7 @@ def to_file_like( file_like, format: str, *, + codec: Optional[str] = None, pixel_format: Optional[str] = None, ) -> None: """Encode frames into a file-like object. @@ -94,7 +105,10 @@ def to_file_like( ``write(data: bytes) -> int`` and ``seek(offset: int, whence: int = 0) -> int``. format (str): The container format of the encoded frames, e.g. "mp4", "mov", - "mkv", "avi", "webm", "flv", or "gif". + "mkv", "avi", "webm", "flv", etc. + codec (str, optional): The codec to use for encoding (e.g., "libx264", + "h264"). If not specified, the default codec + for the container format will be used. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. """ @@ -103,5 +117,6 @@ def to_file_like( frame_rate=self._frame_rate, format=format, file_like=file_like, + codec=codec, pixel_format=pixel_format, ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 922b67bbb..e1bed1a21 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -605,6 +605,12 @@ def test_bad_input_parameterized(self, tmp_path, method): ) getattr(encoder, method)(**valid_params) + with pytest.raises( + RuntimeError, + match=r"Video codec invalid_codec_name not found.", + ): + encoder.to_file(str(tmp_path / "output.mp4"), codec="invalid_codec_name") + def test_bad_input(self, tmp_path): encoder = VideoEncoder( frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), @@ -629,6 +635,24 @@ def test_bad_input(self, tmp_path): ): encoder.to_tensor(format="bad_format") + @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) + @pytest.mark.parametrize("codec", ["h264", "hevc", "av1", "libx264", None]) + def test_codec_valid_values(self, method, codec, tmp_path): + if method == "to_file": + valid_params = {"dest": str(tmp_path / "test.mp4")} + elif method == "to_tensor": + valid_params = {"format": "mp4"} + elif method == "to_file_like": + valid_params = dict(file_like=io.BytesIO(), format="mp4") + else: + raise ValueError(f"Unknown method: {method}") + + encoder = VideoEncoder( + frames=torch.zeros((5, 3, 128, 128), dtype=torch.uint8), + frame_rate=30, + ) + getattr(encoder, method)(**valid_params, codec=codec) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_pixel_format_errors(self, method, tmp_path): frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) From d60764eb38b8904e34e2df0f43db642abd99ab04 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 12 Nov 2025 14:38:58 -0500 Subject: [PATCH 2/7] consistent arg order in ops.py --- src/torchcodec/_core/ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index 671d5a945..db5468762 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -214,8 +214,8 @@ def encode_video_to_file_like( format: str, file_like: Union[io.RawIOBase, io.BufferedIOBase], codec: Optional[str] = None, - crf: Optional[int] = None, pixel_format: Optional[str] = None, + crf: Optional[int] = None, ) -> None: """Encode video frames to a file-like object. @@ -225,8 +225,8 @@ def encode_video_to_file_like( format: Video format (e.g., "mp4", "mov", "mkv") file_like: File-like object that supports write() and seek() methods codec: Optional codec name (e.g., "libx264", "h264") - crf: Optional constant rate factor for encoding quality pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p") + crf: Optional constant rate factor for encoding quality """ assert _pybind_ops is not None @@ -338,8 +338,8 @@ def encode_video_to_tensor_abstract( frame_rate: int, format: str, codec: Optional[str], - crf: Optional[int] = None, pixel_format: Optional[str] = None, + crf: Optional[int] = None, ) -> torch.Tensor: return torch.empty([], dtype=torch.long) From adb469d5db2c5bc864af78b06ade9a2b947e99ce Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Wed, 12 Nov 2025 14:44:45 -0500 Subject: [PATCH 3/7] fix test w correct frame dims --- test/test_encoders.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test_encoders.py b/test/test_encoders.py index e1bed1a21..b4e16499a 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -609,6 +609,10 @@ def test_bad_input_parameterized(self, tmp_path, method): RuntimeError, match=r"Video codec invalid_codec_name not found.", ): + encoder = VideoEncoder( + frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), + frame_rate=30, + ) encoder.to_file(str(tmp_path / "output.mp4"), codec="invalid_codec_name") def test_bad_input(self, tmp_path): From cd7f8f170189e0e24e57482b2ff3f16f434905cf Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 13 Nov 2025 13:56:29 -0500 Subject: [PATCH 4/7] wip new tests2 --- test/test_encoders.py | 117 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 17 deletions(-) diff --git a/test/test_encoders.py b/test/test_encoders.py index b4e16499a..b30a90b3b 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -567,6 +567,27 @@ def write(self, data): class TestVideoEncoder: + def _get_codec_name(self, file_path): + """Helper function to get codec name from a video file using ffprobe.""" + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name", + "-of", + "default=noprint_wrappers=1:nokey=1", + str(file_path), + ], + capture_output=True, + check=True, + text=True, + ) + return result.stdout.strip() + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_bad_input_parameterized(self, tmp_path, method): if method == "to_file": @@ -639,23 +660,23 @@ def test_bad_input(self, tmp_path): ): encoder.to_tensor(format="bad_format") - @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) - @pytest.mark.parametrize("codec", ["h264", "hevc", "av1", "libx264", None]) - def test_codec_valid_values(self, method, codec, tmp_path): - if method == "to_file": - valid_params = {"dest": str(tmp_path / "test.mp4")} - elif method == "to_tensor": - valid_params = {"format": "mp4"} - elif method == "to_file_like": - valid_params = dict(file_like=io.BytesIO(), format="mp4") - else: - raise ValueError(f"Unknown method: {method}") - - encoder = VideoEncoder( - frames=torch.zeros((5, 3, 128, 128), dtype=torch.uint8), - frame_rate=30, - ) - getattr(encoder, method)(**valid_params, codec=codec) + # @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) + # @pytest.mark.parametrize("codec", ["h264", "hevc", "av1", "libx264", None]) + # def test_codec_valid_values(self, method, codec, tmp_path): + # if method == "to_file": + # valid_params = {"dest": str(tmp_path / "test.mp4")} + # elif method == "to_tensor": + # valid_params = {"format": "mp4"} + # elif method == "to_file_like": + # valid_params = dict(file_like=io.BytesIO(), format="mp4") + # else: + # raise ValueError(f"Unknown method: {method}") + + # encoder = VideoEncoder( + # frames=torch.zeros((5, 3, 128, 128), dtype=torch.uint8), + # frame_rate=30, + # ) + # getattr(encoder, method)(**valid_params, codec=codec) @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_pixel_format_errors(self, method, tmp_path): @@ -728,3 +749,65 @@ def encode_to_tensor(frames): torch.testing.assert_close( encoded_from_contiguous, encoded_from_non_contiguous, rtol=0, atol=0 ) + + @pytest.mark.parametrize( + "format,codec", + [ + ("mp4", "h264"), + ("mp4", "hevc"), + ("mkv", "av1"), + ("avi", "mpeg4"), + ("webm", "vp9"), + ], + ) + def test_codec_parameter_utilized(self, tmp_path, format, codec): + # Test the codec parameter is utilized by using ffprobe to check the encoded file's codec spec + frames = torch.randint(0, 256, (10, 3, 128, 128), dtype=torch.uint8) + dest = str(tmp_path / f"output.{format}") + VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec) + + actual_codec = self._get_codec_name(dest) + print(f"Expected codec: {codec}, Actual codec: {actual_codec}") + assert actual_codec == codec + + @pytest.mark.parametrize( + "codec_spec,codec_impl", + [ + ("h264", "libx264"), + ("hevc", "libx265"), + ("av1", "libaom-av1"), + ("vp9", "libvpx-vp9"), + ], + ) + def test_codec_spec_vs_implementation_equivalence( + self, tmp_path, codec_spec, codec_impl + ): + # Test that using codec spec gives the same result as using default codec implementation + frames = torch.randint(0, 256, (10, 3, 64, 64), dtype=torch.uint8) + + spec_output = tmp_path / "spec_output.mp4" + encoder_spec = VideoEncoder(frames=frames, frame_rate=30) + encoder_spec.to_file(dest=str(spec_output), codec=codec_spec, crf=0) + + impl_output = tmp_path / "impl_output.mp4" + encoder_impl = VideoEncoder(frames=frames, frame_rate=30) + encoder_impl.to_file(dest=str(impl_output), codec=codec_impl, crf=0) + + # Verify both files use the same codec spec + spec_codec_name = self._get_codec_name(spec_output) + impl_codec_name = self._get_codec_name(impl_output) + + assert spec_codec_name == impl_codec_name + assert spec_codec_name == codec_spec + + # Decode both and verify frames are identical + from torchcodec.decoders import VideoDecoder + + decoder_spec = VideoDecoder(str(spec_output)) + decoder_impl = VideoDecoder(str(impl_output)) + + frames_spec = decoder_spec.get_frames_in_range(0, 10).data + frames_impl = decoder_impl.get_frames_in_range(0, 10).data + + # The decoded frames should be exactly the same + torch.testing.assert_close(frames_spec, frames_impl, rtol=0, atol=0) From 6c15e76abe80d85ab7003728210f6c2b5adc4c59 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 13 Nov 2025 14:40:00 -0500 Subject: [PATCH 5/7] cleaned tests, add fbcode skip --- test/test_encoders.py | 63 ++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/test/test_encoders.py b/test/test_encoders.py index 60009532a..c975b2093 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -572,7 +572,7 @@ class TestVideoEncoder: def decode(self, source=None) -> torch.Tensor: return VideoDecoder(source).get_frames_in_range(start=0, stop=60) - def _get_codec_name(self, file_path): + def _get_codec_spec(self, file_path): """Helper function to get codec name from a video file using ffprobe.""" result = subprocess.run( [ @@ -1011,8 +1011,12 @@ def write(self, data): ): encoder.to_file_like(NoSeekMethod(), format="mp4") + @pytest.mark.skipif( + in_fbcode(), + reason="ffprobe not available internally", + ) @pytest.mark.parametrize( - "format,codec", + "format,codec_spec", [ ("mp4", "h264"), ("mp4", "hevc"), @@ -1021,54 +1025,45 @@ def write(self, data): ("webm", "vp9"), ], ) - def test_codec_parameter_utilized(self, tmp_path, format, codec): + def test_codec_parameter_utilized(self, tmp_path, format, codec_spec): # Test the codec parameter is utilized by using ffprobe to check the encoded file's codec spec - frames = torch.randint(0, 256, (10, 3, 128, 128), dtype=torch.uint8) + frames = torch.zeros((10, 3, 64, 64), dtype=torch.uint8) dest = str(tmp_path / f"output.{format}") - VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec) - actual_codec = self._get_codec_name(dest) - print(f"Expected codec: {codec}, Actual codec: {actual_codec}") - assert actual_codec == codec + VideoEncoder(frames=frames, frame_rate=30).to_file(dest=dest, codec=codec_spec) + actual_codec_spec = self._get_codec_spec(dest) + assert actual_codec_spec == codec_spec + @pytest.mark.skipif( + in_fbcode(), + reason="ffprobe not available internally", + ) @pytest.mark.parametrize( "codec_spec,codec_impl", [ ("h264", "libx264"), - ("hevc", "libx265"), ("av1", "libaom-av1"), ("vp9", "libvpx-vp9"), ], ) - def test_codec_spec_vs_implementation_equivalence( - self, tmp_path, codec_spec, codec_impl - ): + def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl): # Test that using codec spec gives the same result as using default codec implementation + # We cannot directly check codec impl used, so we assert frame equality frames = torch.randint(0, 256, (10, 3, 64, 64), dtype=torch.uint8) - spec_output = tmp_path / "spec_output.mp4" - encoder_spec = VideoEncoder(frames=frames, frame_rate=30) - encoder_spec.to_file(dest=str(spec_output), codec=codec_spec, crf=0) - - impl_output = tmp_path / "impl_output.mp4" - encoder_impl = VideoEncoder(frames=frames, frame_rate=30) - encoder_impl.to_file(dest=str(impl_output), codec=codec_impl, crf=0) - - # Verify both files use the same codec spec - spec_codec_name = self._get_codec_name(spec_output) - impl_codec_name = self._get_codec_name(impl_output) - - assert spec_codec_name == impl_codec_name - assert spec_codec_name == codec_spec - - # Decode both and verify frames are identical - from torchcodec.decoders import VideoDecoder + spec_output = str(tmp_path / "spec_output.mp4") + VideoEncoder(frames=frames, frame_rate=30).to_file( + dest=spec_output, codec=codec_spec + ) - decoder_spec = VideoDecoder(str(spec_output)) - decoder_impl = VideoDecoder(str(impl_output)) + impl_output = str(tmp_path / "impl_output.mp4") + VideoEncoder(frames=frames, frame_rate=30).to_file( + dest=impl_output, codec=codec_impl + ) - frames_spec = decoder_spec.get_frames_in_range(0, 10).data - frames_impl = decoder_impl.get_frames_in_range(0, 10).data + assert self._get_codec_spec(spec_output) == codec_spec + assert self._get_codec_spec(impl_output) == codec_spec - # The decoded frames should be exactly the same + frames_spec = self.decode(spec_output).data + frames_impl = self.decode(impl_output).data torch.testing.assert_close(frames_spec, frames_impl, rtol=0, atol=0) From 8b48e185dcb809b1766aeeffdf2447d6237d5563 Mon Sep 17 00:00:00 2001 From: Daniel Flores Date: Thu, 13 Nov 2025 17:15:50 -0500 Subject: [PATCH 6/7] skip vp9 on windows, update error message to suggest calling ffmpeg cli --- src/torchcodec/_core/Encoder.cpp | 2 +- test/test_encoders.py | 34 +++++++++++++------------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 4291c3cab..df7e47e08 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -706,7 +706,7 @@ void VideoEncoder::initializeEncoder( avCodec != nullptr, "Video codec ", codec, - " not found. Provide a codec name ('libx264', 'libx265') or a codec descriptor ('h264', 'hevc'), or do not specify a codec to use the default codec."); + " not found. To see available codecs, run: ffmpeg -encoders"); } else { avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); TORCH_CHECK(avCodec != nullptr, "Video codec not found"); diff --git a/test/test_encoders.py b/test/test_encoders.py index c975b2093..eb80a2286 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -690,24 +690,6 @@ def test_bad_input(self, tmp_path): ): encoder.to_tensor(format="bad_format") - # @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) - # @pytest.mark.parametrize("codec", ["h264", "hevc", "av1", "libx264", None]) - # def test_codec_valid_values(self, method, codec, tmp_path): - # if method == "to_file": - # valid_params = {"dest": str(tmp_path / "test.mp4")} - # elif method == "to_tensor": - # valid_params = {"format": "mp4"} - # elif method == "to_file_like": - # valid_params = dict(file_like=io.BytesIO(), format="mp4") - # else: - # raise ValueError(f"Unknown method: {method}") - - # encoder = VideoEncoder( - # frames=torch.zeros((5, 3, 128, 128), dtype=torch.uint8), - # frame_rate=30, - # ) - # getattr(encoder, method)(**valid_params, codec=codec) - @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_pixel_format_errors(self, method, tmp_path): frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8) @@ -1022,7 +1004,13 @@ def write(self, data): ("mp4", "hevc"), ("mkv", "av1"), ("avi", "mpeg4"), - ("webm", "vp9"), + pytest.param( + "webm", + "vp9", + marks=pytest.mark.skipif( + IS_WINDOWS, reason="vp9 codec not available on Windows" + ), + ), ], ) def test_codec_parameter_utilized(self, tmp_path, format, codec_spec): @@ -1043,7 +1031,13 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec): [ ("h264", "libx264"), ("av1", "libaom-av1"), - ("vp9", "libvpx-vp9"), + pytest.param( + "vp9", + "libvpx-vp9", + marks=pytest.mark.skipif( + IS_WINDOWS, reason="vp9 codec not available on Windows" + ), + ), ], ) def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl): From b045b6c0a0837fb9dfd8bda00f0cc9a5de733ace Mon Sep 17 00:00:00 2001 From: Dan-Flores Date: Fri, 14 Nov 2025 09:13:21 -0500 Subject: [PATCH 7/7] add defensive check for avFormatContext_->oformat --- src/torchcodec/_core/Encoder.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index a09b2f549..89ad380d8 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -708,6 +708,9 @@ void VideoEncoder::initializeEncoder( codec, " not found. To see available codecs, run: ffmpeg -encoders"); } else { + TORCH_CHECK( + avFormatContext_->oformat != nullptr, + "Output format is null, unable to find default codec."); avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec); TORCH_CHECK(avCodec != nullptr, "Video codec not found"); }