diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp index 239b4c828..ad44e28b7 100644 --- a/src/torchcodec/_core/Encoder.cpp +++ b/src/torchcodec/_core/Encoder.cpp @@ -5,6 +5,7 @@ #include "torch/types.h" extern "C" { +#include #include } @@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat( } TORCH_CHECK(false, errorMsg.str()); } + +void validateDoubleOption( + const AVCodec& avCodec, + const char* optionName, + double value) { + if (!avCodec.priv_class) { + return; + } + const AVOption* option = av_opt_find2( + // Convert obj arg from const AVClass* const* to non-const void* + // First cast to remove const, then cast to void* + const_cast(static_cast(&avCodec.priv_class)), + optionName, + nullptr, + 0, + AV_OPT_SEARCH_FAKE_OBJ, + nullptr); + // If the option was not found, let FFmpeg handle it later + if (!option) { + return; + } + if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 || + option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) { + TORCH_CHECK( + value >= option->min && value <= option->max, + optionName, + "=", + value, + " is out of valid range [", + option->min, + ", ", + option->max, + "] for this codec. For more details, run 'ffmpeg -h encoder=", + avCodec.name, + "'"); + } +} } // namespace VideoEncoder::~VideoEncoder() { @@ -700,6 +738,7 @@ void VideoEncoder::initializeEncoder( // Apply videoStreamOptions AVDictionary* options = nullptr; if (videoStreamOptions.crf.has_value()) { + validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value()); av_dict_set( &options, "crf", diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index b7647176c..c0de64f2e 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -47,7 +47,7 @@ struct VideoStreamOptions { // Encoding options // TODO-VideoEncoder: Consider adding other optional fields here // (bit rate, gop size, max b frames, preset) - std::optional crf; + std::optional crf; // Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p") // If not specified, uses codec's default format. diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index b4320a24d..e771a83ed 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()"); m.def( - "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()"); + "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None) -> ()"); m.def( - "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor"); + "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None) -> Tensor"); m.def( - "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()"); + "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None) -> ()"); m.def( "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor"); m.def( @@ -604,7 +604,7 @@ void encode_video_to_file( int64_t frame_rate, std::string_view file_name, std::optional pixel_format = std::nullopt, - std::optional crf = std::nullopt) { + std::optional crf = std::nullopt) { VideoStreamOptions videoStreamOptions; videoStreamOptions.pixelFormat = pixel_format; videoStreamOptions.crf = crf; @@ -621,7 +621,7 @@ at::Tensor encode_video_to_tensor( int64_t frame_rate, std::string_view format, std::optional pixel_format = std::nullopt, - std::optional crf = std::nullopt) { + std::optional crf = std::nullopt) { auto avioContextHolder = std::make_unique(); VideoStreamOptions videoStreamOptions; videoStreamOptions.pixelFormat = pixel_format; @@ -641,7 +641,7 @@ void _encode_video_to_file_like( std::string_view format, int64_t file_like_context, std::optional pixel_format = std::nullopt, - std::optional crf = std::nullopt) { + std::optional crf = std::nullopt) { auto fileLikeContext = reinterpret_cast(file_like_context); TORCH_CHECK( diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index cd6174245..ebad670d1 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -213,7 +213,7 @@ def encode_video_to_file_like( frame_rate: int, format: str, file_like: Union[io.RawIOBase, io.BufferedIOBase], - crf: Optional[int] = None, + crf: Optional[Union[int, float]] = None, pixel_format: Optional[str] = None, ) -> None: """Encode video frames to a file-like object. @@ -322,7 +322,7 @@ def encode_video_to_file_abstract( frames: torch.Tensor, frame_rate: int, filename: str, - crf: Optional[int] = None, + crf: Optional[Union[int, float]] = None, pixel_format: Optional[str] = None, ) -> None: return @@ -333,7 +333,7 @@ def encode_video_to_tensor_abstract( frames: torch.Tensor, frame_rate: int, format: str, - crf: Optional[int] = None, + crf: Optional[Union[int, float]] = None, pixel_format: Optional[str] = None, ) -> torch.Tensor: return torch.empty([], dtype=torch.long) @@ -345,7 +345,7 @@ def _encode_video_to_file_like_abstract( frame_rate: int, format: str, file_like_context: int, - crf: Optional[int] = None, + crf: Optional[Union[int, float]] = None, pixel_format: Optional[str] = None, ) -> None: return diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py index e0630d012..318aac76e 100644 --- a/src/torchcodec/encoders/_video_encoder.py +++ b/src/torchcodec/encoders/_video_encoder.py @@ -37,6 +37,7 @@ def to_file( dest: Union[str, Path], *, pixel_format: Optional[str] = None, + crf: Optional[Union[int, float]] = None, ) -> None: """Encode frames into a file. @@ -46,12 +47,16 @@ def to_file( container format. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values + mean better quality. Valid range depends on the encoder (commonly 0-51). + Defaults to None (which will use encoder's default). """ _core.encode_video_to_file( frames=self._frames, frame_rate=self._frame_rate, filename=str(dest), pixel_format=pixel_format, + crf=crf, ) def to_tensor( @@ -59,14 +64,18 @@ def to_tensor( format: str, *, pixel_format: Optional[str] = None, + crf: Optional[Union[int, float]] = None, ) -> Tensor: """Encode frames into raw bytes, as a 1D uint8 Tensor. Args: format (str): The container format of the encoded frames, e.g. "mp4", "mov", - "mkv", "avi", "webm", "flv", or "gif" + "mkv", "avi", "webm", "flv", etc. pixel_format (str, optional): The pixel format to encode frames into (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values + mean better quality. Valid range depends on the encoder (commonly 0-51). + Defaults to None (which will use encoder's default). Returns: Tensor: The raw encoded bytes as 4D uint8 Tensor. @@ -76,6 +85,7 @@ def to_tensor( frame_rate=self._frame_rate, format=format, pixel_format=pixel_format, + crf=crf, ) def to_file_like( @@ -84,6 +94,7 @@ def to_file_like( format: str, *, pixel_format: Optional[str] = None, + crf: Optional[Union[int, float]] = None, ) -> None: """Encode frames into a file-like object. @@ -94,9 +105,12 @@ def to_file_like( ``write(data: bytes) -> int`` and ``seek(offset: int, whence: int = 0) -> int``. format (str): The container format of the encoded frames, e.g. "mp4", "mov", - "mkv", "avi", "webm", "flv", or "gif". + "mkv", "avi", "webm", "flv", etc. pixel_format (str, optional): The pixel format for encoding (e.g., "yuv420p", "yuv444p"). If not specified, uses codec's default format. + crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values + mean better quality. Valid range depends on the encoder (commonly 0-51). + Defaults to None (which will use encoder's default). """ _core.encode_video_to_file_like( frames=self._frames, @@ -104,4 +118,5 @@ def to_file_like( format=format, file_like=file_like, pixel_format=pixel_format, + crf=crf, ) diff --git a/test/test_encoders.py b/test/test_encoders.py index 922b67bbb..0a360ccf9 100644 --- a/test/test_encoders.py +++ b/test/test_encoders.py @@ -9,7 +9,7 @@ import pytest import torch -from torchcodec.decoders import AudioDecoder +from torchcodec.decoders import AudioDecoder, VideoDecoder from torchcodec.encoders import AudioEncoder, VideoEncoder @@ -20,7 +20,9 @@ in_fbcode, IS_WINDOWS, NASA_AUDIO_MP3, + psnr, SINE_MONO_S32, + TEST_SRC_2_720P, TestContainerFile, ) @@ -567,6 +569,9 @@ def write(self, data): class TestVideoEncoder: + def decode(self, source=None) -> torch.Tensor: + return VideoDecoder(source).get_frames_in_range(start=0, stop=60) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) def test_bad_input_parameterized(self, tmp_path, method): if method == "to_file": @@ -605,6 +610,31 @@ def test_bad_input_parameterized(self, tmp_path, method): ) getattr(encoder, method)(**valid_params) + with pytest.raises(RuntimeError, match=r"crf=-10 is out of valid range"): + encoder = VideoEncoder( + frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), + frame_rate=30, + ) + getattr(encoder, method)(**valid_params, crf=-10) + + @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"]) + @pytest.mark.parametrize("crf", [23, 23.5, -0.9]) + def test_crf_valid_values(self, method, crf, tmp_path): + if method == "to_file": + valid_params = {"dest": str(tmp_path / "test.mp4")} + elif method == "to_tensor": + valid_params = {"format": "mp4"} + elif method == "to_file_like": + valid_params = dict(file_like=io.BytesIO(), format="mp4") + else: + raise ValueError(f"Unknown method: {method}") + + encoder = VideoEncoder( + frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), + frame_rate=30, + ) + getattr(encoder, method)(**valid_params, crf=crf) + def test_bad_input(self, tmp_path): encoder = VideoEncoder( frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8), @@ -700,3 +730,234 @@ def encode_to_tensor(frames): torch.testing.assert_close( encoded_from_contiguous, encoded_from_non_contiguous, rtol=0, atol=0 ) + + @pytest.mark.parametrize( + "format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow)) + ) + @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) + def test_round_trip(self, tmp_path, format, method): + # Test that decode(encode(decode(frames))) == decode(frames) + ffmpeg_version = get_ffmpeg_major_version() + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") + source_frames = self.decode(TEST_SRC_2_720P.path).data + + # Frame rate is fixed with num frames decoded + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + if method == "to_file": + encoded_path = str(tmp_path / f"encoder_output.{format}") + encoder.to_file(dest=encoded_path, pixel_format="yuv444p", crf=0) + round_trip_frames = self.decode(encoded_path).data + elif method == "to_tensor": + encoded_tensor = encoder.to_tensor( + format=format, pixel_format="yuv444p", crf=0 + ) + round_trip_frames = self.decode(encoded_tensor).data + elif method == "to_file_like": + file_like = io.BytesIO() + encoder.to_file_like( + file_like=file_like, format=format, pixel_format="yuv444p", crf=0 + ) + round_trip_frames = self.decode(file_like.getvalue()).data + else: + raise ValueError(f"Unknown method: {method}") + + assert source_frames.shape == round_trip_frames.shape + assert source_frames.dtype == round_trip_frames.dtype + + atol = 3 if format == "webm" else 2 + for s_frame, rt_frame in zip(source_frames, round_trip_frames): + assert psnr(s_frame, rt_frame) > 30 + torch.testing.assert_close(s_frame, rt_frame, atol=atol, rtol=0) + + @pytest.mark.parametrize( + "format", + ( + "mov", + "mp4", + "avi", + "mkv", + "flv", + "gif", + pytest.param("webm", marks=pytest.mark.slow), + ), + ) + @pytest.mark.parametrize("method", ("to_tensor", "to_file_like")) + def test_against_to_file(self, tmp_path, format, method): + # Test that to_file, to_tensor, and to_file_like produce the same results + ffmpeg_version = get_ffmpeg_major_version() + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") + + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + encoded_file = tmp_path / f"output.{format}" + encoder.to_file(dest=encoded_file, crf=0) + + if method == "to_tensor": + encoded_output = encoder.to_tensor(format=format, crf=0) + else: # to_file_like + file_like = io.BytesIO() + encoder.to_file_like(file_like=file_like, format=format, crf=0) + encoded_output = file_like.getvalue() + + torch.testing.assert_close( + self.decode(encoded_file).data, + self.decode(encoded_output).data, + atol=0, + rtol=0, + ) + + @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") + @pytest.mark.parametrize( + "format", + ( + "mov", + "mp4", + "avi", + "mkv", + "flv", + pytest.param("webm", marks=pytest.mark.slow), + ), + ) + @pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p")) + def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format): + ffmpeg_version = get_ffmpeg_major_version() + if format == "webm" and ( + ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) + ): + pytest.skip("Codec for webm is not available in this FFmpeg installation.") + if format in ("avi", "flv") and pixel_format == "yuv444p": + pytest.skip(f"Default codec for {format} does not support {pixel_format}") + + source_frames = self.decode(TEST_SRC_2_720P.path).data + + # Encode with FFmpeg CLI + temp_raw_path = str(tmp_path / "temp_input.raw") + with open(temp_raw_path, "wb") as f: + f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) + + ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") + frame_rate = 30 + crf = 0 + # Some codecs (ex. MPEG4) do not support CRF. + # Flags not supported by the selected codec will be ignored. + ffmpeg_cmd = [ + "ffmpeg", + "-y", + "-f", + "rawvideo", + "-pix_fmt", + "rgb24", # Input format + "-s", + f"{source_frames.shape[3]}x{source_frames.shape[2]}", + "-r", + str(frame_rate), + "-i", + temp_raw_path, + "-pix_fmt", + pixel_format, # Output format + "-crf", + str(crf), + ffmpeg_encoded_path, + ] + subprocess.run(ffmpeg_cmd, check=True) + + # Encode with our video encoder + encoder_output_path = str(tmp_path / f"encoder_output.{format}") + encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate) + encoder.to_file(dest=encoder_output_path, pixel_format=pixel_format, crf=crf) + + ffmpeg_frames = self.decode(ffmpeg_encoded_path).data + encoder_frames = self.decode(encoder_output_path).data + + assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] + + # If FFmpeg selects a codec or pixel format that uses qscale (not crf), + # the VideoEncoder outputs *slightly* different frames. + # There may be additional subtle differences in the encoder. + percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 + + # Check that PSNR between both encoded versions is high + for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): + res = psnr(ff_frame, enc_frame) + assert res > 30 + assert_tensor_close_on_at_least( + ff_frame, enc_frame, percentage=percentage, atol=2 + ) + + def test_to_file_like_custom_file_object(self): + """Test to_file_like with a custom file-like object that implements write and seek.""" + + class CustomFileObject: + def __init__(self): + self._file = io.BytesIO() + + def write(self, data): + return self._file.write(data) + + def seek(self, offset, whence=0): + return self._file.seek(offset, whence) + + def get_encoded_data(self): + return self._file.getvalue() + + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + file_like = CustomFileObject() + encoder.to_file_like(file_like, format="mp4", pixel_format="yuv444p", crf=0) + decoded_frames = self.decode(file_like.get_encoded_data()) + + torch.testing.assert_close( + decoded_frames.data, + source_frames, + atol=2, + rtol=0, + ) + + def test_to_file_like_real_file(self, tmp_path): + """Test to_file_like with a real file opened in binary write mode.""" + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + file_path = tmp_path / "test_file_like.mp4" + + with open(file_path, "wb") as file_like: + encoder.to_file_like(file_like, format="mp4", pixel_format="yuv444p", crf=0) + decoded_frames = self.decode(str(file_path)) + + torch.testing.assert_close( + decoded_frames.data, + source_frames, + atol=2, + rtol=0, + ) + + def test_to_file_like_bad_methods(self): + source_frames = self.decode(TEST_SRC_2_720P.path).data + encoder = VideoEncoder(frames=source_frames, frame_rate=30) + + class NoWriteMethod: + def seek(self, offset, whence=0): + return 0 + + with pytest.raises( + RuntimeError, match="File like object must implement a write method" + ): + encoder.to_file_like(NoWriteMethod(), format="mp4") + + class NoSeekMethod: + def write(self, data): + return len(data) + + with pytest.raises( + RuntimeError, match="File like object must implement a seek method" + ): + encoder.to_file_like(NoSeekMethod(), format="mp4") diff --git a/test/test_ops.py b/test/test_ops.py index bb6ce601b..3bd46c4e3 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -10,7 +10,6 @@ os.environ["TORCH_LOGS"] = "output_code" import json -import subprocess import numpy as np import pytest @@ -27,9 +26,6 @@ create_from_file_like, create_from_tensor, encode_audio_to_file, - encode_video_to_file, - encode_video_to_file_like, - encode_video_to_tensor, get_ffmpeg_library_versions, get_frame_at_index, get_frame_at_pts, @@ -42,24 +38,18 @@ get_next_frame, seek_to_pts, ) -from torchcodec.decoders import VideoDecoder from .utils import ( all_supported_devices, assert_frames_equal, - assert_tensor_close_on_at_least, - get_ffmpeg_major_version, in_fbcode, - IS_WINDOWS, NASA_AUDIO, NASA_AUDIO_MP3, NASA_VIDEO, needs_cuda, - psnr, SINE_MONO_S32, SINE_MONO_S32_44100, SINE_MONO_S32_8000, - TEST_SRC_2_720P, unsplit_device_str, ) @@ -1151,282 +1141,5 @@ def test_bad_input(self, tmp_path): ) -class TestVideoEncoderOps: - def decode(self, source=None) -> torch.Tensor: - return VideoDecoder(source).get_frames_in_range(start=0, stop=60) - - @pytest.mark.parametrize( - "format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow)) - ) - @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like")) - def test_video_encoder_round_trip(self, tmp_path, format, method): - # Test that decode(encode(decode(frames))) == decode(frames) - ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ( - ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) - ): - pytest.skip("Codec for webm is not available in this FFmpeg installation.") - source_frames = self.decode(TEST_SRC_2_720P.path).data - - # Frame rate is fixed with num frames decoded - params = dict(frame_rate=30, pixel_format="yuv444p", crf=0) - if method == "to_file": - encoded_path = str(tmp_path / f"encoder_output.{format}") - encode_video_to_file( - frames=source_frames, - filename=encoded_path, - **params, - ) - round_trip_frames = self.decode(encoded_path).data - elif method == "to_tensor": - encoded_tensor = encode_video_to_tensor( - source_frames, format=format, **params - ) - round_trip_frames = self.decode(encoded_tensor).data - elif method == "to_file_like": - file_like = io.BytesIO() - encode_video_to_file_like( - frames=source_frames, - format=format, - file_like=file_like, - **params, - ) - round_trip_frames = self.decode(file_like.getvalue()).data - else: - raise ValueError(f"Unknown method: {method}") - - assert source_frames.shape == round_trip_frames.shape - assert source_frames.dtype == round_trip_frames.dtype - - # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels - # are within a higher tolerance. - if ffmpeg_version == 6: - assert_close = partial(assert_tensor_close_on_at_least, percentage=99) - atol = 15 - else: - assert_close = torch.testing.assert_close - atol = 3 if format == "webm" else 2 - for s_frame, rt_frame in zip(source_frames, round_trip_frames): - assert psnr(s_frame, rt_frame) > 30 - assert_close(s_frame, rt_frame, atol=atol, rtol=0) - - @pytest.mark.parametrize( - "format", - ( - "mov", - "mp4", - "avi", - "mkv", - "flv", - "gif", - pytest.param("webm", marks=pytest.mark.slow), - ), - ) - @pytest.mark.parametrize("method", ("to_tensor", "to_file_like")) - def test_against_to_file(self, tmp_path, format, method): - # Test that to_file, to_tensor, and to_file_like produce the same results - ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ( - ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) - ): - pytest.skip("Codec for webm is not available in this FFmpeg installation.") - - source_frames = self.decode(TEST_SRC_2_720P.path).data - params = dict(frame_rate=30, crf=0) - - encoded_file = tmp_path / f"output.{format}" - encode_video_to_file(frames=source_frames, filename=str(encoded_file), **params) - - if method == "to_tensor": - encoded_output = encode_video_to_tensor( - source_frames, format=format, **params - ) - else: # to_file_like - file_like = io.BytesIO() - encode_video_to_file_like( - frames=source_frames, - file_like=file_like, - format=format, - **params, - ) - encoded_output = file_like.getvalue() - - torch.testing.assert_close( - self.decode(encoded_file).data, - self.decode(encoded_output).data, - atol=0, - rtol=0, - ) - - @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") - @pytest.mark.parametrize( - "format", - ( - "mov", - "mp4", - "avi", - "mkv", - "flv", - pytest.param("webm", marks=pytest.mark.slow), - ), - ) - @pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p")) - def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format): - ffmpeg_version = get_ffmpeg_major_version() - if format == "webm" and ( - ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) - ): - pytest.skip("Codec for webm is not available in this FFmpeg installation.") - if format in ("avi", "flv") and pixel_format == "yuv444p": - pytest.skip(f"Default codec for {format} does not support {pixel_format}") - - source_frames = self.decode(TEST_SRC_2_720P.path).data - - # Encode with FFmpeg CLI - temp_raw_path = str(tmp_path / "temp_input.raw") - with open(temp_raw_path, "wb") as f: - f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) - - ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") - frame_rate = 30 - crf = 0 - # Some codecs (ex. MPEG4) do not support CRF. - # Flags not supported by the selected codec will be ignored. - ffmpeg_cmd = [ - "ffmpeg", - "-y", - "-f", - "rawvideo", - "-pix_fmt", - "rgb24", # Input format - "-s", - f"{source_frames.shape[3]}x{source_frames.shape[2]}", - "-r", - str(frame_rate), - "-i", - temp_raw_path, - "-pix_fmt", - pixel_format, # Output format - "-crf", - str(crf), - ffmpeg_encoded_path, - ] - subprocess.run(ffmpeg_cmd, check=True) - - # Encode with our video encoder - encoder_output_path = str(tmp_path / f"encoder_output.{format}") - encode_video_to_file( - frames=source_frames, - frame_rate=frame_rate, - filename=encoder_output_path, - pixel_format=pixel_format, - crf=crf, - ) - - ffmpeg_frames = self.decode(ffmpeg_encoded_path).data - encoder_frames = self.decode(encoder_output_path).data - - assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] - - # If FFmpeg selects a codec or pixel format that uses qscale (not crf), - # the VideoEncoder outputs *slightly* different frames. - # There may be additional subtle differences in the encoder. - percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 - - # Check that PSNR between both encoded versions is high - for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): - res = psnr(ff_frame, enc_frame) - assert res > 30 - assert_tensor_close_on_at_least( - ff_frame, enc_frame, percentage=percentage, atol=2 - ) - - def test_to_file_like_custom_file_object(self): - """Test to_file_like with a custom file-like object that implements write and seek.""" - - class CustomFileObject: - def __init__(self): - self._file = io.BytesIO() - - def write(self, data): - return self._file.write(data) - - def seek(self, offset, whence=0): - return self._file.seek(offset, whence) - - def get_encoded_data(self): - return self._file.getvalue() - - source_frames = self.decode(TEST_SRC_2_720P.path).data - file_like = CustomFileObject() - encode_video_to_file_like( - source_frames, - frame_rate=30, - pixel_format="yuv444p", - crf=0, - format="mp4", - file_like=file_like, - ) - decoded_samples = self.decode(file_like.get_encoded_data()) - - torch.testing.assert_close( - decoded_samples.data, - source_frames, - atol=2, - rtol=0, - ) - - def test_to_file_like_real_file(self, tmp_path): - """Test to_file_like with a real file opened in binary write mode.""" - source_frames = self.decode(TEST_SRC_2_720P.path).data - file_path = tmp_path / "test_file_like.mp4" - - with open(file_path, "wb") as file_like: - encode_video_to_file_like( - source_frames, - frame_rate=30, - pixel_format="yuv444p", - crf=0, - format="mp4", - file_like=file_like, - ) - decoded_samples = self.decode(str(file_path)) - - torch.testing.assert_close( - decoded_samples.data, - source_frames, - atol=2, - rtol=0, - ) - - def test_to_file_like_bad_methods(self): - source_frames = self.decode(TEST_SRC_2_720P.path).data - - class NoWriteMethod: - def seek(self, offset, whence=0): - return 0 - - with pytest.raises( - RuntimeError, match="File like object must implement a write method" - ): - encode_video_to_file_like( - source_frames, - frame_rate=30, - format="mp4", - file_like=NoWriteMethod(), - ) - - class NoSeekMethod: - def write(self, data): - return len(data) - - with pytest.raises( - RuntimeError, match="File like object must implement a seek method" - ): - encode_video_to_file_like( - source_frames, frame_rate=30, format="mp4", file_like=NoSeekMethod() - ) - - if __name__ == "__main__": pytest.main()