diff --git a/benchmarks/decoders/benchmark_decoders.py b/benchmarks/decoders/benchmark_decoders.py index 23f45dab8..26331806b 100644 --- a/benchmarks/decoders/benchmark_decoders.py +++ b/benchmarks/decoders/benchmark_decoders.py @@ -7,10 +7,13 @@ import argparse import importlib.resources import os +import platform import typing from dataclasses import dataclass, field from pathlib import Path +import torch + from benchmark_decoders_library import ( AbstractDecoder, BatchParameters, @@ -167,7 +170,7 @@ def main() -> None: if entry.is_file() and entry.name.endswith(".mp4"): video_paths.append(entry.path) - df_data = run_benchmarks( + results = run_benchmarks( decoders_to_run, video_paths, num_uniform_samples, @@ -176,7 +179,21 @@ def main() -> None: benchmark_video_creation=args.bm_video_creation, batch_parameters=BatchParameters(num_threads=8, batch_size=40), ) - plot_data(df_data, args.plot_path) + data = { + "experiments": results, + "system_metadata": { + "cpu_count": os.cpu_count(), + "system": platform.system(), + "machine": platform.machine(), + "python_version": str(platform.python_version()), + "cuda": ( + torch.cuda.get_device_properties(0).name + if torch.cuda.is_available() + else "not available" + ), + }, + } + plot_data(data, args.plot_path) if __name__ == "__main__": diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py index 2eb9f8931..8876a09dd 100644 --- a/benchmarks/decoders/benchmark_decoders_library.py +++ b/benchmarks/decoders/benchmark_decoders_library.py @@ -35,17 +35,28 @@ def __init__(self): pass @abc.abstractmethod - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): pass + def decode_frames_description(self, num_frames: int, kind: str) -> str: + return f"decode {num_frames} {kind} frames" + @abc.abstractmethod - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): pass + def decode_first_n_frames_description(self, n) -> str: + return f"first {n} frames" + @abc.abstractmethod - def decode_and_transform(self, video_file, pts_list, height, width, device): + def decode_and_resize(self, video_file, pts_list, height, width, device): pass + def decode_and_resize_description( + self, num_frames: int, height: int, width: int + ) -> str: + return f"decode {num_frames} -> {height}x{width}" + class DecordAccurate(AbstractDecoder): def __init__(self): @@ -54,7 +65,7 @@ def __init__(self): self.decord = decord self.decord.bridge.set_bridge("torch") - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu()) frames = [] fps = decord_vr.get_avg_fps() @@ -64,10 +75,10 @@ def get_frames_from_video(self, video_file, pts_list): frames.append(frame) return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu()) frames = [] - for _ in range(numFramesToDecode): + for _ in range(n): frame = decord_vr.next() frames.append(frame) return frames @@ -80,15 +91,15 @@ def __init__(self): self.decord = decord self.decord.bridge.set_bridge("torch") - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu()) average_fps = decord_vr.get_avg_fps() indices_list = [int(pts * average_fps) for pts in pts_list] return decord_vr.get_batch(indices_list) - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): decord_vr = self.decord.VideoReader(video_file, ctx=self.decord.cpu()) - indices_list = list(range(numFramesToDecode)) + indices_list = list(range(n)) return decord_vr.get_batch(indices_list) @@ -102,7 +113,7 @@ def __init__(self, backend): self.torchvision = torchvision self.transforms_v2 = transforms_v2 - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): self.torchvision.set_video_backend(self._backend) reader = self.torchvision.io.VideoReader(video_file, "video", num_threads=0) frames = [] @@ -112,16 +123,16 @@ def get_frames_from_video(self, video_file, pts_list): frames.append(frame["data"].permute(1, 2, 0)) return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): self.torchvision.set_video_backend(self._backend) reader = self.torchvision.io.VideoReader(video_file, "video", num_threads=0) frames = [] - for _ in range(numFramesToDecode): + for _ in range(n): frame = next(reader) frames.append(frame["data"].permute(1, 2, 0)) return frames - def decode_and_transform(self, video_file, pts_list, height, width, device): + def decode_and_resize(self, video_file, pts_list, height, width, device): self.torchvision.set_video_backend(self._backend) reader = self.torchvision.io.VideoReader(video_file, "video", num_threads=1) frames = [] @@ -142,7 +153,7 @@ def __init__(self, num_threads=None, color_conversion_library=None, device="cpu" self._color_conversion_library = color_conversion_library self._device = device - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): decoder = create_from_file(video_file) scan_all_streams_to_update_metadata(decoder) _add_video_stream( @@ -158,7 +169,7 @@ def get_frames_from_video(self, video_file, pts_list): ) return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): decoder = create_from_file(video_file) _add_video_stream( decoder, @@ -168,7 +179,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): ) frames = [] - for _ in range(numFramesToDecode): + for _ in range(n): frame = get_next_frame(decoder) frames.append(frame) @@ -185,7 +196,7 @@ def __init__(self, num_threads=None, color_conversion_library=None, device="cpu" self.transforms_v2 = transforms_v2 - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): decoder = create_from_file(video_file) num_threads = int(self._num_threads) if self._num_threads else 0 _add_video_stream( @@ -203,7 +214,7 @@ def get_frames_from_video(self, video_file, pts_list): return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): num_threads = int(self._num_threads) if self._num_threads else 0 decoder = create_from_file(video_file) _add_video_stream( @@ -214,13 +225,13 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): ) frames = [] - for _ in range(numFramesToDecode): + for _ in range(n): frame = get_next_frame(decoder) frames.append(frame) return frames - def decode_and_transform(self, video_file, pts_list, height, width, device): + def decode_and_resize(self, video_file, pts_list, height, width, device): num_threads = int(self._num_threads) if self._num_threads else 1 decoder = create_from_file(video_file) _add_video_stream( @@ -251,7 +262,7 @@ def __init__(self, num_threads=None, color_conversion_library=None, device="cpu" self._color_conversion_library = color_conversion_library self._device = device - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): decoder = create_from_file(video_file) scan_all_streams_to_update_metadata(decoder) _add_video_stream( @@ -267,7 +278,7 @@ def get_frames_from_video(self, video_file, pts_list): ) return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): decoder = create_from_file(video_file) scan_all_streams_to_update_metadata(decoder) _add_video_stream( @@ -278,7 +289,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): ) metadata = json.loads(get_json_metadata(decoder)) best_video_stream = metadata["bestVideoStreamIndex"] - indices_list = list(range(numFramesToDecode)) + indices_list = list(range(n)) frames, *_ = get_frames_at_indices( decoder, stream_index=best_video_stream, frame_indices=indices_list ) @@ -294,7 +305,7 @@ def __init__(self, num_ffmpeg_threads=None, device="cpu"): self.transforms_v2 = transforms_v2 - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): num_ffmpeg_threads = ( int(self._num_ffmpeg_threads) if self._num_ffmpeg_threads else 0 ) @@ -303,7 +314,7 @@ def get_frames_from_video(self, video_file, pts_list): ) return decoder.get_frames_played_at(pts_list) - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): num_ffmpeg_threads = ( int(self._num_ffmpeg_threads) if self._num_ffmpeg_threads else 0 ) @@ -315,11 +326,11 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): for frame in decoder: frames.append(frame) count += 1 - if count == numFramesToDecode: + if count == n: break return frames - def decode_and_transform(self, video_file, pts_list, height, width, device): + def decode_and_resize(self, video_file, pts_list, height, width, device): num_ffmpeg_threads = ( int(self._num_ffmpeg_threads) if self._num_ffmpeg_threads else 1 ) @@ -346,7 +357,7 @@ class TorchCodecCoreCompiled(AbstractDecoder): def __init__(self): pass - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): decoder = create_from_file(video_file) _add_video_stream(decoder) frames = [] @@ -355,11 +366,11 @@ def get_frames_from_video(self, video_file, pts_list): frames.append(frame) return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): decoder = create_from_file(video_file) _add_video_stream(decoder) frames = [] - for _ in range(numFramesToDecode): + for _ in range(n): frame = compiled_next(decoder) frames.append(frame) return frames @@ -375,7 +386,7 @@ def __init__(self): self.transforms_v2 = transforms_v2 - def get_frames_from_video(self, video_file, pts_list): + def decode_frames(self, video_file, pts_list): stream_reader = self.torchaudio.io.StreamReader(src=video_file) stream_reader.add_basic_video_stream( frames_per_chunk=1, decoder_option={"threads": "0"} @@ -388,7 +399,7 @@ def get_frames_from_video(self, video_file, pts_list): frames.append(clip[0][0]) return frames - def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): + def decode_first_n_frames(self, video_file, n): stream_reader = self.torchaudio.io.StreamReader(src=video_file) stream_reader.add_basic_video_stream( frames_per_chunk=1, decoder_option={"threads": "0"} @@ -396,14 +407,14 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode): frames = [] frame_cnt = 0 for vframe in stream_reader.stream(): - if frame_cnt >= numFramesToDecode: + if frame_cnt >= n: break frames.append(vframe[0][0]) frame_cnt += 1 return frames - def decode_and_transform(self, video_file, pts_list, height, width, device): + def decode_and_resize(self, video_file, pts_list, height, width, device): stream_reader = self.torchaudio.io.StreamReader(src=video_file) stream_reader.add_basic_video_stream( frames_per_chunk=1, decoder_option={"threads": "1"} @@ -421,7 +432,7 @@ def decode_and_transform(self, video_file, pts_list, height, width, device): return frames -def create_torchcodec_decoder_from_file(video_file): +def create_torchcodec_core_decode_first_frame(video_file): video_decoder = create_from_file(video_file) _add_video_stream(video_decoder) get_next_frame(video_decoder) @@ -562,9 +573,10 @@ def plot_data(json_data, plot_path): for col in range(video_type_combinations[unique_videos[row]], max_combinations): fig.delaxes(axes[row, col]) + # Stamp the metadata for the experimental system on the chart. plt.gcf().text( 0.005, - 0.87, + 0.013, "\n".join([f"{k}: {v}" for k, v in json_data["system_metadata"].items()]), fontsize=11, bbox=dict(facecolor="white"), @@ -664,8 +676,17 @@ def run_benchmarks( if dataloader_parameters: bp = dataloader_parameters.batch_parameters + description = ( + f"concurrency {bp.num_threads}" + f"batch {bp.batch_size}" + + decoder.decode_and_resize_description( + num_samples, + dataloader_parameters.resize_height, + dataloader_parameters.resize_width, + ) + ) dataloader_result = benchmark.Timer( - stmt="run_batch_using_threads(decoder.decode_and_transform, video_file, pts_list, height, width, device, batch_parameters=batch_parameters)", + stmt="run_batch_using_threads(decoder.decode_and_resize, video_file, pts_list, height, width, device, batch_parameters=batch_parameters)", globals={ "video_file": str(video_file_path), "pts_list": uniform_pts_list, @@ -678,11 +699,9 @@ def run_benchmarks( }, label=f"video={video_file_path} {metadata_label}", sub_label=decoder_name, - description=f"concurrent[threads={bp.num_threads},batch_size={bp.batch_size}] {num_samples} decode_and_transform()", - ) - print( - f"{decoder_name} concurrent[threads={bp.num_threads} batch_size={bp.batch_size}]" + description=description, ) + print(description) results.append( dataloader_result.blocked_autorange( min_run_time=min_runtime_seconds @@ -694,7 +713,7 @@ def run_benchmarks( decoder_name, video_file_path, num_samples * dataloader_parameters.batch_parameters.batch_size, - f"concurrent[threads={bp.num_threads} batch_size={bp.batch_size}] {num_samples} x decode_and_transform()", + description, ) ) @@ -707,7 +726,7 @@ def run_benchmarks( f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}" ) seeked_result = benchmark.Timer( - stmt="decoder.get_frames_from_video(video_file, pts_list)", + stmt="decoder.decode_frames(video_file, pts_list)", globals={ "video_file": str(video_file_path), "pts_list": pts_list, @@ -715,9 +734,11 @@ def run_benchmarks( }, label=f"video={video_file_path} {metadata_label}", sub_label=decoder_name, - description=f"{kind} {num_samples} seek()+next()", + description=decoder.decode_frames_description(num_samples, kind), + ) + print( + f"{decoder_name} {decoder.decode_frames_description(num_samples, kind)}" ) - print(f"{decoder_name} {kind} {num_samples} seek()+next()") results.append( seeked_result.blocked_autorange(min_run_time=min_runtime_seconds) ) @@ -727,13 +748,13 @@ def run_benchmarks( decoder_name, video_file_path, num_samples, - f"{num_samples} x {kind} seek()+next()", + decoder.decode_frames_description(num_samples, kind), ) ) if batch_parameters: seeked_result = benchmark.Timer( - stmt="run_batch_using_threads(decoder.get_frames_from_video, video_file, pts_list, batch_parameters=batch_parameters)", + stmt="run_batch_using_threads(decoder.decode_frames, video_file, pts_list, batch_parameters=batch_parameters)", globals={ "video_file": str(video_file_path), "pts_list": pts_list, @@ -743,9 +764,13 @@ def run_benchmarks( }, label=f"video={video_file_path} {metadata_label}", sub_label=decoder_name, - description=f"batch {kind} {num_samples} seek()+next()", + description=decoder.decode_frames_description( + num_samples, kind + ), + ) + print( + f"{decoder_name} batch {decoder.decode_frames_description(num_samples, kind)}" ) - print(f"{decoder_name} batch {kind} {num_samples} seek()+next()") results.append( seeked_result.blocked_autorange( min_run_time=min_runtime_seconds @@ -757,23 +782,25 @@ def run_benchmarks( decoder_name, video_file_path, num_samples * batch_parameters.batch_size, - f"batch {kind} seek()+next()", + decoder.decode_frames_description(num_samples, kind), ) ) - for num_consecutive_nexts in num_sequential_frames_from_start: + for num_frames in num_sequential_frames_from_start: consecutive_frames_result = benchmark.Timer( - stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)", + stmt="decoder.decode_first_n_frames(video_file, n)", globals={ "video_file": str(video_file_path), - "consecutive_frames_to_extract": num_consecutive_nexts, + "n": num_frames, "decoder": decoder, }, label=f"video={video_file_path} {metadata_label}", sub_label=decoder_name, - description=f"{num_consecutive_nexts} next()", + description=decoder.decode_first_n_frames_description(num_frames), + ) + print( + f"{decoder_name} {decoder.decode_first_n_frames_description(num_frames)}" ) - print(f"{decoder_name} {num_consecutive_nexts} next()") results.append( consecutive_frames_result.blocked_autorange( min_run_time=min_runtime_seconds @@ -784,26 +811,30 @@ def run_benchmarks( results[-1], decoder_name, video_file_path, - num_consecutive_nexts, - f"{num_consecutive_nexts} next()", + num_frames, + decoder.decode_first_n_frames_description(num_frames), ) ) if batch_parameters: consecutive_frames_result = benchmark.Timer( - stmt="run_batch_using_threads(decoder.get_consecutive_frames_from_video, video_file, consecutive_frames_to_extract, batch_parameters=batch_parameters)", + stmt="run_batch_using_threads(decoder.decode_first_n_frames, video_file, n, batch_parameters=batch_parameters)", globals={ "video_file": str(video_file_path), - "consecutive_frames_to_extract": num_consecutive_nexts, + "n": num_frames, "decoder": decoder, "run_batch_using_threads": run_batch_using_threads, "batch_parameters": batch_parameters, }, label=f"video={video_file_path} {metadata_label}", sub_label=decoder_name, - description=f"batch {num_consecutive_nexts} next()", + description=decoder.decode_first_n_frames_description( + num_frames + ), + ) + print( + f"{decoder_name} batch {decoder.decode_first_n_frames_description(num_frames)}" ) - print(f"{decoder_name} batch {num_consecutive_nexts} next()") results.append( consecutive_frames_result.blocked_autorange( min_run_time=min_runtime_seconds @@ -814,8 +845,8 @@ def run_benchmarks( results[-1], decoder_name, video_file_path, - num_consecutive_nexts * batch_parameters.batch_size, - f"batch {num_consecutive_nexts} next()", + num_frames * batch_parameters.batch_size, + decoder.decode_first_n_frames_description(num_frames), ) ) @@ -824,14 +855,14 @@ def run_benchmarks( metadata = get_metadata(video_file_path) metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps" creation_result = benchmark.Timer( - stmt="create_torchcodec_decoder_from_file(video_file)", + stmt="create_torchcodec_core_decode_first_frame(video_file)", globals={ "video_file": str(first_video_file_path), - "create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file, + "create_torchcodec_core_decode_first_frame": create_torchcodec_core_decode_first_frame, }, label=f"video={first_video_file_path} {metadata_label}", sub_label="TorchCodecCore", - description="create()+next()", + description="create decode first", ) results.append( creation_result.blocked_autorange( diff --git a/benchmarks/decoders/benchmark_readme_chart.png b/benchmarks/decoders/benchmark_readme_chart.png index 37eb6cdf3..84002bd56 100644 Binary files a/benchmarks/decoders/benchmark_readme_chart.png and b/benchmarks/decoders/benchmark_readme_chart.png differ diff --git a/benchmarks/decoders/benchmark_readme_data.json b/benchmarks/decoders/benchmark_readme_data.json index 26b187d10..34ccbe61f 100644 --- a/benchmarks/decoders/benchmark_readme_data.json +++ b/benchmarks/decoders/benchmark_readme_data.json @@ -2,578 +2,578 @@ "experiments": [ { "decoder": "torchcodec", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 2.185287811535107, "fps_p25": 2.185287811535107, "fps_p75": 2.185287811535107, "frame_count": 500, "iqr": 0.0, "median": 228.80281368922442, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 1.487621824405483, "fps_p25": 1.4946740687348836, "fps_p75": 1.4771120221232172, "frame_count": 10, "iqr": 0.07954542245715857, "median": 6.722138540819287, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 1.7566893241016517, "fps_p25": 1.7720402326671933, "fps_p75": 1.7384815770237596, "frame_count": 10, "iqr": 0.10893335612490773, "median": 5.6925261984579265, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 124.61926655596507, "fps_p25": 129.67421062643098, "fps_p75": 119.14695479258806, "frame_count": 100, "iqr": 0.06813631113618612, "median": 0.8024441385641694, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 7.318914173236622, "fps_p25": 7.318914173236622, "fps_p75": 7.318914173236622, "frame_count": 500, "iqr": 0.0, "median": 68.31614474020898, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 1.2141330103482226, "fps_p25": 1.2325662930218284, "fps_p75": 1.1974735557608616, "frame_count": 10, "iqr": 0.23776122322306037, "median": 8.236329887062311, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 1.5456837552485623, "fps_p25": 1.5466850326811599, "fps_p75": 1.5443254981338352, "frame_count": 10, "iqr": 0.009878377430140972, "median": 6.46962871029973, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 124.89489937129994, "fps_p25": 132.91418755681087, "fps_p75": 123.67834232204613, "frame_count": 100, "iqr": 0.05618386995047331, "median": 0.8006732100620866, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 1.9562443538056336, "fps_p25": 1.9562443538056336, "fps_p75": 1.9562443538056336, "frame_count": 500, "iqr": 0.0, "median": 255.5917920107022, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 0.19707861605359275, "fps_p25": 0.19707861605359275, "fps_p75": 0.19707861605359275, "frame_count": 10, "iqr": 0.0, "median": 50.741172229871154, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 0.1485655937554368, "fps_p25": 0.1485655937554368, "fps_p75": 0.1485655937554368, "frame_count": 10, "iqr": 0.0, "median": 67.31033577304333, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 67.57006767783298, "fps_p25": 67.79578885620272, "fps_p75": 66.37509364538417, "frame_count": 100, "iqr": 0.03157134260982275, "median": 1.47994523961097, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 0.2338984310881488, "fps_p25": 0.2338984310881488, "fps_p75": 0.2338984310881488, "frame_count": 500, "iqr": 0.0, "median": 2137.680007830262, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 1.4894370909070271, "fps_p25": 1.4979668517624964, "fps_p75": 1.4485405333999959, "frame_count": 10, "iqr": 0.22778514958918095, "median": 6.713945866562426, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 1.1414957444735077, "fps_p25": 1.149750586746934, "fps_p75": 1.1294024365512476, "frame_count": 10, "iqr": 0.15670129097998142, "median": 8.760435637552291, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 245.4526089592487, "fps_p25": 250.60379774748577, "fps_p75": 238.8595978488035, "frame_count": 100, "iqr": 0.019619732862338424, "median": 0.4074106216430664, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 16.148019060678696, "fps_p25": 16.148019060678696, "fps_p75": 16.148019060678696, "frame_count": 500, "iqr": 0.0, "median": 30.96355027332902, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 14.673409614471637, "fps_p25": 15.041438382121239, "fps_p75": 14.377383339744545, "frame_count": 10, "iqr": 0.030706821009516716, "median": 0.681504862383008, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 16.16320155619752, "fps_p25": 16.648043466566957, "fps_p75": 15.76324096816119, "frame_count": 10, "iqr": 0.03371612261980772, "median": 0.6186893088743091, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 389.19346204012305, "fps_p25": 395.79120044408205, "fps_p75": 377.4039559370892, "frame_count": 100, "iqr": 0.012309603625908494, "median": 0.2569416235201061, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 49.67220827704095, "fps_p25": 49.67716314210458, "fps_p75": 48.471847983087486, "frame_count": 500, "iqr": 0.25027891201898456, "median": 10.065990970470011, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 9.441689758760884, "fps_p25": 9.45141849411332, "fps_p75": 9.367661415497814, "frame_count": 10, "iqr": 0.009460048051550984, "median": 1.0591324493288994, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 9.39106161945713, "fps_p25": 9.399252169582105, "fps_p75": 9.377012983289656, "frame_count": 10, "iqr": 0.0025232546031475067, "median": 1.0648423368111253, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 177.4469022603485, "fps_p25": 316.77718670482557, "fps_p75": 176.54354870856375, "frame_count": 100, "iqr": 0.2507531810551882, "median": 0.5635488629341125, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 2.9420191806222524, "fps_p25": 2.9420191806222524, "fps_p75": 2.9420191806222524, "frame_count": 500, "iqr": 0.0, "median": 169.95130531210452, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 0.3212337504315466, "fps_p25": 0.3212337504315466, "fps_p75": 0.3212337504315466, "frame_count": 10, "iqr": 0.0, "median": 31.129979295656085, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 0.38320560646528706, "fps_p25": 0.3842953212160512, "fps_p75": 0.38212205426951185, "frame_count": 10, "iqr": 0.14799460163339972, "median": 26.09565160656348, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 67.50330886922704, "fps_p25": 68.89084855269626, "fps_p75": 66.45123463104065, "frame_count": 100, "iqr": 0.05329132452607155, "median": 1.4814088623970747, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 0.3990116243502749, "fps_p25": 0.3990116243502749, "fps_p75": 0.3990116243502749, "frame_count": 500, "iqr": 0.0, "median": 1253.096324735321, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 3.030252715089431, "fps_p25": 3.0526322316668693, "fps_p75": 2.953346399216992, "frame_count": 10, "iqr": 0.11012816615402699, "median": 3.300054794177413, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 3.6392153967153975, "fps_p25": 3.6854168186487217, "fps_p75": 3.599719275410032, "frame_count": 10, "iqr": 0.06459711538627744, "median": 2.74784504622221, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 252.94770217259642, "fps_p25": 259.378165637991, "fps_p75": 245.72053777505408, "frame_count": 100, "iqr": 0.021428925916552544, "median": 0.3953386377543211, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1920x1080_10s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 76.91304610165066, "fps_p25": 77.85871253761688, "fps_p75": 75.70820390469302, "frame_count": 500, "iqr": 0.18241520412266254, "median": 6.5008477149531245, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 29.726490506580905, "fps_p25": 30.606242550636814, "fps_p75": 28.990909515543702, "frame_count": 10, "iqr": 0.01820498052984476, "median": 0.3364002890884876, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 23.597059888338084, "fps_p25": 24.064691272988345, "fps_p75": 22.761615473310332, "frame_count": 10, "iqr": 0.023789554135873914, "median": 0.42378160869702697, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 573.5874430258399, "fps_p25": 583.9631969586693, "fps_p75": 562.2277430367112, "frame_count": 100, "iqr": 0.006620197789743543, "median": 0.17434133403003216, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 14.88544406237577, "fps_p25": 14.88544406237577, "fps_p75": 14.88544406237577, "frame_count": 500, "iqr": 0.0, "median": 33.58986120298505, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 3.3859102553356832, "fps_p25": 3.392143926670231, "fps_p75": 3.3828535510681297, "frame_count": 10, "iqr": 0.008096098899841309, "median": 2.9534155502915382, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 3.7600089196810127, "fps_p25": 3.7659781513743416, "fps_p75": 3.74713269916233, "frame_count": 10, "iqr": 0.01335456338711083, "median": 2.6595681589096785, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchcodec[cuda]", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 109.04967149763928, "fps_p25": 109.57199422467362, "fps_p75": 107.818128612012, "frame_count": 100, "iqr": 0.014845846220850945, "median": 0.9170133080333471, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 43.23868165376152, "fps_p25": 43.38950140330655, "fps_p75": 42.58949487104254, "frame_count": 500, "iqr": 0.2164593692868948, "median": 11.563719819299877, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 4.960028025039232, "fps_p25": 5.093364251365753, "fps_p75": 4.8491085082202074, "frame_count": 10, "iqr": 0.09889586782082915, "median": 2.0161176407709718, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 2.612608064723561, "fps_p25": 2.64420358814648, "fps_p75": 2.5411955478808883, "frame_count": 10, "iqr": 0.15329858590848744, "median": 3.8275928697548807, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchvision[video_reader]", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 175.7821361078175, "fps_p25": 179.39110401504206, "fps_p75": 170.3600162221097, "frame_count": 100, "iqr": 0.029550952836871147, "median": 0.5688860211521387, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "concurrent[threads=10,batch_size=50] 10 decode_and_transform()", + "description": "concurrency 10 batch 50 decode 10 -> 256x256", "fps_median": 9.577945247223786, "fps_p25": 9.577945247223786, "fps_p75": 9.577945247223786, "frame_count": 500, "iqr": 0.0, "median": 52.203263549134135, - "type": "concurrent[threads=10 batch_size=50] 10 x decode_and_transform()", + "type": "concurrency 10 batch 50 decode 10 -> 256x256", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "uniform 10 seek()+next()", + "description": "decode 10 uniform frames", "fps_median": 33.80941691704928, "fps_p25": 35.12371582832645, "fps_p75": 32.88568736547048, "frame_count": 10, "iqr": 0.019375736825168133, "median": 0.2957755830138922, - "type": "10 x uniform seek()+next()", + "type": "decode 10 uniform frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "random 10 seek()+next()", + "description": "decode 10 random frames", "fps_median": 23.720035259186883, "fps_p25": 24.603749055828953, "fps_p75": 22.6657410234739, "frame_count": 10, "iqr": 0.03475236473605037, "median": 0.4215845335274935, - "type": "10 x random seek()+next()", + "type": "decode 10 random frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "torchaudio", - "description": "100 next()", + "description": "first 100 frames", "fps_median": 772.4086105249893, "fps_p25": 786.9980237348831, "fps_p75": 753.8275240724674, "frame_count": 100, "iqr": 0.005591217428445816, "median": 0.12946515437215567, - "type": "100 next()", + "type": "first 100 frames", "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" } ],