diff --git a/README.md b/README.md index 79aaf031b..ca0fa4f80 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,12 @@ The following was generated by running [our benchmark script](./benchmarks/decod ![benchmark_results](./benchmarks/decoders/benchmark_readme_chart.png) +The top row is a [Mandelbrot](https://ffmpeg.org/ffmpeg-filters.html#mandelbrot) video +generated from FFmpeg that has a resolution of 1280x720 at 60 fps and is 120 seconds long. +The bottom row is [promotional video from NASA](https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4) +that has a resolution of 960x540 at 29.7 fps and is 206 seconds long. Both videos were +encoded with libx264 and yuv420p pixel format. + ## Planned future work We are actively working on the following features: diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py index f4be7d8e6..4ba79b507 100644 --- a/benchmarks/decoders/benchmark_decoders_library.py +++ b/benchmarks/decoders/benchmark_decoders_library.py @@ -1,9 +1,10 @@ import abc import json -import os import subprocess +import urllib.request from concurrent.futures import ThreadPoolExecutor, wait from itertools import product +from pathlib import Path import matplotlib.pyplot as plt import numpy as np @@ -330,6 +331,7 @@ def generate_video(command): def generate_videos( resolutions, encodings, + patterns, fpses, gop_sizes, durations, @@ -341,23 +343,25 @@ def generate_videos( video_count = 0 futures = [] - for resolution, duration, fps, gop_size, encoding, pix_fmt in product( - resolutions, durations, fpses, gop_sizes, encodings, pix_fmts + for resolution, duration, fps, gop_size, encoding, pattern, pix_fmt in product( + resolutions, durations, fpses, gop_sizes, encodings, patterns, pix_fmts ): - outfile = f"{output_dir}/{resolution}_{duration}s_{fps}fps_{gop_size}gop_{encoding}_{pix_fmt}.mp4" + outfile = f"{output_dir}/{pattern}_{resolution}_{duration}s_{fps}fps_{gop_size}gop_{encoding}_{pix_fmt}.mp4" command = [ ffmpeg_cli, "-y", "-f", "lavfi", "-i", - f"color=c=blue:s={resolution}:d={duration}", + f"{pattern}=s={resolution}", + "-t", + str(duration), "-c:v", encoding, "-r", - f"{fps}", + str(fps), "-g", - f"{gop_size}", + str(gop_size), "-pix_fmt", pix_fmt, outfile, @@ -372,6 +376,11 @@ def generate_videos( print(f"Generated {video_count} videos") +def retrieve_videos(urls_and_dest_paths): + for url, path in urls_and_dest_paths: + urllib.request.urlretrieve(url, path) + + def plot_data(df_data, plot_path): # Creating the DataFrame df = pd.DataFrame(df_data) @@ -400,7 +409,7 @@ def plot_data(df_data, plot_path): nrows=len(unique_videos), ncols=max_combinations, figsize=(max_combinations * 6, len(unique_videos) * 4), - sharex=True, + sharex=False, sharey=True, ) @@ -419,16 +428,19 @@ def plot_data(df_data, plot_path): ax = axes[row, col] # Select the appropriate axis # Set the title for the subplot - base_video = os.path.basename(video) + base_video = Path(video).name.removesuffix(".mp4") ax.set_title( - f"video={base_video}\ndecode_pattern={vcount} x {vtype}", fontsize=12 + f"video={base_video}\ndecode_pattern={vcount} x {vtype}", fontsize=10 ) # Plot bars with error bars ax.barh( group["decoder"], - group["fps"], - xerr=[group["fps"] - group["fps_p75"], group["fps_p25"] - group["fps"]], + group["fps_median"], + xerr=[ + group["fps_median"] - group["fps_p75"], + group["fps_p25"] - group["fps_median"], + ], color=[colors(i) for i in range(len(group))], align="center", capsize=5, @@ -438,28 +450,11 @@ def plot_data(df_data, plot_path): # Set the labels ax.set_xlabel("FPS") - # No need for y-axis label past the plot on the far left - if col == 0: - ax.set_ylabel("Decoder") - # Remove any empty subplots for videos with fewer combinations for row in range(len(unique_videos)): for col in range(video_type_combinations[unique_videos[row]], max_combinations): fig.delaxes(axes[row, col]) - # If we just call fig.legend, we'll get duplicate labels, as each label appears on - # each subplot. We take advantage of dicts having unique keys to de-dupe. - handles, labels = plt.gca().get_legend_handles_labels() - unique_labels = dict(zip(labels, handles)) - - # Reverse the order of the handles and labels to match the order of the bars - fig.legend( - handles=reversed(unique_labels.values()), - labels=reversed(unique_labels.keys()), - frameon=True, - loc="right", - ) - # Adjust layout to avoid overlap plt.tight_layout() @@ -475,7 +470,7 @@ def get_metadata(video_file_path: str) -> VideoStreamMetadata: def run_benchmarks( decoder_dict: dict[str, AbstractDecoder], - video_files_paths: list[str], + video_files_paths: list[Path], num_samples: int, num_sequential_frames_from_start: list[int], min_runtime_seconds: float, @@ -515,7 +510,7 @@ def run_benchmarks( seeked_result = benchmark.Timer( stmt="decoder.get_frames_from_video(video_file, pts_list)", globals={ - "video_file": video_file_path, + "video_file": str(video_file_path), "pts_list": pts_list, "decoder": decoder, }, @@ -528,22 +523,22 @@ def run_benchmarks( ) df_item = {} df_item["decoder"] = decoder_name - df_item["video"] = video_file_path + df_item["video"] = str(video_file_path) df_item["description"] = results[-1].description df_item["frame_count"] = num_samples df_item["median"] = results[-1].median df_item["iqr"] = results[-1].iqr df_item["type"] = f"{kind}:seek()+next()" - df_item["fps"] = 1.0 * num_samples / results[-1].median - df_item["fps_p75"] = 1.0 * num_samples / results[-1]._p75 - df_item["fps_p25"] = 1.0 * num_samples / results[-1]._p25 + df_item["fps_median"] = num_samples / results[-1].median + df_item["fps_p75"] = num_samples / results[-1]._p75 + df_item["fps_p25"] = num_samples / results[-1]._p25 df_data.append(df_item) for num_consecutive_nexts in num_sequential_frames_from_start: consecutive_frames_result = benchmark.Timer( stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)", globals={ - "video_file": video_file_path, + "video_file": str(video_file_path), "consecutive_frames_to_extract": num_consecutive_nexts, "decoder": decoder, }, @@ -558,15 +553,15 @@ def run_benchmarks( ) df_item = {} df_item["decoder"] = decoder_name - df_item["video"] = video_file_path + df_item["video"] = str(video_file_path) df_item["description"] = results[-1].description df_item["frame_count"] = num_consecutive_nexts df_item["median"] = results[-1].median df_item["iqr"] = results[-1].iqr df_item["type"] = "next()" - df_item["fps"] = 1.0 * num_consecutive_nexts / results[-1].median - df_item["fps_p75"] = 1.0 * num_consecutive_nexts / results[-1]._p75 - df_item["fps_p25"] = 1.0 * num_consecutive_nexts / results[-1]._p25 + df_item["fps_median"] = num_consecutive_nexts / results[-1].median + df_item["fps_p75"] = num_consecutive_nexts / results[-1]._p75 + df_item["fps_p25"] = num_consecutive_nexts / results[-1]._p25 df_data.append(df_item) first_video_file_path = video_files_paths[0] @@ -576,7 +571,7 @@ def run_benchmarks( creation_result = benchmark.Timer( stmt="create_torchcodec_decoder_from_file(video_file)", globals={ - "video_file": first_video_file_path, + "video_file": str(first_video_file_path), "create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file, }, label=f"video={first_video_file_path} {metadata_label}", diff --git a/benchmarks/decoders/benchmark_readme_chart.png b/benchmarks/decoders/benchmark_readme_chart.png index 8c5466cc2..e3c09a46a 100644 Binary files a/benchmarks/decoders/benchmark_readme_chart.png and b/benchmarks/decoders/benchmark_readme_chart.png differ diff --git a/benchmarks/decoders/benchmark_readme_data.json b/benchmarks/decoders/benchmark_readme_data.json index 38a32c6c5..28c79303d 100644 --- a/benchmarks/decoders/benchmark_readme_data.json +++ b/benchmarks/decoders/benchmark_readme_data.json @@ -2,182 +2,290 @@ { "decoder": "TorchCodec", "description": "uniform 10 seek()+next()", - "fps": 312.84353293082387, - "fps_p25": 322.7871320121642, - "fps_p75": 302.8227679557008, + "fps_median": 2.874245162330032, + "fps_p25": 2.890669218048133, + "fps_p75": 2.862767020928218, "frame_count": 10, - "iqr": 0.0020424467511475086, - "median": 0.03196486085653305, + "iqr": 0.03371739387512207, + "median": 3.4791743345558643, "type": "uniform:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "TorchCodec", "description": "random 10 seek()+next()", - "fps": 313.33763162396417, - "fps_p25": 321.75500163530694, - "fps_p75": 303.2479405107031, + "fps_median": 3.658458368739899, + "fps_p25": 3.679569429876782, + "fps_p75": 3.606357184854245, "frame_count": 10, - "iqr": 0.0018967683427035809, - "median": 0.03191445581614971, + "iqr": 0.05517190555110574, + "median": 2.733391771093011, "type": "random:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" }, { "decoder": "TorchCodec", "description": "100 next()", - "fps": 1194.7216700127474, - "fps_p25": 1256.8548260461632, - "fps_p75": 900.2557223623917, + "fps_median": 248.32328677696285, + "fps_p25": 250.6501813668334, + "fps_p75": 244.31468271091225, "frame_count": 100, - "iqr": 0.031515865586698055, - "median": 0.08370150346308947, + "iqr": 0.010345779359340668, + "median": 0.402700855396688, "type": "next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" }, { - "decoder": "TorchCodec[num_threads=1]", + "decoder": "TorchVision[video_reader]", "description": "uniform 10 seek()+next()", - "fps": 130.63321225706912, - "fps_p25": 131.92649622587825, - "fps_p75": 129.0592076739796, + "fps_median": 0.4195638935454161, + "fps_p25": 0.42014650010734295, + "fps_p75": 0.4189829005177118, "frame_count": 10, - "iqr": 0.0016840321477502584, - "median": 0.0765502112917602, + "iqr": 0.06610076874494553, + "median": 23.834272095002234, "type": "uniform:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" }, { - "decoder": "TorchCodec[num_threads=1]", + "decoder": "TorchVision[video_reader]", "description": "random 10 seek()+next()", - "fps": 131.88189359890464, - "fps_p25": 132.88939755851635, - "fps_p75": 130.466572672506, + "fps_median": 0.32245125406966435, + "fps_p25": 0.32245125406966435, + "fps_p75": 0.32245125406966435, "frame_count": 10, - "iqr": 0.0013974376488476992, - "median": 0.07582542020827532, + "iqr": 0.0, + "median": 31.01243947353214, "type": "random:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" }, { - "decoder": "TorchCodec[num_threads=1]", + "decoder": "TorchVision[video_reader]", "description": "100 next()", - "fps": 976.1142182056929, - "fps_p25": 1016.0800435864988, - "fps_p75": 795.6587254838081, + "fps_median": 176.82997455955987, + "fps_p25": 177.54379221046926, + "fps_p75": 175.14625035317215, "frame_count": 100, - "iqr": 0.027264581993222237, - "median": 0.10244702734053135, + "iqr": 0.00771009735763073, + "median": 0.5655149826779962, "type": "next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" }, { - "decoder": "TorchVision[backend=video_reader]", + "decoder": "TorchAudio", + "description": "uniform 10 seek()+next()", + "fps_median": 0.5316661236830815, + "fps_p25": 0.5318804166321828, + "fps_p75": 0.5314520033403498, + "frame_count": 10, + "iqr": 0.015156010165810585, + "median": 18.80879663862288, + "type": "uniform:seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "TorchAudio", + "description": "random 10 seek()+next()", + "fps_median": 0.417209378798961, + "fps_p25": 0.41758998612516984, + "fps_p75": 0.4168294646408316, + "frame_count": 10, + "iqr": 0.04369210824370384, + "median": 23.968780444934964, + "type": "random:seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "TorchAudio", + "description": "100 next()", + "fps_median": 179.4697720392447, + "fps_p25": 181.05508626841646, + "fps_p75": 173.49148405860208, + "frame_count": 100, + "iqr": 0.024079074384644628, + "median": 0.5571968965232372, + "type": "next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "Decord", + "description": "uniform 10 seek()+next()", + "fps_median": 2.9254250604823127, + "fps_p25": 2.928776037979067, + "fps_p75": 2.9179279307467434, + "frame_count": 10, + "iqr": 0.012693846598267555, + "median": 3.4183066710829735, + "type": "uniform:seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "Decord", + "description": "random 10 seek()+next()", + "fps_median": 2.3913952159683447, + "fps_p25": 2.409423905905687, + "fps_p75": 2.379609551240287, + "frame_count": 10, + "iqr": 0.05200037732720375, + "median": 4.181659281253815, + "type": "random:seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "Decord", + "description": "100 next()", + "fps_median": 301.64058598625485, + "fps_p25": 304.4257754819803, + "fps_p75": 297.8336145342091, + "frame_count": 100, + "iqr": 0.0072706404607743025, + "median": 0.33152037439867854, + "type": "next()", + "video": "/tmp/torchcodec_benchmarking_videos/mandelbrot_1280x720_120s_60fps_600gop_libx264_yuv420p.mp4" + }, + { + "decoder": "TorchCodec", + "description": "uniform 10 seek()+next()", + "fps_median": 32.797487611763884, + "fps_p25": 33.16049598127707, + "fps_p75": 32.34604228526151, + "frame_count": 10, + "iqr": 0.007593189366161823, + "median": 0.30490140337496996, + "type": "uniform:seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "TorchCodec", + "description": "random 10 seek()+next()", + "fps_median": 31.86583108909739, + "fps_p25": 32.422775044534646, + "fps_p75": 31.51582155415764, + "frame_count": 10, + "iqr": 0.008875773288309574, + "median": 0.313815759960562, + "type": "random:seek()+next()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "TorchCodec", + "description": "100 next()", + "fps_median": 478.59019817346393, + "fps_p25": 483.5137156807738, + "fps_p75": 469.99482336428133, + "frame_count": 100, + "iqr": 0.00594893516972661, + "median": 0.20894702896475792, + "type": "next()", + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" + }, + { + "decoder": "TorchVision[video_reader]", "description": "uniform 10 seek()+next()", - "fps": 7.77759549232789, - "fps_p25": 7.818750688897255, - "fps_p75": 7.708450468534736, + "fps_median": 5.863026312201653, + "fps_p25": 5.883880399939407, + "fps_p75": 5.821213575132222, "frame_count": 10, - "iqr": 0.018300878116860986, - "median": 1.2857444193214178, + "iqr": 0.01829617563635111, + "median": 1.705603807233274, "type": "uniform:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { - "decoder": "TorchVision[backend=video_reader]", + "decoder": "TorchVision[video_reader]", "description": "random 10 seek()+next()", - "fps": 7.160186815967891, - "fps_p25": 7.1908539836642165, - "fps_p75": 7.0379416903234775, + "fps_median": 3.5804448256145283, + "fps_p25": 3.610017214846564, + "fps_p75": 3.573725565752598, "frame_count": 10, - "iqr": 0.030214559519663453, - "median": 1.3966116048395634, + "iqr": 0.028130420949310064, + "median": 2.7929490571841598, "type": "random:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { - "decoder": "TorchVision[backend=video_reader]", + "decoder": "TorchVision[video_reader]", "description": "100 next()", - "fps": 842.1387064892538, - "fps_p25": 849.6200178567948, - "fps_p75": 831.8435513909374, + "fps_median": 220.60506211225706, + "fps_p25": 221.92824221952606, + "fps_p75": 219.83775682122163, "frame_count": 100, - "iqr": 0.0025152377784252167, - "median": 0.11874528415501118, + "iqr": 0.004284817026928067, + "median": 0.45329875499010086, "type": "next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "TorchAudio", "description": "uniform 10 seek()+next()", - "fps": 27.90513829975662, - "fps_p25": 28.13882301993776, - "fps_p75": 27.54551640075421, + "fps_median": 10.562701139154996, + "fps_p25": 10.594120999307123, + "fps_p75": 10.475383401305544, "frame_count": 10, - "iqr": 0.007654597284272313, - "median": 0.35835694102570415, + "iqr": 0.010699251666665077, + "median": 0.9467275338247418, "type": "uniform:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "TorchAudio", "description": "random 10 seek()+next()", - "fps": 25.684434182425214, - "fps_p25": 25.879457397198443, - "fps_p75": 25.267930621911752, + "fps_median": 7.143372898069971, + "fps_p25": 7.190431420792876, + "fps_p75": 6.984323268168379, "frame_count": 10, - "iqr": 0.009351701475679874, - "median": 0.38934087194502354, + "iqr": 0.041040806798264384, + "median": 1.3998989192768931, "type": "random:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "TorchAudio", "description": "100 next()", - "fps": 667.0015484089075, - "fps_p25": 673.7295038551645, - "fps_p75": 660.0931578201269, + "fps_median": 234.31507730203276, + "fps_p25": 235.5241203289182, + "fps_p75": 233.2609776710573, "frame_count": 100, - "iqr": 0.0030662475619465113, - "median": 0.14992468943819404, + "iqr": 0.004119404591619968, + "median": 0.42677578050643206, "type": "next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "Decord", "description": "uniform 10 seek()+next()", - "fps": 250.76314292043256, - "fps_p25": 255.79988799274403, - "fps_p75": 243.70338823712694, + "fps_median": 28.951774908715404, + "fps_p25": 29.60339324217526, + "fps_p75": 28.130013392739134, "frame_count": 10, - "iqr": 0.0019404292106628418, - "median": 0.03987826872617006, + "iqr": 0.017693073954433203, + "median": 0.3454019669443369, "type": "uniform:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "Decord", "description": "random 10 seek()+next()", - "fps": 108.95659719588485, - "fps_p25": 111.19152822111404, - "fps_p75": 106.52264411126707, + "fps_median": 23.55224702573544, + "fps_p25": 23.829998507803513, + "fps_p75": 23.327513783118945, "frame_count": 10, - "iqr": 0.003941844217479229, - "median": 0.09177966509014368, + "iqr": 0.00903920829296112, + "median": 0.424587938003242, "type": "random:seek()+next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "decoder": "Decord", "description": "100 next()", - "fps": 1159.3142395946288, - "fps_p25": 1169.7791224490422, - "fps_p75": 1140.3958820854073, + "fps_median": 526.5873947695661, + "fps_p25": 534.5632246100912, + "fps_p75": 514.0865116923063, "frame_count": 100, - "iqr": 0.0022026230581104755, - "median": 0.08625788986682892, + "iqr": 0.007451178273186088, + "median": 0.18990200106054544, "type": "next()", - "video": "/tmp/torchcodec_benchmarking_videos/640x480_10s_30fps_600gop_libx264_yuv420p.mp4" + "video": "/tmp/torchcodec_benchmarking_videos/nasa_960x540_206s_30fps_yuv420p.mp4" }, { "cpu_count": 56, diff --git a/benchmarks/decoders/generate_readme_data.py b/benchmarks/decoders/generate_readme_data.py index 01d30b571..277cd3752 100644 --- a/benchmarks/decoders/generate_readme_data.py +++ b/benchmarks/decoders/generate_readme_data.py @@ -4,7 +4,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import glob import json import os import platform @@ -14,29 +13,35 @@ from benchmark_decoders_library import ( DecordAccurateBatch, generate_videos, + retrieve_videos, run_benchmarks, TorchAudioDecoder, TorchCodecPublic, TorchVision, ) +NASA_URL = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" + def main() -> None: """Benchmarks the performance of a few video decoders on synthetic videos""" - resolutions = ["640x480"] + videos_dir_path = "/tmp/torchcodec_benchmarking_videos" + shutil.rmtree(videos_dir_path, ignore_errors=True) + os.makedirs(videos_dir_path) + + resolutions = ["1280x720"] encodings = ["libx264"] - fpses = [30] + patterns = ["mandelbrot"] + fpses = [60] gop_sizes = [600] - durations = [10] + durations = [120] pix_fmts = ["yuv420p"] ffmpeg_path = "ffmpeg" - videos_dir_path = "/tmp/torchcodec_benchmarking_videos" - shutil.rmtree(videos_dir_path, ignore_errors=True) - os.makedirs(videos_dir_path) generate_videos( resolutions, encodings, + patterns, fpses, gop_sizes, durations, @@ -44,17 +49,21 @@ def main() -> None: ffmpeg_path, videos_dir_path, ) - video_files_paths = glob.glob(f"{videos_dir_path}/*.mp4") + + urls_and_dest_paths = [ + (NASA_URL, f"{videos_dir_path}/nasa_960x540_206s_30fps_yuv420p.mp4") + ] + retrieve_videos(urls_and_dest_paths) decoder_dict = {} decoder_dict["TorchCodec"] = TorchCodecPublic() - decoder_dict["TorchCodec[num_threads=1]"] = TorchCodecPublic(num_ffmpeg_threads=1) - decoder_dict["TorchVision[backend=video_reader]"] = TorchVision("video_reader") + decoder_dict["TorchVision[video_reader]"] = TorchVision("video_reader") decoder_dict["TorchAudio"] = TorchAudioDecoder() decoder_dict["Decord"] = DecordAccurateBatch() # These are the number of uniform seeks we do in the seek+decode benchmark. num_samples = 10 + video_files_paths = list(Path(videos_dir_path).glob("*.mp4")) df_data = run_benchmarks( decoder_dict, video_files_paths,