diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 5d56c7f96..3ffa43603 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -11,6 +11,7 @@ def preprocess(i): if os_info['platform'] == 'windows': return {'return': 1, 'error': 'Windows is not supported in this script yet'} env = i['env'] + state = i['state'] if is_true(env.get('MLC_RUN_STATE_DOCKER', '')): return {'return': 0} @@ -110,7 +111,8 @@ def preprocess(i): shutil.rmtree(target_data_path) if not os.path.exists(tsv_file): os.makedirs(target_data_path, exist_ok=True) - # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") + # cmds.append("make download_data + # BENCHMARKS='stable-diffusion-xl'") env['MLC_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( f"""cp -r \\$MLC_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") @@ -154,7 +156,8 @@ def preprocess(i): if not os.path.exists(target_data_path) or not os.path.exists( inference_cases_json_path) or not os.path.exists(calibration_cases_json_path): - # cmds.append(f"ln -sf {env['MLC_DATASET_PATH']} {target_data_path}") + # cmds.append(f"ln -sf {env['MLC_DATASET_PATH']} + # {target_data_path}") cmds.append("make download_data BENCHMARKS='3d-unet'") model_path = os.path.join( @@ -174,7 +177,8 @@ def preprocess(i): if not os.path.exists(target_data_path_base_dir): cmds.append(f"mkdir -p {target_data_path_base_dir}") if not os.path.exists(target_data_path): - # cmds.append(f"ln -sf {env['MLC_DATASET_LIBRISPEECH_PATH']} {target_data_path}") + # cmds.append(f"ln -sf {env['MLC_DATASET_LIBRISPEECH_PATH']} + # {target_data_path}") cmds.append("make download_data BENCHMARKS='rnnt'") model_path = os.path.join( diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 00aa41969..1dcd75492 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -350,6 +350,7 @@ post_deps: # Variations to customize dependencies variations: + pre5.0: {} # MLPerf inference version v5.0: group: version @@ -363,8 +364,6 @@ variations: tags: _for-nvidia-mlperf-inference-v5.0 pycuda: version: "2024.1" - nvidia-inference-server: - tags: _mlcommons,_v5.0 v4.1: group: version env: @@ -434,16 +433,19 @@ variations: version: 0.3.27 resnet50,v4.0: - - tags: get,generic-python-lib,_package.onnx - version: 1.13.1 + deps: + - tags: get,generic-python-lib,_package.onnx + version: 1.13.1 resnet50,v4.1: - - tags: get,generic-python-lib,_package.onnx - version: 1.13.1 + deps: + - tags: get,generic-python-lib,_package.onnx + version: 1.13.1 resnet50,v5.0: - - tags: get,generic-python-lib,_package.onnx - version: 1.17.0 + deps: + - tags: get,generic-python-lib,_package.onnx + version: 1.17.0 retinanet: group: model @@ -595,6 +597,8 @@ variations: names: - onnx version: "1.17.0" + - tags: get,generic-python-lib,_package.onnx-graphsurgeon + version: "0.5.2" - tags: get,generic-python-lib,_package.numpy names: - numpy @@ -1595,22 +1599,42 @@ variations: env: MLC_NVIDIA_CUSTOM_GPU: "yes" - rtx_a6000,resnet50,offline,run_harness: + rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + rtx_a6000,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 + + rtx_a6000,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" - rtx_a6000,resnet50,server,run_harness: + rtx_a6000,pre5.0,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 - rtx_a6000,retinanet,offline,run_harness: + rtx_a6000,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" + + rtx_a6000,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - rtx_a6000,retinanet,server,run_harness: + rtx_a6000,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + rtx_a6000,pre5.0,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 + rtx_a6000,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + rtx_a6000,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1619,14 +1643,22 @@ variations: default_variations: batch-size: batch_size.256 - rtx_a6000,3d-unet_,offline,run_harness: + rtx_a6000,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 + + rtx_a6000,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" - rtx_a6000,3d-unet_,server,run_harness: + rtx_a6000,pre5.0,3d-unet_,server,run_harness: default_variations: batch-size: batch_size.8 + rtx_a6000,v5.0,3d-unet_,server,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + rtx_a6000,rnnt,offline,run_harness: default_variations: batch-size: batch_size.2048 @@ -1635,31 +1667,51 @@ variations: default_variations: batch-size: batch_size.512 - rtx_a6000,dlrm_,offline,run_harness: + rtx_a6000,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + rtx_a6000,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" rtx_6000_ada: group: gpu-name env: MLC_NVIDIA_CUSTOM_GPU: "yes" - rtx_6000_ada,resnet50,offline,run_harness: + rtx_6000_ada,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 + + rtx_a6000_ada,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" - rtx_6000_ada,resnet50,server,run_harness: + rtx_6000_ada,pre5.0,resnet50,server,run_harness: default_variations: batch-size: batch_size.32 + + rtx_a6000_ada,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" - rtx_6000_ada,retinanet,offline,run_harness: + rtx_6000_ada,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - rtx_6000_ada,retinanet,server,run_harness: + rtx_a6000_ada,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + rtx_6000_ada,pre5.0,retinanet,server,run_harness: default_variations: batch-size: batch_size.2 + rtx_a6000_ada,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + rtx_6000_ada,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1668,14 +1720,22 @@ variations: default_variations: batch-size: batch_size.256 - rtx_6000_ada,3d-unet_,offline,run_harness: + rtx_6000_ada,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - rtx_6000_ada,3d-unet_,server,run_harness: + rtx_a6000_ada,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + rtx_6000_ada,pre5.0,3d-unet_,server,run_harness: default_variations: batch-size: batch_size.8 + rtx_a6000_ada,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + rtx_6000_ada,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 @@ -1684,45 +1744,73 @@ variations: default_variations: batch-size: batch_size.512 - rtx_6000_ada,dlrm_,offline,run_harness: + rtx_6000_ada,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + rtx_a6000_ada,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" + l4: group: gpu-name env: MLC_NVIDIA_CUSTOM_GPU: "yes" l4,sdxl,offline,run_harness: - default_variations: - batch-size: batch_size.1 env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 0.6 - - l4,sdxl,offline,run_harness,num-gpu.8: + + l4,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,offline,run_harness,num-gpu.8: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 4.8 - - l4,sdxl,server,run_harness,num-gpu.1: + + l4,pre5.0,sdxl,offline,run_harness,num-gpu.8: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,offline,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,server,run_harness,num-gpu.1: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 0.55 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 - - l4,sdxl,server,run_harness,num-gpu.8: + + l4,pre5.0,sdxl,server,run_harness,num-gpu.1: default_variations: batch-size: batch_size.1 + + l4,v5.0,sdxl,server,run_harness,num-gpu.1: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + + l4,sdxl,server,run_harness,num-gpu.8: env: MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' MLC_MLPERF_LOADGEN_SERVER_TARGET_QPS: 5.05 MLC_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT: 0 + l4,pre5.0,sdxl,server,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size.1 + + l4,v5.0,sdxl,server,run_harness,num-gpu.8: + default_variations: + batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + l4,resnet50: default_env: MLC_MLPERF_LOADGEN_OFFLINE_TARGET_QPS: 10500 @@ -1731,16 +1819,20 @@ variations: MLC_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY: 1 l4,resnet50,offline,run_harness: - default_variations: - batch-size: batch_size.32 env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "1" MLC_MLPERF_NVIDIA_HARNESS_USE_GRAPHS: 'True' - l4,resnet50,server,run_harness: + l4,pre5.0,resnet50,offline,run_harness: default_variations: - batch-size: batch_size.16 + batch-size: batch_size.32 + + l4,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:32" + + l4,resnet50,server,run_harness: env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "9" MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" @@ -1749,13 +1841,23 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000 MLC_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: 'True' - l4,retinanet,offline,run_harness: + l4,pre5.0,resnet50,server,run_harness: default_variations: - batch-size: batch_size.2 + batch-size: batch_size.16 - l4,retinanet,server,run_harness: + l4,v5.0,resnet50,server,run_harness: + default_variations: + batch-size: batch_size."resnet50:16" + + l4,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 + + l4,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + l4,retinanet,server,run_harness: env: MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" @@ -1763,6 +1865,14 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 30000 MLC_MLPERF_NVIDIA_HARNESS_WORKSPACE_SIZE: 20000000000 + l4,pre5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size.2 + + l4,v5.0,retinanet,server,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + l4,bert_,offline,run_harness: default_variations: batch-size: batch_size.16 @@ -1776,10 +1886,14 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_SOFT_DROP: "1.0" MLC_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN: "True" - l4,3d-unet_,offline,run_harness: + l4,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.1 + l4,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:1" + l4,rnnt,offline,run_harness: default_variations: batch-size: batch_size.512 @@ -1792,9 +1906,14 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES: "1024" MLC_MLPERF_NVIDIA_HARNESS_NUM_WARMUPS: "1024" - l4,dlrm_,offline,run_harness: + l4,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + l4,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm-v2:1400" + t4: group: gpu-name env: diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 5eb368fb1..0912cb803 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -18,7 +18,16 @@ def preprocess(i): env = i['env'] state = i['state'] + logger = i['automation'].logger + if env.get('MLC_MLPERF_IMPLEMENTATION', '') == 'nvidia': + if "nvidia" in env.get('MLC_CUDA_DEVICE_PROP_GPU_NAME', '').lower() and env.get( + 'MLC_NVIDIA_GPU_NAME', '') == '': + # extract the Nvidia GPU model name automatically + env['MLC_NVIDIA_GPU_NAME'] = env['MLC_CUDA_DEVICE_PROP_GPU_NAME'].lower( + ).split()[-1].strip() + logger.info( + f"Extracted Nvidia GPU name: {env['MLC_NVIDIA_GPU_NAME']}") if env.get('MLC_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom"]: env['MLC_NVIDIA_HARNESS_GPU_VARIATION'] = "_" + \ diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index dcfea1431..0fff01d99 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1715,6 +1715,8 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom + nvidia-original-mlperf-inference: + tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' MLC_TEST_QUERY_COUNT: '100' @@ -1733,6 +1735,8 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom + nvidia-original-mlperf-inference: + tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' @@ -1746,6 +1750,8 @@ variations: nvidia-inference-server: version: r3.0 tags: _nvidia-only + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1766,6 +1772,8 @@ variations: tags: _v3.1 nvidia-scratch-space: tags: _version.4_0-dev + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1784,6 +1792,8 @@ variations: tags: _ctuning intel-harness: tags: _v3.1 + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1806,6 +1816,8 @@ variations: tags: _v4.0 nvidia-scratch-space: tags: _version.4_1-dev + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1826,6 +1838,8 @@ variations: tags: _v4.1 nvidia-scratch-space: tags: _version.4_1 + nvidia-original-mlperf-inference: + tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1848,7 +1862,9 @@ variations: intel-harness: tags: _v4.1 inference-src: - version: r5.0 + version: r5.0 + nvidia-original-mlperf-inference: + tags: _pre5.0 nvidia-scratch-space: tags: _version.5.0-dev default_env: @@ -1876,6 +1892,8 @@ variations: tags: _version.5.0 pycuda: version: "2024.1" + nvidia-harness: + tags: _v5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes'