From e0470f921ddded55c12a03b086b871585efd1595 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 11 Jun 2025 20:44:13 +0530 Subject: [PATCH 01/17] test commit --- script/app-mlperf-inference-nvidia/meta.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 1dcd75492..cdc545209 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -1768,7 +1768,7 @@ variations: l4,v5.0,sdxl,offline,run_harness: default_variations: - batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" l4,sdxl,offline,run_harness,num-gpu.8: env: @@ -1781,7 +1781,7 @@ variations: l4,v5.0,sdxl,offline,run_harness,num-gpu.8: default_variations: - batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" l4,sdxl,server,run_harness,num-gpu.1: env: @@ -1795,7 +1795,7 @@ variations: l4,v5.0,sdxl,server,run_harness,num-gpu.1: default_variations: - batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" l4,sdxl,server,run_harness,num-gpu.8: env: @@ -1809,7 +1809,7 @@ variations: l4,v5.0,sdxl,server,run_harness,num-gpu.8: default_variations: - batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" l4,resnet50: default_env: From 8f1984d0ebb10b4c85c133c854da02d6df72d276 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 11 Jun 2025 21:57:50 +0530 Subject: [PATCH 02/17] handle batch size for v5.0 --- script/app-mlperf-inference-nvidia/customize.py | 4 ++-- script/app-mlperf-inference-nvidia/meta.yaml | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 3ffa43603..c03f3f483 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -524,11 +524,11 @@ def preprocess(i): gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') if gpu_batch_size: - run_config += f" --gpu_batch_size={gpu_batch_size}" + run_config += f" --gpu_batch_size={gpu_batch_size}".replace(";",",") dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') if dla_batch_size: - run_config += f" --dla_batch_size={dla_batch_size}" + run_config += f" --dla_batch_size={dla_batch_size}".replace(";",",") input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') if input_format: diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index cdc545209..65315bdb3 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -1601,7 +1601,7 @@ variations: rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1: default_variations: - batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1" + batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" rtx_a6000,pre5.0,resnet50,offline,run_harness: default_variations: @@ -1768,7 +1768,7 @@ variations: l4,v5.0,sdxl,offline,run_harness: default_variations: - batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" + batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" l4,sdxl,offline,run_harness,num-gpu.8: env: @@ -1781,7 +1781,7 @@ variations: l4,v5.0,sdxl,offline,run_harness,num-gpu.8: default_variations: - batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" + batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" l4,sdxl,server,run_harness,num-gpu.1: env: @@ -1795,7 +1795,7 @@ variations: l4,v5.0,sdxl,server,run_harness,num-gpu.1: default_variations: - batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" + batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" l4,sdxl,server,run_harness,num-gpu.8: env: @@ -1809,7 +1809,7 @@ variations: l4,v5.0,sdxl,server,run_harness,num-gpu.8: default_variations: - batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\" + batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" l4,resnet50: default_env: From a2e8c5bc51ec3db0e6911ee6919b3766985b73d0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 11 Jun 2025 16:28:15 +0000 Subject: [PATCH 03/17] [Automated Commit] Format Codebase [skip ci] --- script/app-mlperf-inference-nvidia/customize.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index c03f3f483..c16e9e29d 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -524,11 +524,13 @@ def preprocess(i): gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') if gpu_batch_size: - run_config += f" --gpu_batch_size={gpu_batch_size}".replace(";",",") + run_config += f" --gpu_batch_size={gpu_batch_size}".replace( + ";", ",") dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') if dla_batch_size: - run_config += f" --dla_batch_size={dla_batch_size}".replace(";",",") + run_config += f" --dla_batch_size={dla_batch_size}".replace( + ";", ",") input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') if input_format: From f998fee1455cd91ba9d11ba31fbd48628beb9945 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 00:33:49 +0530 Subject: [PATCH 04/17] fix hpcx issue --- script/app-mlperf-inference-nvidia/customize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index c03f3f483..c2b0fced2 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -712,11 +712,13 @@ def preprocess(i): if '+LD_LIBRARY_PATH' not in env: env['+LD_LIBRARY_PATH'] = [] + hpcx_paths = [] if os.path.exists("/opt/hpcx/ucx/lib"): - env['+LD_LIBRARY_PATH'].append("/opt/hpcx/ucx/lib") - + hpcx_paths.append("/opt/hpcx/ucx/lib") if os.path.exists("/opt/hpcx/ucc/lib"): - env['+LD_LIBRARY_PATH'].append("/opt/hpcx/ucc/lib") + hpcx_paths.append("/opt/hpcx/ucc/lib") + + env['+LD_LIBRARY_PATH'] = hpcx_paths + env['+LD_LIBRARY_PATH'] # print(env) From 97404ae2d9ac64cad436e712442803207b6085bc Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 01:22:25 +0530 Subject: [PATCH 05/17] fix retinanet calib dataset path --- script/app-mlperf-inference-nvidia/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index c19af9a7e..c53050483 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -242,7 +242,7 @@ def preprocess(i): 'data', 'open-images-v6-mlperf', 'calibration', - 'train') + 'calibraion') if not os.path.exists(target_data_path_dir): cmds.append(f"mkdir -p {target_data_path_dir}") target_data_path = os.path.join(target_data_path_dir, 'data') From 3d98f73952d3606efc6260e249c48c2b7292edb4 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 01:28:35 +0530 Subject: [PATCH 06/17] fix typo --- script/app-mlperf-inference-nvidia/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index c53050483..9f890e09a 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -242,7 +242,7 @@ def preprocess(i): 'data', 'open-images-v6-mlperf', 'calibration', - 'calibraion') + 'calibration') if not os.path.exists(target_data_path_dir): cmds.append(f"mkdir -p {target_data_path_dir}") target_data_path = os.path.join(target_data_path_dir, 'data') From a4ad94c21c0c3941115766d9910a4dca97db61aa Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 13:26:23 +0530 Subject: [PATCH 07/17] support new retinanet model version --- script/app-mlperf-inference-nvidia/customize.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 9f890e09a..abc3699cb 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -261,11 +261,18 @@ def preprocess(i): model_path = os.path.join( target_model_path_dir, 'retinanet-fpn-torch2.1-postprocessed.onnx') + alt_model_versions = ["2.2", "2.6"] alt_model_path = os.path.join( target_model_path_dir, 'retinanet-fpn-torch2.2-postprocessed.onnx') - if not os.path.exists(model_path) and os.path.exists(alt_model_path): - cmds.append(f"ln -s {alt_model_path} {model_path}") + if not os.path.exists(model_path): + for alt_model_version in alt_model_versions: + alt_model_path = os.path.join( + target_model_path_dir, + f'retinanet-fpn-torch{alt_model_version}-postprocessed.onnx') + if os.path.exists(alt_model_path): + cmds.append(f"ln -s {alt_model_path} {model_path}") + break model_name = "retinanet" From 32ff1c2c754e758ec697a304790901d462b70a92 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 17:28:31 +0530 Subject: [PATCH 08/17] deepseek r1 + nvidia --- script/app-mlperf-inference-nvidia/meta.yaml | 8 ++ script/app-mlperf-inference/meta.yaml | 89 +++++++++++++++----- 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 65315bdb3..3d24eeeff 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -319,6 +319,8 @@ prehook_deps: - 'yes' MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: - 'yes' + MLC_MLPERF_INFERENCE_CODE_VERSION: + - "v5.0" # Install coco2014 dataset - enable_if_env: @@ -366,6 +368,7 @@ variations: version: "2024.1" v4.1: group: version + base: pre5.0 env: MLC_MLPERF_INFERENCE_CODE_VERSION: "v4.1" MLC_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-FP8-quantized @@ -375,6 +378,7 @@ variations: v4.1-dev: group: version + base: pre5.0 default: true env: MLC_MLPERF_INFERENCE_CODE_VERSION: "v4.0" @@ -384,6 +388,7 @@ variations: tags: _for-nvidia-mlperf-inference-v4.0 v4.0: + base: pre5.0 group: version env: MLC_MLPERF_INFERENCE_CODE_VERSION: "v4.0" @@ -392,6 +397,7 @@ variations: pytorch: tags: _for-nvidia-mlperf-inference-v4.0 v3.1: + base: pre5.0 env: MLC_MLPERF_INFERENCE_CODE_VERSION: "v3.1" MLC_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-07142023.pth @@ -563,6 +569,8 @@ variations: version: "0.4.0" - tags: get,generic-python-lib,_package.torchmetrics version: "1.0.3" + - tags: get,generic-python-lib,_package.nvidia-modelopt + version: "0.19.0" - tags: get,generic-python-lib,_package.typeguard - tags: get,generic-python-lib,_package.onnx names: diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 0fff01d99..6dc5736b1 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -297,6 +297,10 @@ variations: reference,bert_: default_variations: backend: onnxruntime + + reference,deepseek-r1: + default_variations: + backend: pytorch all-models,nvidia-original: docker: @@ -948,7 +952,52 @@ variations: names: - llama3_1-405b - llama3-405b + + deepseek-r1: + group: + model + add_deps_recursive: + mlperf-inference-implementation: + tags: _deepseek-r1 + env: + MLC_MODEL: + deepseek-r1 + posthook_deps: + - enable_if_env: + MLC_MLPERF_LOADGEN_MODE: + - accuracy + - all + MLC_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + MLC_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - deepseek-r1-accuracy-script + tags: run,accuracy,mlperf,_dataset_deepseek-r1 + docker: + deps: + - tags: get,mlperf,preprocessed,dataset,deepseek-r1 + enable_if_env: + MLC_USE_DATASET_FROM_HOST: + - 'yes' + names: + - deepseek-r1-preprocessed-dataset + mounts: + - "${{ MLC_PREPROCESSED_DATASET_DEEPSEEK_R1_VALIDATION_PATH }}:${{ MLC_PREPROCESSED_DATASET_DEEPSEEK_R1_VALIDATION_PATH }}" + + deepseek-r1,pytorch: + docker: + image_name: nvcr.io/nvidia/pytorch:25.04-py3 + deepseek-r1,sglang: + docker: + image_name: nvidia/cuda:12.6.0-devel-ubuntu22.04 + + deepseek-r1,vllm: + docker: + image_name: nvidia/cuda:12.6.0-devel-ubuntu22.04 sdxl: group: @@ -981,6 +1030,8 @@ variations: - 'yes' MLC_USE_MODEL_FROM_HOST: - 'yes' + MLC_MLPERF_INFERENCE_VERSION: + - '5.0' tags: get,ml-model,sdxl,_fp16,_rclone sdxl,reference,float16: @@ -1403,6 +1454,24 @@ variations: mlperf-inference-implementation: tags: _pytorch + vllm: + group: backend + env: + MLC_MLPERF_BACKEND: + vllm + add_deps_recursive: + mlperf-inference-implementation: + tags: _vllm + + sglang: + group: backend + env: + MLC_MLPERF_BACKEND: + sglang + add_deps_recursive: + mlperf-inference-implementation: + tags: _sglang + openshift: group: backend env: @@ -1715,8 +1784,6 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom - nvidia-original-mlperf-inference: - tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' MLC_TEST_QUERY_COUNT: '100' @@ -1735,8 +1802,6 @@ variations: nvidia-inference-server: version: r2.1 tags: _custom - nvidia-original-mlperf-inference: - tags: _pre5.0 env: MLC_SKIP_SYS_UTILS: 'yes' @@ -1750,8 +1815,6 @@ variations: nvidia-inference-server: version: r3.0 tags: _nvidia-only - nvidia-original-mlperf-inference: - tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1772,8 +1835,6 @@ variations: tags: _v3.1 nvidia-scratch-space: tags: _version.4_0-dev - nvidia-original-mlperf-inference: - tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1792,8 +1853,6 @@ variations: tags: _ctuning intel-harness: tags: _v3.1 - nvidia-original-mlperf-inference: - tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1816,8 +1875,6 @@ variations: tags: _v4.0 nvidia-scratch-space: tags: _version.4_1-dev - nvidia-original-mlperf-inference: - tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1838,8 +1895,6 @@ variations: tags: _v4.1 nvidia-scratch-space: tags: _version.4_1 - nvidia-original-mlperf-inference: - tags: _pre5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1862,9 +1917,7 @@ variations: intel-harness: tags: _v4.1 inference-src: - version: r5.0 - nvidia-original-mlperf-inference: - tags: _pre5.0 + version: r5.0 nvidia-scratch-space: tags: _version.5.0-dev default_env: @@ -1892,8 +1945,6 @@ variations: tags: _version.5.0 pycuda: version: "2024.1" - nvidia-harness: - tags: _v5.0 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' From 9784e10d695b4de16ced3834c7c000e1c25893aa Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 17:39:49 +0530 Subject: [PATCH 09/17] skip sdxl model download to host until we handle it in download model script --- script/app-mlperf-inference/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 6dc5736b1..4749237e4 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1030,6 +1030,7 @@ variations: - 'yes' MLC_USE_MODEL_FROM_HOST: - 'yes' + - skip_if_env: MLC_MLPERF_INFERENCE_VERSION: - '5.0' tags: get,ml-model,sdxl,_fp16,_rclone From 321d80417a110ca73f7b5934cbf35f6fd9b3fed9 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 18:35:19 +0530 Subject: [PATCH 10/17] Update logic for download model --- .../app-mlperf-inference-nvidia/customize.py | 56 ++++++++++++++----- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index abc3699cb..1ae716d04 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -366,20 +366,50 @@ def preprocess(i): else: cmds.append(f"make download_model BENCHMARKS='{model_name}'") elif "stable-diffusion" in env['MLC_MODEL']: - folders = ["clip1", "clip2", "unetxl", "vae"] - for folder in folders: - onnx_model_path = os.path.join( - env['MLPERF_SCRATCH_PATH'], - 'models', - 'SDXL', - 'onnx_models', - folder, - 'model.onnx') - if not os.path.exists(onnx_model_path): + if env.get('MLC_MLPERF_INFERENCE_CODE_VERSION') == '5.0': + # Define folder mappings for each model type + model_folders = { + 'onnx_models': ["clip1", "clip2", "unetxl", "vae"], + 'modelopt_models': ["unetxl.fp8", "vae.int8"] + } + + model_found = True + + # Check all required models across both directories + for model_type, folders in model_folders.items(): + for folder in folders: + onnx_model_path = os.path.join( + env['MLPERF_SCRATCH_PATH'], + 'models', + 'SDXL', + model_type, + folder, + 'model.onnx' + ) + if not os.path.exists(onnx_model_path): + model_found = False + break + if not model_found: + break + if not model_found: env['MLC_REQUIRE_SDXL_MODEL_DOWNLOAD'] = 'yes' - cmds.append( - f"make download_model BENCHMARKS='{model_name}'") - break + cmds.append(f"make download_model BENCHMARKS='{model_name}'") + else: + folders = ["clip1", "clip2", "unetxl", "vae"] + for folder in folders: + onnx_model_path = os.path.join( + env['MLPERF_SCRATCH_PATH'], + 'models', + 'SDXL', + 'onnx_models', + folder, + 'model.onnx') + if not os.path.exists(onnx_model_path): + env['MLC_REQUIRE_SDXL_MODEL_DOWNLOAD'] = 'yes' + cmds.append( + f"make download_model BENCHMARKS='{model_name}'") + break + if scenario.lower() == "singlestream": ammo_model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], From 6ea7a0f5ce884daff44dca35dbc399ba63004810 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 12 Jun 2025 13:05:37 +0000 Subject: [PATCH 11/17] [Automated Commit] Format Codebase [skip ci] --- script/app-mlperf-inference-nvidia/customize.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 1ae716d04..2e7687e48 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -372,9 +372,9 @@ def preprocess(i): 'onnx_models': ["clip1", "clip2", "unetxl", "vae"], 'modelopt_models': ["unetxl.fp8", "vae.int8"] } - + model_found = True - + # Check all required models across both directories for model_type, folders in model_folders.items(): for folder in folders: @@ -393,7 +393,8 @@ def preprocess(i): break if not model_found: env['MLC_REQUIRE_SDXL_MODEL_DOWNLOAD'] = 'yes' - cmds.append(f"make download_model BENCHMARKS='{model_name}'") + cmds.append( + f"make download_model BENCHMARKS='{model_name}'") else: folders = ["clip1", "clip2", "unetxl", "vae"] for folder in folders: @@ -409,7 +410,7 @@ def preprocess(i): cmds.append( f"make download_model BENCHMARKS='{model_name}'") break - + if scenario.lower() == "singlestream": ammo_model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], From 91627cd2b126e4bb0c2a8f094b04586e77a22c56 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 18:40:44 +0530 Subject: [PATCH 12/17] fix typo --- script/app-mlperf-inference-nvidia/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 2e7687e48..fc8a79624 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -366,7 +366,7 @@ def preprocess(i): else: cmds.append(f"make download_model BENCHMARKS='{model_name}'") elif "stable-diffusion" in env['MLC_MODEL']: - if env.get('MLC_MLPERF_INFERENCE_CODE_VERSION') == '5.0': + if env.get('MLC_MLPERF_INFERENCE_CODE_VERSION') == 'v5.0': # Define folder mappings for each model type model_folders = { 'onnx_models': ["clip1", "clip2", "unetxl", "vae"], From a9c6c15eb3440d2996b0f9c545080ae33b1e731a Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 18:43:19 +0530 Subject: [PATCH 13/17] fix typo --- script/app-mlperf-inference-nvidia/meta.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 3d24eeeff..1eec11011 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -314,11 +314,9 @@ prehook_deps: - sdxl-model - ml-model tags: get,ml-model,sdxl,_fp16,_rclone - skip_if_env: + skip_if_any_env: MLC_RUN_STATE_DOCKER: - 'yes' - MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: - - 'yes' MLC_MLPERF_INFERENCE_CODE_VERSION: - "v5.0" From 4727ddef67f871575db7141bd28306e5c1533a20 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 19:21:38 +0530 Subject: [PATCH 14/17] Updated dependencies 3d-unet 5.0 --- script/app-mlperf-inference-nvidia/meta.yaml | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 1eec11011..666beb347 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -647,10 +647,22 @@ variations: - tags: get,generic-python-lib,_package.nibabel - tags: get,generic-python-lib,_pandas version_max: "1.5.3" - - tags: get,generic-python-lib,_onnx-graphsurgeon - version: 0.3.27 - - tags: get,generic-python-lib,_package.onnx - version: 1.13.1 + + 3d-unet_,pre5.0: + deps: + - tags: get,generic-python-lib,_onnx-graphsurgeon + version: 0.3.27 + - tags: get,generic-python-lib,_package.onnx + version: 1.13.1 + + 3d-unet_,v5.0: + deps: + - tags: get,generic-python-lib,_package.onnx + names: + - onnx + version: "1.17.0" + - tags: get,generic-python-lib,_package.onnx-graphsurgeon + version: "0.5.2" 3d-unet-99: group: model From bf2528e503dedcfb63d727a68e402a51240beccb Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 22:25:15 +0530 Subject: [PATCH 15/17] fix typo --- script/app-mlperf-inference-nvidia/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 666beb347..155c4fcc0 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -567,8 +567,6 @@ variations: version: "0.4.0" - tags: get,generic-python-lib,_package.torchmetrics version: "1.0.3" - - tags: get,generic-python-lib,_package.nvidia-modelopt - version: "0.19.0" - tags: get,generic-python-lib,_package.typeguard - tags: get,generic-python-lib,_package.onnx names: @@ -598,6 +596,8 @@ variations: version: "0.6.0" - tags: get,generic-python-lib,_package.torchmetrics version: "1.0.3" + - tags: get,generic-python-lib,_package.nvidia-modelopt + version: "0.19.0" - tags: get,generic-python-lib,_package.typeguard - tags: get,generic-python-lib,_package.onnx names: From d7d892ae720880f0d59a30f7ae05bff08569ffdb Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Thu, 12 Jun 2025 23:04:08 +0530 Subject: [PATCH 16/17] control model download in official repo --- script/app-mlperf-inference-nvidia/meta.yaml | 4 +--- script/app-mlperf-inference/meta.yaml | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 155c4fcc0..e7820ea6d 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -314,11 +314,9 @@ prehook_deps: - sdxl-model - ml-model tags: get,ml-model,sdxl,_fp16,_rclone - skip_if_any_env: + skip_if_env: MLC_RUN_STATE_DOCKER: - 'yes' - MLC_MLPERF_INFERENCE_CODE_VERSION: - - "v5.0" # Install coco2014 dataset - enable_if_env: diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 4749237e4..ef5bf3287 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1030,9 +1030,6 @@ variations: - 'yes' MLC_USE_MODEL_FROM_HOST: - 'yes' - - skip_if_env: - MLC_MLPERF_INFERENCE_VERSION: - - '5.0' tags: get,ml-model,sdxl,_fp16,_rclone sdxl,reference,float16: From 8ea3bfe0e6c18cc944c9ca2e985cbdbe1f4030fa Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 13 Jun 2025 20:12:22 +0530 Subject: [PATCH 17/17] Update batch sizes for v5.0 --- .../app-mlperf-inference-nvidia/customize.py | 4 +- script/app-mlperf-inference-nvidia/meta.yaml | 334 ++++++++++++++---- 2 files changed, 260 insertions(+), 78 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index fc8a79624..eddf02252 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -563,12 +563,12 @@ def preprocess(i): gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE') if gpu_batch_size: run_config += f" --gpu_batch_size={gpu_batch_size}".replace( - ";", ",") + "##", ",") dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE') if dla_batch_size: run_config += f" --dla_batch_size={dla_batch_size}".replace( - ";", ",") + "##", ",") input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT') if input_format: diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index e7820ea6d..e8f609253 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -1248,66 +1248,126 @@ variations: MLC_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS: '2' MLC_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS: True - gpu_memory.80,num-gpus.2,llama2-70b,offline,run_harness: + gpu_memory.80,pre5.0,num-gpus.2,llama2-70b,offline,run_harness: default_variations: batch-size: batch_size.896 + + gpu_memory.80,v5.0,num-gpus.2,llama2-70b,offline,run_harness: + default_variations: + batch-size: batch_size."llama2-70b:1024" - gpu_memory.16,gptj_,offline,run_harness: + gpu_memory.16,pre5.0,gptj_,offline,run_harness: default_variations: batch-size: batch_size.4 - gpu_memory.24,gptj_,offline,run_harness: + gpu_memory.16,v5.0,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size."gptj:4" + + gpu_memory.24,pre5.0,gptj_,offline,run_harness: default_variations: batch-size: batch_size.7 - gpu_memory.32,gptj_,offline,run_harness: + gpu_memory.24,v5.0,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size."gptj:7" + + gpu_memory.32,pre5.0,gptj_,offline,run_harness: default_variations: batch-size: batch_size.8 + + gpu_memory.32,v5.0,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size."gptj:8" - gpu_memory.48,gptj_,offline,run_harness: + gpu_memory.48,pre5.0,gptj_,offline,run_harness: default_variations: batch-size: batch_size.14 - gpu_memory.40,gptj_,offline,run_harness: + gpu_memory.48,v5.0,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size."gptj:14" + + gpu_memory.40,pre5.0,gptj_,offline,run_harness: default_variations: batch-size: batch_size.10 - gpu_memory.80,gptj_,offline,run_harness: + gpu_memory.40,v5.0,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size."gptj:10" + + gpu_memory.80,pre5.0,gptj_,offline,run_harness: default_variations: batch-size: batch_size.32 + + gpu_memory.80,v5.0,gptj_,offline,run_harness: + default_variations: + batch-size: batch_size."gptj:80" - gpu_memory.16,sdxl,offline,run_harness: + gpu_memory.16,pre5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size.2 + + gpu_memory.16,v5.0,sdxl,offline,run_harness: default_variations: - batch-size: batch_size.2 + batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1" - gpu_memory.24,sdxl,offline,run_harness: + gpu_memory.24,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.2 - - gpu_memory.32,sdxl,offline,run_harness: + + gpu_memory.24,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1" + + gpu_memory.32,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.3 - gpu_memory.80,sdxl,offline,run_harness: + gpu_memory.32,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:1##clip2:2##unet:2##vae:1" + + gpu_memory.80,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.96,sdxl,offline,run_harness: + + gpu_memory.80,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8" + + gpu_memory.96,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.96,sdxl,server,run_harness: + + gpu_memory.96,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8" + + gpu_memory.96,pre5.0,sdxl,server,run_harness: default_variations: batch-size: batch_size.8 - gpu_memory.80,sdxl,server,run_harness: + gpu_memory.96,v5.0,sdxl,server,run_harness: + default_variations: + batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8" + + gpu_memory.80,pre5.0,sdxl,server,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.140,sdxl,offline,run_harness: + + gpu_memory.80,v5.0,sdxl,server,run_harness: + default_variations: + batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8" + + gpu_memory.140,pre5.0,sdxl,offline,run_harness: default_variations: batch-size: batch_size.8 + gpu_memory.140,v5.0,sdxl,offline,run_harness: + default_variations: + batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8" + gpu_memory.8,bert_,offline,run_harness: default_variations: batch-size: batch_size.256 @@ -1336,38 +1396,70 @@ variations: default_variations: batch-size: batch_size.64 - gpu_memory.8,resnet50,offline,run_harness: + gpu_memory.8,pre5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size.64 + env: + MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" + + gpu_memory.8,v5.0,resnet50,offline,run_harness: default_variations: - batch-size: batch_size.64 + batch-size: batch_size."resnet50:64" env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" - gpu_memory.16,resnet50,offline,run_harness: + gpu_memory.16,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.1024 env: MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" - gpu_memory.40,resnet50,offline,run_harness: + gpu_memory.16,v5.0,resnet50,offline,run_harness: default_variations: - batch-size: batch_size.2048 + batch-size: batch_size."resnet50:1024" + env: + MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4" - gpu_memory.24,resnet50,offline,run_harness: + gpu_memory.24,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.64 - gpu_memory.32,resnet50,offline,run_harness: + gpu_memory.24,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:64" + + gpu_memory.32,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.2048 - gpu_memory.48,resnet50,offline,run_harness: + gpu_memory.32,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:2048" + + gpu_memory.40,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.2048 - gpu_memory.80,resnet50,offline,run_harness: + gpu_memory.40,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:2048" + + gpu_memory.48,pre5.0,resnet50,offline,run_harness: default_variations: batch-size: batch_size.2048 + gpu_memory.48,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:2048" + + gpu_memory.80,pre5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size.2048 + + gpu_memory.80,v5.0,resnet50,offline,run_harness: + default_variations: + batch-size: batch_size."resnet50:2048" + num-gpus.#: group: num-gpus env: @@ -1395,36 +1487,68 @@ variations: default_variations: batch-size: batch_size.2 - gpu_memory.8,retinanet,offline,run_harness: + gpu_memory.8,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - - gpu_memory.16,retinanet,offline,run_harness: + + gpu_memory.8,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + gpu_memory.16,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.2 - - gpu_memory.40,retinanet,offline,run_harness: + + gpu_memory.16,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + + gpu_memory.24,pre5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size.2 + env: + MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" + MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" + + gpu_memory.24,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:2" + env: + MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" + MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" + + gpu_memory.32,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.4 - - gpu_memory.32,retinanet,offline,run_harness: + + gpu_memory.32,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:4" + + gpu_memory.40,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.4 - - gpu_memory.48,retinanet,offline,run_harness: + + gpu_memory.40,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:4" + + + gpu_memory.48,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.4 - - gpu_memory.24,retinanet,offline,run_harness: + + gpu_memory.48,v5.0,retinanet,offline,run_harness: default_variations: - batch-size: batch_size.2 - env: - MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2" - MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2" - - gpu_memory.80,retinanet,offline,run_harness: + batch-size: batch_size."retinanet:4" + + gpu_memory.80,pre5.0,retinanet,offline,run_harness: default_variations: batch-size: batch_size.8 + + gpu_memory.80,v5.0,retinanet,offline,run_harness: + default_variations: + batch-size: batch_size."retinanet:8" retinanet,server,run_harness: default_variations: @@ -1461,63 +1585,121 @@ variations: default_variations: batch-size: batch_size.2048 - gpu_memory.8,3d-unet_,offline,run_harness: + gpu_memory.8,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.4 - - gpu_memory.16,3d-unet_,offline,run_harness: + + gpu_memory.8,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:4" + + gpu_memory.16,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.4 - - gpu_memory.40,3d-unet_,offline,run_harness: + + gpu_memory.16,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:4" + + gpu_memory.24,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.24,3d-unet_,offline,run_harness: + + gpu_memory.24,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + gpu_memory.32,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.80,3d-unet_,offline,run_harness: + + gpu_memory.32,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + gpu_memory.40,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.32,3d-unet_,offline,run_harness: + + gpu_memory.40,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + gpu_memory.48,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 - - gpu_memory.48,3d-unet_,offline,run_harness: + + gpu_memory.48,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" + + gpu_memory.80,pre5.0,3d-unet_,offline,run_harness: default_variations: batch-size: batch_size.8 + + gpu_memory.80,v5.0,3d-unet_,offline,run_harness: + default_variations: + batch-size: batch_size."3d-unet:8" - gpu_memory.16,dlrm_,offline,run_harness: + gpu_memory.16,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 - - gpu_memory.40,dlrm_,offline,run_harness: + + gpu_memory.16,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm:1400" + + gpu_memory.24,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 env: - MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.40" - - gpu_memory.24,dlrm_,offline,run_harness: + MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.30" + + gpu_memory.24,v5.0,dlrm_,offline,run_harness: default_variations: - batch-size: batch_size.1400 + batch-size: batch_size."dlrm:1400" env: MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.30" - - gpu_memory.32,dlrm_,offline,run_harness: + + gpu_memory.32,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 - - gpu_memory.48,dlrm_,offline,run_harness: + + gpu_memory.32,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm:1400" + + gpu_memory.40,pre5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size.1400 + env: + MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.40" + + gpu_memory.40,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm:1400" + env: + MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.40" + + gpu_memory.48,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 env: MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.50" - - gpu_memory.80,dlrm_,offline,run_harness: + + gpu_memory.48,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm:1400" + env: + MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.50" + + gpu_memory.80,pre5.0,dlrm_,offline,run_harness: default_variations: batch-size: batch_size.1400 + + gpu_memory.80,v5.0,dlrm_,offline,run_harness: + default_variations: + batch-size: batch_size."dlrm:1400" orin: group: gpu-name @@ -1617,7 +1799,7 @@ variations: rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1: default_variations: - batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" + batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1" rtx_a6000,pre5.0,resnet50,offline,run_harness: default_variations: @@ -1784,7 +1966,7 @@ variations: l4,v5.0,sdxl,offline,run_harness: default_variations: - batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" + batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1" l4,sdxl,offline,run_harness,num-gpu.8: env: @@ -1797,7 +1979,7 @@ variations: l4,v5.0,sdxl,offline,run_harness,num-gpu.8: default_variations: - batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" + batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1" l4,sdxl,server,run_harness,num-gpu.1: env: @@ -1825,7 +2007,7 @@ variations: l4,v5.0,sdxl,server,run_harness,num-gpu.8: default_variations: - batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1" + batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1" l4,resnet50: default_env: