From e0470f921ddded55c12a03b086b871585efd1595 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Wed, 11 Jun 2025 20:44:13 +0530
Subject: [PATCH 01/17] test commit

---
 script/app-mlperf-inference-nvidia/meta.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index 1dcd75492..cdc545209 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -1768,7 +1768,7 @@ variations:
 
   l4,v5.0,sdxl,offline,run_harness:
     default_variations:
-      batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1"
+      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
     
   l4,sdxl,offline,run_harness,num-gpu.8:
     env:
@@ -1781,7 +1781,7 @@ variations:
 
   l4,v5.0,sdxl,offline,run_harness,num-gpu.8:
     default_variations:
-      batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1"
+      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
     
   l4,sdxl,server,run_harness,num-gpu.1:
     env:
@@ -1795,7 +1795,7 @@ variations:
 
   l4,v5.0,sdxl,server,run_harness,num-gpu.1:
     default_variations:
-      batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1"
+      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
   
   l4,sdxl,server,run_harness,num-gpu.8:
     env:
@@ -1809,7 +1809,7 @@ variations:
 
   l4,v5.0,sdxl,server,run_harness,num-gpu.8:
     default_variations:
-      batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1"
+      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
 
   l4,resnet50:
     default_env:

From 8f1984d0ebb10b4c85c133c854da02d6df72d276 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Wed, 11 Jun 2025 21:57:50 +0530
Subject: [PATCH 02/17] handle batch size for v5.0

---
 script/app-mlperf-inference-nvidia/customize.py |  4 ++--
 script/app-mlperf-inference-nvidia/meta.yaml    | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index 3ffa43603..c03f3f483 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -524,11 +524,11 @@ def preprocess(i):
 
         gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')
         if gpu_batch_size:
-            run_config += f" --gpu_batch_size={gpu_batch_size}"
+            run_config += f" --gpu_batch_size={gpu_batch_size}".replace(";",",")
 
         dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')
         if dla_batch_size:
-            run_config += f" --dla_batch_size={dla_batch_size}"
+            run_config += f" --dla_batch_size={dla_batch_size}".replace(";",",")    
 
         input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT')
         if input_format:
diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index cdc545209..65315bdb3 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -1601,7 +1601,7 @@ variations:
 
   rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1:
     default_variations:
-      batch-size: batch_size."clip1:2,clip2:2,unet:2,vae:1"
+      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
 
   rtx_a6000,pre5.0,resnet50,offline,run_harness:
     default_variations:
@@ -1768,7 +1768,7 @@ variations:
 
   l4,v5.0,sdxl,offline,run_harness:
     default_variations:
-      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
+      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
     
   l4,sdxl,offline,run_harness,num-gpu.8:
     env:
@@ -1781,7 +1781,7 @@ variations:
 
   l4,v5.0,sdxl,offline,run_harness,num-gpu.8:
     default_variations:
-      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
+      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
     
   l4,sdxl,server,run_harness,num-gpu.1:
     env:
@@ -1795,7 +1795,7 @@ variations:
 
   l4,v5.0,sdxl,server,run_harness,num-gpu.1:
     default_variations:
-      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
+      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
   
   l4,sdxl,server,run_harness,num-gpu.8:
     env:
@@ -1809,7 +1809,7 @@ variations:
 
   l4,v5.0,sdxl,server,run_harness,num-gpu.8:
     default_variations:
-      batch-size: batch_size.\"clip1:2,clip2:2,unet:2,vae:1\"
+      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
 
   l4,resnet50:
     default_env:

From a2e8c5bc51ec3db0e6911ee6919b3766985b73d0 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 11 Jun 2025 16:28:15 +0000
Subject: [PATCH 03/17] [Automated Commit] Format Codebase [skip ci]

---
 script/app-mlperf-inference-nvidia/customize.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index c03f3f483..c16e9e29d 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -524,11 +524,13 @@ def preprocess(i):
 
         gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')
         if gpu_batch_size:
-            run_config += f" --gpu_batch_size={gpu_batch_size}".replace(";",",")
+            run_config += f" --gpu_batch_size={gpu_batch_size}".replace(
+                ";", ",")
 
         dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')
         if dla_batch_size:
-            run_config += f" --dla_batch_size={dla_batch_size}".replace(";",",")    
+            run_config += f" --dla_batch_size={dla_batch_size}".replace(
+                ";", ",")
 
         input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT')
         if input_format:

From f998fee1455cd91ba9d11ba31fbd48628beb9945 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 00:33:49 +0530
Subject: [PATCH 04/17] fix hpcx issue

---
 script/app-mlperf-inference-nvidia/customize.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index c03f3f483..c2b0fced2 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -712,11 +712,13 @@ def preprocess(i):
     if '+LD_LIBRARY_PATH' not in env:
         env['+LD_LIBRARY_PATH'] = []
 
+    hpcx_paths = []
     if os.path.exists("/opt/hpcx/ucx/lib"):
-        env['+LD_LIBRARY_PATH'].append("/opt/hpcx/ucx/lib")
-
+        hpcx_paths.append("/opt/hpcx/ucx/lib")
     if os.path.exists("/opt/hpcx/ucc/lib"):
-        env['+LD_LIBRARY_PATH'].append("/opt/hpcx/ucc/lib")
+        hpcx_paths.append("/opt/hpcx/ucc/lib")
+
+    env['+LD_LIBRARY_PATH'] = hpcx_paths + env['+LD_LIBRARY_PATH']
 
     #    print(env)
 

From 97404ae2d9ac64cad436e712442803207b6085bc Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 01:22:25 +0530
Subject: [PATCH 05/17] fix retinanet calib dataset path

---
 script/app-mlperf-inference-nvidia/customize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index c19af9a7e..c53050483 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -242,7 +242,7 @@ def preprocess(i):
             'data',
             'open-images-v6-mlperf',
             'calibration',
-            'train')
+            'calibraion')
         if not os.path.exists(target_data_path_dir):
             cmds.append(f"mkdir -p {target_data_path_dir}")
         target_data_path = os.path.join(target_data_path_dir, 'data')

From 3d98f73952d3606efc6260e249c48c2b7292edb4 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 01:28:35 +0530
Subject: [PATCH 06/17] fix typo

---
 script/app-mlperf-inference-nvidia/customize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index c53050483..9f890e09a 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -242,7 +242,7 @@ def preprocess(i):
             'data',
             'open-images-v6-mlperf',
             'calibration',
-            'calibraion')
+            'calibration')
         if not os.path.exists(target_data_path_dir):
             cmds.append(f"mkdir -p {target_data_path_dir}")
         target_data_path = os.path.join(target_data_path_dir, 'data')

From a4ad94c21c0c3941115766d9910a4dca97db61aa Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 13:26:23 +0530
Subject: [PATCH 07/17] support new retinanet model version

---
 script/app-mlperf-inference-nvidia/customize.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index 9f890e09a..abc3699cb 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -261,11 +261,18 @@ def preprocess(i):
         model_path = os.path.join(
             target_model_path_dir,
             'retinanet-fpn-torch2.1-postprocessed.onnx')
+        alt_model_versions = ["2.2", "2.6"]
         alt_model_path = os.path.join(
             target_model_path_dir,
             'retinanet-fpn-torch2.2-postprocessed.onnx')
-        if not os.path.exists(model_path) and os.path.exists(alt_model_path):
-            cmds.append(f"ln -s {alt_model_path} {model_path}")
+        if not os.path.exists(model_path):
+            for alt_model_version in alt_model_versions:
+                alt_model_path = os.path.join(
+                    target_model_path_dir,
+                    f'retinanet-fpn-torch{alt_model_version}-postprocessed.onnx')
+                if os.path.exists(alt_model_path):
+                    cmds.append(f"ln -s {alt_model_path} {model_path}")
+                    break
 
         model_name = "retinanet"
 

From 32ff1c2c754e758ec697a304790901d462b70a92 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 17:28:31 +0530
Subject: [PATCH 08/17] deepseek r1 + nvidia

---
 script/app-mlperf-inference-nvidia/meta.yaml |  8 ++
 script/app-mlperf-inference/meta.yaml        | 89 +++++++++++++++-----
 2 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index 65315bdb3..3d24eeeff 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -319,6 +319,8 @@ prehook_deps:
       - 'yes'
       MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST:
       - 'yes'
+      MLC_MLPERF_INFERENCE_CODE_VERSION:
+      - "v5.0"
 
   # Install coco2014 dataset
   - enable_if_env:
@@ -366,6 +368,7 @@ variations:
         version: "2024.1"
   v4.1:
     group: version
+    base: pre5.0
     env:
       MLC_MLPERF_INFERENCE_CODE_VERSION: "v4.1"
       MLC_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-FP8-quantized
@@ -375,6 +378,7 @@ variations:
  
   v4.1-dev:
     group: version
+    base: pre5.0
     default: true
     env:
       MLC_MLPERF_INFERENCE_CODE_VERSION: "v4.0"
@@ -384,6 +388,7 @@ variations:
         tags: _for-nvidia-mlperf-inference-v4.0
 
   v4.0:
+    base: pre5.0
     group: version
     env:
       MLC_MLPERF_INFERENCE_CODE_VERSION: "v4.0"
@@ -392,6 +397,7 @@ variations:
       pytorch:
         tags: _for-nvidia-mlperf-inference-v4.0
   v3.1:
+    base: pre5.0
     env:
       MLC_MLPERF_INFERENCE_CODE_VERSION: "v3.1"
       MLC_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-07142023.pth
@@ -563,6 +569,8 @@ variations:
       version: "0.4.0"
     - tags: get,generic-python-lib,_package.torchmetrics
       version: "1.0.3"
+    - tags: get,generic-python-lib,_package.nvidia-modelopt
+      version: "0.19.0"
     - tags: get,generic-python-lib,_package.typeguard
     - tags: get,generic-python-lib,_package.onnx
       names:
diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml
index 0fff01d99..6dc5736b1 100644
--- a/script/app-mlperf-inference/meta.yaml
+++ b/script/app-mlperf-inference/meta.yaml
@@ -297,6 +297,10 @@ variations:
   reference,bert_:
     default_variations:
       backend: onnxruntime
+  
+  reference,deepseek-r1:
+    default_variations:
+      backend: pytorch
 
   all-models,nvidia-original:
     docker:
@@ -948,7 +952,52 @@ variations:
         names:
           - llama3_1-405b
           - llama3-405b
+  
+  deepseek-r1:
+    group:
+      model
+    add_deps_recursive:
+      mlperf-inference-implementation:
+        tags: _deepseek-r1
+    env:
+      MLC_MODEL:
+        deepseek-r1
+    posthook_deps:
+    - enable_if_env:
+        MLC_MLPERF_LOADGEN_MODE:
+        - accuracy
+        - all
+        MLC_MLPERF_ACCURACY_RESULTS_DIR:
+        - 'on'
+      skip_if_env:
+        MLC_MLPERF_IMPLEMENTATION:
+        - nvidia
+      names:
+      - mlperf-accuracy-script
+      - deepseek-r1-accuracy-script
+      tags: run,accuracy,mlperf,_dataset_deepseek-r1
+    docker:
+      deps:
+      - tags: get,mlperf,preprocessed,dataset,deepseek-r1
+        enable_if_env:
+          MLC_USE_DATASET_FROM_HOST:
+          - 'yes'
+        names:
+          - deepseek-r1-preprocessed-dataset
+      mounts:
+        - "${{ MLC_PREPROCESSED_DATASET_DEEPSEEK_R1_VALIDATION_PATH }}:${{ MLC_PREPROCESSED_DATASET_DEEPSEEK_R1_VALIDATION_PATH }}"
+
+  deepseek-r1,pytorch:
+    docker:
+      image_name: nvcr.io/nvidia/pytorch:25.04-py3
 
+  deepseek-r1,sglang:
+    docker:
+      image_name: nvidia/cuda:12.6.0-devel-ubuntu22.04
+  
+  deepseek-r1,vllm:
+    docker:
+      image_name: nvidia/cuda:12.6.0-devel-ubuntu22.04
 
   sdxl:
     group:
@@ -981,6 +1030,8 @@ variations:
             - 'yes'
             MLC_USE_MODEL_FROM_HOST:
             - 'yes'
+            MLC_MLPERF_INFERENCE_VERSION:
+            - '5.0'
           tags: get,ml-model,sdxl,_fp16,_rclone
 
   sdxl,reference,float16:
@@ -1403,6 +1454,24 @@ variations:
       mlperf-inference-implementation:
         tags: _pytorch
 
+  vllm:
+    group: backend
+    env:
+      MLC_MLPERF_BACKEND:
+        vllm
+    add_deps_recursive:
+      mlperf-inference-implementation:
+        tags: _vllm
+  
+  sglang:
+    group: backend
+    env:
+      MLC_MLPERF_BACKEND:
+        sglang
+    add_deps_recursive:
+      mlperf-inference-implementation:
+        tags: _sglang
+
   openshift:
     group: backend
     env:
@@ -1715,8 +1784,6 @@ variations:
       nvidia-inference-server:
         version: r2.1
         tags: _custom
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_TEST_QUERY_COUNT: '100'
@@ -1735,8 +1802,6 @@ variations:
       nvidia-inference-server:
         version: r2.1
         tags: _custom
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     env:
       MLC_SKIP_SYS_UTILS: 'yes'
 
@@ -1750,8 +1815,6 @@ variations:
       nvidia-inference-server:
         version: r3.0
         tags: _nvidia-only
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     default_env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_REGENERATE_MEASURE_FILES: 'yes'
@@ -1772,8 +1835,6 @@ variations:
         tags: _v3.1
       nvidia-scratch-space:
         tags: _version.4_0-dev
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     default_env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_REGENERATE_MEASURE_FILES: 'yes'
@@ -1792,8 +1853,6 @@ variations:
         tags: _ctuning
       intel-harness:
         tags: _v3.1
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     default_env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_REGENERATE_MEASURE_FILES: 'yes'
@@ -1816,8 +1875,6 @@ variations:
         tags: _v4.0
       nvidia-scratch-space:
         tags: _version.4_1-dev
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     default_env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_REGENERATE_MEASURE_FILES: 'yes'
@@ -1838,8 +1895,6 @@ variations:
         tags: _v4.1
       nvidia-scratch-space:
         tags: _version.4_1
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
     default_env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_REGENERATE_MEASURE_FILES: 'yes'
@@ -1862,9 +1917,7 @@ variations:
       intel-harness:
         tags: _v4.1
       inference-src:
-        version: r5.0
-      nvidia-original-mlperf-inference:
-        tags: _pre5.0
+         version: r5.0
       nvidia-scratch-space:
         tags: _version.5.0-dev
     default_env:
@@ -1892,8 +1945,6 @@ variations:
         tags: _version.5.0
       pycuda:
         version: "2024.1"
-      nvidia-harness:
-        tags: _v5.0
     default_env:
       MLC_SKIP_SYS_UTILS: 'yes'
       MLC_REGENERATE_MEASURE_FILES: 'yes'

From 9784e10d695b4de16ced3834c7c000e1c25893aa Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 17:39:49 +0530
Subject: [PATCH 09/17] skip sdxl model download to host until we handle it in
 download model script

---
 script/app-mlperf-inference/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml
index 6dc5736b1..4749237e4 100644
--- a/script/app-mlperf-inference/meta.yaml
+++ b/script/app-mlperf-inference/meta.yaml
@@ -1030,6 +1030,7 @@ variations:
             - 'yes'
             MLC_USE_MODEL_FROM_HOST:
             - 'yes'
+        - skip_if_env:
             MLC_MLPERF_INFERENCE_VERSION:
             - '5.0'
           tags: get,ml-model,sdxl,_fp16,_rclone

From 321d80417a110ca73f7b5934cbf35f6fd9b3fed9 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 18:35:19 +0530
Subject: [PATCH 10/17] Update logic for download model

---
 .../app-mlperf-inference-nvidia/customize.py  | 56 ++++++++++++++-----
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index abc3699cb..1ae716d04 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -366,20 +366,50 @@ def preprocess(i):
             else:
                 cmds.append(f"make download_model BENCHMARKS='{model_name}'")
         elif "stable-diffusion" in env['MLC_MODEL']:
-            folders = ["clip1", "clip2", "unetxl", "vae"]
-            for folder in folders:
-                onnx_model_path = os.path.join(
-                    env['MLPERF_SCRATCH_PATH'],
-                    'models',
-                    'SDXL',
-                    'onnx_models',
-                    folder,
-                    'model.onnx')
-                if not os.path.exists(onnx_model_path):
+            if env.get('MLC_MLPERF_INFERENCE_CODE_VERSION') == '5.0':
+                # Define folder mappings for each model type
+                model_folders = {
+                    'onnx_models': ["clip1", "clip2", "unetxl", "vae"],
+                    'modelopt_models': ["unetxl.fp8", "vae.int8"]
+                }
+    
+                model_found = True
+    
+                # Check all required models across both directories
+                for model_type, folders in model_folders.items():
+                    for folder in folders:
+                        onnx_model_path = os.path.join(
+                            env['MLPERF_SCRATCH_PATH'],
+                            'models',
+                            'SDXL',
+                            model_type,
+                            folder,
+                            'model.onnx'
+                        )
+                        if not os.path.exists(onnx_model_path):
+                            model_found = False
+                            break
+                    if not model_found:
+                        break
+                if not model_found:
                     env['MLC_REQUIRE_SDXL_MODEL_DOWNLOAD'] = 'yes'
-                    cmds.append(
-                        f"make download_model BENCHMARKS='{model_name}'")
-                    break
+                    cmds.append(f"make download_model BENCHMARKS='{model_name}'")
+            else:
+                folders = ["clip1", "clip2", "unetxl", "vae"]
+                for folder in folders:
+                    onnx_model_path = os.path.join(
+                        env['MLPERF_SCRATCH_PATH'],
+                        'models',
+                        'SDXL',
+                        'onnx_models',
+                        folder,
+                        'model.onnx')
+                    if not os.path.exists(onnx_model_path):
+                        env['MLC_REQUIRE_SDXL_MODEL_DOWNLOAD'] = 'yes'
+                        cmds.append(
+                            f"make download_model BENCHMARKS='{model_name}'")
+                        break
+                    
             if scenario.lower() == "singlestream":
                 ammo_model_path = os.path.join(
                     env['MLPERF_SCRATCH_PATH'],

From 6ea7a0f5ce884daff44dca35dbc399ba63004810 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 12 Jun 2025 13:05:37 +0000
Subject: [PATCH 11/17] [Automated Commit] Format Codebase [skip ci]

---
 script/app-mlperf-inference-nvidia/customize.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index 1ae716d04..2e7687e48 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -372,9 +372,9 @@ def preprocess(i):
                     'onnx_models': ["clip1", "clip2", "unetxl", "vae"],
                     'modelopt_models': ["unetxl.fp8", "vae.int8"]
                 }
-    
+
                 model_found = True
-    
+
                 # Check all required models across both directories
                 for model_type, folders in model_folders.items():
                     for folder in folders:
@@ -393,7 +393,8 @@ def preprocess(i):
                         break
                 if not model_found:
                     env['MLC_REQUIRE_SDXL_MODEL_DOWNLOAD'] = 'yes'
-                    cmds.append(f"make download_model BENCHMARKS='{model_name}'")
+                    cmds.append(
+                        f"make download_model BENCHMARKS='{model_name}'")
             else:
                 folders = ["clip1", "clip2", "unetxl", "vae"]
                 for folder in folders:
@@ -409,7 +410,7 @@ def preprocess(i):
                         cmds.append(
                             f"make download_model BENCHMARKS='{model_name}'")
                         break
-                    
+
             if scenario.lower() == "singlestream":
                 ammo_model_path = os.path.join(
                     env['MLPERF_SCRATCH_PATH'],

From 91627cd2b126e4bb0c2a8f094b04586e77a22c56 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 18:40:44 +0530
Subject: [PATCH 12/17] fix typo

---
 script/app-mlperf-inference-nvidia/customize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index 2e7687e48..fc8a79624 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -366,7 +366,7 @@ def preprocess(i):
             else:
                 cmds.append(f"make download_model BENCHMARKS='{model_name}'")
         elif "stable-diffusion" in env['MLC_MODEL']:
-            if env.get('MLC_MLPERF_INFERENCE_CODE_VERSION') == '5.0':
+            if env.get('MLC_MLPERF_INFERENCE_CODE_VERSION') == 'v5.0':
                 # Define folder mappings for each model type
                 model_folders = {
                     'onnx_models': ["clip1", "clip2", "unetxl", "vae"],

From a9c6c15eb3440d2996b0f9c545080ae33b1e731a Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 18:43:19 +0530
Subject: [PATCH 13/17] fix typo

---
 script/app-mlperf-inference-nvidia/meta.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index 3d24eeeff..1eec11011 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -314,11 +314,9 @@ prehook_deps:
       - sdxl-model
       - ml-model
     tags: get,ml-model,sdxl,_fp16,_rclone
-    skip_if_env:
+    skip_if_any_env:
       MLC_RUN_STATE_DOCKER:
       - 'yes'
-      MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST:
-      - 'yes'
       MLC_MLPERF_INFERENCE_CODE_VERSION:
       - "v5.0"
 

From 4727ddef67f871575db7141bd28306e5c1533a20 Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 19:21:38 +0530
Subject: [PATCH 14/17] Updated dependencies 3d-unet 5.0

---
 script/app-mlperf-inference-nvidia/meta.yaml | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index 1eec11011..666beb347 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -647,10 +647,22 @@ variations:
     - tags: get,generic-python-lib,_package.nibabel
     - tags: get,generic-python-lib,_pandas
       version_max: "1.5.3"
-    - tags: get,generic-python-lib,_onnx-graphsurgeon
-      version: 0.3.27
-    - tags: get,generic-python-lib,_package.onnx
-      version: 1.13.1
+
+  3d-unet_,pre5.0:
+    deps:
+      - tags: get,generic-python-lib,_onnx-graphsurgeon
+        version: 0.3.27
+      - tags: get,generic-python-lib,_package.onnx
+        version: 1.13.1
+  
+  3d-unet_,v5.0:
+    deps:
+      - tags: get,generic-python-lib,_package.onnx
+        names:
+          - onnx
+        version: "1.17.0"
+      - tags: get,generic-python-lib,_package.onnx-graphsurgeon
+        version: "0.5.2"
 
   3d-unet-99:
     group: model

From bf2528e503dedcfb63d727a68e402a51240beccb Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 22:25:15 +0530
Subject: [PATCH 15/17] fix typo

---
 script/app-mlperf-inference-nvidia/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index 666beb347..155c4fcc0 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -567,8 +567,6 @@ variations:
       version: "0.4.0"
     - tags: get,generic-python-lib,_package.torchmetrics
       version: "1.0.3"
-    - tags: get,generic-python-lib,_package.nvidia-modelopt
-      version: "0.19.0"
     - tags: get,generic-python-lib,_package.typeguard
     - tags: get,generic-python-lib,_package.onnx
       names:
@@ -598,6 +596,8 @@ variations:
       version: "0.6.0"
     - tags: get,generic-python-lib,_package.torchmetrics
       version: "1.0.3"
+    - tags: get,generic-python-lib,_package.nvidia-modelopt
+      version: "0.19.0"
     - tags: get,generic-python-lib,_package.typeguard
     - tags: get,generic-python-lib,_package.onnx
       names:

From d7d892ae720880f0d59a30f7ae05bff08569ffdb Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Thu, 12 Jun 2025 23:04:08 +0530
Subject: [PATCH 16/17] control model download in official repo

---
 script/app-mlperf-inference-nvidia/meta.yaml | 4 +---
 script/app-mlperf-inference/meta.yaml        | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index 155c4fcc0..e7820ea6d 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -314,11 +314,9 @@ prehook_deps:
       - sdxl-model
       - ml-model
     tags: get,ml-model,sdxl,_fp16,_rclone
-    skip_if_any_env:
+    skip_if_env:
       MLC_RUN_STATE_DOCKER:
       - 'yes'
-      MLC_MLPERF_INFERENCE_CODE_VERSION:
-      - "v5.0"
 
   # Install coco2014 dataset
   - enable_if_env:
diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml
index 4749237e4..ef5bf3287 100644
--- a/script/app-mlperf-inference/meta.yaml
+++ b/script/app-mlperf-inference/meta.yaml
@@ -1030,9 +1030,6 @@ variations:
             - 'yes'
             MLC_USE_MODEL_FROM_HOST:
             - 'yes'
-        - skip_if_env:
-            MLC_MLPERF_INFERENCE_VERSION:
-            - '5.0'
           tags: get,ml-model,sdxl,_fp16,_rclone
 
   sdxl,reference,float16:

From 8ea3bfe0e6c18cc944c9ca2e985cbdbe1f4030fa Mon Sep 17 00:00:00 2001
From: anandhu-eng <anandhusooraj011@gmail.com>
Date: Fri, 13 Jun 2025 20:12:22 +0530
Subject: [PATCH 17/17] Update batch sizes for v5.0

---
 .../app-mlperf-inference-nvidia/customize.py  |   4 +-
 script/app-mlperf-inference-nvidia/meta.yaml  | 334 ++++++++++++++----
 2 files changed, 260 insertions(+), 78 deletions(-)

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
index fc8a79624..eddf02252 100644
--- a/script/app-mlperf-inference-nvidia/customize.py
+++ b/script/app-mlperf-inference-nvidia/customize.py
@@ -563,12 +563,12 @@ def preprocess(i):
         gpu_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_GPU_BATCH_SIZE')
         if gpu_batch_size:
             run_config += f" --gpu_batch_size={gpu_batch_size}".replace(
-                ";", ",")
+                "##", ",")
 
         dla_batch_size = env.get('MLC_MLPERF_NVIDIA_HARNESS_DLA_BATCH_SIZE')
         if dla_batch_size:
             run_config += f" --dla_batch_size={dla_batch_size}".replace(
-                ";", ",")
+                "##", ",")
 
         input_format = env.get('MLC_MLPERF_NVIDIA_HARNESS_INPUT_FORMAT')
         if input_format:
diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml
index e7820ea6d..e8f609253 100644
--- a/script/app-mlperf-inference-nvidia/meta.yaml
+++ b/script/app-mlperf-inference-nvidia/meta.yaml
@@ -1248,66 +1248,126 @@ variations:
       MLC_MLPERF_NVIDIA_HARNESS_NUM_SORT_SEGMENTS: '2'
       MLC_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS: True
 
-  gpu_memory.80,num-gpus.2,llama2-70b,offline,run_harness:
+  gpu_memory.80,pre5.0,num-gpus.2,llama2-70b,offline,run_harness:
     default_variations:
       batch-size: batch_size.896
+
+  gpu_memory.80,v5.0,num-gpus.2,llama2-70b,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."llama2-70b:1024"
       
-  gpu_memory.16,gptj_,offline,run_harness:
+  gpu_memory.16,pre5.0,gptj_,offline,run_harness:
     default_variations:
       batch-size: batch_size.4
 
-  gpu_memory.24,gptj_,offline,run_harness:
+  gpu_memory.16,v5.0,gptj_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."gptj:4"
+
+  gpu_memory.24,pre5.0,gptj_,offline,run_harness:
     default_variations:
       batch-size: batch_size.7
 
-  gpu_memory.32,gptj_,offline,run_harness:
+  gpu_memory.24,v5.0,gptj_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."gptj:7"
+
+  gpu_memory.32,pre5.0,gptj_,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
+  
+  gpu_memory.32,v5.0,gptj_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."gptj:8"
 
-  gpu_memory.48,gptj_,offline,run_harness:
+  gpu_memory.48,pre5.0,gptj_,offline,run_harness:
     default_variations:
       batch-size: batch_size.14
 
-  gpu_memory.40,gptj_,offline,run_harness:
+  gpu_memory.48,v5.0,gptj_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."gptj:14"
+
+  gpu_memory.40,pre5.0,gptj_,offline,run_harness:
     default_variations:
       batch-size: batch_size.10
 
-  gpu_memory.80,gptj_,offline,run_harness:
+  gpu_memory.40,v5.0,gptj_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."gptj:10"
+
+  gpu_memory.80,pre5.0,gptj_,offline,run_harness:
     default_variations:
       batch-size: batch_size.32
+
+  gpu_memory.80,v5.0,gptj_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."gptj:80"
   
-  gpu_memory.16,sdxl,offline,run_harness:
+  gpu_memory.16,pre5.0,sdxl,offline,run_harness:
+  default_variations:
+    batch-size: batch_size.2
+
+  gpu_memory.16,v5.0,sdxl,offline,run_harness:
     default_variations:
-      batch-size: batch_size.2
+      batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1"
 
-  gpu_memory.24,sdxl,offline,run_harness:
+  gpu_memory.24,pre5.0,sdxl,offline,run_harness:
     default_variations:
       batch-size: batch_size.2
-  
-  gpu_memory.32,sdxl,offline,run_harness:
+
+  gpu_memory.24,v5.0,sdxl,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1"
+
+  gpu_memory.32,pre5.0,sdxl,offline,run_harness:
     default_variations:
       batch-size: batch_size.3
   
-  gpu_memory.80,sdxl,offline,run_harness:
+  gpu_memory.32,v5.0,sdxl,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:1##clip2:2##unet:2##vae:1"
+
+  gpu_memory.80,pre5.0,sdxl,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
-  
-  gpu_memory.96,sdxl,offline,run_harness:
+
+  gpu_memory.80,v5.0,sdxl,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8"
+
+  gpu_memory.96,pre5.0,sdxl,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
-  
-  gpu_memory.96,sdxl,server,run_harness:
+
+  gpu_memory.96,v5.0,sdxl,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8"
+
+  gpu_memory.96,pre5.0,sdxl,server,run_harness:
     default_variations:
       batch-size: batch_size.8
 
-  gpu_memory.80,sdxl,server,run_harness:
+  gpu_memory.96,v5.0,sdxl,server,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8"
+
+  gpu_memory.80,pre5.0,sdxl,server,run_harness:
     default_variations:
       batch-size: batch_size.8
-  
-  gpu_memory.140,sdxl,offline,run_harness:
+
+  gpu_memory.80,v5.0,sdxl,server,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8"
+
+  gpu_memory.140,pre5.0,sdxl,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
 
+  gpu_memory.140,v5.0,sdxl,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."clip1:64##clip2:64##unet:64##vae:8"
+
   gpu_memory.8,bert_,offline,run_harness:
     default_variations:
       batch-size: batch_size.256
@@ -1336,38 +1396,70 @@ variations:
     default_variations:
       batch-size: batch_size.64
 
-  gpu_memory.8,resnet50,offline,run_harness:
+  gpu_memory.8,pre5.0,resnet50,offline,run_harness:
+  default_variations:
+    batch-size: batch_size.64
+  env:
+    MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4"
+
+  gpu_memory.8,v5.0,resnet50,offline,run_harness:
     default_variations:
-      batch-size: batch_size.64
+      batch-size: batch_size."resnet50:64"
     env:
       MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4"
 
-  gpu_memory.16,resnet50,offline,run_harness:
+  gpu_memory.16,pre5.0,resnet50,offline,run_harness:
     default_variations:
       batch-size: batch_size.1024
     env:
       MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4"
 
-  gpu_memory.40,resnet50,offline,run_harness:
+  gpu_memory.16,v5.0,resnet50,offline,run_harness:
     default_variations:
-      batch-size: batch_size.2048
+      batch-size: batch_size."resnet50:1024"
+    env:
+      MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "4"
 
-  gpu_memory.24,resnet50,offline,run_harness:
+  gpu_memory.24,pre5.0,resnet50,offline,run_harness:
     default_variations:
       batch-size: batch_size.64
 
-  gpu_memory.32,resnet50,offline,run_harness:
+  gpu_memory.24,v5.0,resnet50,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."resnet50:64"
+
+  gpu_memory.32,pre5.0,resnet50,offline,run_harness:
     default_variations:
       batch-size: batch_size.2048
 
-  gpu_memory.48,resnet50,offline,run_harness:
+  gpu_memory.32,v5.0,resnet50,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."resnet50:2048"
+
+  gpu_memory.40,pre5.0,resnet50,offline,run_harness:
     default_variations:
       batch-size: batch_size.2048
 
-  gpu_memory.80,resnet50,offline,run_harness:
+  gpu_memory.40,v5.0,resnet50,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."resnet50:2048"
+
+  gpu_memory.48,pre5.0,resnet50,offline,run_harness:
     default_variations:
       batch-size: batch_size.2048
 
+  gpu_memory.48,v5.0,resnet50,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."resnet50:2048"
+
+  gpu_memory.80,pre5.0,resnet50,offline,run_harness:
+    default_variations:
+      batch-size: batch_size.2048
+
+  gpu_memory.80,v5.0,resnet50,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."resnet50:2048"
+
   num-gpus.#:
     group: num-gpus
     env:
@@ -1395,36 +1487,68 @@ variations:
     default_variations:
       batch-size: batch_size.2
 
-  gpu_memory.8,retinanet,offline,run_harness:
+  gpu_memory.8,pre5.0,retinanet,offline,run_harness:
     default_variations:
       batch-size: batch_size.2
-
-  gpu_memory.16,retinanet,offline,run_harness:
+  
+  gpu_memory.8,v5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."retinanet:2"
+  
+  gpu_memory.16,pre5.0,retinanet,offline,run_harness:
     default_variations:
       batch-size: batch_size.2
-
-  gpu_memory.40,retinanet,offline,run_harness:
+  
+  gpu_memory.16,v5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."retinanet:2"
+  
+  gpu_memory.24,pre5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size.2
+    env:
+      MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2"
+      MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2"
+  
+  gpu_memory.24,v5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."retinanet:2"
+    env:
+      MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2"
+      MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2"
+  
+  gpu_memory.32,pre5.0,retinanet,offline,run_harness:
     default_variations:
       batch-size: batch_size.4
-
-  gpu_memory.32,retinanet,offline,run_harness:
+  
+  gpu_memory.32,v5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."retinanet:4"
+  
+  gpu_memory.40,pre5.0,retinanet,offline,run_harness:
     default_variations:
       batch-size: batch_size.4
-
-  gpu_memory.48,retinanet,offline,run_harness:
+  
+  gpu_memory.40,v5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."retinanet:4"
+  
+  
+  gpu_memory.48,pre5.0,retinanet,offline,run_harness:
     default_variations:
       batch-size: batch_size.4
-
-  gpu_memory.24,retinanet,offline,run_harness:
+  
+  gpu_memory.48,v5.0,retinanet,offline,run_harness:
     default_variations:
-      batch-size: batch_size.2
-    env:
-      MLC_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: "2"
-      MLC_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: "2"
-
-  gpu_memory.80,retinanet,offline,run_harness:
+      batch-size: batch_size."retinanet:4"
+  
+  gpu_memory.80,pre5.0,retinanet,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
+  
+  gpu_memory.80,v5.0,retinanet,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."retinanet:8"
 
   retinanet,server,run_harness:
     default_variations:
@@ -1461,63 +1585,121 @@ variations:
     default_variations:
       batch-size: batch_size.2048
 
-  gpu_memory.8,3d-unet_,offline,run_harness:
+  gpu_memory.8,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.4
-
-  gpu_memory.16,3d-unet_,offline,run_harness:
+  
+  gpu_memory.8,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:4"
+  
+  gpu_memory.16,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.4
-
-  gpu_memory.40,3d-unet_,offline,run_harness:
+  
+  gpu_memory.16,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:4"
+  
+  gpu_memory.24,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
-
-  gpu_memory.24,3d-unet_,offline,run_harness:
+  
+  gpu_memory.24,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:8"
+  
+  gpu_memory.32,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
-
-  gpu_memory.80,3d-unet_,offline,run_harness:
+  
+  gpu_memory.32,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:8"
+  
+  gpu_memory.40,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
-
-  gpu_memory.32,3d-unet_,offline,run_harness:
+  
+  gpu_memory.40,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:8"
+  
+  gpu_memory.48,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
-
-  gpu_memory.48,3d-unet_,offline,run_harness:
+  
+  gpu_memory.48,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:8"
+  
+  gpu_memory.80,pre5.0,3d-unet_,offline,run_harness:
     default_variations:
       batch-size: batch_size.8
+  
+  gpu_memory.80,v5.0,3d-unet_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."3d-unet:8"
 
-  gpu_memory.16,dlrm_,offline,run_harness:
+  gpu_memory.16,pre5.0,dlrm_,offline,run_harness:
     default_variations:
       batch-size: batch_size.1400
-
-  gpu_memory.40,dlrm_,offline,run_harness:
+  
+  gpu_memory.16,v5.0,dlrm_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."dlrm:1400"
+  
+  gpu_memory.24,pre5.0,dlrm_,offline,run_harness:
     default_variations:
       batch-size: batch_size.1400
     env:
-      MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.40"
-
-  gpu_memory.24,dlrm_,offline,run_harness:
+      MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.30"
+  
+  gpu_memory.24,v5.0,dlrm_,offline,run_harness:
     default_variations:
-      batch-size: batch_size.1400
+      batch-size: batch_size."dlrm:1400"
     env:
       MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.30"
-
-  gpu_memory.32,dlrm_,offline,run_harness:
+  
+  gpu_memory.32,pre5.0,dlrm_,offline,run_harness:
     default_variations:
       batch-size: batch_size.1400
-
-  gpu_memory.48,dlrm_,offline,run_harness:
+  
+  gpu_memory.32,v5.0,dlrm_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."dlrm:1400"
+  
+  gpu_memory.40,pre5.0,dlrm_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size.1400
+    env:
+      MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.40"
+  
+  gpu_memory.40,v5.0,dlrm_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."dlrm:1400"
+    env:
+      MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.40"
+  
+  gpu_memory.48,pre5.0,dlrm_,offline,run_harness:
     default_variations:
       batch-size: batch_size.1400
     env:
       MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.50"
-
-  gpu_memory.80,dlrm_,offline,run_harness:
+  
+  gpu_memory.48,v5.0,dlrm_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."dlrm:1400"
+    env:
+      MLC_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART: "0.50"
+  
+  gpu_memory.80,pre5.0,dlrm_,offline,run_harness:
     default_variations:
       batch-size: batch_size.1400
+  
+  gpu_memory.80,v5.0,dlrm_,offline,run_harness:
+    default_variations:
+      batch-size: batch_size."dlrm:1400"
 
   orin:
     group: gpu-name
@@ -1617,7 +1799,7 @@ variations:
 
   rtx_a6000,v5.0,sdxl,offline,run_harness,batch_size.1:
     default_variations:
-      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
+      batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1"
 
   rtx_a6000,pre5.0,resnet50,offline,run_harness:
     default_variations:
@@ -1784,7 +1966,7 @@ variations:
 
   l4,v5.0,sdxl,offline,run_harness:
     default_variations:
-      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
+      batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1"
     
   l4,sdxl,offline,run_harness,num-gpu.8:
     env:
@@ -1797,7 +1979,7 @@ variations:
 
   l4,v5.0,sdxl,offline,run_harness,num-gpu.8:
     default_variations:
-      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
+      batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1"
     
   l4,sdxl,server,run_harness,num-gpu.1:
     env:
@@ -1825,7 +2007,7 @@ variations:
 
   l4,v5.0,sdxl,server,run_harness,num-gpu.8:
     default_variations:
-      batch-size: batch_size."clip1:2;clip2:2;unet:2;vae:1"
+      batch-size: batch_size."clip1:2##clip2:2##unet:2##vae:1"
 
   l4,resnet50:
     default_env: