diff --git a/superbench/config/amd_mi100_hpe.yaml b/superbench/config/amd_mi100_hpe.yaml
index 98ab741ef..f3b27df09 100644
--- a/superbench/config/amd_mi100_hpe.yaml
+++ b/superbench/config/amd_mi100_hpe.yaml
@@ -75,15 +75,16 @@ superbench:
       parameters:
         block_devices: []
     gpu-sm-copy-bw:
-      enable: false
+      enable: true
       modes:
         - name: local
           proc_num: 32
           prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
           parallel: no
       parameters:
-        dtoh: true
-        htod: true
+        mem_type:
+          - dtoh
+          - htod
     gpt_models:
       <<: *default_pytorch_mode
       models:
diff --git a/superbench/config/azure_ndv4.yaml b/superbench/config/azure_ndv4.yaml
index 8d30e2af4..1a4defb2f 100644
--- a/superbench/config/azure_ndv4.yaml
+++ b/superbench/config/azure_ndv4.yaml
@@ -35,6 +35,25 @@ superbench:
       <<: *default_local_mode
     gemm-flops:
       <<: *default_local_mode
+    disk-benchmark:
+      enable: false
+      modes:
+        - name: local
+          proc_num: 1
+          parallel: no
+      parameters:
+        block_devices: []
+    gpu-sm-copy-bw:
+      enable: true
+      modes:
+        - name: local
+          proc_num: 32
+          prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
+          parallel: no
+      parameters:
+        mem_type:
+          - dtoh
+          - htod
     cudnn-function:
       <<: *default_local_mode
     cublas-function:
diff --git a/superbench/config/default.yaml b/superbench/config/default.yaml
index cb9ae5625..8fc6a0a59 100644
--- a/superbench/config/default.yaml
+++ b/superbench/config/default.yaml
@@ -61,15 +61,16 @@ superbench:
           prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -c $(({proc_rank}/2))
           parallel: yes
     gpu-sm-copy-bw:
-      enable: false
+      enable: true
       modes:
         - name: local
           proc_num: 32
           prefix: CUDA_VISIBLE_DEVICES=$(({proc_rank}%8)) numactl -N $(({proc_rank}%4)) -m $(({proc_rank}%4))
           parallel: no
       parameters:
-        dtoh: true
-        htod: true
+        mem_type:
+          - dtoh
+          - htod
     kernel-launch:
       <<: *default_local_mode
     gemm-flops: