From 9841877445710561ef4df5986f8e53ca75ce29f7 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 3 Oct 2025 11:27:19 -0700
Subject: [PATCH 1/2] [no ci] Add deepseek-ai/DeepSeek-V3.2-Exp

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .../benchmarks/cuda/latency-tests.json        | 13 ++++++++++-
 .../benchmarks/cuda/serving-tests.json        | 22 ++++++++++++++++++-
 .../benchmarks/cuda/throughput-tests.json     | 14 +++++++++++-
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/vllm-benchmarks/benchmarks/cuda/latency-tests.json b/vllm-benchmarks/benchmarks/cuda/latency-tests.json
index 719b433..cb52668 100644
--- a/vllm-benchmarks/benchmarks/cuda/latency-tests.json
+++ b/vllm-benchmarks/benchmarks/cuda/latency-tests.json
@@ -85,7 +85,7 @@
         }
     },
     {
-        "test_name": "latency_deepseek_v3_tp8",
+        "test_name": "latency_deepseek_v3_1_tp8",
         "parameters": {
             "model": "deepseek-ai/DeepSeek-V3.1",
             "tensor_parallel_size": 8,
@@ -95,6 +95,17 @@
             "max_model_len": 8192
         }
     },
+    {
+        "test_name": "latency_deepseek_v3_2_tp8",
+        "parameters": {
+            "model": "deepseek-ai/DeepSeek-V3.2-Exp",
+            "tensor_parallel_size": 8,
+            "load_format": "dummy",
+            "num_iters_warmup": 5,
+            "num_iters": 15,
+            "max_model_len": 8192
+        }
+    },
     {
         "test_name": "latency_deepseek_r1_tp8",
         "parameters": {
diff --git a/vllm-benchmarks/benchmarks/cuda/serving-tests.json b/vllm-benchmarks/benchmarks/cuda/serving-tests.json
index 1aaef17..491b5a1 100644
--- a/vllm-benchmarks/benchmarks/cuda/serving-tests.json
+++ b/vllm-benchmarks/benchmarks/cuda/serving-tests.json
@@ -494,7 +494,7 @@
         }
      },
      {
-        "test_name": "serving_deepseek_v3_tp8_random_in5k_out8k",
+        "test_name": "serving_deepseek_v3_1_tp8_random_in5k_out8k",
         "qps_list": [1, 4, 16, "inf"],
         "server_parameters": {
             "model": "deepseek-ai/DeepSeek-V3.1",
@@ -513,6 +513,26 @@
             "random_output_len": 8250
         }
     },
+    {
+        "test_name": "serving_deepseek_v3_2_tp8_random_in5k_out8k",
+        "qps_list": [1, 4, 16, "inf"],
+        "server_parameters": {
+            "model": "deepseek-ai/DeepSeek-V3.2-Exp",
+            "tensor_parallel_size": 8,
+            "swap_space": 16,
+            "disable_log_stats": "",
+            "disable_log_requests": "",
+            "load_format": "dummy"
+        },
+        "client_parameters": {
+            "model": "deepseek-ai/DeepSeek-V3.2-Exp",
+            "backend": "vllm",
+            "dataset_name": "random",
+            "num_prompts": 200,
+            "random_input_len": 5250,
+            "random_output_len": 8250
+        }
+    },
     {
         "test_name": "serving_deepseek_r1_tp8_random_in5k_out8k",
         "qps_list": [1, 4, 16, "inf"],
diff --git a/vllm-benchmarks/benchmarks/cuda/throughput-tests.json b/vllm-benchmarks/benchmarks/cuda/throughput-tests.json
index 9ff9cda..ecc5e27 100644
--- a/vllm-benchmarks/benchmarks/cuda/throughput-tests.json
+++ b/vllm-benchmarks/benchmarks/cuda/throughput-tests.json
@@ -93,7 +93,7 @@
         }
     },
     {
-        "test_name": "throughput_deepseek_v3_tp8",
+        "test_name": "throughput_deepseek_v3_1_tp8",
         "parameters": {
             "model": "deepseek-ai/DeepSeek-V3.1",
             "tensor_parallel_size": 8,
@@ -104,6 +104,18 @@
             "max_model_len": 8192
         }
     },
+    {
+        "test_name": "throughput_deepseek_v3_2_tp8",
+        "parameters": {
+            "model": "deepseek-ai/DeepSeek-V3.2-Exp",
+            "tensor_parallel_size": 8,
+            "load_format": "dummy",
+            "dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
+            "num_prompts": 200,
+            "backend": "vllm",
+            "max_model_len": 8192
+        }
+    },
     {
         "test_name": "throughput_deepseek_r1_tp8",
         "parameters": {

From c2bf6ab8d51f74f2a08cd1823b664fe2b74e0941 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Fri, 3 Oct 2025 11:35:26 -0700
Subject: [PATCH 2/2] [no ci] Run on B200

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 4934ebf..a4b35f8 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -116,6 +116,10 @@
         "linux.aws.a100",
         "linux.aws.h100",
     ],
+    "deepseek-ai/DeepSeek-V3.2-Exp": [
+        "linux.aws.a100",
+        "linux.aws.h100",
+    ],
     "deepseek-ai/DeepSeek-R1": [
         "linux.aws.a100",
         "linux.aws.h100",