From 9841877445710561ef4df5986f8e53ca75ce29f7 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 3 Oct 2025 11:27:19 -0700 Subject: [PATCH 1/2] [no ci] Add deepseek-ai/DeepSeek-V3.2-Exp Signed-off-by: Huy Do --- .../benchmarks/cuda/latency-tests.json | 13 ++++++++++- .../benchmarks/cuda/serving-tests.json | 22 ++++++++++++++++++- .../benchmarks/cuda/throughput-tests.json | 14 +++++++++++- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/vllm-benchmarks/benchmarks/cuda/latency-tests.json b/vllm-benchmarks/benchmarks/cuda/latency-tests.json index 719b433..cb52668 100644 --- a/vllm-benchmarks/benchmarks/cuda/latency-tests.json +++ b/vllm-benchmarks/benchmarks/cuda/latency-tests.json @@ -85,7 +85,7 @@ } }, { - "test_name": "latency_deepseek_v3_tp8", + "test_name": "latency_deepseek_v3_1_tp8", "parameters": { "model": "deepseek-ai/DeepSeek-V3.1", "tensor_parallel_size": 8, @@ -95,6 +95,17 @@ "max_model_len": 8192 } }, + { + "test_name": "latency_deepseek_v3_2_tp8", + "parameters": { + "model": "deepseek-ai/DeepSeek-V3.2-Exp", + "tensor_parallel_size": 8, + "load_format": "dummy", + "num_iters_warmup": 5, + "num_iters": 15, + "max_model_len": 8192 + } + }, { "test_name": "latency_deepseek_r1_tp8", "parameters": { diff --git a/vllm-benchmarks/benchmarks/cuda/serving-tests.json b/vllm-benchmarks/benchmarks/cuda/serving-tests.json index 1aaef17..491b5a1 100644 --- a/vllm-benchmarks/benchmarks/cuda/serving-tests.json +++ b/vllm-benchmarks/benchmarks/cuda/serving-tests.json @@ -494,7 +494,7 @@ } }, { - "test_name": "serving_deepseek_v3_tp8_random_in5k_out8k", + "test_name": "serving_deepseek_v3_1_tp8_random_in5k_out8k", "qps_list": [1, 4, 16, "inf"], "server_parameters": { "model": "deepseek-ai/DeepSeek-V3.1", @@ -513,6 +513,26 @@ "random_output_len": 8250 } }, + { + "test_name": "serving_deepseek_v3_2_tp8_random_in5k_out8k", + "qps_list": [1, 4, 16, "inf"], + "server_parameters": { + "model": "deepseek-ai/DeepSeek-V3.2-Exp", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy" + }, + "client_parameters": { + "model": "deepseek-ai/DeepSeek-V3.2-Exp", + "backend": "vllm", + "dataset_name": "random", + "num_prompts": 200, + "random_input_len": 5250, + "random_output_len": 8250 + } + }, { "test_name": "serving_deepseek_r1_tp8_random_in5k_out8k", "qps_list": [1, 4, 16, "inf"], diff --git a/vllm-benchmarks/benchmarks/cuda/throughput-tests.json b/vllm-benchmarks/benchmarks/cuda/throughput-tests.json index 9ff9cda..ecc5e27 100644 --- a/vllm-benchmarks/benchmarks/cuda/throughput-tests.json +++ b/vllm-benchmarks/benchmarks/cuda/throughput-tests.json @@ -93,7 +93,7 @@ } }, { - "test_name": "throughput_deepseek_v3_tp8", + "test_name": "throughput_deepseek_v3_1_tp8", "parameters": { "model": "deepseek-ai/DeepSeek-V3.1", "tensor_parallel_size": 8, @@ -104,6 +104,18 @@ "max_model_len": 8192 } }, + { + "test_name": "throughput_deepseek_v3_2_tp8", + "parameters": { + "model": "deepseek-ai/DeepSeek-V3.2-Exp", + "tensor_parallel_size": 8, + "load_format": "dummy", + "dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json", + "num_prompts": 200, + "backend": "vllm", + "max_model_len": 8192 + } + }, { "test_name": "throughput_deepseek_r1_tp8", "parameters": { From c2bf6ab8d51f74f2a08cd1823b664fe2b74e0941 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Fri, 3 Oct 2025 11:35:26 -0700 Subject: [PATCH 2/2] [no ci] Run on B200 Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 4934ebf..a4b35f8 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -116,6 +116,10 @@ "linux.aws.a100", "linux.aws.h100", ], + "deepseek-ai/DeepSeek-V3.2-Exp": [ + "linux.aws.a100", + "linux.aws.h100", + ], "deepseek-ai/DeepSeek-R1": [ "linux.aws.a100", "linux.aws.h100",