diff --git a/benchmark.py b/benchmark.py index af9e424a17..202a2cb012 100644 --- a/benchmark.py +++ b/benchmark.py @@ -351,7 +351,7 @@ def _run_service_test(example, service, test_suite_config, namespace): return output_folders -def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model=None, report=False): +def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model=None, report=False, output_dir=None): """Run the benchmark test for the specified helm chart and configuration. Args: @@ -361,6 +361,7 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model node_num (int): The number of nodes of current deployment. llm_model (str): The LLM model to use for the test. report (bool): Whether to generate a report after the test. + output_dir (str): Directory to store the test output. If None, uses default directory. """ # If llm_model is None or an empty string, set to default value if not llm_model: @@ -377,7 +378,9 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model "deployment_type": "k8s", # Default is "k8s", can also be "docker" "service_ip": None, # Leave as None for k8s, specify for Docker "service_port": None, # Leave as None for k8s, specify for Docker - "test_output_dir": os.getcwd() + "/benchmark_output", # The directory to store the test output + "test_output_dir": ( + output_dir if output_dir else os.getcwd() + "/benchmark_output" + ), # Use output_dir if provided "node_num": node_num, "load_shape": { "name": parsed_data["load_shape_type"], diff --git a/deploy.py b/deploy.py index 619d19ddd9..bd3a8a87d5 100644 --- a/deploy.py +++ b/deploy.py @@ -49,12 +49,14 @@ def configure_replica(values, deploy_config): return values -def get_output_filename(num_nodes, with_rerank, example_type, device, action_type): +def get_output_filename(num_nodes, with_rerank, example_type, device, action_type, batch_size=None): """Generate output filename based on configuration.""" rerank_suffix = "with-rerank-" if with_rerank else "" action_suffix = "deploy-" if action_type == 0 else "update-" if action_type == 1 else "" + # Only include batch_suffix if batch_size is not None + batch_suffix = f"batch{batch_size}-" if batch_size else "" - return f"{example_type}-{num_nodes}-{device}-{action_suffix}{rerank_suffix}values.yaml" + return f"{example_type}-{rerank_suffix}{device}-{action_suffix}node{num_nodes}-{batch_suffix}values.yaml" def configure_resources(values, deploy_config): @@ -117,6 +119,7 @@ def configure_resources(values, deploy_config): def configure_extra_cmd_args(values, deploy_config): """Configure extra command line arguments for services.""" + batch_size = None for service_name, config in deploy_config["services"].items(): if service_name == "llm": extra_cmd_args = [] @@ -130,6 +133,13 @@ def configure_extra_cmd_args(values, deploy_config): batch_params = engine_params.get("batch_params", {}) token_params = engine_params.get("token_params", {}) + # Get batch size based on engine type + if engine == "tgi": + batch_size = batch_params.get("max_batch_size") + elif engine == "vllm": + batch_size = batch_params.get("max_num_seqs") + batch_size = batch_size if batch_size and batch_size != "" else None + # Add all parameters that exist in batch_params for param, value in batch_params.items(): if value is not None and value != "": @@ -146,7 +156,7 @@ def configure_extra_cmd_args(values, deploy_config): values[engine]["extraCmdArgs"] = extra_cmd_args print(f"extraCmdArgs: {extra_cmd_args}") - return values + return values, batch_size def configure_models(values, deploy_config): @@ -218,13 +228,13 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no values = configure_rerank(values, with_rerank, deploy_config, example_type, node_selector or {}) values = configure_replica(values, deploy_config) values = configure_resources(values, deploy_config) - values = configure_extra_cmd_args(values, deploy_config) + values, batch_size = configure_extra_cmd_args(values, deploy_config) values = configure_models(values, deploy_config) device = deploy_config.get("device", "unknown") # Generate and write YAML file - filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type) + filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type, batch_size) yaml_string = yaml.dump(values, default_flow_style=False) filepath = os.path.join(chart_dir, filename) @@ -694,6 +704,7 @@ def main(): update_service( args.chart_name, args.chart_name, args.namespace, hw_values_file, args.user_values, values_file_path ) + print(f"values_file_path: {values_file_path}") return except Exception as e: parser.error(f"Failed to update deployment: {str(e)}") diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py index aff65a33e0..f210f215dc 100644 --- a/deploy_and_benchmark.py +++ b/deploy_and_benchmark.py @@ -218,12 +218,9 @@ def main(yaml_file, target_node=None, test_mode="oob"): # Handle batch parameters batch_params = [] if "batch_params" in engine_params: - batch_params = ( - engine_params["batch_params"].get("max_batch_size", []) - if engine == "tgi" - else engine_params["batch_params"].get("max_num_seqs", []) - ) - param_name = "max_batch_size" if engine == "tgi" else "max_num_seqs" + key = "max_batch_size" if engine == "tgi" else "max_num_seqs" + batch_params = engine_params["batch_params"].get(key, []) + param_name = key if not isinstance(batch_params, list): batch_params = [batch_params] @@ -240,11 +237,18 @@ def main(yaml_file, target_node=None, test_mode="oob"): interval = deploy_config.get("interval", 5) # default 5s values_file_path = None + # Create benchmark output directory + benchmark_dir = os.path.join(os.getcwd(), "benchmark_output") + os.makedirs(benchmark_dir, exist_ok=True) + for i, batch_param in enumerate(batch_params): - if test_mode == "tune": - print(f"\nProcessing {param_name}: {batch_param}") - else: - print("\nProcessing OOB deployment") + print(f"\nProcessing {test_mode} mode {param_name}: {batch_param}") + # Create subdirectory for this iteration with test mode in the name + iteration_dir = os.path.join( + benchmark_dir, + f"benchmark_{test_mode}_node{node}_batch{batch_param if batch_param is not None else 'default'}", + ) + os.makedirs(iteration_dir, exist_ok=True) # Construct new deploy config new_deploy_config = construct_deploy_config(deploy_config, node, batch_param, test_mode) @@ -279,6 +283,8 @@ def main(yaml_file, target_node=None, test_mode="oob"): if match: values_file_path = match.group(1) print(f"Captured values_file_path: {values_file_path}") + # Copy values file to iteration directory + shutil.copy2(values_file_path, iteration_dir) else: print("values_file_path not found in the output") @@ -299,11 +305,21 @@ def main(yaml_file, target_node=None, test_mode="oob"): values_file_path, "--update-service", ] - result = subprocess.run(cmd, check=True) + result = subprocess.run(cmd, check=True, capture_output=True, text=True) if result.returncode != 0: print(f"Update failed for {node} nodes configuration with {param_name} {batch_param}") break # Skip remaining {param_name} for this node + # Update values_file_path from the output + match = re.search(r"values_file_path: (\S+)", result.stdout) + if match: + values_file_path = match.group(1) + print(f"Updated values_file_path: {values_file_path}") + # Copy values file to iteration directory + shutil.copy2(values_file_path, iteration_dir) + else: + print("values_file_path not found in the output") + # Wait for deployment to be ready print("\nWaiting for deployment to be ready...") cmd = [ @@ -332,6 +348,7 @@ def main(yaml_file, target_node=None, test_mode="oob"): namespace=namespace, node_num=node, llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""), + output_dir=iteration_dir, ) else: print(