Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def _run_service_test(example, service, test_suite_config, namespace):
return output_folders


def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model=None, report=False):
def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model=None, report=False, output_dir=None):
"""Run the benchmark test for the specified helm chart and configuration.

Args:
Expand All @@ -361,6 +361,7 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
node_num (int): The number of nodes of current deployment.
llm_model (str): The LLM model to use for the test.
report (bool): Whether to generate a report after the test.
output_dir (str): Directory to store the test output. If None, uses default directory.
"""
# If llm_model is None or an empty string, set to default value
if not llm_model:
Expand All @@ -377,7 +378,9 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
"deployment_type": "k8s", # Default is "k8s", can also be "docker"
"service_ip": None, # Leave as None for k8s, specify for Docker
"service_port": None, # Leave as None for k8s, specify for Docker
"test_output_dir": os.getcwd() + "/benchmark_output", # The directory to store the test output
"test_output_dir": (
output_dir if output_dir else os.getcwd() + "/benchmark_output"
), # Use output_dir if provided
"node_num": node_num,
"load_shape": {
"name": parsed_data["load_shape_type"],
Expand Down
21 changes: 16 additions & 5 deletions deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ def configure_replica(values, deploy_config):
return values


def get_output_filename(num_nodes, with_rerank, example_type, device, action_type):
def get_output_filename(num_nodes, with_rerank, example_type, device, action_type, batch_size=None):
"""Generate output filename based on configuration."""
rerank_suffix = "with-rerank-" if with_rerank else ""
action_suffix = "deploy-" if action_type == 0 else "update-" if action_type == 1 else ""
# Only include batch_suffix if batch_size is not None
batch_suffix = f"batch{batch_size}-" if batch_size else ""

return f"{example_type}-{num_nodes}-{device}-{action_suffix}{rerank_suffix}values.yaml"
return f"{example_type}-{rerank_suffix}{device}-{action_suffix}node{num_nodes}-{batch_suffix}values.yaml"


def configure_resources(values, deploy_config):
Expand Down Expand Up @@ -117,6 +119,7 @@ def configure_resources(values, deploy_config):

def configure_extra_cmd_args(values, deploy_config):
"""Configure extra command line arguments for services."""
batch_size = None
for service_name, config in deploy_config["services"].items():
if service_name == "llm":
extra_cmd_args = []
Expand All @@ -130,6 +133,13 @@ def configure_extra_cmd_args(values, deploy_config):
batch_params = engine_params.get("batch_params", {})
token_params = engine_params.get("token_params", {})

# Get batch size based on engine type
if engine == "tgi":
batch_size = batch_params.get("max_batch_size")
elif engine == "vllm":
batch_size = batch_params.get("max_num_seqs")
batch_size = batch_size if batch_size and batch_size != "" else None

# Add all parameters that exist in batch_params
for param, value in batch_params.items():
if value is not None and value != "":
Expand All @@ -146,7 +156,7 @@ def configure_extra_cmd_args(values, deploy_config):
values[engine]["extraCmdArgs"] = extra_cmd_args
print(f"extraCmdArgs: {extra_cmd_args}")

return values
return values, batch_size


def configure_models(values, deploy_config):
Expand Down Expand Up @@ -218,13 +228,13 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
values = configure_rerank(values, with_rerank, deploy_config, example_type, node_selector or {})
values = configure_replica(values, deploy_config)
values = configure_resources(values, deploy_config)
values = configure_extra_cmd_args(values, deploy_config)
values, batch_size = configure_extra_cmd_args(values, deploy_config)
values = configure_models(values, deploy_config)

device = deploy_config.get("device", "unknown")

# Generate and write YAML file
filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type)
filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type, batch_size)
yaml_string = yaml.dump(values, default_flow_style=False)

filepath = os.path.join(chart_dir, filename)
Expand Down Expand Up @@ -694,6 +704,7 @@ def main():
update_service(
args.chart_name, args.chart_name, args.namespace, hw_values_file, args.user_values, values_file_path
)
print(f"values_file_path: {values_file_path}")
return
except Exception as e:
parser.error(f"Failed to update deployment: {str(e)}")
Expand Down
39 changes: 28 additions & 11 deletions deploy_and_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,9 @@ def main(yaml_file, target_node=None, test_mode="oob"):
# Handle batch parameters
batch_params = []
if "batch_params" in engine_params:
batch_params = (
engine_params["batch_params"].get("max_batch_size", [])
if engine == "tgi"
else engine_params["batch_params"].get("max_num_seqs", [])
)
param_name = "max_batch_size" if engine == "tgi" else "max_num_seqs"
key = "max_batch_size" if engine == "tgi" else "max_num_seqs"
batch_params = engine_params["batch_params"].get(key, [])
param_name = key

if not isinstance(batch_params, list):
batch_params = [batch_params]
Expand All @@ -240,11 +237,18 @@ def main(yaml_file, target_node=None, test_mode="oob"):
interval = deploy_config.get("interval", 5) # default 5s

values_file_path = None
# Create benchmark output directory
benchmark_dir = os.path.join(os.getcwd(), "benchmark_output")
os.makedirs(benchmark_dir, exist_ok=True)

for i, batch_param in enumerate(batch_params):
if test_mode == "tune":
print(f"\nProcessing {param_name}: {batch_param}")
else:
print("\nProcessing OOB deployment")
print(f"\nProcessing {test_mode} mode {param_name}: {batch_param}")
# Create subdirectory for this iteration with test mode in the name
iteration_dir = os.path.join(
benchmark_dir,
f"benchmark_{test_mode}_node{node}_batch{batch_param if batch_param is not None else 'default'}",
)
os.makedirs(iteration_dir, exist_ok=True)

# Construct new deploy config
new_deploy_config = construct_deploy_config(deploy_config, node, batch_param, test_mode)
Expand Down Expand Up @@ -279,6 +283,8 @@ def main(yaml_file, target_node=None, test_mode="oob"):
if match:
values_file_path = match.group(1)
print(f"Captured values_file_path: {values_file_path}")
# Copy values file to iteration directory
shutil.copy2(values_file_path, iteration_dir)
else:
print("values_file_path not found in the output")

Expand All @@ -299,11 +305,21 @@ def main(yaml_file, target_node=None, test_mode="oob"):
values_file_path,
"--update-service",
]
result = subprocess.run(cmd, check=True)
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"Update failed for {node} nodes configuration with {param_name} {batch_param}")
break # Skip remaining {param_name} for this node

# Update values_file_path from the output
match = re.search(r"values_file_path: (\S+)", result.stdout)
if match:
values_file_path = match.group(1)
print(f"Updated values_file_path: {values_file_path}")
# Copy values file to iteration directory
shutil.copy2(values_file_path, iteration_dir)
else:
print("values_file_path not found in the output")

# Wait for deployment to be ready
print("\nWaiting for deployment to be ready...")
cmd = [
Expand Down Expand Up @@ -332,6 +348,7 @@ def main(yaml_file, target_node=None, test_mode="oob"):
namespace=namespace,
node_num=node,
llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""),
output_dir=iteration_dir,
)
else:
print(
Expand Down