diff --git a/benchmark.py b/benchmark.py
index af9e424a17..202a2cb012 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -351,7 +351,7 @@ def _run_service_test(example, service, test_suite_config, namespace):
     return output_folders
 
 
-def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model=None, report=False):
+def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model=None, report=False, output_dir=None):
     """Run the benchmark test for the specified helm chart and configuration.
 
     Args:
@@ -361,6 +361,7 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
         node_num (int): The number of nodes of current deployment.
         llm_model (str): The LLM model to use for the test.
         report (bool): Whether to generate a report after the test.
+        output_dir (str): Directory to store the test output. If None, uses default directory.
     """
     # If llm_model is None or an empty string, set to default value
     if not llm_model:
@@ -377,7 +378,9 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
         "deployment_type": "k8s",  # Default is "k8s", can also be "docker"
         "service_ip": None,  # Leave as None for k8s, specify for Docker
         "service_port": None,  # Leave as None for k8s, specify for Docker
-        "test_output_dir": os.getcwd() + "/benchmark_output",  # The directory to store the test output
+        "test_output_dir": (
+            output_dir if output_dir else os.getcwd() + "/benchmark_output"
+        ),  # Use output_dir if provided
         "node_num": node_num,
         "load_shape": {
             "name": parsed_data["load_shape_type"],
diff --git a/deploy.py b/deploy.py
index 619d19ddd9..bd3a8a87d5 100644
--- a/deploy.py
+++ b/deploy.py
@@ -49,12 +49,14 @@ def configure_replica(values, deploy_config):
     return values
 
 
-def get_output_filename(num_nodes, with_rerank, example_type, device, action_type):
+def get_output_filename(num_nodes, with_rerank, example_type, device, action_type, batch_size=None):
     """Generate output filename based on configuration."""
     rerank_suffix = "with-rerank-" if with_rerank else ""
     action_suffix = "deploy-" if action_type == 0 else "update-" if action_type == 1 else ""
+    # Only include batch_suffix if batch_size is not None
+    batch_suffix = f"batch{batch_size}-" if batch_size else ""
 
-    return f"{example_type}-{num_nodes}-{device}-{action_suffix}{rerank_suffix}values.yaml"
+    return f"{example_type}-{rerank_suffix}{device}-{action_suffix}node{num_nodes}-{batch_suffix}values.yaml"
 
 
 def configure_resources(values, deploy_config):
@@ -117,6 +119,7 @@ def configure_resources(values, deploy_config):
 
 def configure_extra_cmd_args(values, deploy_config):
     """Configure extra command line arguments for services."""
+    batch_size = None
     for service_name, config in deploy_config["services"].items():
         if service_name == "llm":
             extra_cmd_args = []
@@ -130,6 +133,13 @@ def configure_extra_cmd_args(values, deploy_config):
             batch_params = engine_params.get("batch_params", {})
             token_params = engine_params.get("token_params", {})
 
+            # Get batch size based on engine type
+            if engine == "tgi":
+                batch_size = batch_params.get("max_batch_size")
+            elif engine == "vllm":
+                batch_size = batch_params.get("max_num_seqs")
+            batch_size = batch_size if batch_size and batch_size != "" else None
+
             # Add all parameters that exist in batch_params
             for param, value in batch_params.items():
                 if value is not None and value != "":
@@ -146,7 +156,7 @@ def configure_extra_cmd_args(values, deploy_config):
                 values[engine]["extraCmdArgs"] = extra_cmd_args
                 print(f"extraCmdArgs: {extra_cmd_args}")
 
-    return values
+    return values, batch_size
 
 
 def configure_models(values, deploy_config):
@@ -218,13 +228,13 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
     values = configure_rerank(values, with_rerank, deploy_config, example_type, node_selector or {})
     values = configure_replica(values, deploy_config)
     values = configure_resources(values, deploy_config)
-    values = configure_extra_cmd_args(values, deploy_config)
+    values, batch_size = configure_extra_cmd_args(values, deploy_config)
     values = configure_models(values, deploy_config)
 
     device = deploy_config.get("device", "unknown")
 
     # Generate and write YAML file
-    filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type)
+    filename = get_output_filename(num_nodes, with_rerank, example_type, device, action_type, batch_size)
     yaml_string = yaml.dump(values, default_flow_style=False)
 
     filepath = os.path.join(chart_dir, filename)
@@ -694,6 +704,7 @@ def main():
             update_service(
                 args.chart_name, args.chart_name, args.namespace, hw_values_file, args.user_values, values_file_path
             )
+            print(f"values_file_path: {values_file_path}")
             return
         except Exception as e:
             parser.error(f"Failed to update deployment: {str(e)}")
diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py
index aff65a33e0..f210f215dc 100644
--- a/deploy_and_benchmark.py
+++ b/deploy_and_benchmark.py
@@ -218,12 +218,9 @@ def main(yaml_file, target_node=None, test_mode="oob"):
                     # Handle batch parameters
                     batch_params = []
                     if "batch_params" in engine_params:
-                        batch_params = (
-                            engine_params["batch_params"].get("max_batch_size", [])
-                            if engine == "tgi"
-                            else engine_params["batch_params"].get("max_num_seqs", [])
-                        )
-                        param_name = "max_batch_size" if engine == "tgi" else "max_num_seqs"
+                        key = "max_batch_size" if engine == "tgi" else "max_num_seqs"
+                        batch_params = engine_params["batch_params"].get(key, [])
+                        param_name = key
 
                     if not isinstance(batch_params, list):
                         batch_params = [batch_params]
@@ -240,11 +237,18 @@ def main(yaml_file, target_node=None, test_mode="oob"):
                 interval = deploy_config.get("interval", 5)  # default 5s
 
                 values_file_path = None
+                # Create benchmark output directory
+                benchmark_dir = os.path.join(os.getcwd(), "benchmark_output")
+                os.makedirs(benchmark_dir, exist_ok=True)
+
                 for i, batch_param in enumerate(batch_params):
-                    if test_mode == "tune":
-                        print(f"\nProcessing {param_name}: {batch_param}")
-                    else:
-                        print("\nProcessing OOB deployment")
+                    print(f"\nProcessing {test_mode} mode {param_name}: {batch_param}")
+                    # Create subdirectory for this iteration with test mode in the name
+                    iteration_dir = os.path.join(
+                        benchmark_dir,
+                        f"benchmark_{test_mode}_node{node}_batch{batch_param if batch_param is not None else 'default'}",
+                    )
+                    os.makedirs(iteration_dir, exist_ok=True)
 
                     # Construct new deploy config
                     new_deploy_config = construct_deploy_config(deploy_config, node, batch_param, test_mode)
@@ -279,6 +283,8 @@ def main(yaml_file, target_node=None, test_mode="oob"):
                             if match:
                                 values_file_path = match.group(1)
                                 print(f"Captured values_file_path: {values_file_path}")
+                                # Copy values file to iteration directory
+                                shutil.copy2(values_file_path, iteration_dir)
                             else:
                                 print("values_file_path not found in the output")
 
@@ -299,11 +305,21 @@ def main(yaml_file, target_node=None, test_mode="oob"):
                                 values_file_path,
                                 "--update-service",
                             ]
-                            result = subprocess.run(cmd, check=True)
+                            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
                             if result.returncode != 0:
                                 print(f"Update failed for {node} nodes configuration with {param_name} {batch_param}")
                                 break  # Skip remaining {param_name} for this node
 
+                            # Update values_file_path from the output
+                            match = re.search(r"values_file_path: (\S+)", result.stdout)
+                            if match:
+                                values_file_path = match.group(1)
+                                print(f"Updated values_file_path: {values_file_path}")
+                                # Copy values file to iteration directory
+                                shutil.copy2(values_file_path, iteration_dir)
+                            else:
+                                print("values_file_path not found in the output")
+
                         # Wait for deployment to be ready
                         print("\nWaiting for deployment to be ready...")
                         cmd = [
@@ -332,6 +348,7 @@ def main(yaml_file, target_node=None, test_mode="oob"):
                                     namespace=namespace,
                                     node_num=node,
                                     llm_model=deploy_config.get("services", {}).get("llm", {}).get("model_id", ""),
+                                    output_dir=iteration_dir,
                                 )
                             else:
                                 print(