diff --git a/config/rhaiis/defaults.yaml b/config/rhaiis/defaults.yaml
new file mode 100644
index 0000000..6327038
--- /dev/null
+++ b/config/rhaiis/defaults.yaml
@@ -0,0 +1,44 @@
+# Global Defaults & Accelerator-Specific Settings
+# These are merged with model configs at runtime using inheritance:
+#   defaults → accelerator → model → model.accelerator_overrides → scenario
+
+# Global defaults applied to all deployments
+defaults:
+  deploy:
+    namespace: forge
+    replicas: 1
+    cpu_request: "4"
+    memory_request: "16Gi"
+    storage_source: hf
+    storage_path: model-pvc-2
+
+  vllm_args:
+    gpu-memory-utilization: 0.9
+    trust-remote-code: true
+    disable-log-requests: true
+    uvicorn-log-level: debug
+    tensor-parallel-size: 1  # Also determines num_gpus for deployment
+
+  guidellm:
+    rate_type: concurrent
+    max_seconds: 300
+
+# Accelerator-specific overrides
+# Selected via --accelerator flag or auto-detected from cluster
+accelerators:
+  nvidia:
+    image: quay.io/aipcc/rhaiis/cuda-ubi9:3.4.0-ea.2-1773886296
+    vllm_args: {}
+    env_vars: {}
+
+  amd:
+    image: quay.io/aipcc/rhaiis/rocm-ubi9:3.2.5-1766067105
+    vllm_args:
+      num-scheduler-steps: 8
+    env_vars:
+      VLLM_ROCM_USE_AITER: "1"
+
+  # Future accelerators
+  # gaudi:
+  #   image: ...
+  #   vllm_args: {}
diff --git a/config/rhaiis/models.yaml b/config/rhaiis/models.yaml
new file mode 100644
index 0000000..29bd2d7
--- /dev/null
+++ b/config/rhaiis/models.yaml
@@ -0,0 +1,315 @@
+# Model Registry
+# Models only specify what's DIFFERENT from defaults.yaml
+# Accelerator-specific settings go in accelerator_overrides section
+#
+# Resolution order:
+#   defaults.yaml → accelerators[accel] → models[model] → models[model].accelerator_overrides[accel]
+
+models:
+  # === Small Test Models ===
+  qwen-0.6b:
+    name: "Qwen3-0.6B"
+    hf_model_id: "Qwen/Qwen3-0.6B"
+    supported_workloads: [balanced, short, long-prompt]
+
+  # === Llama 3.3 Family ===
+  llama-3.3-70b:
+    name: "Llama-3.3-70B-Instruct"
+    hf_model_id: "meta-llama/Llama-3.3-70B-Instruct"
+    vllm_args:
+      tensor-parallel-size: 4
+    supported_workloads: [balanced, short]
+
+  llama-3.3-70b-fp8:
+    name: "Llama-3.3-70B-Instruct-FP8"
+    hf_model_id: "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
+    vllm_args:
+      tensor-parallel-size: 4
+      kv-cache-dtype: fp8
+    supported_workloads: [balanced, short, long-prompt]
+
+  llama-3.3-70b-w8a8:
+    name: "Llama-3.3-70B-Instruct-W8A8"
+    hf_model_id: "RedHatAI/Llama-3.3-70B-Instruct-quantized.w8a8"
+    vllm_args:
+      tensor-parallel-size: 4
+      max-model-len: 32768
+    supported_workloads: [balanced, short]
+
+  # === Llama 3.1 Family ===
+  llama-3.1-8b:
+    name: "Llama-3.1-8B-Instruct"
+    hf_model_id: "meta-llama/Llama-3.1-8B-Instruct"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short, long-prompt]
+
+  llama-3.1-8b-fp8:
+    name: "Llama-3.1-8B-Instruct-FP8"
+    hf_model_id: "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8-dynamic"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short, long-prompt]
+
+  llama-3.1-405b-fp8:
+    name: "Llama-3.1-405B-Instruct-FP8"
+    hf_model_id: "RedHatAI/Meta-Llama-3.1-405B-Instruct-FP8-dynamic"
+    vllm_args:
+      tensor-parallel-size: 8
+      kv-cache-dtype: fp8
+    supported_workloads: [balanced]
+
+  # === Llama 4 Family ===
+  llama-4-scout-fp8:
+    name: "Llama-4-Scout-17B-16E-FP8"
+    hf_model_id: "RedHatAI/Llama-4-Scout-17B-16E-Instruct-FP8-dynamic"
+    vllm_args:
+      tensor-parallel-size: 2
+      kv-cache-dtype: fp8
+    supported_workloads: [balanced, short]
+
+  llama-4-maverick-fp8:
+    name: "Llama-4-Maverick-17B-128E-FP8"
+    hf_model_id: "RedHatAI/Llama-4-Maverick-17B-128E-Instruct-FP8"
+    vllm_args:
+      tensor-parallel-size: 8
+      kv-cache-dtype: fp8
+    supported_workloads: [balanced]
+
+  llama-4-maverick-w4a16:
+    name: "Llama-4-Maverick-17B-128E-W4A16"
+    hf_model_id: "RedHatAI/Llama-4-Maverick-17B-128E-Instruct-quantized.w4a16"
+    vllm_args:
+      tensor-parallel-size: 8
+      enable-expert-parallel: true
+    supported_workloads: [balanced]
+
+  llama-4-scout-w4a16:
+    name: "Llama-4-Scout-17B-16E-W4A16"
+    hf_model_id: "RedHatAI/Llama-4-Scout-17B-16E-Instruct-quantized.w4a16"
+    vllm_args:
+      tensor-parallel-size: 2
+    supported_workloads: [balanced, short]
+
+  # === Qwen Family ===
+  qwen-235b-fp8:
+    name: "Qwen3-235B-A22B-FP8"
+    hf_model_id: "RedHatAI/Qwen3-235B-A22B-FP8-dynamic"
+    aliases: [qwen-235b, qwen3-moe]
+    vllm_args:
+      tensor-parallel-size: 4
+      max-model-len: 16384
+      gpu-memory-utilization: 0.95
+      enable-expert-parallel: true
+    supported_workloads: [balanced, short]
+
+  qwen-235b-instruct:
+    name: "Qwen3-235B-A22B-Instruct"
+    hf_model_id: "Qwen/Qwen3-235B-A22B-Instruct-2507"
+    vllm_args:
+      tensor-parallel-size: 4
+      max-model-len: 16384
+      gpu-memory-utilization: 0.95
+    # AMD needs AITER disabled for this model
+    accelerator_overrides:
+      amd:
+        env_vars:
+          VLLM_ROCM_USE_AITER: "0"
+    supported_workloads: [balanced]
+
+  qwen-30b-a3b:
+    name: "Qwen3-30B-A3B-Instruct"
+    hf_model_id: "Qwen/Qwen3-30B-A3B-Instruct-2507"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  qwen-next-80b-a3b:
+    name: "Qwen3-Next-80B-A3B-Instruct"
+    hf_model_id: "Qwen/Qwen3-Next-80B-A3B-Instruct"
+    vllm_args:
+      tensor-parallel-size: 4
+    supported_workloads: [balanced]
+
+  qwen-vl-30b-a3b:
+    name: "Qwen3-VL-30B-A3B-Instruct"
+    hf_model_id: "Qwen/Qwen3-VL-30B-A3B-Instruct"
+    vllm_args:
+      tensor-parallel-size: 4
+    supported_workloads: [balanced]
+
+  qwen-25-7b:
+    name: "Qwen2.5-7B-Instruct"
+    hf_model_id: "Qwen/Qwen2.5-7B-Instruct"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short, long-prompt]
+
+  # === DeepSeek ===
+  deepseek-r1:
+    name: "DeepSeek-R1-0528"
+    hf_model_id: "deepseek-ai/DeepSeek-R1-0528"
+    aliases: [deepseek, r1]
+    vllm_args:
+      tensor-parallel-size: 8
+      max-model-len: 16384
+      gpu-memory-utilization: 0.95
+    # AMD needs different AITER settings
+    accelerator_overrides:
+      amd:
+        env_vars:
+          VLLM_ROCM_USE_AITER: "0"
+    supported_workloads: [balanced]
+
+  deepseek-r1-w4a16:
+    name: "DeepSeek-R1-0528-W4A16"
+    hf_model_id: "RedHatAI/DeepSeek-R1-0528-quantized.w4a16"
+    vllm_args:
+      tensor-parallel-size: 8
+      max-model-len: 16384
+    supported_workloads: [balanced]
+
+  # === GPT-OSS ===
+  gpt-oss-120b:
+    name: "GPT-OSS-120B"
+    hf_model_id: "openai/gpt-oss-120b"
+    vllm_args:
+      max-model-len: 16384
+      gpu-memory-utilization: 0.95
+    supported_workloads: [balanced, short]
+
+  gpt-oss-120b-fp8:
+    name: "GPT-OSS-120B-FP8"
+    hf_model_id: "RedHatAI/gpt-oss-120b-FP8-dynamic"
+    vllm_args:
+      max-model-len: 16384
+      gpu-memory-utilization: 0.95
+    supported_workloads: [balanced, short]
+
+  gpt-oss-20b:
+    name: "GPT-OSS-20B"
+    hf_model_id: "openai/gpt-oss-20b"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  # === Mistral Family ===
+  mistral-small-24b:
+    name: "Mistral-Small-3.1-24B"
+    hf_model_id: "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  mistral-small-24b-fp8:
+    name: "Mistral-Small-3.1-24B-FP8"
+    hf_model_id: "RedHatAI/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  mixtral-8x22b:
+    name: "Mixtral-8x22B-Instruct"
+    hf_model_id: "mistralai/Mixtral-8x22B-Instruct-v0.1"
+    aliases: [mixtral]
+    vllm_args:
+      tensor-parallel-size: 4
+      max-model-len: 16384
+      gpu-memory-utilization: 0.95
+    supported_workloads: [balanced, short]
+
+  mixtral-8x7b:
+    name: "Mixtral-8x7B-Instruct"
+    hf_model_id: "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  mistral-7b:
+    name: "Mistral-7B-Instruct"
+    hf_model_id: "mistralai/Mistral-7B-Instruct-v0.3"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  ministral-14b:
+    name: "Ministral-3-14B-Instruct"
+    hf_model_id: "mistralai/Ministral-3-14B-Instruct-2512"
+    vllm_args:
+      tensor-parallel-size: 4
+    supported_workloads: [balanced, short]
+
+  ministral-14b-fp8:
+    name: "Ministral-3-14B-Instruct-FP8"
+    hf_model_id: "RedHatAI/Ministral-3-14B-Instruct-2512"
+    vllm_args:
+      tensor-parallel-size: 4
+    supported_workloads: [balanced, short]
+
+  # === Granite Family ===
+  granite-3.1-8b:
+    name: "Granite-3.1-8B-Instruct"
+    hf_model_id: "ibm-granite/granite-3.1-8b-instruct"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short, long-prompt]
+
+  granite-3.1-8b-fp8:
+    name: "Granite-3.1-8B-Instruct-FP8"
+    hf_model_id: "RedHatAI/granite-3.1-8b-instruct-fp8-dynamic"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short, long-prompt]
+
+  # === Phi Family ===
+  phi-4:
+    name: "Phi-4"
+    hf_model_id: "microsoft/phi-4"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  phi-4-fp8:
+    name: "Phi-4-FP8"
+    hf_model_id: "RedHatAI/phi-4-FP8-dynamic"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced, short]
+
+  # === Gemma Family ===
+  gemma-2-9b:
+    name: "Gemma-2-9B-IT"
+    hf_model_id: "google/gemma-2-9b-it"
+    vllm_args:
+      max-model-len: 8192
+    supported_workloads: [balanced, short]
+
+  gemma-2-9b-fp8:
+    name: "Gemma-2-9B-IT-FP8"
+    hf_model_id: "RedHatAI/gemma-2-9b-it-FP8"
+    vllm_args:
+      max-model-len: 8192
+    supported_workloads: [balanced, short]
+
+  # === Nemotron Family ===
+  nemotron-70b:
+    name: "Nemotron-70B-Instruct"
+    hf_model_id: "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
+    vllm_args:
+      tensor-parallel-size: 2
+      max-model-len: 16384
+    supported_workloads: [balanced]
+
+  nemotron-70b-fp8:
+    name: "Nemotron-70B-Instruct-FP8"
+    hf_model_id: "RedHatAI/Llama-3.1-Nemotron-70B-Instruct-HF-FP8-dynamic"
+    vllm_args:
+      max-model-len: 16384
+    supported_workloads: [balanced]
+
+  nemotron-nano-30b-fp8:
+    name: "Nemotron-3-Nano-30B-FP8"
+    hf_model_id: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8"
+    vllm_args:
+      tensor-parallel-size: 4
+    supported_workloads: [balanced, short]
diff --git a/config/rhaiis/workloads.yaml b/config/rhaiis/workloads.yaml
new file mode 100644
index 0000000..aa7bd0b
--- /dev/null
+++ b/config/rhaiis/workloads.yaml
@@ -0,0 +1,64 @@
+# GuideLLM Workload Profiles
+# These define benchmark parameters - can be changed WITHOUT restarting vLLM
+# Only GuideLLM needs to restart with new workload config
+
+workloads:
+  # === Standard Workloads ===
+  balanced:
+    name: "Balanced"
+    description: "Balanced prompt and output tokens (1k/1k)"
+    guidellm:
+      data: "prompt_tokens=1000,output_tokens=1000"
+      rates: [1, 50, 100, 200]
+      max_seconds: 180
+
+  short:
+    name: "Short"
+    description: "Short prompt and output (256/256)"
+    guidellm:
+      data: "prompt_tokens=256,output_tokens=256"
+      rates: [1, 50, 100, 200]
+      max_seconds: 120
+
+  long-prompt:
+    name: "Long Prompt"
+    description: "Long prompt, standard output (8k/1k)"
+    guidellm:
+      data: "prompt_tokens=8000,output_tokens=1000"
+      rate_type: "concurrent"
+      rates: [1, 25, 50, 100]
+      max_seconds: 300
+    # Requires separate deployment with larger context
+    vllm_args:
+      max-model-len: 10000
+
+  very-long-prompt:
+    name: "Very Long Prompt"
+    description: "Very long prompt (16k/1k)"
+    guidellm:
+      data: "prompt_tokens=16000,output_tokens=1000"
+      rate_type: "concurrent"
+      rates: [1, 10, 25, 50]
+      max_seconds: 600
+    # Requires separate deployment with larger context
+    vllm_args:
+      max-model-len: 20000
+
+  # === Advanced Workloads ===
+  heterogeneous:
+    name: "Heterogeneous"
+    description: "Mixed token distributions simulating real traffic"
+    guidellm:
+      data: "ADD ME"
+      rate_type: "concurrent"
+      rates: [1, 25, 50]
+      max_seconds: 300
+
+  multi-turn:
+    name: "Multi-Turn"
+    description: "Multi-turn conversation with context reuse"
+    guidellm:
+      data: "multi_turn"
+      rate_type: "concurrent"
+      rates: [1, 10, 25]
+      max_seconds: 600
\ No newline at end of file
diff --git a/projects/core/ci_entrypoint/run_ci.py b/projects/core/ci_entrypoint/run_ci.py
index f55bf24..3b569f4 100755
--- a/projects/core/ci_entrypoint/run_ci.py
+++ b/projects/core/ci_entrypoint/run_ci.py
@@ -496,10 +496,13 @@ def execute_project_operation(project: str, operation: str, args: tuple, verbose
         sys.exit(1)
 
 
-@click.command()
+@click.command(context_settings=dict(
+    ignore_unknown_options=True,
+    allow_interspersed_args=False,
+))
 @click.argument('project', required=False)
 @click.argument('operation', required=False)
-@click.argument('args', nargs=-1)
+@click.argument('args', nargs=-1, type=click.UNPROCESSED)
 @click.option('--verbose', '-v', is_flag=True, help='Enable verbose output', default=True)
 @click.option('--dry-run', is_flag=True, help='Show what would be executed without running it')
 def main(project, operation, args, verbose, dry_run):
diff --git a/projects/core/scenarios/__init__.py b/projects/core/scenarios/__init__.py
new file mode 100644
index 0000000..240864a
--- /dev/null
+++ b/projects/core/scenarios/__init__.py
@@ -0,0 +1,34 @@
+"""Declarative scenario generation with config inheritance.
+
+Generate benchmark scenarios from YAML configuration using matrix expansion.
+Supports accelerator-specific settings via inheritance chain.
+
+Example:
+    from projects.core.scenarios import ConfigLoader, ScenarioGenerator
+
+    # Load config with accelerator-specific inheritance
+    loader = ConfigLoader("config/", accelerator="nvidia")
+    model = loader.load_model("llama-3.3-70b-fp8")
+
+    # Generate scenarios from matrix
+    gen = ScenarioGenerator("config/projects/rhaiis.yaml", config_loader=loader)
+    gen.load()
+
+    for scenario in gen.expand():
+        print(scenario.scenario_id)  # e.g., llama-70b-fp8_balanced_direct_tp4
+"""
+
+from .config import ScenarioConfig
+from .config_loader import ConfigLoader, ResolvedModelConfig, ResolvedWorkloadConfig
+from .generator import DeploymentGroup, ExpandedScenario, ParsedConfig, ScenarioGenerator
+
+__all__ = [
+    "ConfigLoader",
+    "DeploymentGroup",
+    "ExpandedScenario",
+    "ParsedConfig",
+    "ResolvedModelConfig",
+    "ResolvedWorkloadConfig",
+    "ScenarioConfig",
+    "ScenarioGenerator",
+]
diff --git a/projects/core/scenarios/config.py b/projects/core/scenarios/config.py
new file mode 100644
index 0000000..3039574
--- /dev/null
+++ b/projects/core/scenarios/config.py
@@ -0,0 +1,110 @@
+"""Scenario configuration dataclasses."""
+
+import re
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ScenarioConfig:
+    """
+    Single expanded scenario ready for execution.
+
+    Generated by ScenarioGenerator from matrix expansion of scenarios.yaml.
+    Contains all configuration needed for a single benchmark run.
+    """
+
+    # Identity
+    scenario_id: str  # qwen3-0-6b_balanced_direct_tp1
+    model_id: str  # Qwen/Qwen3-0.6B
+    model_short: str  # qwen3-0-6b
+
+    # Matrix dimensions
+    workload: str  # balanced, short, long-context
+    routing: str  # direct, prefix-estimation
+    tensor_parallel: int  # 1, 2, 4, 8
+
+    # Deployment config
+    deployment_name: str  # K8s resource name (sanitized)
+    namespace: str = "forge"
+    replicas: int = 1
+
+    # vLLM runtime args (merged from common + model + workload)
+    runtime_args: dict[str, Any] = field(default_factory=dict)
+
+    # Workload config (from workloads section)
+    workload_config: dict[str, Any] = field(default_factory=dict)
+
+    # Routing config (from routing section)
+    routing_config: dict[str, Any] = field(default_factory=dict)
+
+    # Model-specific env_vars
+    env_vars: dict[str, str] = field(default_factory=dict)
+
+    # Metadata
+    description: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "scenario_id": self.scenario_id,
+            "model_id": self.model_id,
+            "model_short": self.model_short,
+            "workload": self.workload,
+            "routing": self.routing,
+            "tensor_parallel": self.tensor_parallel,
+            "deployment_name": self.deployment_name,
+            "namespace": self.namespace,
+            "replicas": self.replicas,
+            "runtime_args": self.runtime_args,
+            "config": {
+                "workload_config": self.workload_config,
+                "routing_config": self.routing_config,
+            },
+            "env_vars": self.env_vars,
+            "description": self.description,
+        }
+
+    @staticmethod
+    def sanitize_name(name: str, max_len: int = 42) -> str:
+        """
+        Sanitize name for K8s resource naming.
+
+        Rules:
+        - Lowercase
+        - Replace / and _ with -
+        - Remove dots
+        - Truncate to max_len
+        """
+        return (
+            name.lower().replace("/", "-").replace("_", "-").replace(".", "")
+        )[:max_len]
+
+    @staticmethod
+    def shorten_model_name(model_id: str) -> str:
+        """
+        Create short model name for scenario_id.
+
+        Examples:
+        - Qwen/Qwen3-0.6B -> qwen3-0-6b
+        - openai/gpt-oss-120b -> gpt-oss-120b
+        - RedHatAI/gpt-oss-120b-FP8-dynamic -> gpt-oss-120b-fp8
+        """
+        # Take last part after /
+        name = model_id.split("/")[-1]
+
+        # Lowercase
+        name = name.lower()
+
+        # Remove common suffixes
+        for suffix in ["-instruct", "-dynamic", "-chat"]:
+            if name.endswith(suffix):
+                name = name[: -len(suffix)]
+
+        # Replace dots with dashes
+        name = name.replace(".", "-")
+
+        # Truncate version numbers like -2507
+        name = re.sub(r"-\d{4}$", "", name)
+
+        return name
diff --git a/projects/core/scenarios/config_loader.py b/projects/core/scenarios/config_loader.py
new file mode 100644
index 0000000..f7d9d8d
--- /dev/null
+++ b/projects/core/scenarios/config_loader.py
@@ -0,0 +1,312 @@
+"""Config loading with inheritance and accelerator support.
+
+Resolution order:
+    defaults.yaml (base)
+      ↓ merge
+    defaults.yaml.accelerators[accelerator]
+      ↓ merge
+    models.yaml[model]
+      ↓ merge
+    models.yaml[model].accelerator_overrides[accelerator]
+      ↓ merge
+    scenarios/*.yaml.defaults
+      ↓ merge
+    scenarios/*.yaml.runs[].overrides
+"""
+
+from copy import deepcopy
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+def deep_merge(base: dict, override: dict) -> dict:
+    """
+    Deep merge two dictionaries.
+
+    Values in override take precedence. Nested dicts are merged recursively.
+    Lists are replaced (not merged).
+    """
+    result = deepcopy(base)
+    for key, value in override.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            result[key] = deep_merge(result[key], value)
+        else:
+            result[key] = deepcopy(value)
+    return result
+
+
+@dataclass
+class ResolvedModelConfig:
+    """Fully resolved model configuration after inheritance."""
+
+    key: str
+    name: str
+    hf_model_id: str
+    deploy: dict[str, Any] = field(default_factory=dict)
+    vllm_args: dict[str, Any] = field(default_factory=dict)
+    env_vars: dict[str, str] = field(default_factory=dict)
+    supported_workloads: list[str] = field(default_factory=list)
+    aliases: list[str] = field(default_factory=list)
+
+    @property
+    def num_gpus(self) -> int:
+        """Number of GPUs from deploy config or tensor_parallel."""
+        return self.deploy.get("num_gpus", self.vllm_args.get("tensor-parallel-size", 1))
+
+    @property
+    def tensor_parallel(self) -> int:
+        """Tensor parallel size from vllm_args."""
+        return self.vllm_args.get("tensor-parallel-size", 1)
+
+
+@dataclass
+class ResolvedWorkloadConfig:
+    """Resolved workload configuration."""
+
+    key: str
+    name: str
+    description: str = ""
+    guidellm: dict[str, Any] = field(default_factory=dict)
+    max_seconds: int = 300
+    vllm_args: dict[str, Any] = field(default_factory=dict)  # Workload-specific overrides
+
+
+class ConfigLoader:
+    """
+    Load and resolve configurations with inheritance.
+
+    Usage:
+        loader = ConfigLoader('config/', accelerator='nvidia')
+        model_config = loader.load_model('llama-3.3-70b-fp8')
+        workload_config = loader.load_workload('balanced')
+    """
+
+    def __init__(
+        self,
+        config_dir: str | Path,
+        accelerator: str = "nvidia",
+    ):
+        """
+        Initialize config loader.
+
+        Args:
+            config_dir: Directory containing defaults.yaml, models.yaml, workloads.yaml
+            accelerator: Accelerator type ('nvidia', 'amd')
+        """
+        self.config_dir = Path(config_dir)
+        self.accelerator = accelerator
+
+        # Cache loaded configs
+        self._defaults: dict[str, Any] | None = None
+        self._models: dict[str, Any] | None = None
+        self._workloads: dict[str, Any] | None = None
+
+    @property
+    def defaults(self) -> dict[str, Any]:
+        """Load and cache defaults.yaml."""
+        if self._defaults is None:
+            defaults_path = self.config_dir / "defaults.yaml"
+            if defaults_path.exists():
+                with open(defaults_path) as f:
+                    self._defaults = yaml.safe_load(f) or {}
+            else:
+                self._defaults = {}
+        return self._defaults
+
+    @property
+    def models(self) -> dict[str, Any]:
+        """Load and cache models.yaml."""
+        if self._models is None:
+            models_path = self.config_dir / "models.yaml"
+            if models_path.exists():
+                with open(models_path) as f:
+                    data = yaml.safe_load(f) or {}
+                    self._models = data.get("models", {})
+            else:
+                self._models = {}
+        return self._models
+
+    @property
+    def workloads(self) -> dict[str, Any]:
+        """Load and cache workloads.yaml."""
+        if self._workloads is None:
+            workloads_path = self.config_dir / "workloads.yaml"
+            if workloads_path.exists():
+                with open(workloads_path) as f:
+                    data = yaml.safe_load(f) or {}
+                    self._workloads = data.get("workloads", {})
+            else:
+                self._workloads = {}
+        return self._workloads
+
+    def get_accelerator_defaults(self) -> dict[str, Any]:
+        """Get accelerator-specific defaults."""
+        accelerators = self.defaults.get("accelerators", {})
+        return accelerators.get(self.accelerator, {})
+
+    def get_global_defaults(self) -> dict[str, Any]:
+        """Get global defaults (deploy, vllm_args, guidellm)."""
+        return self.defaults.get("defaults", {})
+
+    def load_model(self, model_key: str) -> ResolvedModelConfig:
+        """
+        Load and resolve a model configuration.
+
+        Applies inheritance:
+            defaults → accelerator_defaults → model → model.accelerator_overrides
+
+        Args:
+            model_key: Model key from models.yaml, alias, or HuggingFace ID
+
+        Returns:
+            Fully resolved model configuration
+
+        Raises:
+            KeyError: If model not found
+        """
+        model_data = self._find_model(model_key)
+        if model_data is None:
+            raise KeyError(f"Model '{model_key}' not found in registry")
+
+        actual_key, raw_config = model_data
+
+        # Start with global defaults
+        global_defaults = self.get_global_defaults()
+        base_deploy = global_defaults.get("deploy", {})
+        base_vllm_args = global_defaults.get("vllm_args", {})
+
+        # Merge accelerator defaults
+        accel_defaults = self.get_accelerator_defaults()
+        accel_vllm_args = accel_defaults.get("vllm_args", {})
+        accel_env_vars = accel_defaults.get("env_vars", {})
+
+        # Merge model config
+        model_deploy = raw_config.get("deploy", {})
+        model_vllm_args = raw_config.get("vllm_args", {})
+        model_env_vars = raw_config.get("env_vars", {})
+
+        # Merge accelerator overrides from model
+        accel_overrides = raw_config.get("accelerator_overrides", {}).get(self.accelerator, {})
+        override_vllm_args = accel_overrides.get("vllm_args", {})
+        override_env_vars = accel_overrides.get("env_vars", {})
+
+        # Build final config through inheritance chain
+        final_deploy = deep_merge(base_deploy, model_deploy)
+        final_vllm_args = deep_merge(
+            deep_merge(deep_merge(base_vllm_args, accel_vllm_args), model_vllm_args),
+            override_vllm_args,
+        )
+        final_env_vars = deep_merge(
+            deep_merge(accel_env_vars, model_env_vars),
+            override_env_vars,
+        )
+
+        return ResolvedModelConfig(
+            key=actual_key,
+            name=raw_config.get("name", actual_key),
+            hf_model_id=raw_config.get("hf_model_id", actual_key),
+            deploy=final_deploy,
+            vllm_args=final_vllm_args,
+            env_vars=final_env_vars,
+            supported_workloads=raw_config.get("supported_workloads", []),
+            aliases=raw_config.get("aliases", []),
+        )
+
+    def load_workload(self, workload_key: str) -> ResolvedWorkloadConfig:
+        """
+        Load and resolve a workload configuration.
+
+        Args:
+            workload_key: Workload key from workloads.yaml
+
+        Returns:
+            Resolved workload configuration
+
+        Raises:
+            KeyError: If workload not found
+        """
+        if workload_key not in self.workloads:
+            raise KeyError(f"Workload '{workload_key}' not found")
+
+        raw_config = self.workloads[workload_key]
+
+        # Merge with guidellm defaults
+        global_defaults = self.get_global_defaults()
+        base_guidellm = global_defaults.get("guidellm", {})
+        workload_guidellm = raw_config.get("guidellm", {})
+        final_guidellm = deep_merge(base_guidellm, workload_guidellm)
+
+        return ResolvedWorkloadConfig(
+            key=workload_key,
+            name=raw_config.get("name", workload_key),
+            description=raw_config.get("description", ""),
+            guidellm=final_guidellm,
+            max_seconds=raw_config.get("max_seconds", base_guidellm.get("max_seconds", 300)),
+            vllm_args=raw_config.get("vllm_args", {}),
+        )
+
+    def load_scenario(self, scenario_path: str | Path) -> dict[str, Any]:
+        """
+        Load a scenario file and resolve its defaults.
+
+        Args:
+            scenario_path: Path to scenario YAML file
+
+        Returns:
+            Parsed scenario data with resolved defaults
+        """
+        scenario_path = Path(scenario_path)
+
+        with open(scenario_path) as f:
+            data = yaml.safe_load(f) or {}
+
+        # Merge scenario defaults with global defaults
+        global_defaults = self.get_global_defaults()
+        scenario_defaults = data.get("defaults", {})
+        data["_resolved_defaults"] = deep_merge(global_defaults, scenario_defaults)
+
+        # Add accelerator info
+        data["_accelerator"] = self.accelerator
+        data["_accelerator_config"] = self.get_accelerator_defaults()
+
+        return data
+
+    def _find_model(self, model_key: str) -> tuple[str, dict[str, Any]] | None:
+        """
+        Find model by key, alias, or HuggingFace ID.
+
+        Returns:
+            Tuple of (actual_key, config) or None if not found
+        """
+        # Try exact key match
+        if model_key in self.models:
+            return (model_key, self.models[model_key])
+
+        # Try alias match
+        for key, config in self.models.items():
+            aliases = config.get("aliases", [])
+            if model_key in aliases:
+                return (key, config)
+
+        # Try HuggingFace ID match
+        for key, config in self.models.items():
+            if config.get("hf_model_id") == model_key:
+                return (key, config)
+
+        return None
+
+    def list_models(self) -> list[str]:
+        """List all model keys."""
+        return list(self.models.keys())
+
+    def list_workloads(self) -> list[str]:
+        """List all workload keys."""
+        return list(self.workloads.keys())
+
+    def get_image(self) -> str:
+        """Get container image for current accelerator."""
+        accel_config = self.get_accelerator_defaults()
+        return accel_config.get("image", "")
diff --git a/projects/core/scenarios/generator.py b/projects/core/scenarios/generator.py
new file mode 100644
index 0000000..9199cf6
--- /dev/null
+++ b/projects/core/scenarios/generator.py
@@ -0,0 +1,676 @@
+"""Declarative scenario generation with split configuration and inheritance.
+
+NOTE: This module is not currently wired to any CLI. The RHAIIS CLI uses
+ConfigLoader directly with --model and --workloads flags. This generator
+is available for future batch/matrix scenario execution if needed.
+
+Supports:
+1. Split config files: defaults.yaml, models.yaml, workloads.yaml
+2. Deploy-once pattern: Deploy vLLM once, run multiple workloads
+3. Config inheritance: defaults → accelerator → model → scenario
+4. Matrix expansion: model × workloads × tensor_parallel
+
+Example scenario format (if wired to CLI):
+```yaml
+scenarios:
+  - model: qwen-0.6b        # Key from models.yaml
+    workloads: [balanced, short]  # Keys from workloads.yaml
+    tensor_parallel: [1]
+```
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from itertools import product
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import yaml
+
+from .config import ScenarioConfig
+
+if TYPE_CHECKING:
+    from .config_loader import ConfigLoader
+
+
+@dataclass
+class ModelConfig:
+    """Model configuration from models.yaml."""
+
+    key: str  # qwen-0.6b
+    name: str  # Qwen3-0.6B
+    hf_model_id: str  # Qwen/Qwen3-0.6B
+    aliases: list[str] = field(default_factory=list)
+    vllm_args: dict[str, Any] = field(default_factory=dict)
+    env_vars: dict[str, str] = field(default_factory=dict)
+    supported_workloads: list[str] = field(default_factory=list)
+
+    @classmethod
+    def from_dict(cls, key: str, data: dict[str, Any]) -> "ModelConfig":
+        return cls(
+            key=key,
+            name=data.get("name", key),
+            hf_model_id=data.get("hf_model_id", key),
+            aliases=data.get("aliases", []),
+            vllm_args=data.get("vllm_args", {}),
+            env_vars=data.get("env_vars", {}),
+            supported_workloads=data.get("supported_workloads", []),
+        )
+
+
+@dataclass
+class WorkloadConfig:
+    """Workload configuration from workloads.yaml."""
+
+    key: str  # balanced
+    name: str  # Balanced
+    description: str = ""
+    guidellm: dict[str, Any] = field(default_factory=dict)
+    max_seconds: int = 300
+    vllm_args: dict[str, Any] = field(default_factory=dict)  # Workload-specific overrides
+
+    @classmethod
+    def from_dict(cls, key: str, data: dict[str, Any]) -> "WorkloadConfig":
+        return cls(
+            key=key,
+            name=data.get("name", key),
+            description=data.get("description", ""),
+            guidellm=data.get("guidellm", {}),
+            max_seconds=data.get("max_seconds", 300),
+            vllm_args=data.get("vllm_args", {}),
+        )
+
+
+@dataclass
+class DeploymentGroup:
+    """
+    A group of workloads to run on a single vLLM deployment.
+
+    Deploy vLLM once -> Run all workloads -> Cleanup
+
+    Workloads with different vllm_args get separate deployment groups.
+    """
+
+    model: ModelConfig
+    tensor_parallel: int
+    routing: str
+    workloads: list[WorkloadConfig]
+    routing_config: dict[str, Any] = field(default_factory=dict)
+    namespace: str = "forge"
+    vllm_args_override: dict[str, Any] = field(default_factory=dict)  # From workload
+
+    @property
+    def deployment_id(self) -> str:
+        """Unique ID for this deployment."""
+        base = f"{self.model.key}_{self.routing}_tp{self.tensor_parallel}"
+        if self.vllm_args_override:
+            # Add hash suffix for workload-specific vllm_args
+            override_hash = hash(frozenset(self.vllm_args_override.items())) % 10000
+            return f"{base}_wl{override_hash}"
+        return base
+
+    @property
+    def deployment_name(self) -> str:
+        """K8s resource name."""
+        return ScenarioConfig.sanitize_name(self.model.key)
+
+    @property
+    def merged_vllm_args(self) -> dict[str, Any]:
+        """Model vllm_args merged with workload overrides."""
+        merged = dict(self.model.vllm_args)
+        merged.update(self.vllm_args_override)
+        return merged
+
+
+@dataclass
+class ExpandedScenario:
+    """A single expanded scenario from matrix."""
+
+    model_id: str  # HuggingFace ID
+    model_key: str  # Key from models.yaml
+    model_short: str  # Short name for display
+    workload: str  # balanced, short, etc.
+    routing: str  # direct, prefix-estimation, etc.
+    tensor_parallel: int  # TP size
+    runtime_args: dict[str, Any]  # Merged runtime args
+    workload_config: dict[str, Any]  # Workload settings
+    routing_config: dict[str, Any]  # Routing settings
+    deploy_config: dict[str, Any]  # Deployment settings
+
+    @property
+    def scenario_id(self) -> str:
+        """Generate deterministic scenario ID."""
+        return f"{self.model_short}_{self.workload}_{self.routing}_tp{self.tensor_parallel}"
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "model_id": self.model_id,
+            "model_key": self.model_key,
+            "model_short": self.model_short,
+            "workload": self.workload,
+            "routing": self.routing,
+            "tensor_parallel": self.tensor_parallel,
+            "scenario_id": self.scenario_id,
+            "runtime_args": self.runtime_args,
+            "workload_config": self.workload_config,
+            "routing_config": self.routing_config,
+            "deploy_config": self.deploy_config,
+        }
+
+    def to_scenario_config(self, namespace: str = "forge") -> ScenarioConfig:
+        """Convert to ScenarioConfig for workflow execution."""
+        return ScenarioConfig(
+            scenario_id=self.scenario_id,
+            model_id=self.model_id,
+            model_short=self.model_short,
+            workload=self.workload,
+            routing=self.routing,
+            tensor_parallel=self.tensor_parallel,
+            deployment_name=ScenarioConfig.sanitize_name(self.model_short),
+            namespace=namespace,
+            replicas=self.deploy_config.get("replicas", 1),
+            runtime_args=self.runtime_args,
+            workload_config=self.workload_config,
+            routing_config=self.routing_config,
+        )
+
+
+@dataclass
+class ParsedConfig:
+    """Parsed scenario YAML configuration."""
+
+    name: str
+    description: str
+    target_cluster: str = ""
+    # Common defaults
+    common: dict[str, Any] = field(default_factory=dict)
+    # Workload definitions (inline or from workloads.yaml)
+    workloads: dict[str, WorkloadConfig] = field(default_factory=dict)
+    # Routing definitions
+    routing: dict[str, dict[str, Any]] = field(default_factory=dict)
+    # Model registry (from models.yaml)
+    models: dict[str, ModelConfig] = field(default_factory=dict)
+    # New: Scenario list (references model keys)
+    scenarios: list[dict[str, Any]] = field(default_factory=list)
+    # Legacy: Explicit run list
+    runs: list[dict[str, Any]] = field(default_factory=list)
+
+
+class ScenarioGenerator:
+    """
+    Generate scenarios from declarative configuration.
+
+    Supports:
+    - Split config: defaults.yaml + models.yaml + workloads.yaml + scenarios/*.yaml
+    - Deploy-once pattern: Group workloads under single deployment
+    - Config inheritance via ConfigLoader: defaults → accelerator → model → scenario
+    - Legacy inline: All config in single file
+    """
+
+    def __init__(
+        self,
+        scenarios_path: str | Path | None = None,
+        models_path: str | Path | None = None,
+        workloads_path: str | Path | None = None,
+        config_dir: str | Path | None = None,
+        config_loader: ConfigLoader | None = None,
+        accelerator: str = "nvidia",
+    ):
+        """
+        Initialize generator.
+
+        Args:
+            scenarios_path: Path to scenarios/*.yaml file
+            models_path: Path to models.yaml (optional, auto-detected from config_dir)
+            workloads_path: Path to workloads.yaml (optional, auto-detected)
+            config_dir: Config directory (auto-detects models.yaml, workloads.yaml)
+            config_loader: Optional ConfigLoader for inheritance-based resolution
+            accelerator: Accelerator type ('nvidia', 'amd') for inheritance
+        """
+        self.scenarios_path = Path(scenarios_path) if scenarios_path else None
+        self.models_path = Path(models_path) if models_path else None
+        self.workloads_path = Path(workloads_path) if workloads_path else None
+        self.accelerator = accelerator
+
+        # Auto-detect config paths from config_dir or scenarios_path parent
+        if config_dir:
+            config_dir = Path(config_dir)
+        elif scenarios_path:
+            # Handle scenarios in subdirectory: config/projects/rhaiis.yaml -> config/
+            scenarios_parent = Path(scenarios_path).parent
+            if scenarios_parent.name == "projects":
+                config_dir = scenarios_parent.parent
+            else:
+                config_dir = scenarios_parent
+
+        self.config_dir = config_dir
+
+        if config_dir:
+            if not self.models_path and (config_dir / "models.yaml").exists():
+                self.models_path = config_dir / "models.yaml"
+            if not self.workloads_path and (config_dir / "workloads.yaml").exists():
+                self.workloads_path = config_dir / "workloads.yaml"
+
+        # Use provided ConfigLoader or create one if config_dir is available
+        self.config_loader = config_loader
+        if not self.config_loader and config_dir:
+            from .config_loader import ConfigLoader
+            self.config_loader = ConfigLoader(config_dir, accelerator=accelerator)
+
+        self.config: ParsedConfig | None = None
+
+    def load(self, path: str | Path | None = None) -> ParsedConfig:
+        """
+        Load and parse scenario configuration.
+
+        Loads from:
+        1. models.yaml (if exists) -> model registry
+        2. workloads.yaml (if exists) -> workload profiles
+        3. scenarios-*.yaml -> scenario definitions
+
+        Args:
+            path: Optional path override for scenarios file
+
+        Returns:
+            Parsed configuration
+        """
+        scenarios_path = Path(path) if path else self.scenarios_path
+        if not scenarios_path:
+            raise ValueError("No scenarios config path provided")
+
+        # Load models registry
+        models: dict[str, ModelConfig] = {}
+        if self.models_path and self.models_path.exists():
+            with open(self.models_path) as f:
+                models_data = yaml.safe_load(f)
+            for key, data in models_data.get("models", {}).items():
+                models[key] = ModelConfig.from_dict(key, data)
+
+        # Load workloads
+        workloads: dict[str, WorkloadConfig] = {}
+        if self.workloads_path and self.workloads_path.exists():
+            with open(self.workloads_path) as f:
+                workloads_data = yaml.safe_load(f)
+            for key, data in workloads_data.get("workloads", {}).items():
+                workloads[key] = WorkloadConfig.from_dict(key, data)
+
+        # Load scenarios
+        with open(scenarios_path) as f:
+            data = yaml.safe_load(f)
+
+        # Merge inline workloads (if any) with loaded workloads
+        for key, wl_data in data.get("workloads", {}).items():
+            if key not in workloads:
+                workloads[key] = WorkloadConfig.from_dict(key, wl_data)
+
+        # Merge inline models with loaded models
+        # Supports both new format (hf_model_id, vllm_args) and legacy (runtime_args)
+        for model_key, model_data in data.get("models", {}).items():
+            if model_key not in models:
+                # Check for new format fields
+                hf_model_id = model_data.get("hf_model_id", model_key)
+                vllm_args = model_data.get("vllm_args") or model_data.get("runtime_args", {})
+                name = model_data.get("name") or model_data.get("deploy", {}).get("name", model_key)
+
+                models[model_key] = ModelConfig.from_dict(
+                    key=model_key,
+                    data={
+                        "hf_model_id": hf_model_id,
+                        "name": name,
+                        "vllm_args": vllm_args,
+                        "env_vars": model_data.get("env_vars", {}),
+                    },
+                )
+
+        self.config = ParsedConfig(
+            name=data.get("name", scenarios_path.stem),
+            description=data.get("description", ""),
+            target_cluster=data.get("target_cluster", ""),
+            common=data.get("common", {}),
+            workloads=workloads,
+            routing=data.get("routing", {}),
+            models=models,
+            scenarios=data.get("scenarios", []),
+            runs=data.get("runs", []),
+        )
+
+        return self.config
+
+    def expand(self) -> list[ExpandedScenario]:
+        """
+        Expand all scenarios into individual benchmark runs.
+
+        Returns:
+            List of ExpandedScenario objects
+        """
+        if not self.config:
+            raise RuntimeError("Must call load() first")
+
+        expanded = []
+
+        # New format: scenarios list with model key references
+        for scenario_def in self.config.scenarios:
+            scenarios = self._expand_scenario_def(scenario_def)
+            expanded.extend(scenarios)
+
+        # Legacy format: models with inline matrix
+        for model_id, model_config in self.config.models.items():
+            if isinstance(model_config, ModelConfig):
+                continue  # Skip, already processed via scenarios
+            # Legacy dict format
+            matrix = model_config.get("matrix", {}) if isinstance(model_config, dict) else {}
+            if matrix:
+                scenarios = self._expand_legacy_model_matrix(model_id, model_config)
+                expanded.extend(scenarios)
+
+        # Explicit runs (no matrix expansion)
+        for run in self.config.runs:
+            scenario = self._create_from_run(run)
+            if scenario:
+                expanded.append(scenario)
+
+        return expanded
+
+    def expand_grouped(self) -> list[DeploymentGroup]:
+        """
+        Expand scenarios grouped by deployment.
+
+        Returns groups where each group shares a single vLLM deployment.
+        Deploy once -> Run all workloads in group -> Cleanup
+
+        Uses ConfigLoader when available for full inheritance chain.
+
+        Returns:
+            List of DeploymentGroup objects
+        """
+        if not self.config:
+            raise RuntimeError("Must call load() first")
+
+        groups: dict[str, DeploymentGroup] = {}
+
+        for scenario_def in self.config.scenarios:
+            model_key = scenario_def.get("model")
+            if not model_key or model_key not in self.config.models:
+                continue
+
+            workload_keys = scenario_def.get("workloads", ["balanced"])
+            routings = scenario_def.get("routing", ["direct"])
+            tp_values = scenario_def.get("tensor_parallel", [1])
+            namespace = self.config.common.get("namespace", "forge")
+
+            # Use ConfigLoader for resolved model config if available
+            if self.config_loader:
+                try:
+                    resolved_model = self.config_loader.load_model(model_key)
+                    # Create a ModelConfig-compatible object with resolved values
+                    model = ModelConfig(
+                        key=resolved_model.key,
+                        name=resolved_model.name,
+                        hf_model_id=resolved_model.hf_model_id,
+                        aliases=resolved_model.aliases,
+                        vllm_args=resolved_model.vllm_args,
+                        env_vars=resolved_model.env_vars,
+                        supported_workloads=resolved_model.supported_workloads,
+                    )
+                except KeyError:
+                    model = self.config.models[model_key]
+            else:
+                model = self.config.models[model_key]
+
+            # Create groups for each (model, routing, tp, vllm_args) combination
+            # Workloads with different vllm_args get separate deployment groups
+            for routing, tp in product(routings, tp_values):
+                # Group workloads by their vllm_args
+                workloads_by_vllm_args: dict[tuple, list[WorkloadConfig]] = {}
+
+                for wl_key in workload_keys:
+                    if wl_key not in self.config.workloads:
+                        continue
+                    wl = self.config.workloads[wl_key]
+                    # Create hashable key from vllm_args
+                    vllm_args_key = tuple(sorted(wl.vllm_args.items())) if wl.vllm_args else ()
+                    if vllm_args_key not in workloads_by_vllm_args:
+                        workloads_by_vllm_args[vllm_args_key] = []
+                    workloads_by_vllm_args[vllm_args_key].append(wl)
+
+                # Create a deployment group for each unique vllm_args
+                for vllm_args_key, workloads in workloads_by_vllm_args.items():
+                    vllm_args_override = dict(vllm_args_key) if vllm_args_key else {}
+
+                    # Include vllm_args hash in group_id for uniqueness
+                    if vllm_args_override:
+                        override_hash = hash(vllm_args_key) % 10000
+                        group_id = f"{model_key}_{routing}_tp{tp}_wl{override_hash}"
+                    else:
+                        group_id = f"{model_key}_{routing}_tp{tp}"
+
+                    if group_id not in groups:
+                        groups[group_id] = DeploymentGroup(
+                            model=model,
+                            tensor_parallel=tp,
+                            routing=routing,
+                            workloads=workloads,
+                            routing_config=self.config.routing.get(routing, {}),
+                            namespace=namespace,
+                            vllm_args_override=vllm_args_override,
+                        )
+                    else:
+                        # Add more workloads to existing group
+                        for wl in workloads:
+                            if wl not in groups[group_id].workloads:
+                                groups[group_id].workloads.append(wl)
+
+        return list(groups.values())
+
+    def _expand_scenario_def(self, scenario_def: dict[str, Any]) -> list[ExpandedScenario]:
+        """Expand a scenario definition from the new format.
+
+        Uses ConfigLoader when available for full inheritance chain:
+        defaults → accelerator → model → model.accelerator_overrides → scenario
+        """
+        model_key = scenario_def.get("model")
+        if not model_key or model_key not in self.config.models:
+            return []
+
+        workload_keys = scenario_def.get("workloads", ["balanced"])
+        routings = scenario_def.get("routing", ["direct"])
+        tp_values = scenario_def.get("tensor_parallel", [1])
+        vllm_args_override = scenario_def.get("vllm_args_override", {})
+
+        scenarios = []
+        namespace = self.config.common.get("namespace", "forge")
+
+        # Use ConfigLoader for full inheritance if available
+        if self.config_loader:
+            try:
+                resolved_model = self.config_loader.load_model(model_key)
+                model_id = resolved_model.hf_model_id
+                base_vllm_args = dict(resolved_model.vllm_args)
+                env_vars = dict(resolved_model.env_vars)
+                deploy_config_base = dict(resolved_model.deploy)
+            except KeyError:
+                # Fall back to basic model config
+                model = self.config.models[model_key]
+                model_id = model.hf_model_id
+                base_vllm_args = dict(model.vllm_args)
+                env_vars = dict(model.env_vars)
+                deploy_config_base = {}
+        else:
+            model = self.config.models[model_key]
+            model_id = model.hf_model_id
+            base_vllm_args = dict(model.vllm_args)
+            env_vars = dict(model.env_vars)
+            deploy_config_base = {}
+
+        for workload_key, routing, tp in product(workload_keys, routings, tp_values):
+            # Resolve workload config
+            if self.config_loader:
+                try:
+                    resolved_workload = self.config_loader.load_workload(workload_key)
+                    workload_guidellm = resolved_workload.guidellm
+                except KeyError:
+                    workload_config = self.config.workloads.get(workload_key)
+                    if not workload_config:
+                        continue
+                    workload_guidellm = workload_config.guidellm
+            else:
+                workload_config = self.config.workloads.get(workload_key)
+                if not workload_config:
+                    continue
+                workload_guidellm = workload_config.guidellm
+
+            # Build runtime args with inheritance
+            runtime_args = dict(base_vllm_args)
+            runtime_args["tensor-parallel-size"] = tp
+            runtime_args.update(vllm_args_override)
+
+            scenario = ExpandedScenario(
+                model_id=model_id,
+                model_key=model_key,
+                model_short=self._shorten_model_name(model_key),
+                workload=workload_key,
+                routing=routing,
+                tensor_parallel=tp,
+                runtime_args=runtime_args,
+                workload_config=workload_guidellm,
+                routing_config=self.config.routing.get(routing, {}),
+                deploy_config={
+                    "namespace": namespace,
+                    "replicas": self.config.common.get("replicas", 1),
+                    "num_gpus": deploy_config_base.get("num_gpus", tp),
+                    "env_vars": env_vars,
+                },
+            )
+            scenarios.append(scenario)
+
+        return scenarios
+
+    def _expand_legacy_model_matrix(
+        self,
+        model_id: str,
+        model_config: dict[str, Any],
+    ) -> list[ExpandedScenario]:
+        """Expand a model's matrix (legacy inline format)."""
+        matrix = model_config.get("matrix", {})
+        deploy_config = model_config.get("deploy", {})
+
+        workloads = matrix.get("workloads", ["balanced"])
+        routings = matrix.get("routing", ["direct"])
+        tp_values = matrix.get("tensor-parallel-size", [1])
+
+        common_runtime = self.config.common.get("runtime_args", {})
+        model_runtime = model_config.get("runtime_args", {})
+
+        scenarios = []
+
+        for workload, routing, tp in product(workloads, routings, tp_values):
+            runtime_args = dict(common_runtime)
+            runtime_args.update(model_runtime)
+            runtime_args["tensor-parallel-size"] = tp
+
+            workload_config = self.config.workloads.get(workload)
+            wl_dict = workload_config.guidellm if workload_config else {}
+
+            routing_config = self.config.routing.get(routing, {})
+
+            model_short = self._shorten_model_name(model_id)
+
+            scenario_deploy_config = dict(deploy_config)
+            scenario_deploy_config["num_gpus"] = tp
+
+            scenario = ExpandedScenario(
+                model_id=model_id,
+                model_key=model_id,
+                model_short=model_short,
+                workload=workload,
+                routing=routing,
+                tensor_parallel=tp,
+                runtime_args=runtime_args,
+                workload_config=wl_dict,
+                routing_config=routing_config,
+                deploy_config=scenario_deploy_config,
+            )
+            scenarios.append(scenario)
+
+        return scenarios
+
+    def _create_from_run(self, run: dict[str, Any]) -> ExpandedScenario | None:
+        """Create scenario from explicit run definition."""
+        model_key = run.get("model")
+        if not model_key:
+            return None
+
+        model = self.config.models.get(model_key)
+        if not model:
+            return None
+
+        workload = run.get("workload", "balanced")
+        routing = run.get("routing", "direct")
+        tp = run.get("tensor_parallel", 1 if isinstance(model, ModelConfig) else 1)
+
+        runtime_args = dict(model.vllm_args) if isinstance(model, ModelConfig) else {}
+        runtime_args.update(run.get("runtime_args_override", {}))
+        runtime_args["tensor-parallel-size"] = tp
+
+        workload_config = self.config.workloads.get(workload)
+
+        return ExpandedScenario(
+            model_id=model.hf_model_id if isinstance(model, ModelConfig) else model_key,
+            model_key=model_key,
+            model_short=self._shorten_model_name(model_key),
+            workload=workload,
+            routing=routing,
+            tensor_parallel=tp,
+            runtime_args=runtime_args,
+            workload_config=workload_config.guidellm if workload_config else {},
+            routing_config=self.config.routing.get(routing, {}),
+            deploy_config={
+                "namespace": self.config.common.get("namespace", "forge"),
+                "replicas": 1,
+            },
+        )
+
+    @staticmethod
+    def _shorten_model_name(model_id: str) -> str:
+        """Create short model name from model key or HuggingFace ID."""
+        name = model_id.split("/")[-1].lower()
+        name = re.sub(r"-instruct.*", "", name)
+        name = re.sub(r"-dynamic$", "", name)
+        name = re.sub(r"-a\d+b", "", name)
+        name = re.sub(r"[^a-z0-9]+", "-", name)
+        name = name.strip("-")
+        if len(name) > 40:
+            name = name[:40].rstrip("-")
+        return name
+
+    def summary(self) -> str:
+        """Generate summary of scenarios."""
+        if not self.config:
+            return "No config loaded"
+
+        expanded = self.expand()
+        groups = self.expand_grouped()
+
+        lines = [
+            f"Scenario Config: {self.config.name}",
+            f"Description: {self.config.description}",
+            f"Target Cluster: {self.config.target_cluster or '(not set)'}",
+            f"Models: {len(self.config.models)}",
+            f"Workloads: {len(self.config.workloads)}",
+            f"Deployment Groups: {len(groups)}",
+            f"Total Benchmark Runs: {len(expanded)}",
+            "",
+        ]
+
+        # Show deployment groups
+        lines.append("Deployment Groups (deploy once, run N workloads):")
+        for group in groups:
+            wl_names = ", ".join(wl.key for wl in group.workloads)
+            lines.append(f"  {group.deployment_id}:")
+            lines.append(f"    Model: {group.model.hf_model_id}")
+            lines.append(f"    Workloads: [{wl_names}]")
+
+        return "\n".join(lines)
diff --git a/projects/core/steps/__init__.py b/projects/core/steps/__init__.py
new file mode 100644
index 0000000..ea3cf43
--- /dev/null
+++ b/projects/core/steps/__init__.py
@@ -0,0 +1,14 @@
+"""Shared workflow steps for all projects.
+
+These steps can be imported and used by any project:
+    from projects.core.steps import RunGuideLLMStep, CollectArtifactsStep
+"""
+
+from .artifacts import CleanupDeploymentStep, CollectArtifactsStep
+from .guidellm import RunGuideLLMStep
+
+__all__ = [
+    "CleanupDeploymentStep",
+    "CollectArtifactsStep",
+    "RunGuideLLMStep",
+]
diff --git a/projects/core/steps/artifacts.py b/projects/core/steps/artifacts.py
new file mode 100644
index 0000000..9f1f5f2
--- /dev/null
+++ b/projects/core/steps/artifacts.py
@@ -0,0 +1,250 @@
+"""Artifact collection step - shared by all projects."""
+
+import logging
+import subprocess
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from projects.core.workflow import StepResult, WorkflowStep
+
+if TYPE_CHECKING:
+    from projects.core.workflow import WorkflowContext
+
+logger = logging.getLogger(__name__)
+
+
+class CollectArtifactsStep(WorkflowStep):
+    """
+    Collect logs, events, and pod status for debugging.
+
+    Always runs as a finally step to capture artifacts regardless
+    of success or failure. Does not fail the workflow if collection
+    fails - just logs warnings.
+
+    Can be customized per project:
+        - rhaiis: app_label="vllm"
+        - llm_d: app_label="epp"
+    """
+
+    def __init__(
+        self,
+        app_label: str = "vllm",
+        namespace: str | None = None,
+        collect_events: bool = True,
+        collect_pod_logs: bool = True,
+        collect_pod_describe: bool = True,
+        name: str | None = None,
+    ):
+        """
+        Initialize artifact collection step.
+
+        Args:
+            app_label: Kubernetes app label to filter pods (e.g., "vllm", "epp")
+            namespace: Kubernetes namespace (uses current context if None)
+            collect_events: Whether to collect namespace events
+            collect_pod_logs: Whether to collect pod logs
+            collect_pod_describe: Whether to collect pod descriptions
+            name: Optional step name
+        """
+        super().__init__(name=name or "collect_artifacts")
+        self.app_label = app_label
+        self.namespace = namespace
+        self.collect_events = collect_events
+        self.collect_pod_logs = collect_pod_logs
+        self.collect_pod_describe = collect_pod_describe
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Collect artifacts from cluster."""
+        step_dir = ctx.artifact_dir / f"{ctx.step_number:03d}__{ctx.current_step_name}"
+        step_dir.mkdir(parents=True, exist_ok=True)
+        collected_files: list[str] = []
+        warnings: list[str] = []
+
+        ns_args = ["-n", self.namespace] if self.namespace else []
+
+        # Collect pod logs
+        if self.collect_pod_logs:
+            log_file = step_dir / "app_logs.txt"
+            result = self._run_oc(
+                ["logs", "-l", f"app={self.app_label}", "--tail=1000", *ns_args],
+                log_file,
+            )
+            if result:
+                collected_files.append(str(log_file))
+            else:
+                warnings.append(f"Failed to collect logs for app={self.app_label}")
+
+        # Collect pod descriptions
+        if self.collect_pod_describe:
+            describe_file = step_dir / "pod_describe.txt"
+            result = self._run_oc(
+                ["describe", "pods", "-l", f"app={self.app_label}", *ns_args],
+                describe_file,
+            )
+            if result:
+                collected_files.append(str(describe_file))
+            else:
+                warnings.append(f"Failed to describe pods for app={self.app_label}")
+
+        # Collect events
+        if self.collect_events:
+            events_file = step_dir / "events.txt"
+            result = self._run_oc(
+                ["get", "events", "--sort-by=.lastTimestamp", *ns_args],
+                events_file,
+            )
+            if result:
+                collected_files.append(str(events_file))
+            else:
+                warnings.append("Failed to collect events")
+
+        # Collect pod status
+        status_file = step_dir / "pod_status.txt"
+        result = self._run_oc(
+            ["get", "pods", "-l", f"app={self.app_label}", "-o", "wide", *ns_args],
+            status_file,
+        )
+        if result:
+            collected_files.append(str(status_file))
+
+        message = f"Collected {len(collected_files)} artifacts"
+        if warnings:
+            message += f" ({len(warnings)} warnings)"
+            for w in warnings:
+                logger.warning(w)
+
+        return StepResult(
+            success=True,  # Never fail - this is a finally step
+            message=message,
+            artifacts=collected_files,
+        )
+
+    def _run_oc(self, args: list[str], output_file: Path) -> bool:
+        """
+        Run oc command and write output to file.
+
+        Returns True if successful, False otherwise.
+        """
+        try:
+            cmd = ["oc", *args]
+            logger.debug(f"Running: {' '.join(cmd)}")
+
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+
+            # Write output regardless of exit code
+            with open(output_file, "w") as f:
+                f.write(f"# Command: oc {' '.join(args)}\n")
+                f.write(f"# Exit code: {result.returncode}\n\n")
+                if result.stdout:
+                    f.write(result.stdout)
+                if result.stderr:
+                    f.write(f"\n# STDERR:\n{result.stderr}")
+
+            return result.returncode == 0
+
+        except subprocess.TimeoutExpired:
+            logger.warning(f"Command timed out: oc {' '.join(args)}")
+            return False
+        except FileNotFoundError:
+            logger.warning("oc command not found")
+            return False
+        except Exception as e:
+            logger.warning(f"Error running oc: {e}")
+            return False
+
+
+class CleanupDeploymentStep(WorkflowStep):
+    """
+    Clean up Kubernetes/KServe deployment resources.
+
+    Runs as a finally step to ensure resources are cleaned up
+    even on failure. Handles both standard K8s deployments and
+    KServe InferenceService/ServingRuntime resources.
+    """
+
+    def __init__(
+        self,
+        deployment_name: str,
+        namespace: str | None = None,
+        delete_service: bool = True,
+        delete_route: bool = True,
+        use_kserve: bool = True,
+        name: str | None = None,
+    ):
+        """
+        Initialize cleanup step.
+
+        Args:
+            deployment_name: Name of the deployment/InferenceService to delete
+            namespace: Kubernetes namespace (uses current context if None)
+            delete_service: Also delete the associated service
+            delete_route: Also delete the associated route
+            use_kserve: Delete KServe resources (InferenceService, ServingRuntime)
+            name: Optional step name
+        """
+        super().__init__(name=name or "cleanup")
+        self.deployment_name = deployment_name
+        self.namespace = namespace
+        self.delete_service = delete_service
+        self.delete_route = delete_route
+        self.use_kserve = use_kserve
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Delete deployment and related resources."""
+        ns_args = ["-n", self.namespace] if self.namespace else []
+        deleted: list[str] = []
+        errors: list[str] = []
+
+        # Delete KServe resources first (they manage the underlying deployments)
+        if self.use_kserve:
+            if self._delete_resource("inferenceservice", self.deployment_name, ns_args):
+                deleted.append(f"inferenceservice/{self.deployment_name}")
+            if self._delete_resource("servingruntime", self.deployment_name, ns_args):
+                deleted.append(f"servingruntime/{self.deployment_name}")
+
+        # Delete standard deployment (if not using KServe or as fallback)
+        if self._delete_resource("deployment", self.deployment_name, ns_args):
+            deleted.append(f"deployment/{self.deployment_name}")
+
+        # Delete service
+        if self.delete_service:
+            if self._delete_resource("service", self.deployment_name, ns_args):
+                deleted.append(f"service/{self.deployment_name}")
+
+        # Delete route
+        if self.delete_route:
+            if self._delete_resource("route", self.deployment_name, ns_args):
+                deleted.append(f"route/{self.deployment_name}")
+
+        message = f"Deleted: {', '.join(deleted)}" if deleted else "Nothing deleted"
+        if errors:
+            message += f" (errors: {len(errors)})"
+
+        return StepResult(
+            success=True,  # Never fail - this is a finally step
+            message=message,
+            data={"deleted": deleted, "errors": errors},
+        )
+
+    def _delete_resource(self, kind: str, name: str, ns_args: list[str]) -> bool:
+        """Delete a Kubernetes resource. Returns True if successful."""
+        try:
+            cmd = ["oc", "delete", kind, name, "--ignore-not-found", *ns_args]
+            logger.debug(f"Running: {' '.join(cmd)}")
+
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+            return result.returncode == 0
+
+        except Exception as e:
+            logger.warning(f"Error deleting {kind}/{name}: {e}")
+            return False
diff --git a/projects/core/steps/guidellm.py b/projects/core/steps/guidellm.py
new file mode 100644
index 0000000..2688f10
--- /dev/null
+++ b/projects/core/steps/guidellm.py
@@ -0,0 +1,343 @@
+"""GuideLLM benchmark step - runs as a pod on the cluster."""
+
+import json
+import logging
+import subprocess
+import time
+import uuid
+from typing import TYPE_CHECKING
+
+from projects.core.workflow import StepResult, WorkflowStep
+
+if TYPE_CHECKING:
+    from projects.core.workflow import WorkflowContext
+
+logger = logging.getLogger(__name__)
+
+# Default GuideLLM image from llm-d-bench
+DEFAULT_GUIDELLM_IMAGE = "ghcr.io/openshift-psap/llm-d-bench/guidellm:latest"
+
+
+class RunGuideLLMStep(WorkflowStep):
+    """
+    Run GuideLLM benchmark as a pod on the cluster.
+
+    Deploys a GuideLLM pod in the same namespace as the inference service,
+    waits for completion, and collects results.
+    """
+
+    def __init__(
+        self,
+        endpoint: str,
+        model: str,
+        namespace: str = "forge",
+        workload: str = "balanced",
+        max_requests: int | None = None,
+        max_seconds: int = 120,
+        rate: str = "1,50,100",
+        rate_type: str = "concurrent",
+        guidellm_image: str | None = None,
+        output_file: str = "guidellm_results.json",
+        name: str | None = None,
+    ):
+        """
+        Initialize GuideLLM step.
+
+        Args:
+            endpoint: Inference endpoint URL (e.g., http://vllm-svc:8080/v1)
+            model: Model name as deployed
+            namespace: Kubernetes namespace where to run the benchmark pod
+            workload: GuideLLM workload type (balanced, heterogeneous, multiturn)
+                      or explicit format: "prompt_tokens=1000,output_tokens=1000"
+            max_requests: Maximum number of requests to send
+            max_seconds: Maximum benchmark duration in seconds per rate
+            rate: Comma-separated rates to test (e.g., "1,50,100")
+            rate_type: Rate type - "concurrent" or "synchronous"
+            guidellm_image: GuideLLM container image
+            output_file: Name of output file in artifact directory
+            name: Optional step name
+        """
+        super().__init__(name=name or "benchmark")
+        self.endpoint = endpoint
+        self.model = model
+        self.namespace = namespace
+        self.workload = workload
+        self.max_requests = max_requests
+        self.max_seconds = max_seconds
+        self.rate = rate
+        self.rate_type = rate_type
+        self.guidellm_image = guidellm_image or DEFAULT_GUIDELLM_IMAGE
+        self.output_file = output_file
+        # Use model name in pod name for easier correlation with inference pods
+        model_short = model.split("/")[-1].lower().replace(".", "-").replace("_", "-")[:20]
+        self.pod_name = f"guidellm-{model_short}-{uuid.uuid4().hex[:6]}"
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Run GuideLLM benchmark as a pod."""
+        step_dir = ctx.artifact_dir / f"{ctx.step_number:03d}__{ctx.current_step_name}"
+        step_dir.mkdir(parents=True, exist_ok=True)
+
+        # Convert workload to GuideLLM data format
+        data = self._workload_to_data(self.workload)
+
+        # Generate pod YAML
+        pod_yaml = self._generate_pod_yaml(data)
+        yaml_path = step_dir / "guidellm-pod.yaml"
+        yaml_path.write_text(pod_yaml)
+
+        logger.info(f"Creating GuideLLM pod: {self.pod_name}")
+        print(f"Creating GuideLLM pod: {self.pod_name} in namespace {self.namespace}")
+
+        # Create the pod
+        try:
+            result = subprocess.run(
+                ["oc", "apply", "-f", str(yaml_path)],
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+            if result.returncode != 0:
+                return StepResult.fail(
+                    f"Failed to create GuideLLM pod: {result.stderr}",
+                    error=RuntimeError(result.stderr),
+                )
+        except Exception as e:
+            return StepResult.fail(f"Failed to create pod: {e}", error=e)
+
+        # Wait for pod to complete
+        # Calculate generous timeout: max_seconds per rate, plus 30min overhead for startup/warmup
+        num_rates = len(self.rate.split(","))
+        timeout = (self.max_seconds * num_rates) + 1800  # 30min overhead
+        wait_result = self._wait_for_pod_completion(timeout)
+
+        # Collect logs regardless of outcome
+        self._collect_pod_logs(step_dir)
+
+        # Cleanup pod
+        self._delete_pod()
+
+        if not wait_result["success"]:
+            return StepResult.fail(
+                f"GuideLLM pod failed: {wait_result['message']}",
+                error=RuntimeError(wait_result["message"]),
+            )
+
+        return StepResult.ok(
+            f"GuideLLM completed in {wait_result.get('duration', 0):.1f}s",
+            pod_name=self.pod_name,
+        )
+
+    def _workload_to_data(self, workload: str) -> str:
+        """Convert workload name to GuideLLM data format."""
+        workload_map = {
+            "balanced": "prompt_tokens=1000,output_tokens=1000",
+            "short": "prompt_tokens=256,output_tokens=256",
+            "long-prompt": "prompt_tokens=8000,output_tokens=1000",
+            "very-long-prompt": "prompt_tokens=16000,output_tokens=1000",
+            "heterogeneous": "emulated",
+            "multi-turn": "multi_turn",
+        }
+        return workload_map.get(workload, workload)
+
+    def _generate_pod_yaml(self, data: str) -> str:
+        """Generate GuideLLM pod YAML."""
+        # Build guidellm args
+        args = [
+            "--target", self.endpoint,
+            "--model", self.model,
+            "--rate", self.rate,
+            "--rate-type", self.rate_type,
+            "--data", data,
+            "--max-seconds", str(self.max_seconds),
+            "--backend-type", "openai_http",
+        ]
+
+        if self.max_requests:
+            args.extend(["--max-requests", str(self.max_requests)])
+
+        # Build command as shell script: run benchmark, signal completion, sleep for rsync
+        guidellm_cmd = f"python3 -m benchmark.main {' '.join(args)}"
+
+        return f"""apiVersion: v1
+kind: Pod
+metadata:
+  name: {self.pod_name}
+  namespace: {self.namespace}
+  labels:
+    app: guidellm-benchmark
+    forge-run: "true"
+spec:
+  restartPolicy: Never
+  containers:
+  - name: guidellm
+    image: {self.guidellm_image}
+    imagePullPolicy: Always
+    command:
+    - /bin/sh
+    - -c
+    - |
+      {guidellm_cmd}
+      echo "BENCHMARK_COMPLETE"
+      echo "Sleeping 30s for artifact collection..."
+      sleep 30
+    env:
+    - name: HF_TOKEN
+      valueFrom:
+        secretKeyRef:
+          name: storage-config
+          key: HF_TOKEN
+          optional: true
+    - name: GUIDELLM__REQUEST_TIMEOUT
+      value: "6000"
+    - name: GUIDELLM__LOGGING__CONSOLE_LOG_LEVEL
+      value: "INFO"
+    - name: HF_HOME
+      value: /tmp/.huggingface
+    resources:
+      requests:
+        cpu: "1"
+        memory: "2Gi"
+      limits:
+        cpu: "4"
+        memory: "8Gi"
+    volumeMounts:
+    - name: results-volume
+      mountPath: /benchmark-results
+  volumes:
+  - name: results-volume
+    emptyDir: {{}}
+  # Avoid GPU nodes - run on infra/worker nodes
+  affinity:
+    nodeAffinity:
+      preferredDuringSchedulingIgnoredDuringExecution:
+      - weight: 100
+        preference:
+          matchExpressions:
+          - key: nvidia.com/gpu
+            operator: DoesNotExist
+"""
+
+    def _wait_for_pod_completion(self, timeout: int) -> dict:
+        """Wait for benchmark to complete (watching for BENCHMARK_COMPLETE marker in logs).
+
+        The pod runs benchmark, prints BENCHMARK_COMPLETE, then sleeps for 30s.
+        We detect completion via the marker while pod is still running,
+        allowing rsync to work before the pod exits.
+        """
+        start_time = time.monotonic()
+        poll_interval = 10
+
+        print(f"Waiting for GuideLLM benchmark to complete (timeout: {timeout}s)...")
+
+        while time.monotonic() - start_time < timeout:
+            try:
+                # First check pod phase
+                phase_result = subprocess.run(
+                    [
+                        "oc", "get", "pod", self.pod_name,
+                        "-n", self.namespace,
+                        "-o", "jsonpath={.status.phase}",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=15,
+                )
+                phase = phase_result.stdout.strip()
+
+                if phase == "Failed":
+                    return {"success": False, "message": "Pod failed"}
+
+                if phase == "Error":
+                    return {"success": False, "message": "Pod error"}
+
+                # Check logs for BENCHMARK_COMPLETE marker
+                if phase in ("Running", "Succeeded"):
+                    log_result = subprocess.run(
+                        ["oc", "logs", self.pod_name, "-n", self.namespace, "--tail=50"],
+                        capture_output=True,
+                        text=True,
+                        timeout=15,
+                    )
+                    if "BENCHMARK_COMPLETE" in log_result.stdout:
+                        duration = time.monotonic() - start_time
+                        print(f"GuideLLM benchmark completed in {duration:.1f}s")
+                        return {"success": True, "duration": duration}
+
+                # Also handle case where pod already Succeeded (marker might have been missed)
+                if phase == "Succeeded":
+                    duration = time.monotonic() - start_time
+                    print(f"GuideLLM pod completed in {duration:.1f}s")
+                    return {"success": True, "duration": duration}
+
+                # Still running, no marker yet
+                elapsed = int(time.monotonic() - start_time)
+                if elapsed % 60 == 0:  # Print every minute
+                    print(f"  GuideLLM running... ({elapsed}s elapsed, phase={phase})")
+
+            except subprocess.TimeoutExpired:
+                pass
+            except Exception as e:
+                logger.warning(f"Error checking pod status: {e}")
+
+            time.sleep(poll_interval)
+
+        return {"success": False, "message": f"Timeout after {timeout}s"}
+
+    def _collect_pod_logs(self, step_dir):
+        """Collect logs and results from the GuideLLM pod."""
+        # Collect logs
+        try:
+            result = subprocess.run(
+                ["oc", "logs", self.pod_name, "-n", self.namespace],
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+            (step_dir / "guidellm_logs.txt").write_text(result.stdout)
+            if result.stderr:
+                (step_dir / "guidellm_stderr.txt").write_text(result.stderr)
+
+            print(f"GuideLLM logs saved to {step_dir}/guidellm_logs.txt")
+
+        except Exception as e:
+            logger.warning(f"Failed to collect pod logs: {e}")
+
+        # Copy results from pod before it's deleted (use rsync for large files)
+        try:
+            results_dir = step_dir / "results"
+            results_dir.mkdir(exist_ok=True)
+
+            # Use oc rsync for efficient transfer of large files (up to 300MB)
+            result = subprocess.run(
+                [
+                    "oc", "rsync",
+                    f"{self.pod_name}:/benchmark-results/",
+                    str(results_dir),
+                    "-n", self.namespace,
+                    "--progress",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=600,  # 10 min timeout for large files
+            )
+            if result.returncode == 0:
+                print(f"GuideLLM results synced to {results_dir}/")
+            else:
+                logger.warning(f"Failed to rsync results: {result.stderr}")
+
+        except subprocess.TimeoutExpired:
+            logger.warning("Timeout copying results (>10 min)")
+        except Exception as e:
+            logger.warning(f"Failed to copy results from pod: {e}")
+
+    def _delete_pod(self):
+        """Delete the GuideLLM pod."""
+        try:
+            subprocess.run(
+                ["oc", "delete", "pod", self.pod_name, "-n", self.namespace, "--ignore-not-found"],
+                capture_output=True,
+                timeout=30,
+            )
+            print(f"Cleaned up GuideLLM pod: {self.pod_name}")
+        except Exception as e:
+            logger.warning(f"Failed to delete pod: {e}")
diff --git a/projects/core/utils/__init__.py b/projects/core/utils/__init__.py
new file mode 100644
index 0000000..5390081
--- /dev/null
+++ b/projects/core/utils/__init__.py
@@ -0,0 +1,16 @@
+"""Core utilities for workflow steps.
+
+Reusable utilities that can be imported by any project (rhaiis, llm-d, etc.).
+
+Example:
+    from projects.core.utils import OC, RetryConfig
+
+    oc = OC(namespace="forge")
+    result = oc.get("pods", "-l", "app=vllm")
+    if result.success:
+        print(result.stdout)
+"""
+
+from .oc import OC, OCResult, RetryConfig
+
+__all__ = ["OC", "OCResult", "RetryConfig"]
diff --git a/projects/core/utils/oc.py b/projects/core/utils/oc.py
new file mode 100644
index 0000000..19b9d0f
--- /dev/null
+++ b/projects/core/utils/oc.py
@@ -0,0 +1,529 @@
+"""OpenShift CLI wrapper with built-in retry for transient failures.
+
+Provides a clean, method-based API for oc commands with automatic retry
+on transient network errors, API server unavailability, etc.
+
+Example:
+    from projects.core.utils import OC, RetryConfig
+
+    # Basic usage
+    oc = OC(namespace="forge")
+    result = oc.get("pods")
+    if result.success:
+        print(result.stdout)
+
+    # With custom retry config
+    oc = OC(namespace="forge", retry=RetryConfig(max_retries=5))
+    result = oc.apply("-f", "manifest.yaml")
+
+    # Without namespace (uses current context)
+    oc = OC()
+    result = oc.get("namespaces")
+"""
+
+import logging
+import subprocess
+import time
+from dataclasses import dataclass, field
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Error patterns that indicate transient failures worth retrying
+TRANSIENT_ERROR_PATTERNS = [
+    "connection refused",
+    "connection reset",
+    "connection timed out",
+    "unable to connect",
+    "no route to host",
+    "temporary failure",
+    "service unavailable",
+    "server is currently unable",
+    "etcdserver: request timed out",
+    "context deadline exceeded",
+    "the server was unable to return a response",
+    "unexpected eof",
+    "i/o timeout",
+    "tls handshake timeout",
+    "net/http: request canceled",
+    "client rate limiter",
+    "too many requests",
+    "throttling",
+    "apiserver not ready",
+]
+
+
+@dataclass
+class RetryConfig:
+    """Configuration for retry behavior.
+
+    Attributes:
+        max_retries: Maximum number of retry attempts (default: 3)
+        initial_delay: Initial delay between retries in seconds (default: 1.0)
+        max_delay: Maximum delay between retries in seconds (default: 30.0)
+        backoff_multiplier: Multiplier for exponential backoff (default: 2.0)
+        retry_on_timeout: Whether to retry on subprocess timeout (default: True)
+    """
+
+    max_retries: int = 3
+    initial_delay: float = 1.0
+    max_delay: float = 30.0
+    backoff_multiplier: float = 2.0
+    retry_on_timeout: bool = True
+
+
+@dataclass
+class OCResult:
+    """Result of an oc command execution.
+
+    Attributes:
+        success: Whether the command succeeded (returncode == 0)
+        returncode: Command exit code
+        stdout: Standard output as string
+        stderr: Standard error as string
+        command: The command that was executed
+        attempts: Number of attempts made (1 = no retries needed)
+        duration: Total execution time including retries in seconds
+    """
+
+    success: bool
+    returncode: int
+    stdout: str = ""
+    stderr: str = ""
+    command: list[str] = field(default_factory=list)
+    attempts: int = 1
+    duration: float = 0.0
+
+    @classmethod
+    def from_completed_process(
+        cls,
+        result: subprocess.CompletedProcess,
+        command: list[str],
+        attempts: int = 1,
+        duration: float = 0.0,
+    ) -> "OCResult":
+        """Create OCResult from subprocess.CompletedProcess."""
+        return cls(
+            success=result.returncode == 0,
+            returncode=result.returncode,
+            stdout=result.stdout if result.stdout else "",
+            stderr=result.stderr if result.stderr else "",
+            command=command,
+            attempts=attempts,
+            duration=duration,
+        )
+
+    @classmethod
+    def from_error(
+        cls,
+        error: Exception,
+        command: list[str],
+        attempts: int = 1,
+        duration: float = 0.0,
+    ) -> "OCResult":
+        """Create failed OCResult from exception."""
+        return cls(
+            success=False,
+            returncode=-1,
+            stdout="",
+            stderr=str(error),
+            command=command,
+            attempts=attempts,
+            duration=duration,
+        )
+
+
+def _is_transient_error(stderr: str, returncode: int) -> bool:
+    """Check if error is likely transient and worth retrying."""
+    if returncode == 0:
+        return False
+    stderr_lower = stderr.lower()
+    return any(pattern in stderr_lower for pattern in TRANSIENT_ERROR_PATTERNS)
+
+
+class OC:
+    """OpenShift CLI wrapper with built-in retry.
+
+    Provides a clean, method-based API for common oc operations.
+    All methods automatically retry on transient failures.
+
+    Args:
+        namespace: Default namespace for commands (optional)
+        retry: Retry configuration (uses defaults if None)
+        timeout: Default command timeout in seconds (default: 60)
+
+    Example:
+        oc = OC(namespace="forge")
+
+        # Get pods
+        result = oc.get("pods")
+        result = oc.get("pods", "-l", "app=vllm")
+        result = oc.get("pod", "my-pod", "-o", "yaml")
+
+        # Apply manifests
+        result = oc.apply("-f", "manifest.yaml")
+        result = oc.apply("-f", "-", input=yaml_content)
+
+        # Delete resources
+        result = oc.delete("pod", "my-pod")
+        result = oc.delete("pod", "my-pod", "--ignore-not-found")
+
+        # Logs
+        result = oc.logs("my-pod")
+        result = oc.logs("my-pod", "-c", "container", "--tail=100")
+
+        # Exec
+        result = oc.exec("my-pod", "--", "curl", "localhost:8080/health")
+
+        # Raw command
+        result = oc.run("get", "pods", "-A")
+    """
+
+    def __init__(
+        self,
+        namespace: str | None = None,
+        retry: RetryConfig | None = None,
+        timeout: int = 60,
+    ):
+        self.namespace = namespace
+        self.retry = retry or RetryConfig()
+        self.timeout = timeout
+
+    def _build_cmd(self, args: list[str], namespace: str | None = None) -> list[str]:
+        """Build full oc command with namespace."""
+        cmd = ["oc"]
+
+        # Use provided namespace, fall back to instance default
+        ns = namespace if namespace is not None else self.namespace
+        if ns:
+            cmd.extend(["-n", ns])
+
+        cmd.extend(args)
+        return cmd
+
+    def _run_with_retry(
+        self,
+        cmd: list[str],
+        timeout: int | None = None,
+        input: str | None = None,
+    ) -> OCResult:
+        """Execute command with retry on transient failures."""
+        timeout = timeout if timeout is not None else self.timeout
+        delay = self.retry.initial_delay
+        attempts = 0
+        start_time = time.monotonic()
+
+        last_result: subprocess.CompletedProcess | None = None
+        last_error: Exception | None = None
+
+        for attempt in range(self.retry.max_retries + 1):
+            attempts = attempt + 1
+            try:
+                result = subprocess.run(
+                    cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=timeout,
+                    input=input,
+                )
+
+                # Success
+                if result.returncode == 0:
+                    duration = time.monotonic() - start_time
+                    if attempts > 1:
+                        logger.info(
+                            f"Command succeeded on attempt {attempts}: {' '.join(cmd[:4])}"
+                        )
+                    return OCResult.from_completed_process(
+                        result, cmd, attempts, duration
+                    )
+
+                # Check if transient error
+                if _is_transient_error(result.stderr, result.returncode):
+                    last_result = result
+                    if attempt < self.retry.max_retries:
+                        logger.warning(
+                            f"Transient error (attempt {attempts}/{self.retry.max_retries + 1}), "
+                            f"retrying in {delay:.1f}s: {' '.join(cmd[:4])}..."
+                        )
+                        time.sleep(delay)
+                        delay = min(delay * self.retry.backoff_multiplier, self.retry.max_delay)
+                        continue
+
+                # Non-transient error or exhausted retries
+                duration = time.monotonic() - start_time
+                return OCResult.from_completed_process(result, cmd, attempts, duration)
+
+            except subprocess.TimeoutExpired as e:
+                last_error = e
+                if self.retry.retry_on_timeout and attempt < self.retry.max_retries:
+                    logger.warning(
+                        f"Timeout (attempt {attempts}/{self.retry.max_retries + 1}), "
+                        f"retrying in {delay:.1f}s: {' '.join(cmd[:4])}..."
+                    )
+                    time.sleep(delay)
+                    delay = min(delay * self.retry.backoff_multiplier, self.retry.max_delay)
+                    continue
+
+                duration = time.monotonic() - start_time
+                return OCResult.from_error(e, cmd, attempts, duration)
+
+            except FileNotFoundError as e:
+                # oc command not found - don't retry
+                duration = time.monotonic() - start_time
+                return OCResult.from_error(
+                    Exception("oc command not found. Is OpenShift CLI installed?"),
+                    cmd,
+                    attempts,
+                    duration,
+                )
+
+            except Exception as e:
+                # Unexpected error - don't retry
+                duration = time.monotonic() - start_time
+                return OCResult.from_error(e, cmd, attempts, duration)
+
+        # Exhausted retries
+        duration = time.monotonic() - start_time
+        if last_error:
+            return OCResult.from_error(last_error, cmd, attempts, duration)
+        if last_result:
+            return OCResult.from_completed_process(last_result, cmd, attempts, duration)
+
+        return OCResult.from_error(
+            Exception("Unexpected state after retries"),
+            cmd,
+            attempts,
+            duration,
+        )
+
+    def run(self, *args: str, namespace: str | None = None, timeout: int | None = None, input: str | None = None) -> OCResult:
+        """Run arbitrary oc command.
+
+        Args:
+            *args: Command arguments (e.g., "get", "pods", "-o", "yaml")
+            namespace: Override namespace for this command
+            timeout: Override timeout for this command
+            input: Input to pass to stdin
+
+        Returns:
+            OCResult with command output
+        """
+        cmd = self._build_cmd(list(args), namespace)
+        return self._run_with_retry(cmd, timeout, input)
+
+    def get(self, resource: str, *args: str, namespace: str | None = None, timeout: int | None = None) -> OCResult:
+        """Get Kubernetes resources.
+
+        Args:
+            resource: Resource type (e.g., "pods", "deployments")
+            *args: Additional arguments (name, selectors, output format)
+            namespace: Override namespace
+            timeout: Override timeout
+
+        Returns:
+            OCResult with resource data
+
+        Examples:
+            oc.get("pods")
+            oc.get("pods", "-l", "app=vllm")
+            oc.get("pod", "my-pod", "-o", "yaml")
+            oc.get("pods", "-o", "jsonpath={.items[*].metadata.name}")
+        """
+        return self.run("get", resource, *args, namespace=namespace, timeout=timeout)
+
+    def apply(self, *args: str, namespace: str | None = None, timeout: int | None = None, input: str | None = None) -> OCResult:
+        """Apply configuration to resources.
+
+        Args:
+            *args: Apply arguments (e.g., "-f", "manifest.yaml")
+            namespace: Override namespace
+            timeout: Override timeout
+            input: YAML content to apply via stdin (use with "-f", "-")
+
+        Returns:
+            OCResult
+
+        Examples:
+            oc.apply("-f", "manifest.yaml")
+            oc.apply("-f", "-", input=yaml_content)
+        """
+        return self.run("apply", *args, namespace=namespace, timeout=timeout, input=input)
+
+    def delete(self, resource: str, name: str = "", *args: str, namespace: str | None = None, timeout: int | None = None) -> OCResult:
+        """Delete resources.
+
+        Args:
+            resource: Resource type
+            name: Resource name (optional for label selectors)
+            *args: Additional arguments (--ignore-not-found, etc.)
+            namespace: Override namespace
+            timeout: Override timeout
+
+        Returns:
+            OCResult
+
+        Examples:
+            oc.delete("pod", "my-pod")
+            oc.delete("pod", "my-pod", "--ignore-not-found")
+            oc.delete("pods", "-l", "app=test")
+        """
+        if name:
+            return self.run("delete", resource, name, *args, namespace=namespace, timeout=timeout)
+        return self.run("delete", resource, *args, namespace=namespace, timeout=timeout)
+
+    def logs(self, pod: str, *args: str, namespace: str | None = None, timeout: int | None = None) -> OCResult:
+        """Get pod logs.
+
+        Args:
+            pod: Pod name
+            *args: Additional arguments (-c container, --tail, --since, etc.)
+            namespace: Override namespace
+            timeout: Override timeout
+
+        Returns:
+            OCResult with logs in stdout
+
+        Examples:
+            oc.logs("my-pod")
+            oc.logs("my-pod", "-c", "sidecar")
+            oc.logs("my-pod", "--tail=100")
+        """
+        return self.run("logs", pod, *args, namespace=namespace, timeout=timeout)
+
+    def exec(self, pod: str, *args: str, namespace: str | None = None, timeout: int | None = None) -> OCResult:
+        """Execute command in pod.
+
+        Args:
+            pod: Pod name
+            *args: Command to execute (use "--" separator)
+            namespace: Override namespace
+            timeout: Override timeout
+
+        Returns:
+            OCResult with command output
+
+        Examples:
+            oc.exec("my-pod", "--", "curl", "localhost:8080/health")
+            oc.exec("my-pod", "-c", "container", "--", "cat", "/etc/config")
+        """
+        return self.run("exec", pod, *args, namespace=namespace, timeout=timeout)
+
+    def describe(self, resource: str, name: str = "", *args: str, namespace: str | None = None, timeout: int | None = None) -> OCResult:
+        """Describe resources.
+
+        Args:
+            resource: Resource type
+            name: Resource name (optional)
+            *args: Additional arguments
+            namespace: Override namespace
+            timeout: Override timeout
+
+        Returns:
+            OCResult with description
+        """
+        if name:
+            return self.run("describe", resource, name, *args, namespace=namespace, timeout=timeout)
+        return self.run("describe", resource, *args, namespace=namespace, timeout=timeout)
+
+    def wait(
+        self,
+        resource: str,
+        name: str,
+        condition: str,
+        timeout_seconds: int = 300,
+        namespace: str | None = None,
+    ) -> OCResult:
+        """Wait for resource condition.
+
+        Args:
+            resource: Resource type
+            name: Resource name
+            condition: Condition to wait for (e.g., "condition=Ready")
+            timeout_seconds: Wait timeout in seconds
+            namespace: Override namespace
+
+        Returns:
+            OCResult
+
+        Example:
+            oc.wait("pod", "my-pod", "condition=Ready", timeout_seconds=120)
+        """
+        return self.run(
+            "wait",
+            f"{resource}/{name}",
+            f"--for={condition}",
+            f"--timeout={timeout_seconds}s",
+            namespace=namespace,
+            timeout=timeout_seconds + 10,  # Give subprocess a bit more time
+        )
+
+    def rollout_status(
+        self,
+        resource: str,
+        name: str,
+        timeout_seconds: int = 300,
+        namespace: str | None = None,
+    ) -> OCResult:
+        """Check rollout status.
+
+        Args:
+            resource: Resource type (deployment, statefulset, etc.)
+            name: Resource name
+            timeout_seconds: Timeout for rollout
+            namespace: Override namespace
+
+        Returns:
+            OCResult
+        """
+        return self.run(
+            "rollout",
+            "status",
+            f"{resource}/{name}",
+            f"--timeout={timeout_seconds}s",
+            namespace=namespace,
+            timeout=timeout_seconds + 10,
+        )
+
+    def rsync(
+        self,
+        source: str,
+        dest: str,
+        *args: str,
+        namespace: str | None = None,
+        timeout: int | None = None,
+    ) -> OCResult:
+        """Rsync files to/from pod.
+
+        Args:
+            source: Source path (pod:path or local path)
+            dest: Destination path
+            *args: Additional rsync arguments
+            namespace: Override namespace
+            timeout: Override timeout
+
+        Returns:
+            OCResult
+
+        Example:
+            oc.rsync("my-pod:/data/", "./local/")
+            oc.rsync("./local/", "my-pod:/data/", "--progress")
+        """
+        return self.run("rsync", source, dest, *args, namespace=namespace, timeout=timeout)
+
+    def create_namespace(self, name: str) -> OCResult:
+        """Create namespace if it doesn't exist.
+
+        Args:
+            name: Namespace name
+
+        Returns:
+            OCResult
+        """
+        # Use apply with dry-run to create idempotently
+        yaml_content = f"""apiVersion: v1
+kind: Namespace
+metadata:
+  name: {name}
+"""
+        return self.apply("-f", "-", input=yaml_content, namespace=None)
diff --git a/projects/core/workflow/__init__.py b/projects/core/workflow/__init__.py
new file mode 100644
index 0000000..757ab19
--- /dev/null
+++ b/projects/core/workflow/__init__.py
@@ -0,0 +1,37 @@
+"""Forge workflow engine.
+
+A simple, testable workflow engine for sequential step execution
+with finally/cleanup blocks. Integrates with the existing DSL patterns.
+
+Example usage:
+    from projects.core.workflow import Workflow, WorkflowContext, WorkflowStep, StepResult
+
+    class MyStep(WorkflowStep):
+        def execute(self, ctx: WorkflowContext) -> StepResult:
+            # Do work...
+            return StepResult.ok("Done")
+
+    class MyWorkflow(Workflow):
+        def define_steps(self):
+            self.add_step(MyStep())
+            self.add_finally(CleanupStep())
+
+    ctx = WorkflowContext.from_environment()
+    workflow = MyWorkflow(ctx)
+    result = workflow.execute()
+"""
+
+from .context import WorkflowContext
+from .executor import SequentialExecutor
+from .step import StepResult, StepStatus, WorkflowStep
+from .workflow import Workflow, WorkflowResult
+
+__all__ = [
+    "SequentialExecutor",
+    "StepResult",
+    "StepStatus",
+    "Workflow",
+    "WorkflowContext",
+    "WorkflowResult",
+    "WorkflowStep",
+]
diff --git a/projects/core/workflow/context.py b/projects/core/workflow/context.py
new file mode 100644
index 0000000..418dd78
--- /dev/null
+++ b/projects/core/workflow/context.py
@@ -0,0 +1,158 @@
+"""Workflow execution context."""
+
+import os
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+import projects.core.library.env as env
+
+
+@dataclass
+class WorkflowContext:
+    """
+    Runtime context for workflow execution.
+
+    Holds run-specific state: UUID, artifact directories, config, and env vars.
+    Created once per workflow execution and passed to all steps.
+    Integrates with the existing env.ARTIFACT_DIR system.
+    """
+
+    run_uuid: str
+    artifact_dir: Path
+    config: dict[str, Any] = field(default_factory=dict)
+    env_vars: dict[str, str] = field(default_factory=dict)
+    start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+
+    # Current step tracking
+    step_number: int = 0
+    current_step_name: str = ""
+
+    @classmethod
+    def from_environment(
+        cls,
+        artifact_base: str | None = None,
+        config: dict[str, Any] | None = None,
+    ) -> "WorkflowContext":
+        """
+        Create context from environment variables.
+
+        Reads FORGE_* environment variables and creates artifact directory.
+        Integrates with env.init() if artifact_base not provided.
+
+        Args:
+            artifact_base: Base path for artifacts (uses env.ARTIFACT_DIR if not set)
+            config: Optional config dict to merge
+
+        Returns:
+            Initialized WorkflowContext
+        """
+        run_uuid = str(uuid.uuid4())
+
+        # Collect FORGE_* env vars
+        env_vars = {k: v for k, v in os.environ.items() if k.startswith("FORGE_")}
+
+        # Use existing env.ARTIFACT_DIR system if available
+        if artifact_base:
+            artifact_dir = Path(artifact_base) / run_uuid
+            artifact_dir.mkdir(parents=True, exist_ok=True)
+        elif env.ARTIFACT_DIR:
+            artifact_dir = env.ARTIFACT_DIR / run_uuid
+            artifact_dir.mkdir(parents=True, exist_ok=True)
+        else:
+            # Initialize env system
+            env.init()
+            artifact_dir = env.ARTIFACT_DIR / run_uuid
+            artifact_dir.mkdir(parents=True, exist_ok=True)
+
+        # Create _meta subdirectory
+        meta_dir = artifact_dir / "_meta"
+        meta_dir.mkdir(exist_ok=True)
+
+        return cls(
+            run_uuid=run_uuid,
+            artifact_dir=artifact_dir,
+            config=config or {},
+            env_vars=env_vars,
+        )
+
+    def get_step_artifact_dir(self, step_name: str) -> Path:
+        """
+        Get artifact directory for a specific step.
+
+        Creates numbered directory like: 001__deploy/
+
+        Args:
+            step_name: Name of the step
+
+        Returns:
+            Path to step's artifact directory
+        """
+        self.step_number += 1
+        self.current_step_name = step_name
+        step_dir = self.artifact_dir / f"{self.step_number:03d}__{step_name}"
+        step_dir.mkdir(exist_ok=True)
+        return step_dir
+
+    def get_env(self, key: str, default: str | None = None) -> str | None:
+        """
+        Get environment variable with FORGE_ prefix.
+
+        Args:
+            key: Variable name (with or without FORGE_ prefix)
+            default: Default value if not found
+
+        Returns:
+            Environment variable value or default
+        """
+        if not key.startswith("FORGE_"):
+            key = f"FORGE_{key}"
+        return self.env_vars.get(key, default)
+
+    def write_metadata(self, args: dict[str, Any] | None = None) -> Path:
+        """
+        Write run metadata to _meta/metadata.yaml.
+
+        Args:
+            args: CLI arguments to include
+
+        Returns:
+            Path to metadata file
+        """
+        meta_path = self.artifact_dir / "_meta" / "metadata.yaml"
+        metadata = {
+            "run_uuid": self.run_uuid,
+            "start_time": self.start_time.isoformat(),
+            "env_vars": self.env_vars,
+            "config": self.config,
+            "args": args or {},
+        }
+        with open(meta_path, "w") as f:
+            yaml.safe_dump(metadata, f, default_flow_style=False)
+        return meta_path
+
+    def write_restart_script(self, command: str) -> Path:
+        """
+        Write restart script to _meta/restart.sh.
+
+        Args:
+            command: Full command to replay this run
+
+        Returns:
+            Path to restart script
+        """
+        restart_path = self.artifact_dir / "_meta" / "restart.sh"
+        script = f"""#!/bin/bash
+# Restart script for run {self.run_uuid}
+# Generated at {self.start_time.isoformat()}
+
+{command}
+"""
+        with open(restart_path, "w") as f:
+            f.write(script)
+        restart_path.chmod(0o755)
+        return restart_path
diff --git a/projects/core/workflow/executor.py b/projects/core/workflow/executor.py
new file mode 100644
index 0000000..d9cdaab
--- /dev/null
+++ b/projects/core/workflow/executor.py
@@ -0,0 +1,132 @@
+"""Workflow executors."""
+
+import logging
+import time
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING
+
+from .step import StepResult
+from .workflow import WorkflowResult
+
+if TYPE_CHECKING:
+    from .workflow import Workflow
+
+logger = logging.getLogger(__name__)
+
+
+class SequentialExecutor:
+    """
+    Execute workflow steps sequentially with finally block support.
+
+    Execution flow:
+    1. Run normal steps in order until completion or failure
+    2. On failure, skip remaining normal steps
+    3. Always run finally steps, even if normal steps failed
+    4. Collect all results and return WorkflowResult
+    """
+
+    def execute(self, workflow: "Workflow") -> WorkflowResult:
+        """
+        Execute the workflow.
+
+        Args:
+            workflow: Workflow instance to execute
+
+        Returns:
+            WorkflowResult with all step outcomes
+        """
+        start_time = datetime.now(timezone.utc)
+        step_results: dict[str, StepResult] = {}
+        failed_step: str | None = None
+        original_error: Exception | None = None
+
+        ctx = workflow.ctx
+        logger.info(f"Starting workflow run {ctx.run_uuid}")
+
+        # Run normal steps
+        for step in workflow.steps:
+            step_name = step.name
+            logger.info(f"Running step: {step_name}")
+
+            # Get artifact directory for this step
+            step_artifact_dir = ctx.get_step_artifact_dir(step_name)
+
+            step_start = time.monotonic()
+            try:
+                result = step.execute(ctx)
+                result.duration_seconds = time.monotonic() - step_start
+                result.start_time = datetime.now(timezone.utc)
+                step_results[step_name] = result
+
+                if not result.success:
+                    logger.error(f"Step {step_name} failed: {result.message}")
+                    failed_step = step_name
+                    original_error = result.error
+                    break
+                logger.info(f"Step {step_name} completed in {result.duration_seconds:.2f}s")
+
+            except Exception as e:
+                duration = time.monotonic() - step_start
+                logger.exception(f"Step {step_name} raised exception")
+                step_results[step_name] = StepResult(
+                    success=False,
+                    message=f"Exception: {e}",
+                    error=e,
+                    duration_seconds=duration,
+                )
+                failed_step = step_name
+                original_error = e
+                break
+
+        # Run finally steps (always)
+        finally_errors: list[Exception] = []
+        for step in workflow.finally_steps:
+            step_name = step.name
+            logger.info(f"Running finally step: {step_name}")
+
+            step_artifact_dir = ctx.get_step_artifact_dir(step_name)
+
+            step_start = time.monotonic()
+            try:
+                result = step.execute(ctx)
+                result.duration_seconds = time.monotonic() - step_start
+                step_results[step_name] = result
+
+                if not result.success:
+                    logger.warning(f"Finally step {step_name} failed: {result.message}")
+                    # Don't break - continue with other finally steps
+                    if result.error:
+                        finally_errors.append(result.error)
+                else:
+                    logger.info(f"Finally step {step_name} completed in {result.duration_seconds:.2f}s")
+
+            except Exception as e:
+                duration = time.monotonic() - step_start
+                logger.exception(f"Finally step {step_name} raised exception")
+                step_results[step_name] = StepResult(
+                    success=False,
+                    message=f"Exception: {e}",
+                    error=e,
+                    duration_seconds=duration,
+                )
+                finally_errors.append(e)
+                # Continue with other finally steps
+
+        end_time = datetime.now(timezone.utc)
+        duration = (end_time - start_time).total_seconds()
+
+        workflow_success = failed_step is None
+        logger.info(
+            f"Workflow completed: success={workflow_success}, "
+            f"duration={duration:.2f}s, failed_step={failed_step}"
+        )
+
+        return WorkflowResult(
+            success=workflow_success,
+            step_results=step_results,
+            failed_step=failed_step,
+            duration_seconds=duration,
+            run_uuid=ctx.run_uuid,
+            start_time=start_time,
+            end_time=end_time,
+        )
diff --git a/projects/core/workflow/step.py b/projects/core/workflow/step.py
new file mode 100644
index 0000000..908bba6
--- /dev/null
+++ b/projects/core/workflow/step.py
@@ -0,0 +1,100 @@
+"""Workflow step protocol and result types."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from .context import WorkflowContext
+
+
+class StepStatus(Enum):
+    """Step execution status."""
+
+    PENDING = "pending"
+    RUNNING = "running"
+    SUCCESS = "success"
+    FAILED = "failed"
+    SKIPPED = "skipped"
+
+
+@dataclass
+class StepResult:
+    """
+    Result of a single step execution.
+
+    Attributes:
+        success: Whether the step completed successfully
+        message: Human-readable status message
+        error: Exception if step failed
+        artifacts: Paths to artifacts produced by this step
+        data: Arbitrary output data for downstream steps
+        duration_seconds: Execution time in seconds
+    """
+
+    success: bool
+    message: str = ""
+    error: Exception | None = None
+    artifacts: list[str] = field(default_factory=list)
+    data: dict[str, Any] = field(default_factory=dict)
+    duration_seconds: float = 0.0
+    start_time: datetime | None = None
+    end_time: datetime | None = None
+
+    @classmethod
+    def ok(cls, message: str = "Success", **data: Any) -> "StepResult":
+        """Create a successful result."""
+        return cls(success=True, message=message, data=data)
+
+    @classmethod
+    def fail(cls, message: str, error: Exception | None = None) -> "StepResult":
+        """Create a failed result."""
+        return cls(success=False, message=message, error=error)
+
+
+class WorkflowStep(ABC):
+    """
+    Abstract base class for workflow steps.
+
+    Implement execute() to define step behavior.
+    Step name defaults to class name if not provided.
+    """
+
+    def __init__(self, name: str | None = None):
+        """
+        Initialize step.
+
+        Args:
+            name: Optional step name (defaults to class name)
+        """
+        self._name = name
+
+    @property
+    def name(self) -> str:
+        """Get step name."""
+        if self._name:
+            return self._name
+        # Default to class name, converting CamelCase to snake_case
+        class_name = self.__class__.__name__
+        # Remove 'Step' suffix if present
+        if class_name.endswith("Step"):
+            class_name = class_name[:-4]
+        # Convert to lowercase
+        return class_name.lower()
+
+    @abstractmethod
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """
+        Execute the step.
+
+        Args:
+            ctx: Workflow execution context
+
+        Returns:
+            StepResult indicating success/failure and any outputs
+        """
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(name={self.name!r})"
diff --git a/projects/core/workflow/workflow.py b/projects/core/workflow/workflow.py
new file mode 100644
index 0000000..0bd3ce7
--- /dev/null
+++ b/projects/core/workflow/workflow.py
@@ -0,0 +1,129 @@
+"""Base workflow class with step registration."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import TYPE_CHECKING
+
+from .step import StepResult, WorkflowStep
+
+if TYPE_CHECKING:
+    from .context import WorkflowContext
+
+
+@dataclass
+class WorkflowResult:
+    """
+    Result of a complete workflow execution.
+
+    Attributes:
+        success: Whether all steps completed successfully
+        step_results: Results from each step, keyed by step name
+        failed_step: Name of first step that failed (if any)
+        duration_seconds: Total execution time
+        run_uuid: UUID of this workflow run
+    """
+
+    success: bool
+    step_results: dict[str, StepResult] = field(default_factory=dict)
+    failed_step: str | None = None
+    duration_seconds: float = 0.0
+    run_uuid: str = ""
+    start_time: datetime | None = None
+    end_time: datetime | None = None
+
+
+class Workflow(ABC):
+    """
+    Base class for defining workflows with steps and finally blocks.
+
+    Subclasses implement define_steps() to register steps.
+    Steps run sequentially; finally steps always run regardless of failure.
+
+    Example:
+        class BenchmarkWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(DeployVLLMStep(model=..., vllm_image=..., runtime_args=...))
+                self.add_step(RunGuideLLMStep(...))
+                self.add_finally(CollectArtifactsStep())
+                self.add_finally(CleanupDeploymentStep())
+    """
+
+    def __init__(self, ctx: "WorkflowContext"):
+        """
+        Initialize workflow with context.
+
+        Args:
+            ctx: Workflow execution context
+        """
+        self.ctx = ctx
+        self._steps: list[WorkflowStep] = []
+        self._finally_steps: list[WorkflowStep] = []
+        self._defined = False
+
+    def add_step(self, step: WorkflowStep) -> None:
+        """
+        Add a step to the workflow.
+
+        Steps run in order of registration. If a step fails,
+        remaining steps are skipped and finally steps run.
+
+        Args:
+            step: WorkflowStep instance to add
+        """
+        self._steps.append(step)
+
+    def add_finally(self, step: WorkflowStep) -> None:
+        """
+        Add a finally step that always runs.
+
+        Finally steps run in order after all normal steps complete
+        or after a step failure. They run even if previous finally
+        steps fail.
+
+        Args:
+            step: WorkflowStep instance to add
+        """
+        self._finally_steps.append(step)
+
+    @abstractmethod
+    def define_steps(self) -> None:
+        """
+        Define workflow steps.
+
+        Override this method to register steps via add_step()
+        and add_finally().
+        """
+
+    @property
+    def steps(self) -> list[WorkflowStep]:
+        """Get registered steps."""
+        self._ensure_defined()
+        return self._steps
+
+    @property
+    def finally_steps(self) -> list[WorkflowStep]:
+        """Get registered finally steps."""
+        self._ensure_defined()
+        return self._finally_steps
+
+    def _ensure_defined(self) -> None:
+        """Ensure define_steps() has been called."""
+        if not self._defined:
+            self.define_steps()
+            self._defined = True
+
+    def execute(self) -> WorkflowResult:
+        """
+        Execute the workflow.
+
+        Runs all steps sequentially, then runs finally steps.
+        Uses SequentialExecutor internally.
+
+        Returns:
+            WorkflowResult with step outcomes
+        """
+        from .executor import SequentialExecutor
+
+        executor = SequentialExecutor()
+        return executor.execute(self)
diff --git a/projects/rhaiis/IMPLEMENTATION.md b/projects/rhaiis/IMPLEMENTATION.md
new file mode 100644
index 0000000..1cae22a
--- /dev/null
+++ b/projects/rhaiis/IMPLEMENTATION.md
@@ -0,0 +1,488 @@
+# RHAIIS Benchmark Implementation
+
+RHAIIS benchmarking system built on Forge's workflow engine (projects/core/workflow/).
+
+## Architecture
+
+```
+CLI/CI Entry Points
+       │
+       ▼
+┌─────────────────────────────────────┐
+│        test_rhaiis.py               │  ← Orchestration layer
+│  (run_test, run_prepare, run_cleanup)│
+└─────────────────────────────────────┘
+       │
+       ▼
+┌─────────────────────────────────────┐
+│      ConfigLoader                   │  ← Config inheritance
+│  defaults → accelerator → model    │
+└─────────────────────────────────────┘
+       │
+       ▼
+┌─────────────────────────────────────┐
+│      BenchmarkWorkflow              │  ← Workflow definition
+│  (deploy → wait → benchmark)        │
+└─────────────────────────────────────┘
+       │
+       ▼
+┌─────────────────────────────────────┐
+│      WorkflowStep implementations   │  ← Step execution
+│  DeployVLLM, WaitForReady,         │
+│  RunGuideLLM, CollectArtifacts     │
+└─────────────────────────────────────┘
+```
+
+## Entry Points
+
+### CLI (`cli.py`)
+```bash
+# Single model + workload
+PYTHONPATH=. python3 projects/rhaiis/orchestration/cli.py test \
+  --model llama-3.3-70b-fp8 --workload balanced --accelerator nvidia
+
+# Deploy-once: multiple workloads without restarting vLLM
+cli.py test --model qwen-0.6b --workloads balanced,short,long-prompt
+```
+
+### CI (`ci.py`)
+```bash
+# Env var driven (for FOURNOS jobs)
+FORGE_MODEL=qwen-0.6b FORGE_WORKLOADS=balanced,short \
+  python3 projects/rhaiis/orchestration/ci.py test
+```
+
+## Config Structure
+
+Project-specific configs allow different projects (rhaiis, llm-d) to have their own settings:
+
+```
+config/
+├── rhaiis/
+│   ├── defaults.yaml     # Global defaults + accelerator settings
+│   ├── models.yaml       # Model registry (HF IDs, vllm_args, env_vars)
+│   └── workloads.yaml    # GuideLLM profiles (rates, max_seconds)
+└── llm-d/                # (future) llm-d specific configs
+    └── ...
+```
+
+### Inheritance Chain
+```
+defaults.yaml (base vllm_args, deploy settings)
+    ↓ merge
+accelerators[nvidia|amd] (image, vllm_args, env_vars)
+    ↓ merge
+models[model] (hf_model_id, vllm_args, env_vars)
+    ↓ merge
+models[model].accelerator_overrides[accelerator] (vllm_args, env_vars)
+```
+
+## Environment Variables
+
+Env vars are passed to the vLLM pod and follow the same inheritance chain:
+
+### Accelerator-level (`defaults.yaml`)
+```yaml
+accelerators:
+  nvidia:
+    env_vars:
+      TORCH_CUDA_ARCH_LIST: "9.0"  # All NVIDIA models
+  amd:
+    env_vars:
+      VLLM_ROCM_USE_AITER: "1"     # All AMD models
+```
+
+### Model-level (`models.yaml`)
+```yaml
+models:
+  my-model:
+    env_vars:
+      VLLM_MXFP4_USE_MARLIN: "1"  # This model on all accelerators
+```
+
+### Model + Accelerator specific (`models.yaml`)
+```yaml
+models:
+  deepseek-r1:
+    accelerator_overrides:
+      amd:
+        env_vars:
+          VLLM_ROCM_USE_AITER: "0"  # Override AMD default for this model
+      nvidia:
+        env_vars:
+          TORCH_CUDA_ARCH_LIST: "9.0"
+```
+
+## Core Interfaces - Class Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────────────────┐
+│                           FORGE WORKFLOW ENGINE                                  │
+└─────────────────────────────────────────────────────────────────────────────────┘
+
+┌──────────────────────────────┐         ┌──────────────────────────────┐
+│    «abstract» WorkflowStep   │         │      WorkflowContext         │
+├──────────────────────────────┤         ├──────────────────────────────┤
+│ - _name: str | None          │         │ + run_uuid: str              │
+├──────────────────────────────┤         │ + artifact_dir: Path         │
+│ + name: str {property}       │         │ + config: dict               │
+│ + execute(ctx) → StepResult  │◄────────│ + env_vars: dict             │
+│   «abstract»                 │  uses   │ + start_time: datetime       │
+└──────────────────────────────┘         │ + step_number: int           │
+            △                            │ + current_step_name: str     │
+            │ inherits                   ├──────────────────────────────┤
+            │                            │ + from_environment() → ctx   │
+    ┌───────┴───────┐                    │ + get_step_artifact_dir()    │
+    │               │                    │ + get_env(key) → str         │
+    ▼               ▼                    │ + write_metadata()           │
+┌────────────┐  ┌────────────┐           │ + write_restart_script()     │
+│ Core Steps │  │ Project    │           └──────────────────────────────┘
+│            │  │ Steps      │
+├────────────┤  ├────────────┤
+│RunGuideLLM │  │DeployVLLM  │
+│Collect     │  │WaitForReady│
+│Artifacts   │  │DeployHelm  │
+│Cleanup     │  │ConfigureEPP│
+│Deployment  │  │...         │
+└────────────┘  └────────────┘
+
+
+┌──────────────────────────────┐
+│     «abstract» Workflow      │         ┌──────────────────────────────┐
+├──────────────────────────────┤         │        StepResult            │
+│ + ctx: WorkflowContext       │         ├──────────────────────────────┤
+│ - _steps: list[WorkflowStep] │         │ + success: bool              │
+│ - _finally_steps: list[...]  │         │ + message: str               │
+│ - _defined: bool             │         │ + error: Exception | None    │
+├──────────────────────────────┤         │ + artifacts: list[str]       │
+│ + add_step(step)             │         │ + data: dict                 │
+│ + add_finally(step)          │         │ + duration_seconds: float    │
+│ + define_steps() «abstract»  │         ├──────────────────────────────┤
+│ + steps: list {property}     │         │ + ok(message) → StepResult   │
+│ + finally_steps: list {prop} │         │ + fail(message) → StepResult │
+│ + execute() → WorkflowResult │         └──────────────────────────────┘
+└──────────────────────────────┘                       △
+            │                                          │ returns
+            │ uses                                     │
+            ▼                            ┌─────────────┴────────────────┐
+┌──────────────────────────────┐         │    SequentialExecutor        │
+│       WorkflowResult         │         ├──────────────────────────────┤
+├──────────────────────────────┤         │                              │
+│ + success: bool              │◄────────┤ + execute(workflow)          │
+│ + step_results: dict         │ returns │   → WorkflowResult           │
+│ + failed_step: str | None    │         │                              │
+│ + duration_seconds: float    │         │ Execution Flow:              │
+│ + run_uuid: str              │         │ 1. Run steps sequentially    │
+│ + start_time: datetime       │         │ 2. Stop on first failure     │
+│ + end_time: datetime         │         │ 3. Always run finally_steps  │
+└──────────────────────────────┘         │ 4. Collect all StepResults   │
+                                         └──────────────────────────────┘
+```
+
+### Concrete Workflow Implementations
+
+```
+            △ inherits from Workflow
+            │
+    ┌───────┴────────────────────┐
+    │                            │
+    ▼                            ▼
+┌────────────────────┐    ┌────────────────────┐
+│ BenchmarkWorkflow  │    │ LlmdBenchmark      │
+│ (RHAIIS)           │    │ Workflow (llm-d)   │
+├────────────────────┤    ├────────────────────┤
+│ + model: str       │    │ + model: str       │
+│ + workload: str    │    │ + routing_mode: str│
+│ + vllm_image: str  │    │ + helmfile_path    │
+│ + namespace: str   │    │ + namespace: str   │
+├────────────────────┤    ├────────────────────┤
+│ define_steps():    │    │ define_steps():    │
+│  ├─ DeployVLLMStep │    │  ├─ DeployHelmStep │
+│  ├─ WaitForReady   │    │  ├─ ConfigureEPP   │
+│  ├─ RunGuideLLM    │    │  ├─ WaitForGateway │
+│  ├─ [finally]      │    │  ├─ RunGuideLLM    │
+│  │  CollectArtif.  │    │  ├─ [finally]      │
+│  └─ CleanupDeploy  │    │  │  CollectArtif.  │
+└────────────────────┘    │  └─ HelmCleanup    │
+                          └────────────────────┘
+```
+
+### Execution Sequence Diagram
+
+```
+┌──────────┐     ┌──────────────┐     ┌────────────────────┐     ┌──────────────┐
+│  Client  │     │   Workflow   │     │ SequentialExecutor │     │ WorkflowStep │
+└────┬─────┘     └──────┬───────┘     └─────────┬──────────┘     └──────┬───────┘
+     │                  │                       │                       │
+     │  execute()       │                       │                       │
+     │─────────────────>│                       │                       │
+     │                  │                       │                       │
+     │                  │  execute(self)        │                       │
+     │                  │──────────────────────>│                       │
+     │                  │                       │                       │
+     │                  │                       │  ┌─────────────────┐  │
+     │                  │                       │  │ For each step:  │  │
+     │                  │                       │  └────────┬────────┘  │
+     │                  │                       │           │           │
+     │                  │                       │  execute(ctx)         │
+     │                  │                       │──────────────────────>│
+     │                  │                       │                       │
+     │                  │                       │     StepResult        │
+     │                  │                       │<──────────────────────│
+     │                  │                       │           │           │
+     │                  │                       │  ┌────────┴────────┐  │
+     │                  │                       │  │ if !success:    │  │
+     │                  │                       │  │   break loop    │  │
+     │                  │                       │  └────────┬────────┘  │
+     │                  │                       │           │           │
+     │                  │                       │  ┌────────┴────────┐  │
+     │                  │                       │  │ For each        │  │
+     │                  │                       │  │ finally_step:   │  │
+     │                  │                       │  └────────┬────────┘  │
+     │                  │                       │           │           │
+     │                  │                       │  execute(ctx)         │
+     │                  │                       │──────────────────────>│
+     │                  │                       │                       │
+     │                  │                       │     StepResult        │
+     │                  │                       │<──────────────────────│
+     │                  │                       │  (continue even       │
+     │                  │                       │   if failed)          │
+     │                  │                       │                       │
+     │                  │    WorkflowResult     │                       │
+     │                  │<──────────────────────│                       │
+     │                  │                       │                       │
+     │  WorkflowResult  │                       │                       │
+     │<─────────────────│                       │                       │
+     │                  │                       │                       │
+```
+
+### Dependency Graph
+
+```
+                    ┌─────────────────┐
+                    │ WorkflowContext │
+                    └────────┬────────┘
+                             │
+                    created by│from_environment()
+                             │
+                             ▼
+┌──────────────┐    ┌─────────────────┐    ┌────────────────────┐
+│ WorkflowStep │◄───│    Workflow     │───►│ SequentialExecutor │
+│   (ABC)      │    │     (ABC)       │    │                    │
+└──────┬───────┘    └────────┬────────┘    └─────────┬──────────┘
+       │                     │                       │
+       │ implements          │ implements            │ produces
+       ▼                     ▼                       ▼
+┌──────────────┐    ┌─────────────────┐    ┌────────────────────┐
+│  Concrete    │    │    Concrete     │    │   WorkflowResult   │
+│   Steps      │    │   Workflows     │    │   + StepResults    │
+│              │    │                 │    │                    │
+│ DeployVLLM   │    │ BenchmarkWF     │    │ {                  │
+│ RunGuideLLM  │    │ PrepareWF       │    │   success: bool    │
+│ CollectArtif │    │ CleanupWF       │    │   step_results: {} │
+│ WaitForReady │    │ LlmdBenchmarkWF │    │   failed_step: str │
+│ DeployHelm   │    │                 │    │ }                  │
+└──────────────┘    └─────────────────┘    └────────────────────┘
+```
+
+### Key Relationships
+
+| Relationship | Type | Description |
+|-------------|------|-------------|
+| `Workflow` → `WorkflowContext` | composition | Workflow holds a context instance |
+| `Workflow` → `WorkflowStep` | aggregation | Workflow contains list of steps |
+| `WorkflowStep.execute()` → `WorkflowContext` | dependency | Steps receive context as parameter |
+| `WorkflowStep.execute()` → `StepResult` | returns | Steps return result objects |
+| `SequentialExecutor.execute()` → `Workflow` | uses | Executor runs a workflow |
+| `SequentialExecutor.execute()` → `WorkflowResult` | returns | Executor returns final result |
+| Concrete Steps → `WorkflowStep` | inheritance | All steps extend the abstract class |
+| Concrete Workflows → `Workflow` | inheritance | All workflows extend the abstract class |
+
+---
+
+## Core Interfaces - Code
+
+### WorkflowStep
+```python
+class WorkflowStep(ABC):
+    """Base class for all workflow steps."""
+
+    def __init__(self, name: str | None = None):
+        self._name = name  # Defaults to class name if not provided
+
+    @abstractmethod
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        """Execute step, return success/failure with data."""
+
+@dataclass
+class StepResult:
+    success: bool
+    message: str = ""
+    error: Exception | None = None
+    artifacts: list[str] = field(default_factory=list)
+    data: dict[str, Any] = field(default_factory=dict)
+    duration_seconds: float = 0.0
+```
+
+### Workflow
+```python
+class Workflow(ABC):
+    def add_step(self, step: WorkflowStep): ...
+    def add_finally(self, step: WorkflowStep): ...  # Always runs
+
+    @abstractmethod
+    def define_steps(self) -> None:
+        """Register steps via add_step() and add_finally()."""
+```
+
+### WorkflowContext
+```python
+@dataclass
+class WorkflowContext:
+    run_uuid: str
+    artifact_dir: Path
+    config: dict
+    env_vars: dict  # FORGE_* env vars
+```
+
+### SequentialExecutor
+The executor runs steps with these guarantees:
+```python
+class SequentialExecutor:
+    """
+    Execution flow:
+    1. Run normal steps in order until completion or failure
+    2. On failure, skip remaining normal steps
+    3. Always run finally steps, even if normal steps failed
+    4. Finally steps continue even if previous finally steps fail
+    5. Collect all results and return WorkflowResult
+    """
+```
+
+## Reliability and Safety
+
+### Current Reliability Features
+
+| Feature | Status | Description |
+|---------|--------|-------------|
+| Finally steps | ✅ | Cleanup always runs, even on failure |
+| Exception handling | ✅ | Unhandled exceptions caught, logged, step marked failed |
+| Artifact collection | ✅ | Each step gets its own artifact directory |
+| Duration tracking | ✅ | Execution time recorded per step |
+| Transient retry | ✅ | OC wrapper retries network errors with backoff |
+
+### Execution Guarantees
+
+```
+Step 1 (deploy) ──success──► Step 2 (wait) ──success──► Step 3 (benchmark)
+      │                            │                           │
+      │ failure                    │ failure                   │ failure
+      ▼                            ▼                           ▼
+   Finally 1 (collect) ─────► Finally 2 (cleanup) ─────► Return Result
+   (always runs)              (always runs)
+```
+
+### Transient Errors Handled by OC Wrapper
+
+- Connection refused / reset / timed out
+- Service unavailable
+- API server not ready
+- etcd timeout
+- Rate limiting (too many requests)
+- TLS handshake timeout
+
+### Safety Considerations
+
+| Aspect | Implementation |
+|--------|---------------|
+| Resource cleanup | Finally steps delete InferenceService/ServingRuntime |
+| Namespace isolation | All resources created in specified namespace |
+| Resource labeling | Resources labeled with `app={deployment_name}` for easy identification |
+| Idempotent apply | Uses `oc apply` (not `create`) for idempotency |
+| Orphan prevention | Cleanup step uses `--ignore-not-found` |
+
+### Known Limitations
+
+| Limitation | Mitigation |
+|------------|------------|
+| No checkpointing | Re-run from beginning on failure |
+| No step timeout enforcement | Use subprocess timeout in OC wrapper |
+| No parallel step execution | Use deploy-once pattern to minimize overhead |
+| No circuit breaker | Relies on retry exhaustion |
+
+## RHAIIS Steps
+
+| Step | Location | Purpose |
+|------|----------|---------|
+| `DeployVLLMStep` | `rhaiis/workflows/steps/deploy.py` | Create KServe ServingRuntime + InferenceService |
+| `WaitForReadyStep` | `rhaiis/workflows/steps/deploy.py` | Wait for ISVC ready + health check |
+| `RunGuideLLMStep` | `core/steps/guidellm.py` | Run GuideLLM as pod, collect results |
+| `CollectArtifactsStep` | `core/steps/artifacts.py` | Gather logs, events, pod status |
+| `CleanupDeploymentStep` | `core/steps/artifacts.py` | Delete ISVC/ServingRuntime |
+
+## BenchmarkWorkflow
+
+```python
+class BenchmarkWorkflow(Workflow):
+    def define_steps(self):
+        self.add_step(DeployVLLMStep(...))
+        self.add_step(WaitForReadyStep(...))
+        self.add_step(RunGuideLLMStep(...))
+        self.add_finally(CollectArtifactsStep(...))  # Always runs
+        self.add_finally(CleanupDeploymentStep(...)) # Always runs
+```
+
+## Deploy-Once Pattern
+
+For multiple workloads with same vLLM config, deploys once and runs GuideLLM multiple times:
+
+```python
+# test_rhaiis.py::_run_multi_workload()
+for workload in workloads:
+    # Group by vllm_args (workloads with different vllm_args get separate deployments)
+    # Run GuideLLM for each workload without restarting vLLM
+```
+
+## Artifact Structure
+
+```
+artifacts/{run_uuid}/
+├── _meta/
+│   └── metadata.yaml
+├── 001__deploy/
+│   └── kserve.yaml
+├── 002__wait/
+├── 003__benchmark_balanced/
+│   ├── guidellm_logs.txt
+│   └── results/
+│       └── benchmark_results.json
+├── 004__collect_artifacts/
+│   ├── app_logs.txt
+│   ├── pod_describe.txt
+│   └── events.txt
+└── 005__cleanup/
+```
+
+## Running Unit Tests
+
+```bash
+# Activate venv and run from forge directory
+source ~/test_foo/python3_virt/bin/activate
+cd /Users/memehta/workspace/forge
+PYTHONPATH=. python -m pytest tests/ --ignore=tests/llm_d -v
+
+# Or run specific test files
+PYTHONPATH=. pytest tests/core/utils/test_oc.py -v          # OC wrapper tests
+PYTHONPATH=. pytest tests/core/scenarios/test_config_loader.py -v  # ConfigLoader tests
+PYTHONPATH=. pytest tests/rhaiis/ -v                        # RHAIIS tests
+```
+
+## Key Design Decisions
+
+1. **KServe RawDeployment**: Uses ServingRuntime + InferenceService for RHOAI compatibility
+2. **Pod-based GuideLLM**: Runs benchmark as pod inside cluster (not local)
+3. **Finally steps**: CollectArtifacts and Cleanup always run, even on failure
+4. **Config inheritance**: Minimizes duplication, accelerator-specific overrides where needed
+5. **num_gpus = tensor-parallel-size**: Single source of truth for GPU count
+6. **Project-specific configs**: Each project (rhaiis, llm-d) has its own config directory
+7. **Env vars inheritance**: Supports accelerator → model → model.accelerator_overrides chain
diff --git a/projects/rhaiis/LLM_D_EXTENSIBILITY_REPORT.md b/projects/rhaiis/LLM_D_EXTENSIBILITY_REPORT.md
new file mode 100644
index 0000000..342cdf9
--- /dev/null
+++ b/projects/rhaiis/LLM_D_EXTENSIBILITY_REPORT.md
@@ -0,0 +1,232 @@
+# llm-d Integration Extensibility Report
+
+Analysis of how the current RHAIIS workflow implementation can be extended to support llm-d benchmarking use cases.
+
+## Executive Summary
+
+The current workflow engine (`projects/core/workflow/`) is **highly extensible** for llm-d. The abstract `WorkflowStep` and `Workflow` interfaces allow llm-d to define its own deployment steps while reusing shared steps (GuideLLM, artifact collection). Key differences are in the **deployment layer**, not the workflow engine itself.
+
+## llm-d vs RHAIIS Comparison
+
+| Aspect | RHAIIS | llm-d |
+|--------|--------|-------|
+| **Deployment** | KServe (ServingRuntime + InferenceService) | Helm + Helmfile (model-service, gaie-scheduler, infra) |
+| **Networking** | KServe service (`{name}-predictor.{ns}`) | Gateway API + HTTPRoute |
+| **Routing** | Direct to vLLM pod | EPP router with scheduling strategies |
+| **Scheduling** | None (direct inference) | GAIE scheduler (prefix-aware, disaggregated) |
+| **Config** | YAML (models.yaml, defaults.yaml) | Helmfile values + routing configs |
+
+## Reusable Components
+
+### Fully Reusable (No Changes)
+```
+projects/core/
+├── workflow/
+│   ├── step.py          # WorkflowStep abstract class
+│   ├── workflow.py      # Workflow abstract class
+│   ├── context.py       # WorkflowContext
+│   └── executor.py      # SequentialExecutor
+├── steps/
+│   ├── guidellm.py      # RunGuideLLMStep (endpoint agnostic)
+│   └── artifacts.py     # CollectArtifactsStep, CleanupDeploymentStep
+└── scenarios/
+    └── config_loader.py # ConfigLoader (model + workload resolution)
+```
+
+### Requires llm-d-Specific Implementation
+```
+projects/llm_d/
+├── workflows/
+│   ├── steps/
+│   │   ├── deploy_helm.py     # DeployHelmStep (model-service, gaie, infra)
+│   │   ├── configure_epp.py   # ConfigureEPPStep (routing strategy)
+│   │   └── wait_gateway.py    # WaitForGatewayStep
+│   ├── benchmark.py           # LlmdBenchmarkWorkflow
+│   └── prepare.py             # LlmdPrepareWorkflow (install operators)
+└── orchestration/
+    ├── cli.py                 # llm-d CLI (similar structure to RHAIIS)
+    └── test_llmd.py           # llm-d orchestration (run_test, etc.)
+```
+
+## Proposed llm-d Step Implementations
+
+### 1. DeployHelmStep
+```python
+class DeployHelmStep(WorkflowStep):
+    """Deploy llm-d components via Helm/Helmfile."""
+
+    def __init__(
+        self,
+        model: str,
+        routing_mode: str,  # direct, prefix-estimation, pd-disaggregation
+        helmfile_path: str,
+        namespace: str,
+    ):
+        ...
+
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        # helmfile apply -f {helmfile_path} --state-values-set model={model}
+        # Returns: gateway_url, epp_endpoint
+```
+
+### 2. ConfigureEPPStep
+```python
+class ConfigureEPPStep(WorkflowStep):
+    """Configure EPP routing strategy."""
+
+    def __init__(
+        self,
+        routing_mode: str,
+        epp_namespace: str,
+    ):
+        ...
+
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        # Patch EPP ConfigMap with routing config
+        # Wait for EPP pods to reload
+```
+
+### 3. WaitForGatewayStep
+```python
+class WaitForGatewayStep(WorkflowStep):
+    """Wait for K8s Gateway + HTTPRoute to be ready."""
+
+    def __init__(
+        self,
+        gateway_name: str,
+        namespace: str,
+    ):
+        ...
+
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        # Check Gateway status
+        # Verify HTTPRoute attached
+        # Health check endpoint
+```
+
+## Proposed LlmdBenchmarkWorkflow
+
+```python
+class LlmdBenchmarkWorkflow(Workflow):
+    """llm-d benchmark: deploy via Helm → configure EPP → run GuideLLM → cleanup."""
+
+    def __init__(
+        self,
+        ctx: WorkflowContext,
+        model: str,
+        routing_mode: str,  # direct, prefix-estimation, prefix-precise, pd-disaggregation
+        workload: str,
+        namespace: str,
+    ):
+        ...
+
+    def define_steps(self):
+        # Deploy model-service + GAIE scheduler + infra via Helm
+        self.add_step(DeployHelmStep(
+            model=self.model,
+            routing_mode=self.routing_mode,
+            helmfile_path=self._get_helmfile(),
+            namespace=self.namespace,
+        ))
+
+        # Configure EPP routing strategy
+        self.add_step(ConfigureEPPStep(
+            routing_mode=self.routing_mode,
+            epp_namespace=self.namespace,
+        ))
+
+        # Wait for Gateway API + HTTPRoute
+        self.add_step(WaitForGatewayStep(
+            gateway_name=f"{self.model}-gateway",
+            namespace=self.namespace,
+        ))
+
+        # Run GuideLLM (reused from core)
+        gateway_endpoint = f"http://{self.model}-gateway.{self.namespace}.svc:8080/v1"
+        self.add_step(RunGuideLLMStep(
+            endpoint=gateway_endpoint,
+            model=self.model,
+            namespace=self.namespace,
+            workload=self.workload,
+        ))
+
+        # Cleanup (reused from core, modified for Helm)
+        self.add_finally(CollectArtifactsStep(
+            app_label="llm-d",
+            namespace=self.namespace,
+        ))
+        self.add_finally(HelmCleanupStep(
+            namespace=self.namespace,
+        ))
+```
+
+## Config Structure for llm-d
+
+```yaml
+# config/llm-d/defaults.yaml
+defaults:
+  deploy:
+    namespace: llm-d
+    helmfile_path: deploy/llm-d/helmfile.yaml
+
+  routing:
+    default_mode: direct
+    modes:
+      direct: {}
+      prefix-estimation:
+        scheduler: gaie
+        prefix_cache: redis
+      pd-disaggregation:
+        prefill_replicas: 2
+        decode_replicas: 4
+
+# config/llm-d/models.yaml
+models:
+  llama-3.1-8b:
+    hf_model_id: meta-llama/Llama-3.1-8B-Instruct
+    supported_routing: [direct, prefix-estimation]
+    helm_values:
+      vllm:
+        tensor_parallel: 1
+```
+
+## Implementation Roadmap
+
+### Phase 1: Step Implementations (2-3 days)
+- [ ] `DeployHelmStep` - Helm/Helmfile deployment
+- [ ] `WaitForGatewayStep` - Gateway API readiness
+- [ ] `HelmCleanupStep` - Helm uninstall
+
+### Phase 2: EPP Integration (2-3 days)
+- [ ] `ConfigureEPPStep` - Routing strategy configuration
+- [ ] EPP config templates (prefix-estimation, pd-disaggregation)
+
+### Phase 3: Workflow + CLI (1-2 days)
+- [ ] `LlmdBenchmarkWorkflow`
+- [ ] `projects/llm_d/orchestration/cli.py`
+- [ ] `projects/llm_d/orchestration/test_llmd.py`
+
+### Phase 4: Config + Testing (1-2 days)
+- [ ] llm-d config files (defaults, models, routing modes)
+- [ ] Integration tests
+
+## Gaps in Current Implementation
+
+| Gap | Impact | Resolution |
+|-----|--------|------------|
+| No Helm support | High | Create `DeployHelmStep` |
+| No Gateway API support | High | Create `WaitForGatewayStep` |
+| No EPP routing config | High | Create `ConfigureEPPStep` |
+| RHAIIS-specific in deploy.py | Low | Already isolated in `rhaiis/workflows/steps/` |
+| GuideLLM assumes OpenAI endpoint | None | Already generic (`--target` flag) |
+
+## Conclusion
+
+The workflow engine architecture is **well-suited** for llm-d extension:
+
+1. **Clean separation**: Core workflow engine (`projects/core/`) is deployment-agnostic
+2. **Step abstraction**: New steps (Helm, Gateway) implement same `WorkflowStep` interface
+3. **Reusable components**: GuideLLM, artifact collection work unchanged
+4. **Config system**: `ConfigLoader` can be extended with llm-d-specific configs
+
+Estimated effort: **6-10 developer days** for full llm-d integration with routing modes.
diff --git a/projects/rhaiis/__init__.py b/projects/rhaiis/__init__.py
new file mode 100644
index 0000000..d90b9a4
--- /dev/null
+++ b/projects/rhaiis/__init__.py
@@ -0,0 +1 @@
+"""RHAIIS benchmarking project."""
diff --git a/projects/rhaiis/orchestration/__init__.py b/projects/rhaiis/orchestration/__init__.py
new file mode 100644
index 0000000..29fa342
--- /dev/null
+++ b/projects/rhaiis/orchestration/__init__.py
@@ -0,0 +1,5 @@
+"""RHAIIS orchestration module."""
+
+from .ci import ci
+
+__all__ = ["ci"]
diff --git a/projects/rhaiis/orchestration/ci.py b/projects/rhaiis/orchestration/ci.py
new file mode 100644
index 0000000..3a9837b
--- /dev/null
+++ b/projects/rhaiis/orchestration/ci.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""RHAIIS CI Operations - Minimal CLI for FOURNOS jobs.
+
+Most configuration comes from config/ directory via ConfigLoader.
+FOURNOS only needs to set a few key parameters:
+    FORGE_MODEL     - Model key to benchmark
+    FORGE_WORKLOADS - Comma-separated workloads (optional)
+
+For interactive use with detailed CLI options, use cli.py instead.
+"""
+
+import os
+import sys
+import types
+from pathlib import Path
+
+import click
+
+from projects.core.workflow import WorkflowContext
+
+from . import test_rhaiis
+
+# Default config directory
+DEFAULT_CONFIG_DIR = Path(__file__).parent.parent.parent.parent / "config" / "rhaiis"
+
+
+@click.group()
+@click.pass_context
+def ci(ctx):
+    """RHAIIS CI Operations for FOURNOS."""
+    ctx.ensure_object(types.SimpleNamespace)
+
+
+@ci.command()
+@click.option("--dry-run", is_flag=True, help="Print what would be done without executing")
+@click.pass_context
+def prepare(ctx, dry_run: bool):
+    """Prepare phase - Install operators (RHOAI, NFD, GPU)."""
+    rhoai_version = os.environ.get("FORGE_RHOAI_VERSION", "2.19")
+
+    if dry_run:
+        click.echo("[DRY-RUN] Prepare phase")
+        click.echo(f"[DRY-RUN] RHOAI Version: {rhoai_version}")
+        click.echo("[DRY-RUN] Would install: NFD, GPU Operator, RHOAI")
+        return
+
+    workflow_ctx = WorkflowContext.from_environment()
+    workflow_ctx.write_metadata({
+        "command": "prepare",
+        "rhoai_version": rhoai_version,
+    })
+
+    exit_code = test_rhaiis.run_prepare(workflow_ctx, rhoai_version)
+    sys.exit(exit_code)
+
+
+@ci.command()
+@click.option("--dry-run", is_flag=True, help="Print what would be done without executing")
+@click.pass_context
+def test(ctx, dry_run: bool):
+    """Test phase - Run vLLM benchmark.
+
+    Configuration from config/ directory. FOURNOS sets:
+        FORGE_MODEL - Model key (e.g., qwen-0.6b)
+        FORGE_WORKLOADS - Comma-separated workloads (optional)
+    """
+    workflow_ctx = WorkflowContext.from_environment()
+
+    # Key parameters from FOURNOS
+    model = os.environ.get("FORGE_MODEL")
+    workloads_str = os.environ.get("FORGE_WORKLOADS")
+
+    # Parse workloads
+    workloads = None
+    if workloads_str:
+        workloads = [w.strip() for w in workloads_str.split(",")]
+
+    # Log
+    click.echo("RHAIIS CI Test")
+    click.echo(f"  Model: {model}")
+    if workloads:
+        click.echo(f"  Workloads: {workloads}")
+
+    # All other config comes from config/ directory via ConfigLoader
+    exit_code = test_rhaiis.run_test(
+        ctx=workflow_ctx,
+        model=model,
+        workloads=workloads,
+        config_dir=DEFAULT_CONFIG_DIR,
+        dry_run=dry_run,
+    )
+    sys.exit(exit_code)
+
+
+@ci.command()
+@click.option("--dry-run", is_flag=True, help="Print what would be done without executing")
+@click.pass_context
+def cleanup(ctx, dry_run: bool):
+    """Cleanup phase - Remove deployments and resources."""
+    namespace = os.environ.get("FORGE_NAMESPACE", "forge")
+
+    if dry_run:
+        click.echo("[DRY-RUN] Cleanup phase")
+        click.echo(f"[DRY-RUN] Namespace: {namespace}")
+        click.echo("[DRY-RUN] Would delete: InferenceServices, ServingRuntimes")
+        return
+
+    workflow_ctx = WorkflowContext.from_environment()
+    workflow_ctx.write_metadata({
+        "command": "cleanup",
+        "namespace": namespace,
+    })
+
+    exit_code = test_rhaiis.run_cleanup(workflow_ctx, namespace)
+    sys.exit(exit_code)
+
+
+if __name__ == "__main__":
+    ci()
diff --git a/projects/rhaiis/orchestration/cli.py b/projects/rhaiis/orchestration/cli.py
new file mode 100644
index 0000000..4e62de1
--- /dev/null
+++ b/projects/rhaiis/orchestration/cli.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+"""RHAIIS CLI - Detailed CLI for interactive/manual use.
+
+This CLI provides full control over benchmark parameters via command-line options.
+For FOURNOS jobs that read config from files, use ci.py instead.
+
+Examples:
+    # Single workload
+    cli.py test --model qwen-0.6b --workload balanced
+
+    # Multiple workloads (deploy vLLM once)
+    cli.py test --model qwen-0.6b --workloads balanced,short,heterogeneous
+
+    # AMD accelerator
+    cli.py test --model qwen-0.6b --accelerator amd
+"""
+
+import sys
+from pathlib import Path
+
+import click
+
+from projects.core.workflow import WorkflowContext
+
+from . import test_rhaiis
+
+# Default config directory
+DEFAULT_CONFIG_DIR = Path(__file__).parent.parent.parent.parent / "config" / "rhaiis"
+
+
+@click.group()
+def cli():
+    """RHAIIS CLI - Interactive benchmark commands."""
+
+
+@cli.command()
+@click.option(
+    "--rhoai-version",
+    envvar="FORGE_RHOAI_VERSION",
+    default="2.19",
+    help="RHOAI operator version to install",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Print what would be done without executing",
+)
+def prepare(rhoai_version: str, dry_run: bool):
+    """Install operators (RHOAI, NFD, GPU) on OpenShift."""
+    ctx = WorkflowContext.from_environment()
+    ctx.write_metadata({"command": "prepare", "rhoai_version": rhoai_version})
+
+    if dry_run:
+        click.echo(f"[DRY-RUN] Would install RHOAI {rhoai_version}")
+        click.echo(f"[DRY-RUN] Artifacts would be written to: {ctx.artifact_dir}")
+        return
+
+    exit_code = test_rhaiis.run_prepare(ctx, rhoai_version)
+    sys.exit(exit_code)
+
+
+@cli.command()
+@click.option(
+    "--model",
+    envvar="FORGE_MODEL",
+    default=None,
+    help="Model key or HuggingFace ID (e.g., qwen-0.6b or Qwen/Qwen3-0.6B)",
+)
+@click.option(
+    "--workload",
+    envvar="FORGE_WORKLOAD",
+    default=None,
+    help="Single workload: balanced, heterogeneous, multiturn, etc.",
+)
+@click.option(
+    "--workloads",
+    envvar="FORGE_WORKLOADS",
+    default=None,
+    help="Comma-separated workloads to run WITHOUT restarting vLLM (e.g., balanced,short,heterogeneous)",
+)
+@click.option(
+    "--config-dir",
+    type=click.Path(exists=True, path_type=Path),
+    default=None,
+    help="Config directory containing defaults.yaml, models.yaml, workloads.yaml",
+)
+@click.option(
+    "--accelerator",
+    envvar="FORGE_ACCELERATOR",
+    type=click.Choice(["nvidia", "amd"]),
+    default="nvidia",
+    help="Accelerator type for config inheritance (nvidia, amd)",
+)
+@click.option(
+    "--vllm-image",
+    envvar="FORGE_VLLM_IMAGE",
+    help="vLLM container image to use (overrides accelerator default)",
+)
+@click.option(
+    "--tensor-parallel",
+    envvar="FORGE_TENSOR_PARALLEL",
+    type=int,
+    default=None,
+    help="Tensor parallelism override (default: from model config)",
+)
+@click.option(
+    "--max-requests",
+    envvar="FORGE_MAX_REQUESTS",
+    type=int,
+    default=None,
+    help="Maximum requests for GuideLLM benchmark (default: from config)",
+)
+@click.option(
+    "--namespace",
+    envvar="FORGE_NAMESPACE",
+    default=None,
+    help="Kubernetes namespace for deployment (default: from config)",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Print what would be done without executing",
+)
+def test(
+    model: str | None,
+    workload: str | None,
+    workloads: str | None,
+    config_dir: Path | None,
+    accelerator: str,
+    vllm_image: str | None,
+    tensor_parallel: int | None,
+    max_requests: int | None,
+    namespace: str | None,
+    dry_run: bool,
+):
+    """Run benchmark: deploy vLLM -> run GuideLLM -> collect artifacts.
+
+    \b
+    Config inheritance (defaults -> accelerator -> model):
+        defaults.yaml provides base settings
+        accelerator (nvidia/amd) provides accelerator-specific overrides
+        model config provides model-specific settings
+        CLI flags override everything
+
+    \b
+    Modes of operation:
+    1. Single workload:     --model X --workload balanced
+    2. Multiple workloads:  --model X --workloads balanced,short,heterogeneous
+       (deploys vLLM once, runs GuideLLM multiple times)
+    """
+    ctx = WorkflowContext.from_environment()
+    config_dir = config_dir or DEFAULT_CONFIG_DIR
+
+    # Parse workloads list
+    workload_list = workloads.split(",") if workloads else None
+
+    # Validate inputs (skip for dry-run)
+    if not dry_run and not model:
+        click.echo("Error: Must specify --model", err=True)
+        sys.exit(1)
+
+    exit_code = test_rhaiis.run_test(
+        ctx=ctx,
+        model=model,
+        workload=workload,
+        workloads=workload_list,
+        config_dir=config_dir,
+        accelerator=accelerator,
+        vllm_image=vllm_image,
+        tensor_parallel=tensor_parallel,
+        max_requests=max_requests,
+        namespace=namespace,
+        dry_run=dry_run,
+    )
+    sys.exit(exit_code)
+
+
+@cli.command()
+@click.option(
+    "--namespace",
+    envvar="FORGE_NAMESPACE",
+    default="forge",
+    help="Kubernetes namespace to clean up",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Print what would be done without executing",
+)
+def cleanup(namespace: str, dry_run: bool):
+    """Uninstall operators and cleanup resources."""
+    ctx = WorkflowContext.from_environment()
+    ctx.write_metadata({"command": "cleanup", "namespace": namespace})
+
+    if dry_run:
+        click.echo(f"[DRY-RUN] Would clean up namespace: {namespace}")
+        return
+
+    exit_code = test_rhaiis.run_cleanup(ctx, namespace)
+    sys.exit(exit_code)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/projects/rhaiis/orchestration/test_rhaiis.py b/projects/rhaiis/orchestration/test_rhaiis.py
new file mode 100644
index 0000000..13ec5b6
--- /dev/null
+++ b/projects/rhaiis/orchestration/test_rhaiis.py
@@ -0,0 +1,407 @@
+"""RHAIIS Benchmark Implementation.
+
+Shared logic for running vLLM benchmarks. Used by both:
+- ci.py (minimal CLI for FOURNOS jobs)
+- cli.py (detailed CLI for interactive use)
+"""
+
+import logging
+import sys
+import time
+from pathlib import Path
+
+import click
+
+from projects.core.scenarios import ConfigLoader
+from projects.core.workflow import WorkflowContext
+from projects.rhaiis.workflows import BenchmarkWorkflow, CleanupWorkflow, PrepareWorkflow
+
+logger = logging.getLogger(__name__)
+
+# Default config directory
+DEFAULT_CONFIG_DIR = Path(__file__).parent.parent.parent.parent / "config" / "rhaiis"
+
+
+def _dry_run_test(
+    model: str | None,
+    workload: str | None,
+    workloads: list[str] | None,
+    config_loader,
+    accelerator: str,
+    vllm_image: str | None,
+    tensor_parallel: int | None,
+    max_requests: int | None,
+    namespace: str | None,
+    ctx,
+) -> int:
+    """Show what would be executed without running."""
+    global_defaults = config_loader.get_global_defaults()
+    resolved_namespace = namespace or global_defaults.get("deploy", {}).get("namespace", "forge")
+    resolved_max_requests = max_requests or global_defaults.get("guidellm", {}).get("max_requests", 100)
+    resolved_image = vllm_image or config_loader.get_image()
+
+    click.echo(f"[DRY-RUN] Model: {model}")
+    click.echo(f"[DRY-RUN] Accelerator: {accelerator}")
+    click.echo(f"[DRY-RUN] Namespace: {resolved_namespace}")
+    click.echo(f"[DRY-RUN] Image: {resolved_image}")
+    click.echo(f"[DRY-RUN] Max requests: {resolved_max_requests}")
+    click.echo(f"[DRY-RUN] Artifacts: {ctx.artifact_dir}")
+
+    if model:
+        try:
+            resolved = config_loader.load_model(model)
+            click.echo(f"\n[DRY-RUN] Resolved model config ({model}):")
+            click.echo(f"  HF Model ID: {resolved.hf_model_id}")
+            click.echo(f"  num_gpus: {resolved.num_gpus}")
+            click.echo(f"  tensor_parallel: {tensor_parallel or resolved.tensor_parallel}")
+            click.echo(f"  vllm_args: {resolved.vllm_args}")
+            if resolved.env_vars:
+                click.echo(f"  env_vars: {resolved.env_vars}")
+        except KeyError:
+            click.echo(f"\n[DRY-RUN] Model '{model}' not in registry, will use defaults")
+
+    if workloads:
+        click.echo(f"\n[DRY-RUN] Workloads (deploy-once): {workloads}")
+        click.echo("[DRY-RUN] vLLM will be deployed ONCE, GuideLLM runs for each workload")
+    elif workload:
+        click.echo(f"[DRY-RUN] Workload: {workload}")
+    else:
+        click.echo("[DRY-RUN] Workload: balanced (default)")
+
+    return 0
+
+
+def run_prepare(ctx: WorkflowContext, rhoai_version: str) -> int:
+    """Run prepare phase - install operators."""
+    workflow = PrepareWorkflow(ctx, rhoai_version=rhoai_version)
+    result = workflow.execute()
+
+    if result.success:
+        click.echo(f"Prepare completed successfully in {result.duration_seconds:.1f}s")
+        return 0
+    else:
+        click.echo(f"Prepare failed at step: {result.failed_step}", err=True)
+        return 1
+
+
+def run_test(
+    ctx: WorkflowContext,
+    model: str | None = None,
+    workload: str | None = None,
+    workloads: list[str] | None = None,
+    config_dir: Path | None = None,
+    accelerator: str = "nvidia",
+    vllm_image: str | None = None,
+    tensor_parallel: int | None = None,
+    max_requests: int | None = None,
+    namespace: str | None = None,
+    dry_run: bool = False,
+) -> int:
+    """Run benchmark test phase.
+
+    Args:
+        ctx: Workflow context
+        model: Model key or HuggingFace ID
+        workload: Single workload name
+        workloads: List of workloads (deploy-once pattern)
+        config_dir: Config directory path
+        accelerator: Accelerator type (nvidia, amd)
+        vllm_image: Container image override
+        tensor_parallel: TP override
+        max_requests: Max GuideLLM requests
+        namespace: K8s namespace
+        dry_run: Print what would be done without executing
+
+    Returns:
+        Exit code (0 = success)
+    """
+    config_dir = config_dir or DEFAULT_CONFIG_DIR
+
+    # Initialize ConfigLoader for inheritance-based config resolution
+    config_loader = ConfigLoader(config_dir, accelerator=accelerator)
+
+    # Dry-run: show resolved config and exit
+    if dry_run:
+        return _dry_run_test(
+            model=model,
+            workload=workload,
+            workloads=workloads,
+            config_loader=config_loader,
+            accelerator=accelerator,
+            vllm_image=vllm_image,
+            tensor_parallel=tensor_parallel,
+            max_requests=max_requests,
+            namespace=namespace,
+            ctx=ctx,
+        )
+
+    # Get defaults from config
+    global_defaults = config_loader.get_global_defaults()
+    default_namespace = global_defaults.get("deploy", {}).get("namespace", "forge")
+    default_max_requests = global_defaults.get("guidellm", {}).get("max_requests", 100)
+
+    # Apply defaults
+    namespace = namespace or default_namespace
+    max_requests = max_requests or default_max_requests
+
+    # Get accelerator-specific image if not overridden
+    if not vllm_image:
+        vllm_image = config_loader.get_image()
+
+    args = {
+        "command": "test",
+        "model": model,
+        "workload": workload,
+        "workloads": workloads,
+        "accelerator": accelerator,
+        "vllm_image": vllm_image,
+        "tensor_parallel": tensor_parallel,
+        "max_requests": max_requests,
+        "namespace": namespace,
+    }
+    ctx.write_metadata(args)
+
+    # Mode 1: Multiple workloads (deploy-once pattern)
+    if workloads and model:
+        click.echo(f"Deploy-once mode: {model} with workloads {workloads}")
+        click.echo(f"Accelerator: {accelerator}")
+        return _run_multi_workload(
+            ctx, model, workloads, vllm_image,
+            tensor_parallel, max_requests, namespace, config_loader
+        )
+
+    # Mode 2: Single workload
+    elif model:
+        single_workload = workload or "balanced"
+
+        # Resolve model config for vllm_args and env_vars
+        resolved_tp = tensor_parallel
+        resolved_vllm_args = {}
+        resolved_env_vars = {}
+        resolved_model_id = model
+
+        try:
+            resolved_model = config_loader.load_model(model)
+            resolved_model_id = resolved_model.hf_model_id
+            resolved_vllm_args = dict(resolved_model.vllm_args)
+            resolved_env_vars = dict(resolved_model.env_vars)
+            if resolved_tp is None:
+                resolved_tp = resolved_model.tensor_parallel
+            click.echo(f"Using resolved model config: {resolved_model.key}")
+            click.echo(f"  HF Model ID: {resolved_model_id}")
+            click.echo(f"  vLLM args: {resolved_vllm_args}")
+            if resolved_env_vars:
+                click.echo(f"  Env vars: {resolved_env_vars}")
+        except KeyError:
+            if resolved_tp is None:
+                resolved_tp = 1
+            click.echo(f"Model not in registry, using defaults for: {model}")
+
+        workflow = BenchmarkWorkflow(
+            ctx,
+            model=resolved_model_id,
+            workload=single_workload,
+            vllm_image=vllm_image,
+            runtime_args=resolved_vllm_args,
+            tensor_parallel=resolved_tp,
+            max_requests=max_requests,
+            namespace=namespace,
+            env_vars=resolved_env_vars,
+        )
+        result = workflow.execute()
+
+        if result.success:
+            click.echo(f"\nBenchmark completed successfully in {result.duration_seconds:.1f}s")
+            click.echo(f"Artifacts: {result.run_uuid}")
+            return 0
+        else:
+            click.echo(f"\nBenchmark failed at step: {result.failed_step}", err=True)
+            return 1
+
+    else:
+        click.echo("Error: Must specify --model", err=True)
+        return 1
+
+
+def run_cleanup(ctx: WorkflowContext, namespace: str) -> int:
+    """Run cleanup phase."""
+    workflow = CleanupWorkflow(ctx, namespace=namespace)
+    result = workflow.execute()
+
+    if result.success:
+        click.echo(f"Cleanup completed in {result.duration_seconds:.1f}s")
+        return 0
+    else:
+        click.echo("Cleanup had errors (check logs)", err=True)
+        return 1
+
+
+def _run_multi_workload(
+    ctx: WorkflowContext,
+    model: str,
+    workload_list: list[str],
+    vllm_image: str | None,
+    tensor_parallel: int | None,
+    max_requests: int,
+    namespace: str,
+    config_loader: ConfigLoader,
+) -> int:
+    """Run multiple workloads with deploy-once optimization.
+
+    Groups workloads by their vllm_args - workloads with different vllm_args
+    get separate deployment groups (requires vLLM restart).
+    """
+    from projects.core.steps import CollectArtifactsStep, CleanupDeploymentStep, RunGuideLLMStep
+    from projects.rhaiis.workflows.steps import DeployVLLMStep, WaitForReadyStep
+
+    # Load model config
+    try:
+        resolved = config_loader.load_model(model)
+        hf_model_id = resolved.hf_model_id
+        base_vllm_args = dict(resolved.vllm_args)
+        env_vars = dict(resolved.env_vars)
+        model_key = resolved.key
+        model_tp = resolved.tensor_parallel
+        click.echo(f"Using resolved model config: {model_key}")
+        click.echo(f"  HF Model ID: {hf_model_id}")
+        click.echo(f"  vLLM args: {base_vllm_args}")
+        if env_vars:
+            click.echo(f"  Env vars: {env_vars}")
+    except KeyError:
+        hf_model_id = model
+        base_vllm_args = {}
+        env_vars = {}
+        model_tp = 1
+        click.echo(f"Model not in registry, using defaults for: {model}")
+
+    tensor_parallel = tensor_parallel if tensor_parallel is not None else model_tp
+    deployment_name = hf_model_id.split("/")[-1].lower().replace(".", "-").replace("_", "-")[:42]
+
+    # Group workloads by their vllm_args
+    # Workloads with same vllm_args share a deployment, different vllm_args get separate deployments
+    workload_groups: dict[tuple, list[str]] = {}
+    for wl_key in workload_list:
+        try:
+            wl_config = config_loader.load_workload(wl_key)
+            vllm_args_key = tuple(sorted(wl_config.vllm_args.items())) if wl_config.vllm_args else ()
+        except KeyError:
+            vllm_args_key = ()
+        if vllm_args_key not in workload_groups:
+            workload_groups[vllm_args_key] = []
+        workload_groups[vllm_args_key].append(wl_key)
+
+    num_groups = len(workload_groups)
+    if num_groups > 1:
+        click.echo(f"\nWorkloads grouped into {num_groups} deployment groups (different vllm_args)")
+
+    failed = False
+    total_workloads = len(workload_list)
+    workload_idx = 0
+
+    for group_idx, (vllm_args_key, group_workloads) in enumerate(workload_groups.items(), 1):
+        vllm_args_override = dict(vllm_args_key) if vllm_args_key else {}
+
+        # Merge model vllm_args with workload override
+        merged_vllm_args = dict(base_vllm_args)
+        merged_vllm_args.update(vllm_args_override)
+
+        if num_groups > 1:
+            click.echo(f"\n=== Deployment Group {group_idx}/{num_groups} ===")
+            click.echo(f"Workloads: {group_workloads}")
+            if vllm_args_override:
+                click.echo(f"vllm_args override: {vllm_args_override}")
+
+        click.echo(f"Deploying vLLM for {hf_model_id}...")
+
+        # Deploy
+        ctx.step_number += 1
+        ctx.current_step_name = "deploy"
+        deploy_step = DeployVLLMStep(
+            model=hf_model_id,
+            deployment_name=deployment_name,
+            vllm_image=vllm_image,
+            tensor_parallel=tensor_parallel,
+            namespace=namespace,
+            runtime_args=merged_vllm_args,
+            env_vars=env_vars,
+        )
+        deploy_result = deploy_step.execute(ctx)
+        if not deploy_result.success:
+            click.echo(f"Deployment failed: {deploy_result.message}", err=True)
+            return 1
+
+        # Wait
+        ctx.step_number += 1
+        ctx.current_step_name = "wait"
+        wait_step = WaitForReadyStep(
+            deployment_name=deployment_name,
+            namespace=namespace,
+            timeout_seconds=3600,
+        )
+        wait_result = wait_step.execute(ctx)
+        if not wait_result.success:
+            click.echo(f"Wait failed: {wait_result.message}", err=True)
+            CleanupDeploymentStep(deployment_name, namespace).execute(ctx)
+            return 1
+
+        click.echo("vLLM deployed successfully!")
+
+        # Run workloads in this group
+        endpoint = f"http://{deployment_name}-predictor.{namespace}.svc.cluster.local:8080/v1"
+
+        for idx, wl in enumerate(group_workloads, 1):
+            workload_idx += 1
+            click.echo(f"\n--- Workload {workload_idx}/{total_workloads}: {wl} ---")
+
+            # Load workload config to get max_seconds and rates
+            try:
+                wl_config = config_loader.load_workload(wl)
+                wl_max_seconds = wl_config.guidellm.get("max_seconds", 300)
+                wl_rates = wl_config.guidellm.get("rates", [1, 50, 100])
+                wl_rate_str = ",".join(str(r) for r in wl_rates)
+            except KeyError:
+                wl_max_seconds = 300
+                wl_rate_str = "1,50,100"
+
+            ctx.step_number += 1
+            ctx.current_step_name = f"benchmark_{wl}"
+            guidellm_step = RunGuideLLMStep(
+                endpoint=endpoint,
+                model=hf_model_id,
+                namespace=namespace,
+                workload=wl,
+                max_requests=max_requests,
+                max_seconds=wl_max_seconds,
+                rate=wl_rate_str,
+            )
+            result = guidellm_step.execute(ctx)
+
+            if not result.success:
+                click.echo(f"Workload {wl} failed: {result.message}", err=True)
+                failed = True
+                break
+            else:
+                click.echo(f"Workload {wl} completed successfully")
+                if idx < len(group_workloads):
+                    click.echo("Waiting 5s for in-flight requests to drain...")
+                    time.sleep(5)
+
+        # Cleanup this deployment before starting next group
+        ctx.step_number += 1
+        ctx.current_step_name = "collect_artifacts"
+        click.echo("\nCollecting artifacts...")
+        CollectArtifactsStep(app_label=deployment_name, namespace=namespace).execute(ctx)
+
+        ctx.step_number += 1
+        ctx.current_step_name = "cleanup"
+        click.echo("Cleaning up deployment...")
+        CleanupDeploymentStep(deployment_name, namespace).execute(ctx)
+
+        if failed:
+            break
+
+    if failed:
+        return 1
+    else:
+        click.echo(f"\nAll {total_workloads} workloads completed successfully!")
+        return 0
diff --git a/projects/rhaiis/workflows/__init__.py b/projects/rhaiis/workflows/__init__.py
new file mode 100644
index 0000000..f94034c
--- /dev/null
+++ b/projects/rhaiis/workflows/__init__.py
@@ -0,0 +1,11 @@
+"""RHAIIS workflow definitions."""
+
+from .benchmark import BenchmarkWorkflow
+from .cleanup import CleanupWorkflow
+from .prepare import PrepareWorkflow
+
+__all__ = [
+    "BenchmarkWorkflow",
+    "CleanupWorkflow",
+    "PrepareWorkflow",
+]
diff --git a/projects/rhaiis/workflows/benchmark.py b/projects/rhaiis/workflows/benchmark.py
new file mode 100644
index 0000000..012513a
--- /dev/null
+++ b/projects/rhaiis/workflows/benchmark.py
@@ -0,0 +1,122 @@
+"""RHAIIS benchmark workflow.
+
+Deploy vLLM -> Run GuideLLM -> Collect Artifacts
+"""
+
+from projects.core.steps import CleanupDeploymentStep, CollectArtifactsStep, RunGuideLLMStep
+from projects.core.workflow import Workflow, WorkflowContext
+from projects.rhaiis.workflows.steps import DeployVLLMStep, WaitForReadyStep
+
+
+class BenchmarkWorkflow(Workflow):
+    """
+    RHAIIS benchmark workflow: deploy vLLM, run benchmark, cleanup.
+
+    Steps:
+    1. deploy: Deploy vLLM serving
+    2. wait: Wait for deployment to be ready
+    3. benchmark: Run GuideLLM benchmark
+
+    Finally:
+    1. collect_artifacts: Collect logs and events
+    2. cleanup: Delete deployment
+    """
+
+    def __init__(
+        self,
+        ctx: WorkflowContext,
+        model: str,
+        workload: str = "balanced",
+        vllm_image: str = "",
+        runtime_args: dict | None = None,
+        tensor_parallel: int = 1,
+        max_requests: int = 100,
+        namespace: str = "forge",
+        env_vars: dict | None = None,
+    ):
+        """
+        Initialize benchmark workflow.
+
+        Args:
+            ctx: Workflow context
+            model: HuggingFace model ID
+            workload: GuideLLM workload type
+            vllm_image: vLLM container image (from config)
+            runtime_args: vLLM runtime arguments (from config)
+            tensor_parallel: Number of GPUs for tensor parallelism
+            max_requests: Maximum requests for benchmark
+            namespace: Kubernetes namespace
+            env_vars: Environment variables for vLLM (from config)
+        """
+        super().__init__(ctx)
+        self.model = model
+        self.workload = workload
+        self.vllm_image = vllm_image or ctx.get_env("VLLM_IMAGE", "")
+        self.runtime_args = runtime_args or {}
+        self.tensor_parallel = tensor_parallel
+        self.max_requests = max_requests
+        self.namespace = namespace
+        self.env_vars = env_vars or {}
+
+        # Generate deployment name from model
+        self.deployment_name = self._sanitize_name(model)
+
+    def define_steps(self):
+        """Define workflow steps."""
+        # Deploy vLLM
+        self.add_step(
+            DeployVLLMStep(
+                model=self.model,
+                deployment_name=self.deployment_name,
+                vllm_image=self.vllm_image,
+                runtime_args=self.runtime_args,
+                tensor_parallel=self.tensor_parallel,
+                namespace=self.namespace,
+                env_vars=self.env_vars,
+            )
+        )
+
+        # Wait for deployment to be ready (3600s = 1 hour for large models)
+        self.add_step(
+            WaitForReadyStep(
+                deployment_name=self.deployment_name,
+                namespace=self.namespace,
+                timeout_seconds=3600,
+            )
+        )
+
+        # Run GuideLLM benchmark (as a pod on cluster)
+        # KServe RawDeployment mode creates service named {name}-predictor
+        endpoint = f"http://{self.deployment_name}-predictor.{self.namespace}.svc.cluster.local:8080/v1"
+        self.add_step(
+            RunGuideLLMStep(
+                endpoint=endpoint,
+                model=self.model,
+                namespace=self.namespace,
+                workload=self.workload,
+                max_requests=self.max_requests,
+            )
+        )
+
+        # Finally: collect artifacts (always runs)
+        self.add_finally(
+            CollectArtifactsStep(
+                app_label=self.deployment_name,
+                namespace=self.namespace,
+            )
+        )
+
+        # Finally: cleanup deployment (always runs)
+        self.add_finally(
+            CleanupDeploymentStep(
+                deployment_name=self.deployment_name,
+                namespace=self.namespace,
+            )
+        )
+
+    @staticmethod
+    def _sanitize_name(name: str) -> str:
+        """Sanitize model name for K8s resource naming."""
+        name = name.split("/")[-1].lower()
+        name = name.replace(".", "-").replace("_", "-")
+        return name[:42]
diff --git a/projects/rhaiis/workflows/cleanup.py b/projects/rhaiis/workflows/cleanup.py
new file mode 100644
index 0000000..2a53056
--- /dev/null
+++ b/projects/rhaiis/workflows/cleanup.py
@@ -0,0 +1,40 @@
+"""RHAIIS cleanup workflow - remove deployments and optionally operators."""
+
+from projects.core.workflow import Workflow, WorkflowContext
+from projects.rhaiis.workflows.steps import CleanupNamespaceStep
+
+
+class CleanupWorkflow(Workflow):
+    """
+    Cleanup RHAIIS resources.
+
+    Removes deployments from the benchmark namespace.
+    Optionally can remove operators (not enabled by default).
+    """
+
+    def __init__(
+        self,
+        ctx: WorkflowContext,
+        namespace: str = "forge",
+        remove_operators: bool = False,
+    ):
+        """
+        Initialize cleanup workflow.
+
+        Args:
+            ctx: Workflow context
+            namespace: Namespace to clean up
+            remove_operators: Whether to also remove operators
+        """
+        super().__init__(ctx)
+        self.namespace = namespace
+        self.remove_operators = remove_operators
+
+    def define_steps(self):
+        """Define cleanup steps."""
+        self.add_step(
+            CleanupNamespaceStep(
+                namespace=self.namespace,
+                delete_namespace=False,  # Keep namespace, delete contents
+            )
+        )
diff --git a/projects/rhaiis/workflows/prepare.py b/projects/rhaiis/workflows/prepare.py
new file mode 100644
index 0000000..fb7f957
--- /dev/null
+++ b/projects/rhaiis/workflows/prepare.py
@@ -0,0 +1,36 @@
+"""RHAIIS prepare workflow - install operators."""
+
+from projects.core.workflow import Workflow, WorkflowContext
+from projects.rhaiis.workflows.steps import (
+    InstallGPUOperatorStep,
+    InstallNFDOperatorStep,
+    InstallRHOAIOperatorStep,
+)
+
+
+class PrepareWorkflow(Workflow):
+    """
+    Prepare cluster for RHAIIS benchmarking.
+
+    Installs required operators:
+    1. NFD (Node Feature Discovery) Operator
+    2. GPU Operator (NVIDIA or AMD)
+    3. RHOAI (Red Hat OpenShift AI) Operator
+    """
+
+    def __init__(self, ctx: WorkflowContext, rhoai_version: str = "2.19"):
+        """
+        Initialize prepare workflow.
+
+        Args:
+            ctx: Workflow context
+            rhoai_version: RHOAI operator version
+        """
+        super().__init__(ctx)
+        self.rhoai_version = rhoai_version
+
+    def define_steps(self):
+        """Define operator installation steps."""
+        self.add_step(InstallNFDOperatorStep())
+        self.add_step(InstallGPUOperatorStep())
+        self.add_step(InstallRHOAIOperatorStep(version=self.rhoai_version))
diff --git a/projects/rhaiis/workflows/steps/__init__.py b/projects/rhaiis/workflows/steps/__init__.py
new file mode 100644
index 0000000..9d7c8f4
--- /dev/null
+++ b/projects/rhaiis/workflows/steps/__init__.py
@@ -0,0 +1,14 @@
+"""RHAIIS-specific workflow steps."""
+
+from .cleanup import CleanupNamespaceStep
+from .deploy import DeployVLLMStep, WaitForReadyStep
+from .operators import InstallGPUOperatorStep, InstallNFDOperatorStep, InstallRHOAIOperatorStep
+
+__all__ = [
+    "CleanupNamespaceStep",
+    "DeployVLLMStep",
+    "InstallGPUOperatorStep",
+    "InstallNFDOperatorStep",
+    "InstallRHOAIOperatorStep",
+    "WaitForReadyStep",
+]
diff --git a/projects/rhaiis/workflows/steps/cleanup.py b/projects/rhaiis/workflows/steps/cleanup.py
new file mode 100644
index 0000000..661811b
--- /dev/null
+++ b/projects/rhaiis/workflows/steps/cleanup.py
@@ -0,0 +1,101 @@
+"""Cleanup steps for RHAIIS."""
+
+import logging
+import subprocess
+from typing import TYPE_CHECKING
+
+from projects.core.workflow import StepResult, WorkflowStep
+
+if TYPE_CHECKING:
+    from projects.core.workflow import WorkflowContext
+
+logger = logging.getLogger(__name__)
+
+
+class CleanupNamespaceStep(WorkflowStep):
+    """Clean up all resources in a namespace."""
+
+    def __init__(
+        self,
+        namespace: str,
+        delete_namespace: bool = False,
+        name: str | None = None,
+    ):
+        """
+        Initialize cleanup step.
+
+        Args:
+            namespace: Namespace to clean up
+            delete_namespace: Whether to delete the namespace itself
+            name: Optional step name
+        """
+        super().__init__(name=name or "cleanup_namespace")
+        self.namespace = namespace
+        self.delete_namespace = delete_namespace
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Delete all resources in namespace."""
+        deleted_resources: list[str] = []
+        errors: list[str] = []
+
+        # Resource types to delete (KServe resources first, then standard K8s)
+        resource_types = [
+            "inferenceservice",
+            "servingruntime",
+            "deployment",
+            "service",
+            "route",
+            "configmap",
+            "secret",
+            "pod",
+        ]
+
+        for resource_type in resource_types:
+            try:
+                result = subprocess.run(
+                    [
+                        "oc", "delete", resource_type,
+                        "--all",
+                        "-n", self.namespace,
+                        "--ignore-not-found",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=120,
+                )
+
+                if result.returncode == 0:
+                    deleted_resources.append(resource_type)
+                else:
+                    errors.append(f"{resource_type}: {result.stderr}")
+
+            except subprocess.TimeoutExpired:
+                errors.append(f"{resource_type}: timeout")
+            except Exception as e:
+                errors.append(f"{resource_type}: {e}")
+
+        # Optionally delete namespace
+        if self.delete_namespace:
+            try:
+                result = subprocess.run(
+                    ["oc", "delete", "namespace", self.namespace, "--ignore-not-found"],
+                    capture_output=True,
+                    text=True,
+                    timeout=300,
+                )
+                if result.returncode == 0:
+                    deleted_resources.append(f"namespace/{self.namespace}")
+            except Exception as e:
+                errors.append(f"namespace: {e}")
+
+        message = f"Cleaned up {len(deleted_resources)} resource types"
+        if errors:
+            message += f" ({len(errors)} errors)"
+            for err in errors:
+                logger.warning(f"Cleanup error: {err}")
+
+        return StepResult(
+            success=True,  # Don't fail on cleanup errors
+            message=message,
+            data={"deleted": deleted_resources, "errors": errors},
+        )
diff --git a/projects/rhaiis/workflows/steps/deploy.py b/projects/rhaiis/workflows/steps/deploy.py
new file mode 100644
index 0000000..c580899
--- /dev/null
+++ b/projects/rhaiis/workflows/steps/deploy.py
@@ -0,0 +1,512 @@
+"""RHAIIS vLLM deployment steps using KServe (ServingRuntime + InferenceService)."""
+
+import logging
+import subprocess
+import time
+import uuid
+from typing import TYPE_CHECKING, Any
+
+from projects.core.workflow import StepResult, WorkflowStep
+
+if TYPE_CHECKING:
+    from projects.core.workflow import WorkflowContext
+
+logger = logging.getLogger(__name__)
+
+
+class DeployVLLMStep(WorkflowStep):
+    """
+    Deploy vLLM serving on OpenShift using KServe.
+
+    Creates a ServingRuntime and InferenceService for vLLM model serving.
+    This is the recommended deployment method for RHAIIS/RHOAI.
+    """
+
+    def __init__(
+        self,
+        model: str,
+        deployment_name: str,
+        vllm_image: str,
+        runtime_args: dict[str, Any],
+        namespace: str = "forge",
+        tensor_parallel: int | None = None,
+        replicas: int = 1,
+        accelerator: str = "nvidia",
+        storage_source: str = "hf",
+        storage_path: str | None = None,
+        cpu_request: str = "4",
+        memory_request: str = "16Gi",
+        env_vars: dict[str, str] | None = None,
+        name: str | None = None,
+    ):
+        """
+        Initialize vLLM deployment step.
+
+        Args:
+            model: HuggingFace model ID (e.g., Qwen/Qwen3-0.6B)
+            deployment_name: Name for K8s resources (ServingRuntime, InferenceService)
+            vllm_image: Container image for vLLM (from config)
+            runtime_args: vLLM runtime arguments (from config, includes all vllm_args)
+            namespace: Kubernetes namespace
+            tensor_parallel: Override tensor parallelism (default: from runtime_args)
+            replicas: Number of replicas (minReplicas)
+            accelerator: GPU accelerator type ("nvidia" or "amd")
+            storage_source: Model storage source ("hf" for HuggingFace, "s3", "pvc")
+            storage_path: Storage path (PVC name for hf, bucket path for s3)
+            cpu_request: CPU request
+            memory_request: Memory request
+            env_vars: Environment variables (from config)
+            name: Optional step name
+        """
+        super().__init__(name=name or "deploy")
+        self.model = model
+        self.deployment_name = deployment_name
+        self.accelerator = accelerator.lower()
+        self.vllm_image = vllm_image
+        self.namespace = namespace
+        self.replicas = replicas
+        self.cpu_request = cpu_request
+        self.memory_request = memory_request
+        self.storage_source = storage_source
+        self.storage_path = storage_path
+
+        # Use runtime_args directly from config
+        self.runtime_args = dict(runtime_args)
+
+        # tensor_parallel: use explicit override or get from runtime_args
+        self.tensor_parallel = tensor_parallel or self.runtime_args.get("tensor-parallel-size", 1)
+
+        self.env_vars = env_vars or {}
+        self.deployment_uuid = str(uuid.uuid4())[:8]
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Deploy vLLM to OpenShift using KServe."""
+        step_dir = ctx.artifact_dir / f"{ctx.step_number:03d}__{ctx.current_step_name}"
+        step_dir.mkdir(parents=True, exist_ok=True)
+
+        # Generate KServe YAML (ServingRuntime + InferenceService)
+        kserve_yaml = self._generate_kserve_yaml()
+        yaml_path = step_dir / "kserve.yaml"
+        yaml_path.write_text(kserve_yaml)
+
+        # Ensure namespace exists
+        subprocess.run(
+            ["oc", "create", "namespace", self.namespace, "--dry-run=client", "-o", "yaml"],
+            capture_output=True,
+        )
+        subprocess.run(
+            ["oc", "apply", "-f", "-"],
+            input=f"apiVersion: v1\nkind: Namespace\nmetadata:\n  name: {self.namespace}\n",
+            capture_output=True,
+            text=True,
+        )
+
+        # Apply KServe resources
+        try:
+            result = subprocess.run(
+                ["oc", "apply", "-f", str(yaml_path)],
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+
+            if result.returncode != 0:
+                return StepResult.fail(
+                    f"Failed to apply KServe resources: {result.stderr}",
+                    error=RuntimeError(result.stderr),
+                )
+
+            logger.info(f"Deployed InferenceService {self.deployment_name} to {self.namespace}")
+            return StepResult.ok(
+                f"Deployed {self.deployment_name}",
+                deployment_name=self.deployment_name,
+                namespace=self.namespace,
+                deployment_uuid=self.deployment_uuid,
+            )
+
+        except subprocess.TimeoutExpired as e:
+            return StepResult.fail("Deployment timed out", error=e)
+        except Exception as e:
+            return StepResult.fail(f"Deployment error: {e}", error=e)
+
+    def _generate_kserve_yaml(self) -> str:
+        """Generate KServe ServingRuntime and InferenceService YAML."""
+        # Build vLLM args
+        args_lines = self._build_args_lines()
+
+        # Build env vars
+        env_lines = self._build_env_lines()
+
+        # Shared memory volume (always needed for vLLM)
+        volume_mounts = """
+    volumeMounts:
+    - name: shared-memory
+      mountPath: /dev/shm"""
+        volumes = """
+  volumes:
+  - name: shared-memory
+    emptyDir:
+      medium: Memory
+      sizeLimit: 8Gi"""
+
+        # GPU resource type based on accelerator
+        gpu_resource = "nvidia.com/gpu" if self.accelerator == "nvidia" else "amd.com/gpu"
+
+        # Storage URI based on source
+        storage_uri = self._build_storage_uri()
+
+        return f"""---
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  annotations:
+    opendatahub.io/template-display-name: ServingRuntime for vLLM | Forge
+  labels:
+    opendatahub.io/dashboard: "true"
+  name: {self.deployment_name}
+  namespace: {self.namespace}
+spec:
+  builtInAdapter:
+    modelLoadingTimeoutMillis: 300000
+  imagePullSecrets:
+  - name: npalaska-image-pull
+  containers:
+  - command:
+    - python3
+    - -m
+    - vllm.entrypoints.openai.api_server
+    args:
+{args_lines}
+    env:
+{env_lines}
+    image: "{self.vllm_image}"
+    name: kserve-container
+    ports:
+    - containerPort: 8080
+      protocol: TCP{volume_mounts}
+  multiModel: false
+  supportedModelFormats:
+  - autoSelect: true
+    name: pytorch{volumes}
+---
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/path: "/metrics"
+    prometheus.io/port: "8000"
+    serving.kserve.io/deploymentMode: RawDeployment
+    serving.kserve.io/enable-prometheus-scraping: "true"
+    storage.kserve.io/readonly: "false"
+  labels:
+    opendatahub.io/dashboard: "true"
+    deployment_uuid: {self.deployment_uuid}
+    app: {self.deployment_name}
+  name: {self.deployment_name}
+  namespace: {self.namespace}
+spec:
+  predictor:
+    minReplicas: {self.replicas}
+    model:
+      resources:
+        limits:
+          {gpu_resource}: "{self.tensor_parallel}"
+        requests:
+          {gpu_resource}: "{self.tensor_parallel}"
+          cpu: "{self.cpu_request}"
+          memory: "{self.memory_request}"
+      runtime: {self.deployment_name}
+      modelFormat:
+        name: pytorch
+      storageUri: {storage_uri}
+    serviceAccountName: sa
+"""
+
+    def _build_args_lines(self) -> str:
+        """Build vLLM command line arguments."""
+        lines = []
+
+        # Model argument depends on storage source
+        if self.storage_source == "hf":
+            lines.append(f"    - --model={self.model}")
+        else:
+            lines.append("    - --model=/mnt/models")
+            lines.append(f"    - --served-model-name={self.model}")
+
+        lines.append("    - --port=8080")
+
+        # Add runtime args
+        for key, val in self.runtime_args.items():
+            if isinstance(val, bool):
+                if val:
+                    lines.append(f"    - --{key}")
+            else:
+                lines.append(f"    - --{key}={val}")
+
+        return "\n".join(lines)
+
+    def _build_env_lines(self) -> str:
+        """Build environment variables."""
+        lines = []
+
+
+        # HuggingFace storage source env vars
+        if self.storage_source == "hf":
+            lines.extend([
+                "    - name: HF_HUB_OFFLINE",
+                '      value: "0"',
+                "    - name: HOME",
+                "      value: /mnt/models",
+                "    - name: HF_HOME",
+                "      value: /mnt/models",
+                "    - name: VLLM_CACHE_DIR",
+                "      value: /mnt/models/.cache/vllm",
+                "    - name: HF_DATASETS_CACHE",
+                "      value: /mnt/models/.cache/huggingface/datasets",
+                "    - name: HF_TOKEN",
+                "      valueFrom:",
+                "        secretKeyRef:",
+                "          name: storage-config",
+                "          key: HF_TOKEN",
+            ])
+
+        # Additional env vars
+        for key, val in self.env_vars.items():
+            lines.append(f"    - name: {key}")
+            lines.append(f'      value: "{val}"')
+
+        return "\n".join(lines) if lines else "    []"
+
+    def _build_storage_uri(self) -> str:
+        """Build storage URI for InferenceService."""
+        if self.storage_source == "hf":
+            # Use PVC for HuggingFace models (model-pvc-2 is the default on H200)
+            pvc_name = self.storage_path or "model-pvc-2"
+            return f"pvc://{pvc_name}"
+        elif self.storage_path:
+            return f"{self.storage_source}://{self.storage_path}"
+        else:
+            return f"{self.storage_source}://{self.model}"
+
+
+class WaitForReadyStep(WorkflowStep):
+    """Wait for InferenceService to become ready."""
+
+    def __init__(
+        self,
+        deployment_name: str,
+        namespace: str = "forge",
+        timeout_seconds: int = 3600,
+        poll_interval: int = 10,
+        name: str | None = None,
+    ):
+        """
+        Initialize wait step.
+
+        Args:
+            deployment_name: Name of InferenceService to wait for
+            namespace: Kubernetes namespace
+            timeout_seconds: Maximum wait time
+            poll_interval: Seconds between status checks
+            name: Optional step name
+        """
+        super().__init__(name=name or "wait")
+        self.deployment_name = deployment_name
+        self.namespace = namespace
+        self.timeout_seconds = timeout_seconds
+        self.poll_interval = poll_interval
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Wait for InferenceService to be ready."""
+        import click
+
+        click.echo(
+            f"Waiting for InferenceService {self.deployment_name} to be ready "
+            f"(timeout: {self.timeout_seconds}s)..."
+        )
+
+        start_time = time.monotonic()
+        last_status_print = 0
+
+        while time.monotonic() - start_time < self.timeout_seconds:
+            elapsed = int(time.monotonic() - start_time)
+
+            # Print status every 30 seconds
+            if elapsed - last_status_print >= 30:
+                click.echo(f"  Still waiting... ({elapsed}s elapsed)")
+                last_status_print = elapsed
+            try:
+                # Check InferenceService status
+                result = subprocess.run(
+                    [
+                        "oc", "get", "inferenceservice",
+                        self.deployment_name,
+                        "-n", self.namespace,
+                        "-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=15,
+                )
+
+                if result.returncode == 0 and result.stdout.strip() == "True":
+                    elapsed = time.monotonic() - start_time
+                    logger.info(f"InferenceService ready in {elapsed:.1f}s")
+
+                    # Get the service URL
+                    url_result = subprocess.run(
+                        [
+                            "oc", "get", "inferenceservice",
+                            self.deployment_name,
+                            "-n", self.namespace,
+                            "-o", "jsonpath={.status.url}",
+                        ],
+                        capture_output=True,
+                        text=True,
+                    )
+
+                    # Health check: verify vLLM endpoint is actually responding
+                    endpoint = f"http://{self.deployment_name}-predictor.{self.namespace}.svc.cluster.local:8080"
+                    health_ok = self._wait_for_health_check(endpoint)
+                    if not health_ok:
+                        return StepResult.fail(
+                            f"InferenceService ready but health check failed after {self.timeout_seconds}s"
+                        )
+
+                    total_elapsed = time.monotonic() - start_time
+                    return StepResult.ok(
+                        f"InferenceService ready and healthy in {total_elapsed:.1f}s",
+                        ready_time_seconds=elapsed,
+                        health_check_time_seconds=total_elapsed - elapsed,
+                        service_url=url_result.stdout.strip() if url_result.returncode == 0 else None,
+                    )
+
+                # Also check underlying deployment for debugging
+                deploy_result = subprocess.run(
+                    [
+                        "oc", "rollout", "status",
+                        f"deployment/{self.deployment_name}-predictor",
+                        "-n", self.namespace,
+                        "--timeout=5s",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                )
+
+                if deploy_result.returncode == 0:
+                    logger.debug("Underlying deployment is ready, waiting for InferenceService...")
+
+            except subprocess.TimeoutExpired:
+                pass  # Continue waiting
+            except Exception as e:
+                logger.warning(f"Error checking status: {e}")
+
+            time.sleep(self.poll_interval)
+
+        # Timeout - collect debug info
+        self._log_debug_info()
+
+        return StepResult.fail(
+            f"InferenceService not ready after {self.timeout_seconds}s"
+        )
+
+    def _log_debug_info(self):
+        """Log debug information on timeout."""
+        try:
+            # Get InferenceService status
+            result = subprocess.run(
+                ["oc", "get", "inferenceservice", self.deployment_name, "-n", self.namespace, "-o", "yaml"],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode == 0:
+                logger.error(f"InferenceService status:\n{result.stdout}")
+
+            # Get pod status
+            result = subprocess.run(
+                ["oc", "get", "pods", "-l", f"serving.kserve.io/inferenceservice={self.deployment_name}",
+                 "-n", self.namespace],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode == 0:
+                logger.error(f"Pod status:\n{result.stdout}")
+        except Exception as e:
+            logger.warning(f"Failed to collect debug info: {e}")
+
+    def _wait_for_health_check(self, endpoint: str, timeout: int = 120, interval: int = 5) -> bool:
+        """
+        Wait for vLLM health endpoint to respond.
+
+        Uses oc exec to curl the health endpoint from within the cluster.
+
+        Args:
+            endpoint: vLLM service endpoint URL
+            timeout: Maximum wait time in seconds
+            interval: Seconds between health check attempts
+
+        Returns:
+            True if health check passes, False on timeout
+        """
+        import click
+
+        click.echo(f"Verifying vLLM health check at {endpoint}/health ...")
+
+        start_time = time.monotonic()
+
+        while time.monotonic() - start_time < timeout:
+            try:
+                # Get a pod name to exec into for health check
+                pod_result = subprocess.run(
+                    [
+                        "oc", "get", "pods",
+                        "-l", f"serving.kserve.io/inferenceservice={self.deployment_name}",
+                        "-n", self.namespace,
+                        "-o", "jsonpath={.items[0].metadata.name}",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=10,
+                )
+
+                if pod_result.returncode != 0 or not pod_result.stdout.strip():
+                    logger.warning("No pod found for health check")
+                    time.sleep(interval)
+                    continue
+
+                pod_name = pod_result.stdout.strip()
+
+                # Try /health endpoint via localhost (pod-internal)
+                health_result = subprocess.run(
+                    [
+                        "oc", "exec", pod_name,
+                        "-n", self.namespace,
+                        "-c", "kserve-container",
+                        "--",
+                        "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}",
+                        "http://localhost:8080/health",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=15,
+                )
+
+                elapsed = int(time.monotonic() - start_time)
+
+                if health_result.returncode == 0 and health_result.stdout.strip() == "200":
+                    click.echo(f"  Health check passed ({elapsed}s)")
+                    return True
+                else:
+                    click.echo(f"  Health check pending... ({elapsed}s, status={health_result.stdout.strip()})")
+
+            except subprocess.TimeoutExpired:
+                click.echo(f"  Health check timeout, retrying...")
+            except Exception as e:
+                logger.warning(f"Health check error: {e}")
+
+            time.sleep(interval)
+
+        click.echo(f"  Health check failed after {timeout}s")
+        return False
diff --git a/projects/rhaiis/workflows/steps/operators.py b/projects/rhaiis/workflows/steps/operators.py
new file mode 100644
index 0000000..9ee0059
--- /dev/null
+++ b/projects/rhaiis/workflows/steps/operators.py
@@ -0,0 +1,148 @@
+"""Operator installation steps for RHAIIS."""
+
+import logging
+import subprocess
+from typing import TYPE_CHECKING
+
+from projects.core.workflow import StepResult, WorkflowStep
+
+if TYPE_CHECKING:
+    from projects.core.workflow import WorkflowContext
+
+logger = logging.getLogger(__name__)
+
+
+class InstallNFDOperatorStep(WorkflowStep):
+    """Install Node Feature Discovery operator."""
+
+    def __init__(self, name: str | None = None):
+        super().__init__(name=name or "install_nfd")
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Install NFD operator."""
+        step_dir = ctx.artifact_dir / f"{ctx.step_number:03d}__{ctx.current_step_name}"
+
+        subscription_yaml = """
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: nfd
+  namespace: openshift-nfd
+spec:
+  channel: stable
+  name: nfd
+  source: redhat-operators
+  sourceNamespace: openshift-marketplace
+"""
+        yaml_path = step_dir / "nfd-subscription.yaml"
+        yaml_path.write_text(subscription_yaml)
+
+        # Create namespace first
+        self._run_oc(["create", "namespace", "openshift-nfd", "--dry-run=client", "-o", "yaml"])
+        self._run_oc(["apply", "-f", str(yaml_path)])
+
+        return StepResult.ok("NFD operator subscription created")
+
+    def _run_oc(self, args: list[str]) -> bool:
+        """Run oc command."""
+        try:
+            result = subprocess.run(
+                ["oc", *args],
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+            return result.returncode == 0
+        except Exception as e:
+            logger.warning(f"oc command failed: {e}")
+            return False
+
+
+class InstallGPUOperatorStep(WorkflowStep):
+    """Install NVIDIA GPU operator."""
+
+    def __init__(self, name: str | None = None):
+        super().__init__(name=name or "install_gpu")
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Install GPU operator."""
+        step_dir = ctx.artifact_dir / f"{ctx.step_number:03d}__{ctx.current_step_name}"
+
+        subscription_yaml = """
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: gpu-operator-certified
+  namespace: nvidia-gpu-operator
+spec:
+  channel: v24.6
+  name: gpu-operator-certified
+  source: certified-operators
+  sourceNamespace: openshift-marketplace
+"""
+        yaml_path = step_dir / "gpu-subscription.yaml"
+        yaml_path.write_text(subscription_yaml)
+
+        # Create namespace first
+        subprocess.run(
+            ["oc", "create", "namespace", "nvidia-gpu-operator"],
+            capture_output=True,
+        )
+
+        result = subprocess.run(
+            ["oc", "apply", "-f", str(yaml_path)],
+            capture_output=True,
+            text=True,
+        )
+
+        if result.returncode != 0:
+            return StepResult.fail(f"Failed to install GPU operator: {result.stderr}")
+
+        return StepResult.ok("GPU operator subscription created")
+
+
+class InstallRHOAIOperatorStep(WorkflowStep):
+    """Install Red Hat OpenShift AI operator."""
+
+    def __init__(self, version: str = "2.19", name: str | None = None):
+        super().__init__(name=name or "install_rhoai")
+        self.version = version
+
+    def execute(self, ctx: "WorkflowContext") -> StepResult:
+        """Install RHOAI operator."""
+        step_dir = ctx.artifact_dir / f"{ctx.step_number:03d}__{ctx.current_step_name}"
+
+        # Determine channel from version
+        channel = f"stable-{self.version}"
+
+        subscription_yaml = f"""
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: rhods-operator
+  namespace: redhat-ods-operator
+spec:
+  channel: {channel}
+  name: rhods-operator
+  source: redhat-operators
+  sourceNamespace: openshift-marketplace
+"""
+        yaml_path = step_dir / "rhoai-subscription.yaml"
+        yaml_path.write_text(subscription_yaml)
+
+        # Create namespace first
+        subprocess.run(
+            ["oc", "create", "namespace", "redhat-ods-operator"],
+            capture_output=True,
+        )
+
+        result = subprocess.run(
+            ["oc", "apply", "-f", str(yaml_path)],
+            capture_output=True,
+            text=True,
+        )
+
+        if result.returncode != 0:
+            return StepResult.fail(f"Failed to install RHOAI operator: {result.stderr}")
+
+        return StepResult.ok(f"RHOAI operator {self.version} subscription created")
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..f72b470
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Forge tests."""
diff --git a/tests/core/__init__.py b/tests/core/__init__.py
new file mode 100644
index 0000000..ca2ce5c
--- /dev/null
+++ b/tests/core/__init__.py
@@ -0,0 +1 @@
+"""Tests for projects/core modules."""
diff --git a/tests/core/scenarios/__init__.py b/tests/core/scenarios/__init__.py
new file mode 100644
index 0000000..0047182
--- /dev/null
+++ b/tests/core/scenarios/__init__.py
@@ -0,0 +1 @@
+"""Tests for scenario generator."""
diff --git a/tests/core/scenarios/test_config_loader.py b/tests/core/scenarios/test_config_loader.py
new file mode 100644
index 0000000..26d49ba
--- /dev/null
+++ b/tests/core/scenarios/test_config_loader.py
@@ -0,0 +1,731 @@
+"""Unit tests for ConfigLoader."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+import yaml
+
+from projects.core.scenarios import ConfigLoader, ResolvedModelConfig, ResolvedWorkloadConfig
+
+
+class TestConfigLoader:
+    """Tests for ConfigLoader inheritance and resolution."""
+
+    @pytest.fixture
+    def config_dir(self, tmp_path):
+        """Create a config directory with defaults, models, and workloads."""
+        # defaults.yaml
+        defaults = {
+            "defaults": {
+                "deploy": {
+                    "namespace": "forge",
+                    "replicas": 1,
+                    "cpu_request": "4",
+                    "memory_request": "16Gi",
+                },
+                "vllm_args": {
+                    "gpu-memory-utilization": 0.9,
+                    "trust-remote-code": True,
+                    "tensor-parallel-size": 1,  # Also determines num_gpus
+                },
+                "guidellm": {
+                    "max_requests": 100,
+                    "rate_type": "concurrent",
+                },
+            },
+            "accelerators": {
+                "nvidia": {
+                    "image": "quay.io/rhaiis/cuda:latest",
+                    "vllm_args": {},
+                    "env_vars": {},
+                },
+                "amd": {
+                    "image": "quay.io/rhaiis/rocm:latest",
+                    "vllm_args": {
+                        "num-scheduler-steps": 8,
+                    },
+                    "env_vars": {
+                        "VLLM_ROCM_USE_AITER": "1",
+                    },
+                },
+            },
+        }
+        (tmp_path / "defaults.yaml").write_text(yaml.safe_dump(defaults))
+
+        # models.yaml
+        models = {
+            "models": {
+                "qwen-0.6b": {
+                    "name": "Qwen3-0.6B",
+                    "hf_model_id": "Qwen/Qwen3-0.6B",
+                    "vllm_args": {
+                        "max-model-len": 8192,
+                    },
+                    "supported_workloads": ["balanced", "short"],
+                },
+                "llama-70b-fp8": {
+                    "name": "Llama-3.3-70B-FP8",
+                    "hf_model_id": "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic",
+                    "aliases": ["llama-70b", "llama-fp8"],
+                    "vllm_args": {
+                        "tensor-parallel-size": 4,
+                        "max-model-len": 32768,
+                        "kv-cache-dtype": "fp8",
+                    },
+                    "supported_workloads": ["balanced", "short", "long-prompt"],
+                },
+                "deepseek-r1": {
+                    "name": "DeepSeek-R1",
+                    "hf_model_id": "deepseek-ai/DeepSeek-R1-0528",
+                    "vllm_args": {
+                        "tensor-parallel-size": 8,
+                    },
+                    "accelerator_overrides": {
+                        "amd": {
+                            "env_vars": {
+                                "VLLM_ROCM_USE_AITER": "0",
+                            },
+                        },
+                    },
+                },
+                # Model with env_vars that apply to all accelerators
+                "model-with-env": {
+                    "name": "Model With Env",
+                    "hf_model_id": "test/model-with-env",
+                    "env_vars": {
+                        "VLLM_MXFP4_USE_MARLIN": "1",
+                        "CUSTOM_VAR": "model-value",
+                    },
+                },
+                # Model with both model-level and accelerator-specific env_vars
+                "model-with-overrides": {
+                    "name": "Model With Overrides",
+                    "hf_model_id": "test/model-with-overrides",
+                    "env_vars": {
+                        "SHARED_VAR": "model-default",
+                        "MODEL_ONLY_VAR": "from-model",
+                    },
+                    "accelerator_overrides": {
+                        "nvidia": {
+                            "env_vars": {
+                                "TORCH_CUDA_ARCH_LIST": "9.0",
+                                "SHARED_VAR": "nvidia-override",
+                            },
+                        },
+                        "amd": {
+                            "env_vars": {
+                                "SHARED_VAR": "amd-override",
+                            },
+                        },
+                    },
+                },
+            },
+        }
+        (tmp_path / "models.yaml").write_text(yaml.safe_dump(models))
+
+        # workloads.yaml
+        workloads = {
+            "workloads": {
+                "balanced": {
+                    "name": "Balanced",
+                    "description": "Balanced prompt and output (1k/1k)",
+                    "guidellm": {
+                        "data": "prompt_tokens=1000,output_tokens=1000",
+                        "rates": [1, 50, 100],
+                    },
+                    "max_seconds": 180,
+                },
+                "short": {
+                    "name": "Short",
+                    "description": "Short prompt and output (256/256)",
+                    "guidellm": {
+                        "data": "prompt_tokens=256,output_tokens=256",
+                    },
+                    "max_seconds": 120,
+                },
+                "long-prompt": {
+                    "name": "Long Prompt",
+                    "description": "Long prompt (8k/1k) - requires larger context",
+                    "guidellm": {
+                        "data": "prompt_tokens=8000,output_tokens=1000",
+                    },
+                    "max_seconds": 300,
+                    "vllm_args": {
+                        "max-model-len": 10000,
+                    },
+                },
+                "very-long-prompt": {
+                    "name": "Very Long Prompt",
+                    "description": "Very long prompt (16k/1k)",
+                    "guidellm": {
+                        "data": "prompt_tokens=16000,output_tokens=1000",
+                    },
+                    "max_seconds": 600,
+                    "vllm_args": {
+                        "max-model-len": 20000,
+                    },
+                },
+            },
+        }
+        (tmp_path / "workloads.yaml").write_text(yaml.safe_dump(workloads))
+
+        return tmp_path
+
+    def test_load_model_basic(self, config_dir):
+        """ConfigLoader loads model with defaults applied."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model = loader.load_model("qwen-0.6b")
+
+        assert isinstance(model, ResolvedModelConfig)
+        assert model.key == "qwen-0.6b"
+        assert model.name == "Qwen3-0.6B"
+        assert model.hf_model_id == "Qwen/Qwen3-0.6B"
+
+    def test_defaults_inheritance(self, config_dir):
+        """Model inherits from global defaults."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model = loader.load_model("qwen-0.6b")
+
+        # From defaults
+        assert model.vllm_args["gpu-memory-utilization"] == 0.9
+        assert model.vllm_args["trust-remote-code"] is True
+
+        # From model config (overrides default)
+        assert model.vllm_args["max-model-len"] == 8192
+
+    def test_accelerator_nvidia_defaults(self, config_dir):
+        """NVIDIA accelerator uses correct settings."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model = loader.load_model("qwen-0.6b")
+
+        # NVIDIA has no special vllm_args or env_vars
+        assert "num-scheduler-steps" not in model.vllm_args
+        assert model.env_vars == {}
+
+    def test_accelerator_amd_defaults(self, config_dir):
+        """AMD accelerator applies accelerator-specific settings."""
+        loader = ConfigLoader(config_dir, accelerator="amd")
+        model = loader.load_model("qwen-0.6b")
+
+        # AMD accelerator defaults
+        assert model.vllm_args["num-scheduler-steps"] == 8
+        assert model.env_vars["VLLM_ROCM_USE_AITER"] == "1"
+
+    def test_accelerator_overrides_in_model(self, config_dir):
+        """Model-specific accelerator overrides take precedence."""
+        # DeepSeek needs AITER disabled on AMD
+        loader = ConfigLoader(config_dir, accelerator="amd")
+        model = loader.load_model("deepseek-r1")
+
+        # Model accelerator_override takes precedence over accelerator defaults
+        assert model.env_vars["VLLM_ROCM_USE_AITER"] == "0"
+
+    def test_model_level_env_vars(self, config_dir):
+        """Model-level env_vars apply to all accelerators."""
+        # Test on NVIDIA
+        nvidia_loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model_nvidia = nvidia_loader.load_model("model-with-env")
+
+        assert model_nvidia.env_vars["VLLM_MXFP4_USE_MARLIN"] == "1"
+        assert model_nvidia.env_vars["CUSTOM_VAR"] == "model-value"
+
+        # Test on AMD - same model env_vars plus AMD accelerator defaults
+        amd_loader = ConfigLoader(config_dir, accelerator="amd")
+        model_amd = amd_loader.load_model("model-with-env")
+
+        assert model_amd.env_vars["VLLM_MXFP4_USE_MARLIN"] == "1"
+        assert model_amd.env_vars["CUSTOM_VAR"] == "model-value"
+        # Also gets AMD accelerator defaults
+        assert model_amd.env_vars["VLLM_ROCM_USE_AITER"] == "1"
+
+    def test_env_vars_inheritance_chain(self, config_dir):
+        """Env vars follow inheritance: accelerator → model → model.accelerator_overrides."""
+        # NVIDIA: accelerator has no env_vars, model has some, model.accelerator_overrides adds CUDA arch
+        nvidia_loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model_nvidia = nvidia_loader.load_model("model-with-overrides")
+
+        assert model_nvidia.env_vars["MODEL_ONLY_VAR"] == "from-model"
+        assert model_nvidia.env_vars["TORCH_CUDA_ARCH_LIST"] == "9.0"
+        # SHARED_VAR: nvidia override wins over model default
+        assert model_nvidia.env_vars["SHARED_VAR"] == "nvidia-override"
+
+        # AMD: accelerator has AITER, model has its vars, model.accelerator_overrides overrides SHARED_VAR
+        amd_loader = ConfigLoader(config_dir, accelerator="amd")
+        model_amd = amd_loader.load_model("model-with-overrides")
+
+        assert model_amd.env_vars["MODEL_ONLY_VAR"] == "from-model"
+        assert model_amd.env_vars["VLLM_ROCM_USE_AITER"] == "1"  # From AMD accelerator defaults
+        # SHARED_VAR: amd override wins over model default
+        assert model_amd.env_vars["SHARED_VAR"] == "amd-override"
+        # No CUDA arch on AMD
+        assert "TORCH_CUDA_ARCH_LIST" not in model_amd.env_vars
+
+    def test_model_alias_lookup(self, config_dir):
+        """ConfigLoader finds model by alias."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model = loader.load_model("llama-70b")
+
+        assert model.key == "llama-70b-fp8"
+        assert model.hf_model_id == "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"
+
+    def test_model_hf_id_lookup(self, config_dir):
+        """ConfigLoader finds model by HuggingFace ID."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        model = loader.load_model("Qwen/Qwen3-0.6B")
+
+        assert model.key == "qwen-0.6b"
+
+    def test_model_not_found(self, config_dir):
+        """ConfigLoader raises KeyError for unknown model."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+
+        with pytest.raises(KeyError, match="not found"):
+            loader.load_model("nonexistent-model")
+
+    def test_num_gpus_property(self, config_dir):
+        """ResolvedModelConfig.num_gpus returns correct value."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+
+        # Small model - 1 GPU
+        small = loader.load_model("qwen-0.6b")
+        assert small.num_gpus == 1
+
+        # Large model - 4 GPUs
+        large = loader.load_model("llama-70b-fp8")
+        assert large.num_gpus == 4
+
+    def test_tensor_parallel_property(self, config_dir):
+        """ResolvedModelConfig.tensor_parallel returns correct value."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+
+        # Default TP=1
+        small = loader.load_model("qwen-0.6b")
+        assert small.tensor_parallel == 1
+
+        # TP=4 from model config
+        large = loader.load_model("llama-70b-fp8")
+        assert large.tensor_parallel == 4
+
+    def test_load_workload(self, config_dir):
+        """ConfigLoader loads workload with guidellm defaults merged."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        workload = loader.load_workload("balanced")
+
+        assert isinstance(workload, ResolvedWorkloadConfig)
+        assert workload.key == "balanced"
+        assert workload.name == "Balanced"
+        assert workload.max_seconds == 180
+
+        # Guidellm config merged with defaults
+        assert workload.guidellm["data"] == "prompt_tokens=1000,output_tokens=1000"
+        assert workload.guidellm["rate_type"] == "concurrent"  # From defaults
+        assert workload.guidellm["rates"] == [1, 50, 100]  # From workload
+
+    def test_load_workload_without_vllm_args(self, config_dir):
+        """Workload without vllm_args has empty dict."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        workload = loader.load_workload("balanced")
+
+        assert workload.vllm_args == {}
+
+    def test_load_workload_with_vllm_args(self, config_dir):
+        """Workload with vllm_args returns the override."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        workload = loader.load_workload("long-prompt")
+
+        assert workload.key == "long-prompt"
+        assert workload.vllm_args == {"max-model-len": 10000}
+
+    def test_workload_not_found(self, config_dir):
+        """ConfigLoader raises KeyError for unknown workload."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+
+        with pytest.raises(KeyError, match="not found"):
+            loader.load_workload("nonexistent")
+
+    def test_get_image(self, config_dir):
+        """ConfigLoader returns correct image for accelerator."""
+        nvidia_loader = ConfigLoader(config_dir, accelerator="nvidia")
+        assert nvidia_loader.get_image() == "quay.io/rhaiis/cuda:latest"
+
+        amd_loader = ConfigLoader(config_dir, accelerator="amd")
+        assert amd_loader.get_image() == "quay.io/rhaiis/rocm:latest"
+
+    def test_list_models(self, config_dir):
+        """ConfigLoader.list_models returns all model keys."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        models = loader.list_models()
+
+        assert "qwen-0.6b" in models
+        assert "llama-70b-fp8" in models
+        assert "deepseek-r1" in models
+        assert "model-with-env" in models
+        assert "model-with-overrides" in models
+        assert len(models) == 5
+
+    def test_list_workloads(self, config_dir):
+        """ConfigLoader.list_workloads returns all workload keys."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+        workloads = loader.list_workloads()
+
+        assert "balanced" in workloads
+        assert "short" in workloads
+        assert "long-prompt" in workloads
+        assert "very-long-prompt" in workloads
+        assert len(workloads) == 4
+
+    def test_caching(self, config_dir):
+        """ConfigLoader caches loaded configs."""
+        loader = ConfigLoader(config_dir, accelerator="nvidia")
+
+        # Access defaults twice - should be same object
+        defaults1 = loader.defaults
+        defaults2 = loader.defaults
+        assert defaults1 is defaults2
+
+        # Same for models
+        models1 = loader.models
+        models2 = loader.models
+        assert models1 is models2
+
+
+class TestConfigLoaderScenarios:
+    """Tests for ConfigLoader scenario loading."""
+
+    @pytest.fixture
+    def full_config_dir(self, tmp_path):
+        """Create full config directory with scenarios."""
+        # Create base configs
+        defaults = {
+            "defaults": {
+                "deploy": {"namespace": "forge"},
+                "vllm_args": {"gpu-memory-utilization": 0.9},
+                "guidellm": {"max_requests": 100},
+            },
+            "accelerators": {
+                "nvidia": {"image": "cuda:latest"},
+            },
+        }
+        (tmp_path / "defaults.yaml").write_text(yaml.safe_dump(defaults))
+
+        models = {
+            "models": {
+                "test-model": {
+                    "hf_model_id": "test/model",
+                    "vllm_args": {"max-model-len": 4096},
+                },
+            },
+        }
+        (tmp_path / "models.yaml").write_text(yaml.safe_dump(models))
+
+        workloads = {
+            "workloads": {
+                "balanced": {"guidellm": {"data": "1k"}},
+            },
+        }
+        (tmp_path / "workloads.yaml").write_text(yaml.safe_dump(workloads))
+
+        # Create scenarios directory
+        scenarios_dir = tmp_path / "scenarios"
+        scenarios_dir.mkdir()
+
+        scenario = {
+            "name": "test-scenario",
+            "defaults": {
+                "deploy": {
+                    "namespace": "test-ns",
+                },
+            },
+            "scenarios": [
+                {
+                    "model": "test-model",
+                    "workloads": ["balanced"],
+                },
+            ],
+        }
+        (scenarios_dir / "test.yaml").write_text(yaml.safe_dump(scenario))
+
+        return tmp_path
+
+    def test_load_scenario(self, full_config_dir):
+        """ConfigLoader loads scenario with resolved defaults."""
+        loader = ConfigLoader(full_config_dir, accelerator="nvidia")
+        scenario = loader.load_scenario(full_config_dir / "scenarios" / "test.yaml")
+
+        assert scenario["name"] == "test-scenario"
+        assert scenario["_accelerator"] == "nvidia"
+
+        # Resolved defaults merge global + scenario
+        resolved = scenario["_resolved_defaults"]
+        assert resolved["deploy"]["namespace"] == "test-ns"  # From scenario
+        assert resolved["vllm_args"]["gpu-memory-utilization"] == 0.9  # From global
+
+
+class TestDeepMerge:
+    """Tests for deep_merge utility function."""
+
+    def test_basic_merge(self):
+        """Basic dictionary merge."""
+        from projects.core.scenarios.config_loader import deep_merge
+
+        base = {"a": 1, "b": 2}
+        override = {"b": 3, "c": 4}
+        result = deep_merge(base, override)
+
+        assert result == {"a": 1, "b": 3, "c": 4}
+
+    def test_nested_merge(self):
+        """Nested dictionaries are merged recursively."""
+        from projects.core.scenarios.config_loader import deep_merge
+
+        base = {"outer": {"a": 1, "b": 2}}
+        override = {"outer": {"b": 3, "c": 4}}
+        result = deep_merge(base, override)
+
+        assert result == {"outer": {"a": 1, "b": 3, "c": 4}}
+
+    def test_lists_replaced(self):
+        """Lists are replaced, not merged."""
+        from projects.core.scenarios.config_loader import deep_merge
+
+        base = {"items": [1, 2, 3]}
+        override = {"items": [4, 5]}
+        result = deep_merge(base, override)
+
+        assert result == {"items": [4, 5]}
+
+    def test_no_mutation(self):
+        """Original dicts are not mutated."""
+        from projects.core.scenarios.config_loader import deep_merge
+
+        base = {"a": {"b": 1}}
+        override = {"a": {"c": 2}}
+
+        result = deep_merge(base, override)
+
+        # Original unchanged
+        assert base == {"a": {"b": 1}}
+        assert override == {"a": {"c": 2}}
+        # Result has both
+        assert result == {"a": {"b": 1, "c": 2}}
+
+
+class TestWorkloadVllmArgsGrouping:
+    """Tests for workload-specific vllm_args and deployment grouping."""
+
+    @pytest.fixture
+    def config_dir_with_vllm_args(self, tmp_path):
+        """Create config directory with workloads that have vllm_args."""
+        # defaults.yaml
+        defaults = {
+            "defaults": {
+                "deploy": {"namespace": "forge"},
+                "vllm_args": {"gpu-memory-utilization": 0.9, "max-model-len": 4096},
+                "guidellm": {"max_requests": 100},
+            },
+            "accelerators": {
+                "nvidia": {"image": "cuda:latest"},
+            },
+        }
+        (tmp_path / "defaults.yaml").write_text(yaml.safe_dump(defaults))
+
+        # models.yaml
+        models = {
+            "models": {
+                "test-model": {
+                    "hf_model_id": "test/model",
+                    "vllm_args": {"trust-remote-code": True},
+                },
+            },
+        }
+        (tmp_path / "models.yaml").write_text(yaml.safe_dump(models))
+
+        # workloads.yaml - some with vllm_args, some without
+        workloads = {
+            "workloads": {
+                "balanced": {
+                    "name": "Balanced",
+                    "guidellm": {"data": "1k/1k"},
+                },
+                "short": {
+                    "name": "Short",
+                    "guidellm": {"data": "256/256"},
+                },
+                "long-prompt": {
+                    "name": "Long Prompt",
+                    "guidellm": {"data": "8k/1k"},
+                    "vllm_args": {"max-model-len": 10000},
+                },
+                "very-long-prompt": {
+                    "name": "Very Long Prompt",
+                    "guidellm": {"data": "16k/1k"},
+                    "vllm_args": {"max-model-len": 20000},
+                },
+            },
+        }
+        (tmp_path / "workloads.yaml").write_text(yaml.safe_dump(workloads))
+
+        # scenarios/test.yaml
+        scenarios_dir = tmp_path / "scenarios"
+        scenarios_dir.mkdir()
+        scenario = {
+            "name": "test-scenario",
+            "scenarios": [
+                {
+                    "model": "test-model",
+                    "workloads": ["balanced", "short", "long-prompt", "very-long-prompt"],
+                },
+            ],
+        }
+        (scenarios_dir / "test.yaml").write_text(yaml.safe_dump(scenario))
+
+        return tmp_path
+
+    def test_workload_config_has_vllm_args(self, config_dir_with_vllm_args):
+        """WorkloadConfig parses vllm_args from config."""
+        from projects.core.scenarios.generator import WorkloadConfig
+
+        wl = WorkloadConfig.from_dict("long-prompt", {
+            "name": "Long Prompt",
+            "guidellm": {"data": "8k/1k"},
+            "vllm_args": {"max-model-len": 10000},
+        })
+
+        assert wl.vllm_args == {"max-model-len": 10000}
+
+    def test_workload_config_empty_vllm_args(self, config_dir_with_vllm_args):
+        """WorkloadConfig without vllm_args has empty dict."""
+        from projects.core.scenarios.generator import WorkloadConfig
+
+        wl = WorkloadConfig.from_dict("balanced", {
+            "name": "Balanced",
+            "guidellm": {"data": "1k/1k"},
+        })
+
+        assert wl.vllm_args == {}
+
+    def test_deployment_group_merged_vllm_args(self, config_dir_with_vllm_args):
+        """DeploymentGroup.merged_vllm_args combines model and workload args."""
+        from projects.core.scenarios.generator import DeploymentGroup, ModelConfig
+
+        model = ModelConfig(
+            key="test-model",
+            name="Test Model",
+            hf_model_id="test/model",
+            vllm_args={"gpu-memory-utilization": 0.9, "max-model-len": 4096},
+        )
+        group = DeploymentGroup(
+            model=model,
+            tensor_parallel=1,
+            routing="direct",
+            workloads=[],
+            vllm_args_override={"max-model-len": 10000},
+        )
+
+        merged = group.merged_vllm_args
+
+        # Model args preserved
+        assert merged["gpu-memory-utilization"] == 0.9
+        # Workload override wins
+        assert merged["max-model-len"] == 10000
+
+    def test_expand_grouped_separates_by_vllm_args(self, config_dir_with_vllm_args):
+        """ScenarioGenerator groups workloads by vllm_args."""
+        from projects.core.scenarios.generator import ScenarioGenerator
+
+        gen = ScenarioGenerator(
+            scenarios_path=config_dir_with_vllm_args / "scenarios" / "test.yaml",
+            config_dir=config_dir_with_vllm_args,
+            accelerator="nvidia",
+        )
+        gen.load()
+        groups = gen.expand_grouped()
+
+        # Should have 3 groups:
+        # 1. balanced + short (no vllm_args)
+        # 2. long-prompt (max-model-len: 10000)
+        # 3. very-long-prompt (max-model-len: 20000)
+        assert len(groups) == 3
+
+        # Find each group by its vllm_args
+        no_override_group = None
+        long_prompt_group = None
+        very_long_group = None
+
+        for g in groups:
+            if not g.vllm_args_override:
+                no_override_group = g
+            elif g.vllm_args_override.get("max-model-len") == 10000:
+                long_prompt_group = g
+            elif g.vllm_args_override.get("max-model-len") == 20000:
+                very_long_group = g
+
+        # Group without override has balanced + short
+        assert no_override_group is not None
+        assert len(no_override_group.workloads) == 2
+        assert {w.key for w in no_override_group.workloads} == {"balanced", "short"}
+
+        # long-prompt group
+        assert long_prompt_group is not None
+        assert len(long_prompt_group.workloads) == 1
+        assert long_prompt_group.workloads[0].key == "long-prompt"
+        assert long_prompt_group.vllm_args_override == {"max-model-len": 10000}
+
+        # very-long-prompt group
+        assert very_long_group is not None
+        assert len(very_long_group.workloads) == 1
+        assert very_long_group.workloads[0].key == "very-long-prompt"
+        assert very_long_group.vllm_args_override == {"max-model-len": 20000}
+
+    def test_same_vllm_args_same_group(self, tmp_path):
+        """Workloads with identical vllm_args share a deployment group."""
+        # Create config where two workloads have same vllm_args
+        defaults = {
+            "defaults": {"vllm_args": {}},
+            "accelerators": {"nvidia": {"image": "cuda:latest"}},
+        }
+        (tmp_path / "defaults.yaml").write_text(yaml.safe_dump(defaults))
+
+        models = {
+            "models": {
+                "test-model": {"hf_model_id": "test/model"},
+            },
+        }
+        (tmp_path / "models.yaml").write_text(yaml.safe_dump(models))
+
+        workloads = {
+            "workloads": {
+                "long-a": {
+                    "guidellm": {"data": "a"},
+                    "vllm_args": {"max-model-len": 10000},
+                },
+                "long-b": {
+                    "guidellm": {"data": "b"},
+                    "vllm_args": {"max-model-len": 10000},  # Same as long-a
+                },
+            },
+        }
+        (tmp_path / "workloads.yaml").write_text(yaml.safe_dump(workloads))
+
+        scenarios_dir = tmp_path / "scenarios"
+        scenarios_dir.mkdir()
+        scenario = {
+            "name": "test",
+            "scenarios": [{"model": "test-model", "workloads": ["long-a", "long-b"]}],
+        }
+        (scenarios_dir / "test.yaml").write_text(yaml.safe_dump(scenario))
+
+        from projects.core.scenarios.generator import ScenarioGenerator
+
+        gen = ScenarioGenerator(
+            scenarios_path=tmp_path / "scenarios" / "test.yaml",
+            config_dir=tmp_path,
+        )
+        gen.load()
+        groups = gen.expand_grouped()
+
+        # Both workloads have same vllm_args -> 1 group
+        assert len(groups) == 1
+        assert len(groups[0].workloads) == 2
+        assert {w.key for w in groups[0].workloads} == {"long-a", "long-b"}
diff --git a/tests/core/scenarios/test_generator.py b/tests/core/scenarios/test_generator.py
new file mode 100644
index 0000000..330de19
--- /dev/null
+++ b/tests/core/scenarios/test_generator.py
@@ -0,0 +1,245 @@
+"""Unit tests for ScenarioGenerator."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+import yaml
+
+from projects.core.scenarios import ExpandedScenario, ScenarioConfig, ScenarioGenerator
+
+
+class TestScenarioGenerator:
+    """Tests for ScenarioGenerator."""
+
+    @pytest.fixture
+    def sample_config_path(self):
+        """Create a sample scenarios.yaml file using new format."""
+        config = {
+            "name": "test-scenarios",
+            "description": "Test scenario configuration",
+            "common": {
+                "namespace": "forge",
+                "runtime_args": {
+                    "dtype": "auto",
+                    "gpu-memory-utilization": 0.9,
+                },
+            },
+            "workloads": {
+                "balanced": {
+                    "description": "Balanced workload",
+                    "guidellm": {"max_requests": 100},
+                },
+                "short": {
+                    "description": "Short workload",
+                    "guidellm": {"max_requests": 50},
+                },
+            },
+            "routing": {
+                "direct": {"mode": "direct"},
+            },
+            # New format: models section with model definitions
+            "models": {
+                "qwen-0.6b": {
+                    "hf_model_id": "Qwen/Qwen3-0.6B",
+                    "name": "qwen3-0-6b",
+                    "vllm_args": {"max-model-len": 4096},
+                },
+            },
+            # New format: scenarios list references model keys
+            "scenarios": [
+                {
+                    "model": "qwen-0.6b",
+                    "workloads": ["balanced", "short"],
+                    "routing": ["direct"],
+                    "tensor_parallel": [1, 2],
+                },
+            ],
+        }
+
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".yaml", delete=False
+        ) as f:
+            yaml.safe_dump(config, f)
+            yield Path(f.name)
+
+    def test_load_config(self, sample_config_path):
+        """Generator loads and parses YAML config."""
+        gen = ScenarioGenerator(sample_config_path)
+        config = gen.load()
+
+        assert config.name == "test-scenarios"
+        assert config.description == "Test scenario configuration"
+        assert "qwen-0.6b" in config.models
+
+    def test_matrix_expansion(self, sample_config_path):
+        """Matrix expansion produces correct number of scenarios."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+
+        # 2 workloads × 1 routing × 2 TP = 4 scenarios
+        assert len(scenarios) == 4
+
+    def test_expanded_scenario_ids(self, sample_config_path):
+        """Expanded scenarios have deterministic IDs."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+        scenario_ids = [s.scenario_id for s in scenarios]
+
+        # Check expected scenario IDs (model_short derived from model key)
+        assert "qwen-0-6b_balanced_direct_tp1" in scenario_ids
+        assert "qwen-0-6b_balanced_direct_tp2" in scenario_ids
+        assert "qwen-0-6b_short_direct_tp1" in scenario_ids
+        assert "qwen-0-6b_short_direct_tp2" in scenario_ids
+
+    def test_runtime_args_merging(self, sample_config_path):
+        """Runtime args come from model vllm_args + tensor_parallel."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+
+        for s in scenarios:
+            # Model-specific vllm_args
+            assert s.runtime_args["max-model-len"] == 4096
+            # TP from matrix
+            assert s.runtime_args["tensor-parallel-size"] == s.tensor_parallel
+
+    def test_workload_config_applied(self, sample_config_path):
+        """Workload config is available in workload_config."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+
+        for s in scenarios:
+            # Workload guidellm config is in workload_config
+            assert "max_requests" in s.workload_config
+
+    def test_deploy_config_num_gpus(self, sample_config_path):
+        """Deploy config num_gpus matches tensor-parallel-size."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+
+        for s in scenarios:
+            assert s.deploy_config["num_gpus"] == s.tensor_parallel
+
+    def test_summary(self, sample_config_path):
+        """Summary produces readable output."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        summary = gen.summary()
+
+        assert "test-scenarios" in summary
+        assert "qwen-0.6b" in summary  # Model key appears in deployment groups
+        assert "Total Benchmark Runs: 4" in summary
+
+    def test_to_scenario_config(self, sample_config_path):
+        """ExpandedScenario converts to ScenarioConfig."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+        scenario_config = scenarios[0].to_scenario_config(namespace="test-ns")
+
+        assert isinstance(scenario_config, ScenarioConfig)
+        assert scenario_config.namespace == "test-ns"
+        assert scenario_config.model_id == scenarios[0].model_id
+
+    def test_to_dict(self, sample_config_path):
+        """ExpandedScenario serializes to dict."""
+        gen = ScenarioGenerator(sample_config_path)
+        gen.load()
+
+        scenarios = gen.expand()
+        d = scenarios[0].to_dict()
+
+        assert "model_id" in d
+        assert "scenario_id" in d
+        assert "runtime_args" in d
+        assert d["model_id"] == scenarios[0].model_id
+
+
+class TestScenarioConfig:
+    """Tests for ScenarioConfig utilities."""
+
+    def test_sanitize_name(self):
+        """sanitize_name produces K8s-compatible names."""
+        # Dots are removed for K8s compatibility
+        assert ScenarioConfig.sanitize_name("Qwen/Qwen3-0.6B") == "qwen-qwen3-06b"
+        assert ScenarioConfig.sanitize_name("test_name") == "test-name"
+        assert (
+            ScenarioConfig.sanitize_name("very-long-name" * 10, max_len=20)
+            == "very-long-namevery-l"
+        )
+
+    def test_shorten_model_name(self):
+        """shorten_model_name extracts short name."""
+        assert ScenarioConfig.shorten_model_name("Qwen/Qwen3-0.6B") == "qwen3-0-6b"
+        assert (
+            ScenarioConfig.shorten_model_name("openai/gpt-oss-120b") == "gpt-oss-120b"
+        )
+        assert (
+            ScenarioConfig.shorten_model_name("RedHatAI/model-instruct")
+            == "model"
+        )
+        assert (
+            ScenarioConfig.shorten_model_name("org/model-dynamic") == "model"
+        )
+
+
+class TestExplicitRuns:
+    """Tests for explicit run definitions (no matrix)."""
+
+    @pytest.fixture
+    def explicit_runs_config(self):
+        """Config with explicit runs instead of matrix."""
+        config = {
+            "name": "explicit-runs",
+            "common": {"namespace": "forge"},
+            "workloads": {"balanced": {"guidellm": {"max_requests": 100}}},
+            "routing": {"direct": {"mode": "direct"}},
+            "models": {
+                "test-model": {
+                    "hf_model_id": "test/model",
+                    "name": "test-model",
+                    "vllm_args": {"extra": "value"},
+                },
+            },
+            "runs": [
+                {
+                    "model": "test-model",
+                    "workload": "balanced",
+                    "routing": "direct",
+                    "tensor_parallel": 4,
+                },
+            ],
+        }
+
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".yaml", delete=False
+        ) as f:
+            yaml.safe_dump(config, f)
+            yield Path(f.name)
+
+    def test_explicit_run_expansion(self, explicit_runs_config):
+        """Explicit runs expand without matrix."""
+        gen = ScenarioGenerator(explicit_runs_config)
+        gen.load()
+
+        scenarios = gen.expand()
+
+        assert len(scenarios) == 1
+        s = scenarios[0]
+        assert s.model_id == "test/model"
+        assert s.workload == "balanced"
+        assert s.tensor_parallel == 4
+        assert s.runtime_args["tensor-parallel-size"] == 4
+        assert s.runtime_args["extra"] == "value"
diff --git a/tests/core/steps/__init__.py b/tests/core/steps/__init__.py
new file mode 100644
index 0000000..67b31d7
--- /dev/null
+++ b/tests/core/steps/__init__.py
@@ -0,0 +1 @@
+"""Tests for shared steps."""
diff --git a/tests/core/steps/test_artifacts.py b/tests/core/steps/test_artifacts.py
new file mode 100644
index 0000000..6f4bfeb
--- /dev/null
+++ b/tests/core/steps/test_artifacts.py
@@ -0,0 +1,122 @@
+"""Unit tests for artifact collection steps."""
+
+import subprocess
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from projects.core.steps import CleanupDeploymentStep, CollectArtifactsStep
+from projects.core.workflow import WorkflowContext
+
+
+class TestCollectArtifactsStep:
+    """Tests for CollectArtifactsStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("collect_artifacts")
+        return ctx
+
+    def test_step_name(self):
+        """Step has correct default name."""
+        step = CollectArtifactsStep()
+        assert step.name == "collect_artifacts"
+
+    def test_custom_app_label(self):
+        """Step accepts custom app label."""
+        step = CollectArtifactsStep(app_label="custom-app")
+        assert step.app_label == "custom-app"
+
+    @patch("subprocess.run")
+    def test_collects_logs(self, mock_run, context):
+        """CollectArtifactsStep collects pod logs."""
+        mock_run.return_value = MagicMock(
+            returncode=0, stdout="log output", stderr=""
+        )
+
+        step = CollectArtifactsStep(app_label="test-app")
+        result = step.execute(context)
+
+        assert result.success
+        # Verify oc logs was called
+        calls = [str(c) for c in mock_run.call_args_list]
+        assert any("logs" in str(c) for c in calls)
+
+    @patch("subprocess.run")
+    def test_never_fails(self, mock_run, context):
+        """CollectArtifactsStep never fails the workflow."""
+        mock_run.return_value = MagicMock(
+            returncode=1, stdout="", stderr="command failed"
+        )
+
+        step = CollectArtifactsStep()
+        result = step.execute(context)
+
+        # Should succeed even if oc commands fail
+        assert result.success
+
+
+class TestCleanupDeploymentStep:
+    """Tests for CleanupDeploymentStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("cleanup")
+        return ctx
+
+    def test_step_name(self):
+        """Step has correct default name."""
+        step = CleanupDeploymentStep(deployment_name="test")
+        assert step.name == "cleanup"
+
+    @patch("subprocess.run")
+    def test_deletes_deployment(self, mock_run, context):
+        """CleanupDeploymentStep deletes deployment."""
+        mock_run.return_value = MagicMock(returncode=0)
+
+        step = CleanupDeploymentStep(deployment_name="test-deploy")
+        result = step.execute(context)
+
+        assert result.success
+        # Check deployment was deleted
+        calls = [str(c) for c in mock_run.call_args_list]
+        assert any("deployment" in str(c) for c in calls)
+
+    @patch("subprocess.run")
+    def test_deletes_service_and_route(self, mock_run, context):
+        """CleanupDeploymentStep deletes associated resources."""
+        mock_run.return_value = MagicMock(returncode=0)
+
+        step = CleanupDeploymentStep(
+            deployment_name="test",
+            delete_service=True,
+            delete_route=True,
+        )
+        result = step.execute(context)
+
+        assert result.success
+
+    @patch("subprocess.run")
+    def test_never_fails(self, mock_run, context):
+        """CleanupDeploymentStep never fails the workflow."""
+        mock_run.return_value = MagicMock(returncode=1)
+
+        step = CleanupDeploymentStep(deployment_name="test")
+        result = step.execute(context)
+
+        # Should succeed even if deletes fail
+        assert result.success
diff --git a/tests/core/steps/test_guidellm.py b/tests/core/steps/test_guidellm.py
new file mode 100644
index 0000000..867878c
--- /dev/null
+++ b/tests/core/steps/test_guidellm.py
@@ -0,0 +1,117 @@
+"""Unit tests for GuideLLM step."""
+
+import subprocess
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from projects.core.steps import RunGuideLLMStep
+from projects.core.workflow import WorkflowContext
+
+
+class TestRunGuideLLMStep:
+    """Tests for RunGuideLLMStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("benchmark")
+        return ctx
+
+    def test_step_name(self):
+        """Step has correct default name."""
+        step = RunGuideLLMStep(
+            endpoint="http://localhost:8080/v1",
+            model="test-model",
+        )
+        assert step.name == "benchmark"
+
+    def test_custom_step_name(self):
+        """Step accepts custom name."""
+        step = RunGuideLLMStep(
+            endpoint="http://localhost:8080/v1",
+            model="test-model",
+            name="custom-benchmark",
+        )
+        assert step.name == "custom-benchmark"
+
+    @patch("subprocess.run")
+    def test_execute_success(self, mock_run, context):
+        """RunGuideLLMStep executes successfully."""
+        # Create mock output file
+        step_dir = context.artifact_dir / f"{context.step_number:03d}__{context.current_step_name}"
+        step_dir.mkdir(parents=True, exist_ok=True)
+        output_file = step_dir / "guidellm_results.json"
+        output_file.write_text('{"results": []}')
+
+        # Mock responses for: oc apply, get phase, get logs (with marker), get logs (collect), delete
+        mock_run.side_effect = [
+            MagicMock(returncode=0, stdout="pod created", stderr=""),  # oc apply
+            MagicMock(returncode=0, stdout="Running", stderr=""),  # get phase
+            MagicMock(returncode=0, stdout="BENCHMARK_COMPLETE", stderr=""),  # get logs (marker check)
+            MagicMock(returncode=0, stdout="benchmark logs", stderr=""),  # collect logs
+            MagicMock(returncode=0, stdout="pod deleted", stderr=""),  # delete pod
+        ]
+
+        step = RunGuideLLMStep(
+            endpoint="http://localhost:8080/v1",
+            model="test-model",
+            workload="balanced",
+            max_requests=10,
+        )
+        result = step.execute(context)
+
+        assert result.success
+        assert mock_run.called
+
+    @patch("subprocess.run")
+    def test_execute_failure(self, mock_run, context):
+        """RunGuideLLMStep handles failure."""
+        mock_run.return_value = MagicMock(
+            returncode=1, stdout="", stderr="benchmark failed"
+        )
+
+        step = RunGuideLLMStep(
+            endpoint="http://localhost:8080/v1",
+            model="test-model",
+        )
+        result = step.execute(context)
+
+        assert not result.success
+        assert "failed" in result.message.lower()
+
+    @patch("subprocess.run")
+    def test_handles_timeout(self, mock_run, context):
+        """RunGuideLLMStep handles timeout."""
+        mock_run.side_effect = subprocess.TimeoutExpired("guidellm", 60)
+
+        step = RunGuideLLMStep(
+            endpoint="http://localhost:8080/v1",
+            model="test-model",
+            max_seconds=60,
+        )
+        result = step.execute(context)
+
+        assert not result.success
+        assert "timed out" in result.message.lower()
+
+    @patch("subprocess.run")
+    def test_handles_missing_command(self, mock_run, context):
+        """RunGuideLLMStep handles missing guidellm command."""
+        mock_run.side_effect = FileNotFoundError("guidellm not found")
+
+        step = RunGuideLLMStep(
+            endpoint="http://localhost:8080/v1",
+            model="test-model",
+        )
+        result = step.execute(context)
+
+        assert not result.success
+        assert "not found" in result.message.lower()
diff --git a/tests/core/utils/__init__.py b/tests/core/utils/__init__.py
new file mode 100644
index 0000000..96c5c33
--- /dev/null
+++ b/tests/core/utils/__init__.py
@@ -0,0 +1 @@
+"""Tests for core utilities."""
diff --git a/tests/core/utils/test_oc.py b/tests/core/utils/test_oc.py
new file mode 100644
index 0000000..bd24016
--- /dev/null
+++ b/tests/core/utils/test_oc.py
@@ -0,0 +1,496 @@
+"""Unit tests for OC wrapper with retry logic."""
+
+import subprocess
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from projects.core.utils import OC, OCResult, RetryConfig
+
+
+class TestRetryConfig:
+    """Tests for RetryConfig dataclass."""
+
+    def test_default_values(self):
+        """RetryConfig has sensible defaults."""
+        config = RetryConfig()
+
+        assert config.max_retries == 3
+        assert config.initial_delay == 1.0
+        assert config.max_delay == 30.0
+        assert config.backoff_multiplier == 2.0
+        assert config.retry_on_timeout is True
+
+    def test_custom_values(self):
+        """RetryConfig accepts custom values."""
+        config = RetryConfig(
+            max_retries=5,
+            initial_delay=0.5,
+            max_delay=60.0,
+            backoff_multiplier=3.0,
+            retry_on_timeout=False,
+        )
+
+        assert config.max_retries == 5
+        assert config.initial_delay == 0.5
+        assert config.max_delay == 60.0
+        assert config.backoff_multiplier == 3.0
+        assert config.retry_on_timeout is False
+
+
+class TestOCResult:
+    """Tests for OCResult dataclass."""
+
+    def test_from_completed_process_success(self):
+        """OCResult created from successful subprocess."""
+        mock_result = MagicMock(spec=subprocess.CompletedProcess)
+        mock_result.returncode = 0
+        mock_result.stdout = "pod/my-pod created"
+        mock_result.stderr = ""
+
+        result = OCResult.from_completed_process(
+            mock_result,
+            command=["oc", "apply", "-f", "test.yaml"],
+            attempts=1,
+            duration=0.5,
+        )
+
+        assert result.success is True
+        assert result.returncode == 0
+        assert result.stdout == "pod/my-pod created"
+        assert result.stderr == ""
+        assert result.attempts == 1
+        assert result.duration == 0.5
+
+    def test_from_completed_process_failure(self):
+        """OCResult created from failed subprocess."""
+        mock_result = MagicMock(spec=subprocess.CompletedProcess)
+        mock_result.returncode = 1
+        mock_result.stdout = ""
+        mock_result.stderr = "error: resource not found"
+
+        result = OCResult.from_completed_process(
+            mock_result,
+            command=["oc", "get", "pod", "missing"],
+            attempts=3,
+            duration=5.0,
+        )
+
+        assert result.success is False
+        assert result.returncode == 1
+        assert result.stderr == "error: resource not found"
+        assert result.attempts == 3
+
+    def test_from_error(self):
+        """OCResult created from exception."""
+        error = subprocess.TimeoutExpired(cmd=["oc", "get", "pods"], timeout=30)
+
+        result = OCResult.from_error(
+            error,
+            command=["oc", "get", "pods"],
+            attempts=4,
+            duration=120.0,
+        )
+
+        assert result.success is False
+        assert result.returncode == -1
+        assert "timed out" in result.stderr.lower() or "timeout" in result.stderr.lower()
+        assert result.attempts == 4
+
+
+class TestOC:
+    """Tests for OC wrapper class."""
+
+    def test_init_defaults(self):
+        """OC initializes with sensible defaults."""
+        oc = OC()
+
+        assert oc.namespace is None
+        assert oc.timeout == 60
+        assert isinstance(oc.retry, RetryConfig)
+
+    def test_init_with_namespace(self):
+        """OC accepts namespace."""
+        oc = OC(namespace="forge")
+
+        assert oc.namespace == "forge"
+
+    def test_init_with_custom_retry(self):
+        """OC accepts custom retry config."""
+        config = RetryConfig(max_retries=10)
+        oc = OC(retry=config)
+
+        assert oc.retry.max_retries == 10
+
+    def test_build_cmd_with_namespace(self):
+        """Commands include namespace when set."""
+        oc = OC(namespace="forge")
+        cmd = oc._build_cmd(["get", "pods"])
+
+        assert cmd == ["oc", "-n", "forge", "get", "pods"]
+
+    def test_build_cmd_without_namespace(self):
+        """Commands work without namespace."""
+        oc = OC()
+        cmd = oc._build_cmd(["get", "namespaces"])
+
+        assert cmd == ["oc", "get", "namespaces"]
+
+    def test_build_cmd_namespace_override(self):
+        """Namespace can be overridden per command."""
+        oc = OC(namespace="default")
+        cmd = oc._build_cmd(["get", "pods"], namespace="other")
+
+        assert cmd == ["oc", "-n", "other", "get", "pods"]
+
+    @patch("subprocess.run")
+    def test_get_success(self, mock_run):
+        """get() returns successful result."""
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout="NAME   READY   STATUS\nmy-pod   1/1   Running",
+            stderr="",
+        )
+
+        oc = OC(namespace="forge")
+        result = oc.get("pods")
+
+        assert result.success is True
+        assert "my-pod" in result.stdout
+        mock_run.assert_called_once()
+        call_args = mock_run.call_args
+        assert "oc" in call_args[0][0]
+        assert "-n" in call_args[0][0]
+        assert "forge" in call_args[0][0]
+        assert "get" in call_args[0][0]
+        assert "pods" in call_args[0][0]
+
+    @patch("subprocess.run")
+    def test_get_with_selector(self, mock_run):
+        """get() passes additional arguments."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+
+        oc = OC(namespace="forge")
+        oc.get("pods", "-l", "app=vllm", "-o", "yaml")
+
+        call_args = mock_run.call_args[0][0]
+        assert "-l" in call_args
+        assert "app=vllm" in call_args
+        assert "-o" in call_args
+        assert "yaml" in call_args
+
+    @patch("subprocess.run")
+    def test_apply_success(self, mock_run):
+        """apply() works with file path."""
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout="pod/my-pod created",
+            stderr="",
+        )
+
+        oc = OC(namespace="forge")
+        result = oc.apply("-f", "manifest.yaml")
+
+        assert result.success is True
+        call_args = mock_run.call_args[0][0]
+        assert "apply" in call_args
+        assert "-f" in call_args
+        assert "manifest.yaml" in call_args
+
+    @patch("subprocess.run")
+    def test_apply_with_stdin(self, mock_run):
+        """apply() accepts input via stdin."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+
+        oc = OC(namespace="forge")
+        yaml_content = "apiVersion: v1\nkind: Pod\n..."
+        oc.apply("-f", "-", input=yaml_content)
+
+        call_args = mock_run.call_args
+        assert call_args.kwargs.get("input") == yaml_content
+
+    @patch("subprocess.run")
+    def test_delete_success(self, mock_run):
+        """delete() deletes resources."""
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout='pod "my-pod" deleted',
+            stderr="",
+        )
+
+        oc = OC(namespace="forge")
+        result = oc.delete("pod", "my-pod")
+
+        assert result.success is True
+        call_args = mock_run.call_args[0][0]
+        assert "delete" in call_args
+        assert "pod" in call_args
+        assert "my-pod" in call_args
+
+    @patch("subprocess.run")
+    def test_delete_with_ignore_not_found(self, mock_run):
+        """delete() passes extra flags."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+
+        oc = OC(namespace="forge")
+        oc.delete("pod", "my-pod", "--ignore-not-found")
+
+        call_args = mock_run.call_args[0][0]
+        assert "--ignore-not-found" in call_args
+
+    @patch("subprocess.run")
+    def test_logs_success(self, mock_run):
+        """logs() retrieves pod logs."""
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout="INFO: Server started\nINFO: Ready",
+            stderr="",
+        )
+
+        oc = OC(namespace="forge")
+        result = oc.logs("my-pod")
+
+        assert result.success is True
+        assert "Server started" in result.stdout
+        call_args = mock_run.call_args[0][0]
+        assert "logs" in call_args
+        assert "my-pod" in call_args
+
+    @patch("subprocess.run")
+    def test_logs_with_container(self, mock_run):
+        """logs() accepts container flag."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+
+        oc = OC(namespace="forge")
+        oc.logs("my-pod", "-c", "sidecar", "--tail=100")
+
+        call_args = mock_run.call_args[0][0]
+        assert "-c" in call_args
+        assert "sidecar" in call_args
+        assert "--tail=100" in call_args
+
+    @patch("subprocess.run")
+    def test_exec_success(self, mock_run):
+        """exec() runs command in pod."""
+        mock_run.return_value = MagicMock(
+            returncode=0,
+            stdout='{"status": "healthy"}',
+            stderr="",
+        )
+
+        oc = OC(namespace="forge")
+        result = oc.exec("my-pod", "--", "curl", "localhost:8080/health")
+
+        assert result.success is True
+        call_args = mock_run.call_args[0][0]
+        assert "exec" in call_args
+        assert "my-pod" in call_args
+        assert "--" in call_args
+        assert "curl" in call_args
+
+
+class TestOCRetry:
+    """Tests for OC retry behavior."""
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_retry_on_transient_error(self, mock_run, mock_sleep):
+        """OC retries on transient network errors."""
+        # First call fails with connection error, second succeeds
+        mock_run.side_effect = [
+            MagicMock(returncode=1, stdout="", stderr="connection refused"),
+            MagicMock(returncode=0, stdout="success", stderr=""),
+        ]
+
+        oc = OC(namespace="forge", retry=RetryConfig(max_retries=3, initial_delay=0.1))
+        result = oc.get("pods")
+
+        assert result.success is True
+        assert result.attempts == 2
+        assert mock_run.call_count == 2
+        mock_sleep.assert_called_once()  # Slept once between retries
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_no_retry_on_permanent_error(self, mock_run, mock_sleep):
+        """OC does not retry on non-transient errors."""
+        mock_run.return_value = MagicMock(
+            returncode=1,
+            stdout="",
+            stderr="error: resource not found",
+        )
+
+        oc = OC(namespace="forge", retry=RetryConfig(max_retries=3))
+        result = oc.get("pod", "nonexistent")
+
+        assert result.success is False
+        assert result.attempts == 1
+        assert mock_run.call_count == 1
+        mock_sleep.assert_not_called()  # No sleep - didn't retry
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_retry_exhausted(self, mock_run, mock_sleep):
+        """OC returns failure after exhausting retries."""
+        mock_run.return_value = MagicMock(
+            returncode=1,
+            stdout="",
+            stderr="connection timed out",
+        )
+
+        oc = OC(namespace="forge", retry=RetryConfig(max_retries=2, initial_delay=0.1))
+        result = oc.get("pods")
+
+        assert result.success is False
+        assert result.attempts == 3  # Initial + 2 retries
+        assert mock_run.call_count == 3
+        assert mock_sleep.call_count == 2  # Slept between each retry
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_retry_on_timeout(self, mock_run, mock_sleep):
+        """OC retries on subprocess timeout."""
+        mock_run.side_effect = [
+            subprocess.TimeoutExpired(cmd=["oc"], timeout=30),
+            MagicMock(returncode=0, stdout="success", stderr=""),
+        ]
+
+        oc = OC(
+            namespace="forge",
+            retry=RetryConfig(max_retries=3, retry_on_timeout=True, initial_delay=0.1),
+        )
+        result = oc.get("pods")
+
+        assert result.success is True
+        assert result.attempts == 2
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_no_retry_on_timeout_when_disabled(self, mock_run, mock_sleep):
+        """OC does not retry timeout when disabled."""
+        mock_run.side_effect = subprocess.TimeoutExpired(cmd=["oc"], timeout=30)
+
+        oc = OC(
+            namespace="forge",
+            retry=RetryConfig(max_retries=3, retry_on_timeout=False),
+        )
+        result = oc.get("pods")
+
+        assert result.success is False
+        assert result.attempts == 1
+        mock_sleep.assert_not_called()
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_exponential_backoff(self, mock_run, mock_sleep):
+        """OC uses exponential backoff between retries."""
+        mock_run.return_value = MagicMock(
+            returncode=1,
+            stdout="",
+            stderr="service unavailable",
+        )
+
+        oc = OC(
+            namespace="forge",
+            retry=RetryConfig(
+                max_retries=3,
+                initial_delay=1.0,
+                backoff_multiplier=2.0,
+                max_delay=10.0,
+            ),
+        )
+        oc.get("pods")
+
+        # Check backoff delays: 1.0, 2.0, 4.0
+        calls = mock_sleep.call_args_list
+        assert len(calls) == 3
+        assert calls[0][0][0] == 1.0
+        assert calls[1][0][0] == 2.0
+        assert calls[2][0][0] == 4.0
+
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_max_delay_cap(self, mock_run, mock_sleep):
+        """OC caps delay at max_delay."""
+        mock_run.return_value = MagicMock(
+            returncode=1,
+            stdout="",
+            stderr="service unavailable",
+        )
+
+        oc = OC(
+            namespace="forge",
+            retry=RetryConfig(
+                max_retries=5,
+                initial_delay=10.0,
+                backoff_multiplier=2.0,
+                max_delay=15.0,
+            ),
+        )
+        oc.get("pods")
+
+        # Delays should be: 10.0, 15.0 (capped), 15.0, 15.0, 15.0
+        calls = mock_sleep.call_args_list
+        assert calls[0][0][0] == 10.0
+        assert calls[1][0][0] == 15.0  # Capped
+        assert calls[2][0][0] == 15.0
+        assert calls[3][0][0] == 15.0
+        assert calls[4][0][0] == 15.0
+
+
+class TestOCTransientErrorDetection:
+    """Tests for transient error detection."""
+
+    @pytest.mark.parametrize(
+        "stderr",
+        [
+            "connection refused",
+            "Connection reset by peer",
+            "Unable to connect to the server",
+            "no route to host",
+            "etcdserver: request timed out",
+            "context deadline exceeded",
+            "the server was unable to return a response",
+            "unexpected EOF",
+            "i/o timeout",
+            "TLS handshake timeout",
+            "Service Unavailable",
+            "too many requests",
+        ],
+    )
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_transient_error_patterns(self, mock_run, mock_sleep, stderr):
+        """OC recognizes various transient error patterns."""
+        mock_run.side_effect = [
+            MagicMock(returncode=1, stdout="", stderr=stderr),
+            MagicMock(returncode=0, stdout="success", stderr=""),
+        ]
+
+        oc = OC(retry=RetryConfig(max_retries=1, initial_delay=0.1))
+        result = oc.get("pods")
+
+        assert result.success is True
+        assert mock_run.call_count == 2, f"Should retry for: {stderr}"
+
+    @pytest.mark.parametrize(
+        "stderr",
+        [
+            "error: resource not found",
+            "Error: pod not found",
+            "forbidden: User cannot get resource",
+            "invalid: spec.containers: Required value",
+        ],
+    )
+    @patch("time.sleep")
+    @patch("subprocess.run")
+    def test_non_transient_error_patterns(self, mock_run, mock_sleep, stderr):
+        """OC does not retry non-transient errors."""
+        mock_run.return_value = MagicMock(returncode=1, stdout="", stderr=stderr)
+
+        oc = OC(retry=RetryConfig(max_retries=3))
+        result = oc.get("pods")
+
+        assert result.success is False
+        assert mock_run.call_count == 1, f"Should not retry for: {stderr}"
+        mock_sleep.assert_not_called()
diff --git a/tests/core/workflow/__init__.py b/tests/core/workflow/__init__.py
new file mode 100644
index 0000000..a659d00
--- /dev/null
+++ b/tests/core/workflow/__init__.py
@@ -0,0 +1 @@
+"""Tests for workflow engine."""
diff --git a/tests/core/workflow/test_context.py b/tests/core/workflow/test_context.py
new file mode 100644
index 0000000..6940a17
--- /dev/null
+++ b/tests/core/workflow/test_context.py
@@ -0,0 +1,106 @@
+"""Unit tests for WorkflowContext."""
+
+import os
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from projects.core.workflow import WorkflowContext
+
+
+class TestWorkflowContext:
+    """Tests for WorkflowContext."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        """Create temporary artifact directory."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    def test_from_environment_creates_uuid(self, temp_artifact_dir):
+        """Context generates a unique run UUID."""
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        assert ctx.run_uuid is not None
+        assert len(ctx.run_uuid) == 36  # UUID format
+
+    def test_from_environment_creates_artifact_dir(self, temp_artifact_dir):
+        """Context creates artifact directory."""
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        assert ctx.artifact_dir.exists()
+        assert ctx.artifact_dir.is_dir()
+        assert (ctx.artifact_dir / "_meta").exists()
+
+    def test_from_environment_captures_forge_vars(self, temp_artifact_dir, monkeypatch):
+        """Context captures FORGE_* environment variables."""
+        monkeypatch.setenv("FORGE_MODEL", "test-model")
+        monkeypatch.setenv("FORGE_VLLM_IMAGE", "test-image")
+        monkeypatch.setenv("OTHER_VAR", "should-not-capture")
+
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        assert ctx.env_vars["FORGE_MODEL"] == "test-model"
+        assert ctx.env_vars["FORGE_VLLM_IMAGE"] == "test-image"
+        assert "OTHER_VAR" not in ctx.env_vars
+
+    def test_get_env_with_prefix(self, temp_artifact_dir, monkeypatch):
+        """get_env works with FORGE_ prefix."""
+        monkeypatch.setenv("FORGE_MODEL", "my-model")
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        # With prefix
+        assert ctx.get_env("FORGE_MODEL") == "my-model"
+        # Without prefix (auto-added)
+        assert ctx.get_env("MODEL") == "my-model"
+
+    def test_get_env_default(self, temp_artifact_dir):
+        """get_env returns default for missing vars."""
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        assert ctx.get_env("NONEXISTENT") is None
+        assert ctx.get_env("NONEXISTENT", "default-value") == "default-value"
+
+    def test_get_step_artifact_dir(self, temp_artifact_dir):
+        """get_step_artifact_dir creates numbered directories."""
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        dir1 = ctx.get_step_artifact_dir("deploy")
+        dir2 = ctx.get_step_artifact_dir("benchmark")
+        dir3 = ctx.get_step_artifact_dir("cleanup")
+
+        assert dir1.name == "001__deploy"
+        assert dir2.name == "002__benchmark"
+        assert dir3.name == "003__cleanup"
+
+        assert dir1.exists()
+        assert dir2.exists()
+        assert dir3.exists()
+
+    def test_write_metadata(self, temp_artifact_dir):
+        """write_metadata creates YAML file."""
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.config = {"key": "value"}
+
+        path = ctx.write_metadata(args={"model": "test"})
+
+        assert path.exists()
+        content = path.read_text()
+        assert "run_uuid" in content
+        assert "model: test" in content
+
+    def test_write_restart_script(self, temp_artifact_dir):
+        """write_restart_script creates executable script."""
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        command = "python run.py --model test"
+        path = ctx.write_restart_script(command)
+
+        assert path.exists()
+        assert os.access(path, os.X_OK)  # Executable
+
+        content = path.read_text()
+        assert "#!/bin/bash" in content
+        assert command in content
+        assert ctx.run_uuid in content
diff --git a/tests/core/workflow/test_executor.py b/tests/core/workflow/test_executor.py
new file mode 100644
index 0000000..5d4b129
--- /dev/null
+++ b/tests/core/workflow/test_executor.py
@@ -0,0 +1,220 @@
+"""Unit tests for SequentialExecutor."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from projects.core.workflow import (
+    SequentialExecutor,
+    StepResult,
+    Workflow,
+    WorkflowContext,
+    WorkflowStep,
+)
+
+
+class PassingStep(WorkflowStep):
+    """A step that always succeeds."""
+
+    def __init__(self, name: str, record: list[str]):
+        super().__init__(name=name)
+        self.record = record
+
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        self.record.append(f"executed:{self.name}")
+        return StepResult.ok(f"Step {self.name} passed")
+
+
+class FailingStep(WorkflowStep):
+    """A step that always fails."""
+
+    def __init__(self, name: str, record: list[str]):
+        super().__init__(name=name)
+        self.record = record
+
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        self.record.append(f"executed:{self.name}")
+        return StepResult.fail(f"Step {self.name} failed")
+
+
+class ExceptionStep(WorkflowStep):
+    """A step that raises an exception."""
+
+    def __init__(self, name: str, record: list[str]):
+        super().__init__(name=name)
+        self.record = record
+
+    def execute(self, ctx: WorkflowContext) -> StepResult:
+        self.record.append(f"executed:{self.name}")
+        raise RuntimeError(f"Step {self.name} exploded")
+
+
+class TestSequentialExecutor:
+    """Tests for SequentialExecutor."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        """Create temporary artifact directory."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        """Create workflow context with temp artifact dir."""
+        return WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+    def test_steps_run_in_order(self, context):
+        """Steps execute in registration order."""
+        record: list[str] = []
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(PassingStep("first", record))
+                self.add_step(PassingStep("second", record))
+                self.add_step(PassingStep("third", record))
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        assert result.success
+        assert record == ["executed:first", "executed:second", "executed:third"]
+        assert "first" in result.step_results
+        assert "second" in result.step_results
+        assert "third" in result.step_results
+
+    def test_finally_runs_on_success(self, context):
+        """Finally steps run after successful completion."""
+        record: list[str] = []
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(PassingStep("main", record))
+                self.add_finally(PassingStep("cleanup", record))
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        assert result.success
+        assert record == ["executed:main", "executed:cleanup"]
+
+    def test_finally_runs_on_failure(self, context):
+        """Finally steps execute even when normal steps fail."""
+        record: list[str] = []
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(PassingStep("first", record))
+                self.add_step(FailingStep("failing", record))
+                self.add_step(PassingStep("skipped", record))
+                self.add_finally(PassingStep("cleanup1", record))
+                self.add_finally(PassingStep("cleanup2", record))
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        assert not result.success
+        assert result.failed_step == "failing"
+        # "skipped" should NOT be in the record
+        assert record == [
+            "executed:first",
+            "executed:failing",
+            "executed:cleanup1",
+            "executed:cleanup2",
+        ]
+
+    def test_finally_runs_on_exception(self, context):
+        """Finally steps run even when a step raises an exception."""
+        record: list[str] = []
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(PassingStep("first", record))
+                self.add_step(ExceptionStep("exploding", record))
+                self.add_step(PassingStep("skipped", record))
+                self.add_finally(PassingStep("cleanup", record))
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        assert not result.success
+        assert result.failed_step == "exploding"
+        assert "exploding" in result.step_results
+        assert result.step_results["exploding"].error is not None
+        assert record == ["executed:first", "executed:exploding", "executed:cleanup"]
+
+    def test_all_finally_steps_run_even_if_one_fails(self, context):
+        """All finally steps run even if one fails."""
+        record: list[str] = []
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(PassingStep("main", record))
+                self.add_finally(FailingStep("cleanup1", record))
+                self.add_finally(PassingStep("cleanup2", record))
+                self.add_finally(ExceptionStep("cleanup3", record))
+                self.add_finally(PassingStep("cleanup4", record))
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        # Main workflow succeeded, finally failures don't affect overall success
+        assert result.success
+        assert record == [
+            "executed:main",
+            "executed:cleanup1",
+            "executed:cleanup2",
+            "executed:cleanup3",
+            "executed:cleanup4",
+        ]
+
+    def test_empty_workflow(self, context):
+        """Empty workflow completes successfully."""
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                pass
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        assert result.success
+        assert len(result.step_results) == 0
+
+    def test_duration_tracking(self, context):
+        """Workflow tracks total duration."""
+        record: list[str] = []
+
+        class TestWorkflow(Workflow):
+            def define_steps(self):
+                self.add_step(PassingStep("step1", record))
+
+        workflow = TestWorkflow(context)
+        result = workflow.execute()
+
+        assert result.duration_seconds >= 0
+        assert result.start_time is not None
+        assert result.end_time is not None
+        assert result.run_uuid == context.run_uuid
+
+
+class TestStepResult:
+    """Tests for StepResult helper methods."""
+
+    def test_ok_result(self):
+        """StepResult.ok creates successful result."""
+        result = StepResult.ok("All good", foo="bar")
+
+        assert result.success
+        assert result.message == "All good"
+        assert result.data == {"foo": "bar"}
+        assert result.error is None
+
+    def test_fail_result(self):
+        """StepResult.fail creates failed result."""
+        error = ValueError("bad input")
+        result = StepResult.fail("Something went wrong", error=error)
+
+        assert not result.success
+        assert result.message == "Something went wrong"
+        assert result.error is error
diff --git a/tests/rhaiis/__init__.py b/tests/rhaiis/__init__.py
new file mode 100644
index 0000000..0197469
--- /dev/null
+++ b/tests/rhaiis/__init__.py
@@ -0,0 +1 @@
+"""Tests for RHAIIS project."""
diff --git a/tests/rhaiis/test_ci.py b/tests/rhaiis/test_ci.py
new file mode 100644
index 0000000..cf7275f
--- /dev/null
+++ b/tests/rhaiis/test_ci.py
@@ -0,0 +1,82 @@
+"""Unit tests for RHAIIS CI CLI."""
+
+import os
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from click.testing import CliRunner
+
+from projects.rhaiis.orchestration.ci import ci
+
+
+class TestCiPrepare:
+    """Tests for ci prepare command."""
+
+    @pytest.fixture
+    def runner(self):
+        return CliRunner()
+
+    def test_prepare_dry_run(self, runner):
+        """prepare --dry-run shows what would be done."""
+        result = runner.invoke(ci, ["prepare", "--dry-run"])
+
+        assert result.exit_code == 0
+        assert "DRY-RUN" in result.output
+        assert "RHOAI" in result.output
+
+
+class TestCiTest:
+    """Tests for ci test command."""
+
+    @pytest.fixture
+    def runner(self):
+        return CliRunner(env={"FORGE_ARTIFACT_DIR": "/tmp/artifacts"})
+
+    def test_test_dry_run(self, runner):
+        """test --dry-run shows model from env."""
+        result = runner.invoke(
+            ci, ["test", "--dry-run"],
+            env={"FORGE_MODEL": "test/model", "FORGE_ARTIFACT_DIR": "/tmp/artifacts"}
+        )
+
+        assert result.exit_code == 0
+        assert "test/model" in result.output
+
+    def test_test_dry_run_with_workloads(self, runner):
+        """test --dry-run shows workloads from env."""
+        result = runner.invoke(
+            ci, ["test", "--dry-run"],
+            env={
+                "FORGE_MODEL": "test/model",
+                "FORGE_WORKLOADS": "balanced,heterogeneous",
+                "FORGE_ARTIFACT_DIR": "/tmp/artifacts",
+            }
+        )
+
+        assert result.exit_code == 0
+        assert "balanced" in result.output or "heterogeneous" in result.output
+
+class TestCiCleanup:
+    """Tests for ci cleanup command."""
+
+    @pytest.fixture
+    def runner(self):
+        return CliRunner()
+
+    def test_cleanup_dry_run(self, runner):
+        """cleanup --dry-run shows what would be done."""
+        result = runner.invoke(ci, ["cleanup", "--dry-run"])
+
+        assert result.exit_code == 0
+        assert "DRY-RUN" in result.output
+
+    def test_cleanup_with_namespace(self, runner):
+        """cleanup accepts custom namespace via env."""
+        result = runner.invoke(
+            ci, ["cleanup", "--dry-run"],
+            env={"FORGE_NAMESPACE": "custom-ns"}
+        )
+
+        assert result.exit_code == 0
+        assert "custom-ns" in result.output
diff --git a/tests/rhaiis/test_operators.py b/tests/rhaiis/test_operators.py
new file mode 100644
index 0000000..16d575e
--- /dev/null
+++ b/tests/rhaiis/test_operators.py
@@ -0,0 +1,178 @@
+"""Unit tests for RHAIIS operator installation steps."""
+
+import subprocess
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from projects.core.workflow import WorkflowContext
+from projects.rhaiis.workflows.steps import (
+    InstallGPUOperatorStep,
+    InstallNFDOperatorStep,
+    InstallRHOAIOperatorStep,
+)
+
+
+class TestInstallNFDOperatorStep:
+    """Tests for InstallNFDOperatorStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("install_nfd")
+        return ctx
+
+    def test_step_name(self):
+        """Step has correct default name."""
+        step = InstallNFDOperatorStep()
+        assert step.name == "install_nfd"
+
+    @patch("subprocess.run")
+    def test_execute_success(self, mock_run, context):
+        """InstallNFDOperatorStep executes successfully."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="created", stderr="")
+
+        step = InstallNFDOperatorStep()
+        result = step.execute(context)
+
+        assert result.success
+        assert "NFD" in result.message
+
+    @patch("subprocess.run")
+    def test_creates_subscription_yaml(self, mock_run, context):
+        """InstallNFDOperatorStep creates subscription YAML."""
+        mock_run.return_value = MagicMock(returncode=0)
+
+        step = InstallNFDOperatorStep()
+        result = step.execute(context)
+
+        # Check YAML file was created
+        step_dir = context.artifact_dir / f"{context.step_number:03d}__{context.current_step_name}"
+        yaml_file = step_dir / "nfd-subscription.yaml"
+        assert yaml_file.exists()
+        content = yaml_file.read_text()
+        assert "openshift-nfd" in content
+
+
+class TestInstallGPUOperatorStep:
+    """Tests for InstallGPUOperatorStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("install_gpu")
+        return ctx
+
+    def test_step_name(self):
+        """Step has correct default name."""
+        step = InstallGPUOperatorStep()
+        assert step.name == "install_gpu"
+
+    @patch("subprocess.run")
+    def test_execute_success(self, mock_run, context):
+        """InstallGPUOperatorStep executes successfully."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="created", stderr="")
+
+        step = InstallGPUOperatorStep()
+        result = step.execute(context)
+
+        assert result.success
+        assert "GPU" in result.message
+
+    @patch("subprocess.run")
+    def test_execute_failure(self, mock_run, context):
+        """InstallGPUOperatorStep handles failure."""
+        mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="error")
+
+        step = InstallGPUOperatorStep()
+        result = step.execute(context)
+
+        assert not result.success
+
+    @patch("subprocess.run")
+    def test_creates_subscription_yaml(self, mock_run, context):
+        """InstallGPUOperatorStep creates subscription YAML."""
+        mock_run.return_value = MagicMock(returncode=0)
+
+        step = InstallGPUOperatorStep()
+        step.execute(context)
+
+        step_dir = context.artifact_dir / f"{context.step_number:03d}__{context.current_step_name}"
+        yaml_file = step_dir / "gpu-subscription.yaml"
+        assert yaml_file.exists()
+        content = yaml_file.read_text()
+        assert "gpu-operator-certified" in content
+
+
+class TestInstallRHOAIOperatorStep:
+    """Tests for InstallRHOAIOperatorStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("install_rhoai")
+        return ctx
+
+    def test_step_name(self):
+        """Step has correct default name."""
+        step = InstallRHOAIOperatorStep()
+        assert step.name == "install_rhoai"
+
+    def test_custom_version(self):
+        """Step accepts custom RHOAI version."""
+        step = InstallRHOAIOperatorStep(version="2.20")
+        assert step.version == "2.20"
+
+    @patch("subprocess.run")
+    def test_execute_success(self, mock_run, context):
+        """InstallRHOAIOperatorStep executes successfully."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="created", stderr="")
+
+        step = InstallRHOAIOperatorStep(version="2.19")
+        result = step.execute(context)
+
+        assert result.success
+        assert "RHOAI" in result.message
+        assert "2.19" in result.message
+
+    @patch("subprocess.run")
+    def test_execute_failure(self, mock_run, context):
+        """InstallRHOAIOperatorStep handles failure."""
+        mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="error")
+
+        step = InstallRHOAIOperatorStep()
+        result = step.execute(context)
+
+        assert not result.success
+
+    @patch("subprocess.run")
+    def test_creates_subscription_yaml_with_channel(self, mock_run, context):
+        """InstallRHOAIOperatorStep creates subscription with correct channel."""
+        mock_run.return_value = MagicMock(returncode=0)
+
+        step = InstallRHOAIOperatorStep(version="2.19")
+        step.execute(context)
+
+        step_dir = context.artifact_dir / f"{context.step_number:03d}__{context.current_step_name}"
+        yaml_file = step_dir / "rhoai-subscription.yaml"
+        assert yaml_file.exists()
+        content = yaml_file.read_text()
+        assert "stable-2.19" in content
+        assert "rhods-operator" in content
diff --git a/tests/rhaiis/test_steps.py b/tests/rhaiis/test_steps.py
new file mode 100644
index 0000000..2623db1
--- /dev/null
+++ b/tests/rhaiis/test_steps.py
@@ -0,0 +1,265 @@
+"""Unit tests for RHAIIS workflow steps."""
+
+import subprocess
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from projects.core.workflow import WorkflowContext
+from projects.rhaiis.workflows.steps import (
+    CleanupNamespaceStep,
+    DeployVLLMStep,
+    WaitForReadyStep,
+)
+
+
+class TestDeployVLLMStep:
+    """Tests for DeployVLLMStep (KServe-based)."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        # Pre-increment step number to simulate workflow execution
+        ctx.get_step_artifact_dir("deploy")
+        return ctx
+
+    @pytest.fixture
+    def default_runtime_args(self):
+        """Default runtime args for tests."""
+        return {
+            "gpu-memory-utilization": 0.9,
+            "max-model-len": 4096,
+            "tensor-parallel-size": 1,
+        }
+
+    def test_generates_kserve_yaml(self, context, default_runtime_args):
+        """DeployVLLMStep generates valid KServe YAML."""
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args={**default_runtime_args, "tensor-parallel-size": 2},
+            tensor_parallel=2,
+            namespace="test-ns",
+        )
+
+        yaml_content = step._generate_kserve_yaml()
+
+        assert "apiVersion: serving.kserve.io/v1alpha1" in yaml_content
+        assert "kind: ServingRuntime" in yaml_content
+        assert "apiVersion: serving.kserve.io/v1beta1" in yaml_content
+        assert "kind: InferenceService" in yaml_content
+        assert "name: test-deploy" in yaml_content
+        assert "namespace: test-ns" in yaml_content
+        assert 'nvidia.com/gpu: "2"' in yaml_content
+
+    def test_generates_shared_memory_for_tp(self, context, default_runtime_args):
+        """DeployVLLMStep includes shared memory volume for tensor parallel > 1."""
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args={**default_runtime_args, "tensor-parallel-size": 4},
+            tensor_parallel=4,
+        )
+
+        yaml_content = step._generate_kserve_yaml()
+
+        assert "shared-memory" in yaml_content
+        assert "/dev/shm" in yaml_content
+        assert "sizeLimit: 8Gi" in yaml_content
+
+    def test_shared_memory_always_present(self, context, default_runtime_args):
+        """DeployVLLMStep includes shared memory even for tensor parallel = 1."""
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args=default_runtime_args,
+            tensor_parallel=1,
+        )
+
+        yaml_content = step._generate_kserve_yaml()
+
+        # Shared memory is always required for vLLM
+        assert "shared-memory" in yaml_content
+
+    def test_amd_accelerator(self, context, default_runtime_args):
+        """DeployVLLMStep uses AMD GPU resources."""
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args=default_runtime_args,
+            accelerator="amd",
+            tensor_parallel=1,
+        )
+
+        yaml_content = step._generate_kserve_yaml()
+
+        assert "amd.com/gpu" in yaml_content
+
+    def test_hf_storage_source(self, context, default_runtime_args):
+        """DeployVLLMStep configures HuggingFace storage."""
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args=default_runtime_args,
+            storage_source="hf",
+            storage_path="models-pvc",
+        )
+
+        yaml_content = step._generate_kserve_yaml()
+
+        assert "HF_TOKEN" in yaml_content
+        assert "HF_HOME" in yaml_content
+        assert "pvc://models-pvc" in yaml_content
+
+    def test_custom_runtime_args(self, context):
+        """DeployVLLMStep includes custom runtime args."""
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args={"enable-prefix-caching": True, "max-num-seqs": 256},
+        )
+
+        yaml_content = step._generate_kserve_yaml()
+
+        assert "--enable-prefix-caching" in yaml_content
+        assert "--max-num-seqs=256" in yaml_content
+
+    @patch("subprocess.run")
+    def test_execute_success(self, mock_run, context, default_runtime_args):
+        """DeployVLLMStep executes successfully."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="created", stderr="")
+
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args=default_runtime_args,
+        )
+        result = step.execute(context)
+
+        assert result.success
+        assert mock_run.called
+
+    @patch("subprocess.run")
+    def test_execute_failure(self, mock_run, context, default_runtime_args):
+        """DeployVLLMStep handles apply failure."""
+        # First two calls (namespace creation) succeed, third (apply) fails
+        mock_run.side_effect = [
+            MagicMock(returncode=0),  # namespace dry-run
+            MagicMock(returncode=0),  # namespace apply
+            MagicMock(returncode=1, stdout="", stderr="error applying"),  # kserve apply
+        ]
+
+        step = DeployVLLMStep(
+            model="test/model",
+            deployment_name="test-deploy",
+            vllm_image="test/image:v1",
+            runtime_args=default_runtime_args,
+        )
+        result = step.execute(context)
+
+        assert not result.success
+        assert "error applying" in result.message
+
+
+class TestWaitForReadyStep:
+    """Tests for WaitForReadyStep (InferenceService)."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("wait")
+        return ctx
+
+    @patch("subprocess.run")
+    def test_wait_success_immediate(self, mock_run, context):
+        """WaitForReadyStep succeeds when InferenceService is ready."""
+        # Order: status check -> URL -> get pod name -> health check
+        mock_run.side_effect = [
+            MagicMock(returncode=0, stdout="True", stderr=""),  # status check
+            MagicMock(returncode=0, stdout="http://test.svc", stderr=""),  # URL
+            MagicMock(returncode=0, stdout="test-pod-abc", stderr=""),  # get pod name
+            MagicMock(returncode=0, stdout="200", stderr=""),  # health check curl
+        ]
+
+        step = WaitForReadyStep(
+            deployment_name="test",
+            timeout_seconds=30,
+            poll_interval=1,
+        )
+        result = step.execute(context)
+
+        assert result.success
+        assert "ready" in result.message.lower()
+        assert result.data.get("service_url") == "http://test.svc"
+
+    @patch("subprocess.run")
+    @patch("time.sleep")
+    def test_wait_timeout(self, mock_sleep, mock_run, context):
+        """WaitForReadyStep fails on timeout."""
+        mock_run.return_value = MagicMock(returncode=0, stdout="False", stderr="")
+
+        step = WaitForReadyStep(
+            deployment_name="test",
+            timeout_seconds=2,
+            poll_interval=1,
+        )
+        result = step.execute(context)
+
+        assert not result.success
+        assert "not ready" in result.message.lower()
+
+
+class TestCleanupNamespaceStep:
+    """Tests for CleanupNamespaceStep."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        ctx = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+        ctx.get_step_artifact_dir("cleanup")
+        return ctx
+
+    @patch("subprocess.run")
+    def test_cleanup_success(self, mock_run, context):
+        """CleanupNamespaceStep cleans up resources."""
+        mock_run.return_value = MagicMock(returncode=0)
+
+        step = CleanupNamespaceStep(namespace="test-ns")
+        result = step.execute(context)
+
+        # Cleanup step never fails
+        assert result.success
+
+    @patch("subprocess.run")
+    def test_cleanup_with_errors(self, mock_run, context):
+        """CleanupNamespaceStep handles errors gracefully."""
+        mock_run.return_value = MagicMock(returncode=1, stderr="not found")
+
+        step = CleanupNamespaceStep(namespace="test-ns")
+        result = step.execute(context)
+
+        # Still succeeds - cleanup is best effort
+        assert result.success
diff --git a/tests/rhaiis/test_workflows.py b/tests/rhaiis/test_workflows.py
new file mode 100644
index 0000000..92cbbcc
--- /dev/null
+++ b/tests/rhaiis/test_workflows.py
@@ -0,0 +1,118 @@
+"""Unit tests for RHAIIS workflows."""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from projects.core.workflow import WorkflowContext
+from projects.rhaiis.workflows import BenchmarkWorkflow, CleanupWorkflow, PrepareWorkflow
+
+
+class TestBenchmarkWorkflow:
+    """Tests for BenchmarkWorkflow."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        return WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+    def test_workflow_defines_steps(self, context):
+        """BenchmarkWorkflow defines required steps."""
+        workflow = BenchmarkWorkflow(
+            ctx=context,
+            model="Qwen/Qwen3-0.6B",
+            workload="balanced",
+        )
+
+        # Force step definition
+        workflow._ensure_defined()
+
+        # Should have deploy, wait, benchmark steps
+        step_names = [s.name for s in workflow.steps]
+        assert "deploy" in step_names
+        assert "wait" in step_names
+        assert "benchmark" in step_names
+
+        # Should have finally steps
+        finally_names = [s.name for s in workflow.finally_steps]
+        assert "collect_artifacts" in finally_names
+        assert "cleanup" in finally_names
+
+    def test_workflow_uses_custom_image(self, context):
+        """BenchmarkWorkflow uses custom vLLM image."""
+        custom_image = "custom/vllm:latest"
+        workflow = BenchmarkWorkflow(
+            ctx=context,
+            model="test/model",
+            vllm_image=custom_image,
+        )
+
+        assert workflow.vllm_image == custom_image
+
+    def test_workflow_sanitizes_deployment_name(self, context):
+        """BenchmarkWorkflow sanitizes deployment name."""
+        workflow = BenchmarkWorkflow(
+            ctx=context,
+            model="Qwen/Qwen3-0.6B-Instruct",
+        )
+
+        # Should be lowercase, no special chars
+        assert workflow.deployment_name == "qwen3-0-6b-instruct"
+
+    def test_workflow_uses_env_image(self, temp_artifact_dir, monkeypatch):
+        """BenchmarkWorkflow uses FORGE_VLLM_IMAGE from env."""
+        monkeypatch.setenv("FORGE_VLLM_IMAGE", "env/vllm:test")
+        context = WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+        workflow = BenchmarkWorkflow(ctx=context, model="test/model")
+
+        assert workflow.vllm_image == "env/vllm:test"
+
+
+class TestPrepareWorkflow:
+    """Tests for PrepareWorkflow."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        return WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+    def test_prepare_defines_operator_steps(self, context):
+        """PrepareWorkflow defines operator installation steps."""
+        workflow = PrepareWorkflow(ctx=context, rhoai_version="2.19")
+        workflow._ensure_defined()
+
+        step_names = [s.name for s in workflow.steps]
+        assert "install_nfd" in step_names
+        assert "install_gpu" in step_names
+        assert "install_rhoai" in step_names
+
+
+class TestCleanupWorkflow:
+    """Tests for CleanupWorkflow."""
+
+    @pytest.fixture
+    def temp_artifact_dir(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield Path(tmpdir)
+
+    @pytest.fixture
+    def context(self, temp_artifact_dir):
+        return WorkflowContext.from_environment(artifact_base=str(temp_artifact_dir))
+
+    def test_cleanup_defines_steps(self, context):
+        """CleanupWorkflow defines cleanup steps."""
+        workflow = CleanupWorkflow(ctx=context, namespace="test-ns")
+        workflow._ensure_defined()
+
+        step_names = [s.name for s in workflow.steps]
+        assert "cleanup_namespace" in step_names