diff --git a/src/agentex/lib/cli/commands/agents.py b/src/agentex/lib/cli/commands/agents.py index ea7e9889..5dfb5986 100644 --- a/src/agentex/lib/cli/commands/agents.py +++ b/src/agentex/lib/cli/commands/agents.py @@ -19,6 +19,11 @@ InputDeployOverrides, deploy_agent, ) +from agentex.lib.sdk.config.validation import ( + validate_manifest_and_environments, + EnvironmentsValidationError, + generate_helpful_error_message +) from agentex.lib.cli.utils.cli_utils import handle_questionary_cancellation from agentex.lib.cli.utils.kubectl_utils import ( check_and_switch_cluster_context, @@ -243,18 +248,18 @@ def deploy( cluster: str = typer.Option( ..., help="Target cluster name (must match kubectl context)" ), + environment: str = typer.Option( + ..., help="Environment name (dev, prod, etc.) - must be defined in environments.yaml" + ), manifest: str = typer.Option("manifest.yaml", help="Path to the manifest file"), namespace: str | None = typer.Option( None, - help="Kubernetes namespace to deploy to (required in non-interactive mode)", + help="Override Kubernetes namespace (defaults to namespace from environments.yaml)", ), tag: str | None = typer.Option(None, help="Override the image tag for deployment"), repository: str | None = typer.Option( None, help="Override the repository for deployment" ), - override_file: str | None = typer.Option( - None, help="Path to override configuration file" - ), interactive: bool = typer.Option( True, "--interactive/--no-interactive", help="Enable interactive prompts" ), @@ -272,45 +277,43 @@ def deploy( console.print(f"[red]Error:[/red] Manifest file not found: {manifest}") raise typer.Exit(1) - # In non-interactive mode, require namespace - if not interactive and not namespace: - console.print( - "[red]Error:[/red] --namespace is required in non-interactive mode" + # Validate manifest and environments configuration + try: + _, environments_config = validate_manifest_and_environments( + str(manifest_path), + required_environment=environment ) + agent_env_config = environments_config.get_config_for_env(environment) + console.print(f"[green]✓[/green] Environment config validated: {environment}") + + except EnvironmentsValidationError as e: + error_msg = generate_helpful_error_message(e, "Environment validation failed") + console.print(f"[red]Configuration Error:[/red]\n{error_msg}") + raise typer.Exit(1) + except Exception as e: + console.print(f"[red]Error:[/red] Failed to validate configuration: {e}") raise typer.Exit(1) - - # Get namespace if not provided (only in interactive mode) - if not namespace: - namespace = questionary.text( - "Enter Kubernetes namespace:", default="default" - ).ask() - namespace = handle_questionary_cancellation(namespace, "namespace input") - - if not namespace: - console.print("Deployment cancelled") - raise typer.Exit(0) - - # Validate override file exists if provided - if override_file: - override_path = Path(override_file) - if not override_path.exists(): - console.print( - f"[red]Error:[/red] Override file not found: {override_file}" - ) - raise typer.Exit(1) # Load manifest for credential validation manifest_obj = AgentManifest.from_yaml(str(manifest_path)) + # Use namespace from environment config if not overridden + if not namespace: + namespace_from_config = agent_env_config.kubernetes.namespace if agent_env_config.kubernetes else None + if namespace_from_config: + console.print(f"[blue]ℹ[/blue] Using namespace from environments.yaml: {namespace_from_config}") + namespace = namespace_from_config + else: + raise DeploymentError(f"No namespace found in environments.yaml for environment: {environment}, and not passed in as --namespace") + # Confirm deployment (only in interactive mode) console.print("\n[bold]Deployment Summary:[/bold]") console.print(f" Manifest: {manifest}") + console.print(f" Environment: {environment}") console.print(f" Cluster: {cluster}") console.print(f" Namespace: {namespace}") if tag: console.print(f" Image Tag: {tag}") - if override_file: - console.print(f" Override File: {override_file}") if interactive: proceed = questionary.confirm("Proceed with deployment?").ask() @@ -339,7 +342,7 @@ def deploy( cluster_name=cluster, namespace=namespace, deploy_overrides=deploy_overrides, - override_file_path=override_file, + environment_name=environment, ) # Use the already loaded manifest object diff --git a/src/agentex/lib/cli/commands/init.py b/src/agentex/lib/cli/commands/init.py index 9b2ed600..2de197ae 100644 --- a/src/agentex/lib/cli/commands/init.py +++ b/src/agentex/lib/cli/commands/init.py @@ -65,6 +65,7 @@ def create_project_structure( ".dockerignore.j2": ".dockerignore", "manifest.yaml.j2": "manifest.yaml", "README.md.j2": "README.md", + "environments.yaml.j2": "environments.yaml", } # Add package management file based on uv choice diff --git a/src/agentex/lib/cli/handlers/deploy_handlers.py b/src/agentex/lib/cli/handlers/deploy_handlers.py index 0d64aa26..4ea50a89 100644 --- a/src/agentex/lib/cli/handlers/deploy_handlers.py +++ b/src/agentex/lib/cli/handlers/deploy_handlers.py @@ -8,14 +8,14 @@ from pydantic import BaseModel, Field from rich.console import Console -from agentex.lib.cli.utils.auth_utils import _encode_principal_context from agentex.lib.cli.utils.exceptions import DeploymentError, HelmError +from agentex.lib.sdk.config.environment_config import AgentEnvironmentConfig from agentex.lib.cli.utils.kubectl_utils import check_and_switch_cluster_context from agentex.lib.cli.utils.path_utils import calculate_docker_acp_module, PathResolutionError from agentex.lib.environment_variables import EnvVarKeys from agentex.lib.sdk.config.agent_config import AgentConfig from agentex.lib.sdk.config.agent_manifest import AgentManifest -from agentex.lib.sdk.config.deployment_config import ClusterConfig + from agentex.lib.utils.logging import make_logger logger = make_logger(__name__) @@ -76,25 +76,6 @@ def add_helm_repo() -> None: raise HelmError(f"Failed to add helm repository: {e}") from e -def load_override_config(override_file_path: str | None = None) -> ClusterConfig | None: - """Load override configuration from specified file path""" - if not override_file_path: - return None - - override_path = Path(override_file_path) - if not override_path.exists(): - raise DeploymentError(f"Override file not found: {override_file_path}") - - try: - with open(override_path) as f: - config_data = yaml.safe_load(f) - return ClusterConfig(**config_data) if config_data else None - except Exception as e: - raise DeploymentError( - f"Failed to load override config from {override_file_path}: {e}" - ) from e - - def convert_env_vars_dict_to_list(env_vars: dict[str, str]) -> list[dict[str, str]]: """Convert a dictionary of environment variables to a list of dictionaries""" @@ -116,13 +97,13 @@ def add_acp_command_to_helm_values(helm_values: dict[str, Any], manifest: AgentM def merge_deployment_configs( manifest: AgentManifest, - cluster_config: ClusterConfig | None, + agent_env_config: AgentEnvironmentConfig | None, deploy_overrides: InputDeployOverrides, manifest_path: str, ) -> dict[str, Any]: agent_config: AgentConfig = manifest.agent - """Merge global deployment config with cluster-specific overrides into helm values""" + """Merge global deployment config with environment-specific overrides into helm values""" if not manifest.deployment: raise DeploymentError("No deployment configuration found in manifest") @@ -185,18 +166,27 @@ def merge_deployment_configs( "taskQueue": temporal_config.queue_name, } - # Collect all environment variables with conflict detection + # Collect all environment variables with proper precedence + # Priority: manifest -> environments.yaml -> secrets (highest) all_env_vars: dict[str, str] = {} secret_env_vars: list[dict[str, str]] = [] - # Start with agent_config env vars + # Start with agent_config env vars from manifest if agent_config.env: all_env_vars.update(agent_config.env) + + # Override with environment config env vars if they exist + if agent_env_config and agent_env_config.helm_overrides and "env" in agent_env_config.helm_overrides: + env_overrides = agent_env_config.helm_overrides["env"] + if isinstance(env_overrides, list): + # Convert list format to dict for easier merging + env_override_dict: dict[str, str] = {} + for env_var in env_overrides: + if isinstance(env_var, dict) and "name" in env_var and "value" in env_var: + env_override_dict[str(env_var["name"])] = str(env_var["value"]) + all_env_vars.update(env_override_dict) + - # Add auth principal env var if manifest principal is set - encoded_principal = _encode_principal_context(manifest) - if encoded_principal: - all_env_vars[EnvVarKeys.AUTH_PRINCIPAL_B64.value] = encoded_principal # Handle credentials and check for conflicts if agent_config.credentials: @@ -228,57 +218,23 @@ def merge_deployment_configs( } ) - # Apply cluster-specific overrides - if cluster_config: - if cluster_config.image: - if cluster_config.image.repository: - helm_values["global"]["image"]["repository"] = ( - cluster_config.image.repository - ) - if cluster_config.image.tag: - helm_values["global"]["image"]["tag"] = cluster_config.image.tag - - if cluster_config.replicaCount is not None: - helm_values["replicaCount"] = cluster_config.replicaCount - - if cluster_config.resources: - if cluster_config.resources.requests: - helm_values["resources"]["requests"].update( - { - "cpu": cluster_config.resources.requests.cpu, - "memory": cluster_config.resources.requests.memory, - } - ) - if cluster_config.resources.limits: - helm_values["resources"]["limits"].update( - { - "cpu": cluster_config.resources.limits.cpu, - "memory": cluster_config.resources.limits.memory, - } - ) - - # Handle cluster env vars with conflict detection - if cluster_config.env: - # Convert cluster env list to dict for easier conflict detection - cluster_env_dict = {env_var["name"]: env_var["value"] for env_var in cluster_config.env} - - # Check for conflicts with secret env vars - for secret_env_var in secret_env_vars: - if secret_env_var["name"] in cluster_env_dict: - logger.warning( - f"Environment variable '{secret_env_var['name']}' is defined in both " - f"cluster config env and secretEnvVars. The secret value will take precedence." - ) - del cluster_env_dict[secret_env_var["name"]] - - # Update all_env_vars with cluster overrides - all_env_vars.update(cluster_env_dict) - - # Apply additional arbitrary overrides - if cluster_config.additional_overrides: - _deep_merge(helm_values, cluster_config.additional_overrides) + # Apply agent environment configuration overrides + if agent_env_config: + # Add auth principal env var if environment config is set + if agent_env_config.auth: + from agentex.lib.cli.utils.auth_utils import _encode_principal_context_from_env_config + encoded_principal = _encode_principal_context_from_env_config(agent_env_config.auth) + logger.info(f"Encoding auth principal from {agent_env_config.auth}") + if encoded_principal: + all_env_vars[EnvVarKeys.AUTH_PRINCIPAL_B64.value] = encoded_principal + else: + raise DeploymentError(f"Auth principal unable to be encoded for agent_env_config: {agent_env_config}") + + if agent_env_config.helm_overrides: + _deep_merge(helm_values, agent_env_config.helm_overrides) # Set final environment variables + # Environment variable precedence: manifest -> environments.yaml -> secrets (highest) if all_env_vars: helm_values["env"] = convert_env_vars_dict_to_list(all_env_vars) @@ -295,7 +251,7 @@ def merge_deployment_configs( # Handle image pull secrets if manifest.deployment and manifest.deployment.imagePullSecrets: pull_secrets = [ - pull_secret.to_dict() + pull_secret.model_dump() for pull_secret in manifest.deployment.imagePullSecrets ] helm_values["global"]["imagePullSecrets"] = pull_secrets @@ -333,7 +289,7 @@ def deploy_agent( cluster_name: str, namespace: str, deploy_overrides: InputDeployOverrides, - override_file_path: str | None = None, + environment_name: str | None = None, ) -> None: """Deploy an agent using helm""" @@ -345,21 +301,23 @@ def deploy_agent( check_and_switch_cluster_context(cluster_name) manifest = AgentManifest.from_yaml(file_path=manifest_path) - override_config = load_override_config(override_file_path) - # Provide feedback about override configuration - if override_config: - console.print(f"[green]✓[/green] Using override config: {override_file_path}") - else: - console.print( - "[yellow]ℹ[/yellow] No override config specified, using global defaults" - ) + # Load agent environment configuration + agent_env_config = None + if environment_name: + manifest_dir = Path(manifest_path).parent + environments_config = manifest.load_environments_config(manifest_dir) + if environments_config: + agent_env_config = environments_config.get_config_for_env(environment_name) + console.print(f"[green]✓[/green] Using environment config: {environment_name}") + else: + console.print(f"[yellow]⚠[/yellow] No environments.yaml found, skipping environment-specific config") # Add helm repository/update add_helm_repo() # Merge configurations - helm_values = merge_deployment_configs(manifest, override_config, deploy_overrides, manifest_path) + helm_values = merge_deployment_configs(manifest, agent_env_config, deploy_overrides, manifest_path) # Create values file values_file = create_helm_values_file(helm_values) diff --git a/src/agentex/lib/cli/handlers/run_handlers.py b/src/agentex/lib/cli/handlers/run_handlers.py index 228709ad..cfc49e61 100644 --- a/src/agentex/lib/cli/handlers/run_handlers.py +++ b/src/agentex/lib/cli/handlers/run_handlers.py @@ -6,7 +6,6 @@ from rich.console import Console from rich.panel import Panel -from agentex.lib.cli.utils.auth_utils import _encode_principal_context from agentex.lib.cli.handlers.cleanup_handlers import ( cleanup_agent_workflows, should_cleanup_on_restart @@ -374,10 +373,13 @@ def create_agent_environment(manifest: AgentManifest) -> dict[str, str]: "ACP_PORT": str(manifest.local_development.agent.port), } - # Add authorization principal if set + # Add authorization principal if set - for local development, auth is optional + from agentex.lib.cli.utils.auth_utils import _encode_principal_context encoded_principal = _encode_principal_context(manifest) if encoded_principal: env_vars[EnvVarKeys.AUTH_PRINCIPAL_B64] = encoded_principal + else: + logger.info("No auth principal configured - agent will run without authentication context") # Add description if available if manifest.agent.description: diff --git a/src/agentex/lib/cli/templates/default/deploy/example.yaml.j2 b/src/agentex/lib/cli/templates/default/deploy/example.yaml.j2 deleted file mode 100644 index d4874d5b..00000000 --- a/src/agentex/lib/cli/templates/default/deploy/example.yaml.j2 +++ /dev/null @@ -1,55 +0,0 @@ -# Example Override Configuration -# ============================= -# This file shows how to override deployment settings from the global defaults in manifest.yaml. -# You can create multiple override files for different environments (e.g., staging.yaml, prod.yaml, dev.yaml) -# Only specify values that differ from the global defaults in manifest.yaml - -# Override image tag -# image: -# tag: "v1.2.3" - -# Override replica count -# replicaCount: 2 - -# Environment-specific environment variables -# env: -# - name: LOG_LEVEL -# value: "DEBUG" -# - name: ENVIRONMENT -# value: "staging" - -# Override resource requirements -# resources: -# requests: -# cpu: "250m" -# memory: "512Mi" -# limits: -# cpu: "500m" -# memory: "1Gi" - -# Advanced: Additional helm chart value overrides -# Use this for any helm chart values not covered by the simple options above -# additional_overrides: -# autoscaling: -# enabled: true -# minReplicas: 2 -# maxReplicas: 10 -# targetCPUUtilizationPercentage: 70 -# -# service: -# type: LoadBalancer -# -# ingress: -# enabled: true -# annotations: -# kubernetes.io/ingress.class: "nginx" -# hosts: -# - host: {{ agent_name }}.example.com -# paths: -# - path: / -# pathType: Prefix - -# To use this configuration: -# 1. Copy this file to a new file (e.g., staging.yaml, prod.yaml) -# 2. Uncomment and modify the values you want to override -# 3. Deploy with: agentex agents deploy --cluster your-cluster --namespace your-namespace --override-file staging.yaml \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default/environments.yaml.j2 b/src/agentex/lib/cli/templates/default/environments.yaml.j2 new file mode 100644 index 00000000..f802776f --- /dev/null +++ b/src/agentex/lib/cli/templates/default/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default/manifest.yaml.j2 index 6f7df041..8ab39b61 100644 --- a/src/agentex/lib/cli/templates/default/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/default/manifest.yaml.j2 @@ -75,6 +75,10 @@ agent: # Optional: Credentials mapping # Maps Kubernetes secrets to environment variables # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url # credentials: # - env_var_name: OPENAI_API_KEY # secret_name: openai-api-key diff --git "a/src/agentex/lib/cli/templates/deploy/Screenshot 2025-03-19 at 10.36.57\342\200\257AM.png" "b/src/agentex/lib/cli/templates/deploy/Screenshot 2025-03-19 at 10.36.57\342\200\257AM.png" deleted file mode 100644 index ef18c6e8..00000000 Binary files "a/src/agentex/lib/cli/templates/deploy/Screenshot 2025-03-19 at 10.36.57\342\200\257AM.png" and /dev/null differ diff --git a/src/agentex/lib/cli/templates/deploy/example.yaml.j2 b/src/agentex/lib/cli/templates/deploy/example.yaml.j2 deleted file mode 100644 index d4874d5b..00000000 --- a/src/agentex/lib/cli/templates/deploy/example.yaml.j2 +++ /dev/null @@ -1,55 +0,0 @@ -# Example Override Configuration -# ============================= -# This file shows how to override deployment settings from the global defaults in manifest.yaml. -# You can create multiple override files for different environments (e.g., staging.yaml, prod.yaml, dev.yaml) -# Only specify values that differ from the global defaults in manifest.yaml - -# Override image tag -# image: -# tag: "v1.2.3" - -# Override replica count -# replicaCount: 2 - -# Environment-specific environment variables -# env: -# - name: LOG_LEVEL -# value: "DEBUG" -# - name: ENVIRONMENT -# value: "staging" - -# Override resource requirements -# resources: -# requests: -# cpu: "250m" -# memory: "512Mi" -# limits: -# cpu: "500m" -# memory: "1Gi" - -# Advanced: Additional helm chart value overrides -# Use this for any helm chart values not covered by the simple options above -# additional_overrides: -# autoscaling: -# enabled: true -# minReplicas: 2 -# maxReplicas: 10 -# targetCPUUtilizationPercentage: 70 -# -# service: -# type: LoadBalancer -# -# ingress: -# enabled: true -# annotations: -# kubernetes.io/ingress.class: "nginx" -# hosts: -# - host: {{ agent_name }}.example.com -# paths: -# - path: / -# pathType: Prefix - -# To use this configuration: -# 1. Copy this file to a new file (e.g., staging.yaml, prod.yaml) -# 2. Uncomment and modify the values you want to override -# 3. Deploy with: agentex agents deploy --cluster your-cluster --namespace your-namespace --override-file staging.yaml \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync/deploy/example.yaml.j2 b/src/agentex/lib/cli/templates/sync/deploy/example.yaml.j2 deleted file mode 100644 index d4874d5b..00000000 --- a/src/agentex/lib/cli/templates/sync/deploy/example.yaml.j2 +++ /dev/null @@ -1,55 +0,0 @@ -# Example Override Configuration -# ============================= -# This file shows how to override deployment settings from the global defaults in manifest.yaml. -# You can create multiple override files for different environments (e.g., staging.yaml, prod.yaml, dev.yaml) -# Only specify values that differ from the global defaults in manifest.yaml - -# Override image tag -# image: -# tag: "v1.2.3" - -# Override replica count -# replicaCount: 2 - -# Environment-specific environment variables -# env: -# - name: LOG_LEVEL -# value: "DEBUG" -# - name: ENVIRONMENT -# value: "staging" - -# Override resource requirements -# resources: -# requests: -# cpu: "250m" -# memory: "512Mi" -# limits: -# cpu: "500m" -# memory: "1Gi" - -# Advanced: Additional helm chart value overrides -# Use this for any helm chart values not covered by the simple options above -# additional_overrides: -# autoscaling: -# enabled: true -# minReplicas: 2 -# maxReplicas: 10 -# targetCPUUtilizationPercentage: 70 -# -# service: -# type: LoadBalancer -# -# ingress: -# enabled: true -# annotations: -# kubernetes.io/ingress.class: "nginx" -# hosts: -# - host: {{ agent_name }}.example.com -# paths: -# - path: / -# pathType: Prefix - -# To use this configuration: -# 1. Copy this file to a new file (e.g., staging.yaml, prod.yaml) -# 2. Uncomment and modify the values you want to override -# 3. Deploy with: agentex agents deploy --cluster your-cluster --namespace your-namespace --override-file staging.yaml \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync/environments.yaml.j2 new file mode 100644 index 00000000..73924abd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/temporal/deploy/example.yaml.j2 b/src/agentex/lib/cli/templates/temporal/deploy/example.yaml.j2 deleted file mode 100644 index d4874d5b..00000000 --- a/src/agentex/lib/cli/templates/temporal/deploy/example.yaml.j2 +++ /dev/null @@ -1,55 +0,0 @@ -# Example Override Configuration -# ============================= -# This file shows how to override deployment settings from the global defaults in manifest.yaml. -# You can create multiple override files for different environments (e.g., staging.yaml, prod.yaml, dev.yaml) -# Only specify values that differ from the global defaults in manifest.yaml - -# Override image tag -# image: -# tag: "v1.2.3" - -# Override replica count -# replicaCount: 2 - -# Environment-specific environment variables -# env: -# - name: LOG_LEVEL -# value: "DEBUG" -# - name: ENVIRONMENT -# value: "staging" - -# Override resource requirements -# resources: -# requests: -# cpu: "250m" -# memory: "512Mi" -# limits: -# cpu: "500m" -# memory: "1Gi" - -# Advanced: Additional helm chart value overrides -# Use this for any helm chart values not covered by the simple options above -# additional_overrides: -# autoscaling: -# enabled: true -# minReplicas: 2 -# maxReplicas: 10 -# targetCPUUtilizationPercentage: 70 -# -# service: -# type: LoadBalancer -# -# ingress: -# enabled: true -# annotations: -# kubernetes.io/ingress.class: "nginx" -# hosts: -# - host: {{ agent_name }}.example.com -# paths: -# - path: / -# pathType: Prefix - -# To use this configuration: -# 1. Copy this file to a new file (e.g., staging.yaml, prod.yaml) -# 2. Uncomment and modify the values you want to override -# 3. Deploy with: agentex agents deploy --cluster your-cluster --namespace your-namespace --override-file staging.yaml \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal/environments.yaml.j2 b/src/agentex/lib/cli/templates/temporal/environments.yaml.j2 new file mode 100644 index 00000000..ef33d9b2 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal/environments.yaml.j2 @@ -0,0 +1,64 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + # This is used to override the global helm values.yaml file in the agentex-agent helm charts + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: true + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 index 20f76369..5ec8cc5e 100644 --- a/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 +++ b/src/agentex/lib/cli/templates/temporal/manifest.yaml.j2 @@ -92,7 +92,10 @@ agent: # Optional: Credentials mapping # Maps Kubernetes secrets to environment variables # Common credentials include: - # credentials: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url # - env_var_name: OPENAI_API_KEY # secret_name: openai-api-key # secret_key: api-key diff --git a/src/agentex/lib/cli/utils/auth_utils.py b/src/agentex/lib/cli/utils/auth_utils.py index a2d3279a..9ffbc6be 100644 --- a/src/agentex/lib/cli/utils/auth_utils.py +++ b/src/agentex/lib/cli/utils/auth_utils.py @@ -1,16 +1,56 @@ import base64 import json +from typing import Any, Dict from agentex.lib.sdk.config.agent_manifest import AgentManifest +from agentex.lib.sdk.config.environment_config import AgentAuthConfig +# DEPRECATED: Old function for backward compatibility +# Will be removed in future version +def _encode_principal_context(manifest: AgentManifest) -> str | None: + """ + DEPRECATED: This function is deprecated as AgentManifest no longer contains auth. + Use _encode_principal_context_from_env_config instead. + + This function is kept temporarily for backward compatibility during migration. + """ + # AgentManifest no longer has auth field - this will always return None + return None -# Base 64 encode principal dictionary -def _encode_principal_context(manifest: AgentManifest): - if manifest.auth is None: + +def _encode_principal_context_from_env_config(auth_config: "AgentAuthConfig | None") -> str | None: + """ + Encode principal context from environment configuration. + + Args: + auth_config: AgentAuthConfig containing principal configuration + + Returns: + Base64-encoded JSON string of the principal, or None if no principal + """ + if auth_config is None: + return None + + principal = auth_config.principal + if not principal: return None - principal = manifest.auth.principal - if principal is None: + json_str = json.dumps(principal, separators=(',', ':')) + encoded_bytes = base64.b64encode(json_str.encode('utf-8')) + return encoded_bytes.decode('utf-8') + + +def _encode_principal_dict(principal: Dict[str, Any]) -> str | None: + """ + Encode principal dictionary directly. + + Args: + principal: Dictionary containing principal configuration + + Returns: + Base64-encoded JSON string of the principal, or None if principal is empty + """ + if not principal: return None json_str = json.dumps(principal, separators=(',', ':')) diff --git a/src/agentex/lib/sdk/config/agent_manifest.py b/src/agentex/lib/sdk/config/agent_manifest.py index 1f317b02..83737093 100644 --- a/src/agentex/lib/sdk/config/agent_manifest.py +++ b/src/agentex/lib/sdk/config/agent_manifest.py @@ -15,7 +15,8 @@ from agentex.lib.sdk.config.agent_config import AgentConfig from agentex.lib.sdk.config.build_config import BuildConfig -from agentex.lib.sdk.config.deployment_config import DeploymentConfig, AuthenticationConfig +from agentex.lib.sdk.config.environment_config import AgentEnvironmentsConfig +from agentex.lib.sdk.config.deployment_config import DeploymentConfig from agentex.lib.sdk.config.local_development_config import LocalDevelopmentConfig from agentex.lib.utils.logging import make_logger from agentex.lib.utils.model_utils import BaseModel @@ -36,7 +37,7 @@ class AgentManifest(BaseModel): deployment: DeploymentConfig | None = Field( default=None, description="Deployment configuration for the agent" ) - auth: AuthenticationConfig | None = Field(default=None, description="Authentication configuration") + def context_manager(self, build_context_root: Path) -> BuildContextManager: """ @@ -45,6 +46,23 @@ def context_manager(self, build_context_root: Path) -> BuildContextManager: return BuildContextManager( agent_manifest=self, build_context_root=build_context_root ) + + def load_environments_config(self, manifest_dir: Path) -> "AgentEnvironmentsConfig | None": + """Load environments.yaml from same directory as manifest.yaml. + + Args: + manifest_dir: Directory containing manifest.yaml + + Returns: + AgentEnvironmentsConfig if environments.yaml exists, None otherwise + + Raises: + ValueError: If environments.yaml exists but is invalid + """ + # Import here to avoid circular imports + from agentex.lib.sdk.config.environment_config import load_environments_config_from_manifest_dir + + return load_environments_config_from_manifest_dir(manifest_dir) class BuildContextManager: diff --git a/src/agentex/lib/sdk/config/environment_config.py b/src/agentex/lib/sdk/config/environment_config.py new file mode 100644 index 00000000..808117b7 --- /dev/null +++ b/src/agentex/lib/sdk/config/environment_config.py @@ -0,0 +1,188 @@ +""" +Environment-specific configuration models for agent deployments. + +This module provides Pydantic models for managing environment-specific +configurations that are separate from the main manifest.yaml file. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, override + +import yaml +from pydantic import BaseModel, Field, field_validator + +from agentex.lib.utils.model_utils import BaseModel as UtilsBaseModel + + +class AgentAuthConfig(BaseModel): + """Authentication configuration for an agent in a specific environment.""" + + principal: Dict[str, Any] = Field( + ..., + description="Principal configuration for agent authorization and registration" + ) + + @field_validator('principal') + @classmethod + def validate_principal_required_fields(cls, v: Any) -> Dict[str, Any]: + """Ensure principal has required fields for agent registration.""" + if not isinstance(v, dict): + raise ValueError("Principal must be a dictionary") + return v + + +class AgentKubernetesConfig(BaseModel): + """Kubernetes configuration for an agent in a specific environment.""" + + namespace: str = Field( + ..., + description="Kubernetes namespace where the agent will be deployed" + ) + + @field_validator('namespace') + @classmethod + def validate_namespace_format(cls, v: str) -> str: + """Ensure namespace follows Kubernetes naming conventions.""" + if not v or not v.strip(): + raise ValueError("Namespace cannot be empty") + + # Basic Kubernetes namespace validation + namespace = v.strip().lower() + if not namespace.replace('-', '').replace('.', '').isalnum(): + raise ValueError( + f"Namespace '{v}' must contain only lowercase letters, numbers, " + "hyphens, and periods" + ) + + if len(namespace) > 63: + raise ValueError(f"Namespace '{v}' cannot exceed 63 characters") + + return namespace + + +class AgentEnvironmentConfig(BaseModel): + """Complete configuration for an agent in a specific environment.""" + + kubernetes: AgentKubernetesConfig | None = Field( + default=None, + description="Kubernetes deployment configuration" + ) + auth: AgentAuthConfig = Field( + ..., + description="Authentication and authorization configuration" + ) + helm_overrides: Dict[str, Any] = Field( + default_factory=dict, + description="Helm chart value overrides for environment-specific tuning" + ) + + +class AgentEnvironmentsConfig(UtilsBaseModel): + """All environment configurations for an agent.""" + + schema_version: str = Field( + default="v1", + description="Schema version for validation and compatibility" + ) + environments: Dict[str, AgentEnvironmentConfig] = Field( + ..., + description="Environment-specific configurations (dev, prod, etc.)" + ) + + @field_validator('schema_version') + @classmethod + def validate_schema_version(cls, v: str) -> str: + """Ensure schema version is supported.""" + supported_versions = ['v1'] + if v not in supported_versions: + raise ValueError( + f"Schema version '{v}' not supported. " + f"Supported versions: {', '.join(supported_versions)}" + ) + return v + + @field_validator('environments') + @classmethod + def validate_environments_not_empty(cls, v: Dict[str, AgentEnvironmentConfig]) -> Dict[str, AgentEnvironmentConfig]: + """Ensure at least one environment is defined.""" + if not v: + raise ValueError("At least one environment must be defined") + return v + + def get_config_for_env(self, env_name: str) -> AgentEnvironmentConfig: + """Get configuration for a specific environment. + + Args: + env_name: Name of the environment (e.g., 'dev', 'prod') + + Returns: + AgentEnvironmentConfig for the specified environment + + Raises: + ValueError: If environment is not found + """ + if env_name not in self.environments: + available_envs = ', '.join(self.environments.keys()) + raise ValueError( + f"Environment '{env_name}' not found in environments.yaml. " + f"Available environments: {available_envs}" + ) + return self.environments[env_name] + + def list_environments(self) -> list[str]: + """Get list of all configured environment names.""" + return list(self.environments.keys()) + + @classmethod + @override + def from_yaml(cls, file_path: str) -> "AgentEnvironmentsConfig": + """Load configuration from environments.yaml file. + + Args: + file_path: Path to environments.yaml file + + Returns: + Parsed and validated AgentEnvironmentsConfig + + Raises: + FileNotFoundError: If file doesn't exist + ValueError: If file is invalid or doesn't validate + """ + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"environments.yaml not found: {file_path}") + + try: + with open(path, 'r') as f: + data = yaml.safe_load(f) + + if not data: + raise ValueError("environments.yaml file is empty") + + return cls.model_validate(data) + + except yaml.YAMLError as e: + raise ValueError(f"Invalid YAML format in {file_path}: {e}") from e + except Exception as e: + raise ValueError(f"Failed to load environments.yaml from {file_path}: {e}") from e + + +def load_environments_config_from_manifest_dir(manifest_dir: Path) -> AgentEnvironmentsConfig | None: + """Helper function to load environments.yaml from same directory as manifest.yaml. + + Args: + manifest_dir: Directory containing manifest.yaml + + Returns: + AgentEnvironmentsConfig if environments.yaml exists, None otherwise + + Raises: + ValueError: If environments.yaml exists but is invalid + """ + environments_file = manifest_dir / "environments.yaml" + if not environments_file.exists(): + return None + + return AgentEnvironmentsConfig.from_yaml(str(environments_file)) diff --git a/src/agentex/lib/sdk/config/validation.py b/src/agentex/lib/sdk/config/validation.py new file mode 100644 index 00000000..e94912d1 --- /dev/null +++ b/src/agentex/lib/sdk/config/validation.py @@ -0,0 +1,252 @@ +""" +Validation framework for agent configuration files. + +This module provides validation functions for agent configurations, +with clear error messages and best practices enforcement. +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional + +from agentex.lib.sdk.config.environment_config import AgentEnvironmentsConfig, AgentEnvironmentConfig +from agentex.lib.utils.logging import make_logger + +logger = make_logger(__name__) + + +class ConfigValidationError(Exception): + """Exception raised when configuration validation fails.""" + + def __init__(self, message: str, file_path: Optional[str] = None): + self.file_path = file_path + super().__init__(message) + + +class EnvironmentsValidationError(ConfigValidationError): + """Exception raised when environments.yaml validation fails.""" + pass + + +def validate_environments_config( + environments_config: AgentEnvironmentsConfig, + required_environments: Optional[List[str]] = None +) -> None: + """ + Validate environments configuration with comprehensive checks. + + Args: + environments_config: The loaded environments configuration + required_environments: List of environment names that must be present + + Raises: + EnvironmentsValidationError: If validation fails + """ + # Check for required environments + if required_environments: + missing_envs: List[str] = [] + for env_name in required_environments: + if env_name not in environments_config.environments: + missing_envs.append(env_name) + + if missing_envs: + available_envs = list(environments_config.environments.keys()) + raise EnvironmentsValidationError( + f"Missing required environments: {', '.join(missing_envs)}. " + f"Available environments: {', '.join(available_envs)}" + ) + + # Validate each environment configuration + for env_name, env_config in environments_config.environments.items(): + try: + _validate_single_environment_config(env_name, env_config) + except Exception as e: + raise EnvironmentsValidationError( + f"Environment '{env_name}' configuration error: {str(e)}" + ) from e + + +def _validate_single_environment_config(env_name: str, env_config: AgentEnvironmentConfig) -> None: + """ + Validate a single environment configuration. + + Args: + env_name: Name of the environment + env_config: AgentEnvironmentConfig instance + + Raises: + ValueError: If validation fails + """ + # Validate namespace naming conventions if kubernetes config exists + if env_config.kubernetes and env_config.kubernetes.namespace: + namespace = env_config.kubernetes.namespace + + # Check for common namespace naming issues + if namespace != namespace.lower(): + logger.warning( + f"Namespace '{namespace}' contains uppercase letters. " + "Kubernetes namespaces should be lowercase." + ) + + if namespace.startswith('-') or namespace.endswith('-'): + raise ValueError( + f"Namespace '{namespace}' cannot start or end with hyphens" + ) + + # Validate auth principal + principal = env_config.auth.principal + if not principal.get('user_id'): + raise ValueError("Auth principal must contain non-empty 'user_id'") + + # Check for environment-specific user_id patterns + user_id = principal['user_id'] + if isinstance(user_id, str): + if not any(env_name.lower() in user_id.lower() for env_name in ['dev', 'prod', 'staging', env_name]): + logger.warning( + f"User ID '{user_id}' doesn't contain environment indicator. " + f"Consider including '{env_name}' in the user_id for clarity." + ) + + # Validate helm overrides if present + if env_config.helm_overrides: + _validate_helm_overrides(env_config.helm_overrides) + + +def _validate_helm_overrides(helm_overrides: Dict[str, Any]) -> None: + """ + Validate helm override configuration. + + Args: + helm_overrides: Dictionary of helm overrides + + Raises: + ValueError: If validation fails + """ + # Check for common helm override issues + if 'resources' in helm_overrides: + resources = helm_overrides['resources'] + if isinstance(resources, dict): + # Validate resource format + if 'requests' in resources or 'limits' in resources: + for resource_type in ['requests', 'limits']: + if resource_type in resources: + resource_config: Any = resources[resource_type] + if isinstance(resource_config, dict): + # Check for valid resource specifications + for key, value in resource_config.items(): + if key in ['cpu', 'memory'] and not isinstance(value, str): + logger.warning( + f"Resource {key} should be a string (e.g., '500m', '1Gi'), " + f"got {type(value).__name__}: {value}" + ) + + +def validate_environments_yaml_file(file_path: str) -> AgentEnvironmentsConfig: + """ + Load and validate environments.yaml file. + + Args: + file_path: Path to environments.yaml file + + Returns: + Validated AgentEnvironmentsConfig + + Raises: + EnvironmentsValidationError: If file is invalid + """ + try: + environments_config = AgentEnvironmentsConfig.from_yaml(file_path) + validate_environments_config(environments_config) + return environments_config + except FileNotFoundError: + raise EnvironmentsValidationError( + f"environments.yaml not found: {file_path}\n\n" + "💡 To create one:\n" + " agentex agents init-environments\n\n" + "📋 Why required:\n" + " Environment-specific settings (auth, namespace, resources)\n" + " must be separated from global manifest for proper isolation.", + file_path=file_path + ) from None + except Exception as e: + raise EnvironmentsValidationError( + f"Invalid environments.yaml file: {str(e)}", + file_path=file_path + ) from e + + +def validate_manifest_and_environments( + manifest_path: str, + required_environment: Optional[str] = None +) -> tuple[str, AgentEnvironmentsConfig]: + """ + Validate both manifest.yaml and environments.yaml files together. + + Args: + manifest_path: Path to manifest.yaml file + required_environment: Specific environment that must be present + + Returns: + Tuple of (manifest_path, environments_config) + + Raises: + ConfigValidationError: If validation fails + """ + manifest_file = Path(manifest_path) + if not manifest_file.exists(): + raise ConfigValidationError(f"Manifest file not found: {manifest_path}") + + # Look for environments.yaml in same directory + environments_file = manifest_file.parent / "environments.yaml" + environments_config = validate_environments_yaml_file(str(environments_file)) + + # Validate specific environment if requested + if required_environment: + validate_environments_config( + environments_config, + required_environments=[required_environment] + ) + + return manifest_path, environments_config + + +def generate_helpful_error_message(error: Exception, context: str = "") -> str: + """ + Generate helpful error message with troubleshooting tips. + + Args: + error: The original exception + context: Additional context about where the error occurred + + Returns: + Formatted error message with troubleshooting tips + """ + base_msg = str(error) + + if context: + base_msg = f"{context}: {base_msg}" + + # Add troubleshooting tips based on error type + if isinstance(error, FileNotFoundError): + if "environments.yaml" in base_msg: + base_msg += ( + "\n\n🔧 Troubleshooting:\n" + "1. Create environments.yaml: agentex agents init-environments\n" + "2. Check file location: should be next to manifest.yaml\n" + "3. Verify file permissions" + ) + elif "user_id" in base_msg.lower(): + base_msg += ( + "\n\n💡 Auth Principal Tips:\n" + "- user_id should be unique per environment\n" + "- Include environment name (e.g., 'dev_my_agent')\n" + "- Use consistent naming convention across agents" + ) + elif "namespace" in base_msg.lower(): + base_msg += ( + "\n\n🏷️ Namespace Tips:\n" + "- Use lowercase letters, numbers, and hyphens only\n" + "- Include team and environment (e.g., 'team-dev-agent')\n" + "- Keep under 63 characters" + ) + + return base_msg