diff --git a/backend/core/auth.py b/backend/core/auth.py index 37b1b61..1862163 100644 --- a/backend/core/auth.py +++ b/backend/core/auth.py @@ -6,6 +6,12 @@ import httpx from modules.config.config_manager import config_manager +import jwt +import requests +import base64 +import json +import time + logger = logging.getLogger(__name__) @@ -60,8 +66,75 @@ async def is_user_in_group(user_id: str, group_id: str) -> bool: return group_id in user_groups +def get_user_from_aws_alb_jwt(encoded_jwt, expected_alb_arn, aws_region): + """ + Validates the AWS ALB JWT and parses the email address from the payload. + + Args: + encoded_jwt (str): The JWT from the x-amzn-oidc-data header. + expected_alb_arn (str): The ARN of your Application Load Balancer. + aws_region (str): The AWS region where your ALB is located (e.g., 'us-east-1'). + + Returns: + str: The user's email address, or None if validation fails. + """ + if not encoded_jwt: + return None + try: + # Step 1: Decode the JWT header to get the key ID (kid) and signer + jwt_headers_encoded = encoded_jwt.split('.')[0] + # JWTs use base64url encoding, not standard base64 + # Add padding if missing, as Python's b64decode expects it + jwt_headers_decoded = base64.b64decode(jwt_headers_encoded + '===').decode("utf-8") + decoded_json_headers = json.loads(jwt_headers_decoded) + kid = decoded_json_headers['kid'] + received_alb_arn = decoded_json_headers.get('signer') + + # Step 2: Validate the signer matches the expected ALB ARN + if received_alb_arn != expected_alb_arn: + print(f"Error: Invalid signer ARN. Expected {expected_alb_arn}, got {received_alb_arn}") + return None + + # Step 3: Get the public key from the regional endpoint + url = f'https://public-keys.auth.elb.{aws_region}.amazonaws.com/{kid}' + req = requests.get(url) + pub_key = req.text + + # Step 4: Validate the signature and claims using PyJWT + # The decode method handles signature verification and standard claims (like expiration) + # The ALB uses ES256 algorithm + payload = jwt.decode( + encoded_jwt, + pub_key, + algorithms=['ES256'], + # Optional: Add audience or issuer validation if needed, though ALB handles most standard claims validation + options={"verify_aud": False, "verify_iss": False} + ) + + # Step 5: Extract the email address from the payload + email_address = payload.get('email') + if email_address: + return email_address + else: + print("Error: 'email' claim not found in JWT payload.") + return None + + except jwt.ExpiredSignatureError: + print("Error: Token has expired.") + return None + except jwt.InvalidTokenError as e: + print(f"Error: Invalid token - {e}") + return None + except requests.exceptions.RequestException as e: + print(f"Error fetching public key: {e}") + return None + except Exception as e: + print(f"An unexpected error occurred: {e}") + return None + + def get_user_from_header(x_email_header: Optional[str]) -> Optional[str]: """Extract user email from authentication header value.""" if not x_email_header: return None - return x_email_header.strip() \ No newline at end of file + return x_email_header.strip() diff --git a/backend/core/middleware.py b/backend/core/middleware.py index c389c93..d045db0 100644 --- a/backend/core/middleware.py +++ b/backend/core/middleware.py @@ -8,6 +8,7 @@ from starlette.responses import Response from core.auth import get_user_from_header +from core.auth import get_user_from_aws_alb_jwt from core.capabilities import verify_file_token from infrastructure.app_factory import app_factory @@ -22,6 +23,9 @@ def __init__( app, debug_mode: bool = False, auth_header_name: str = "X-User-Email", + auth_header_type: str = "email-string", + auth_aws_expected_alb_arn: str = "arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/...", + auth_aws_region: str = "us-east-1", proxy_secret_enabled: bool = False, proxy_secret_header: str = "X-Proxy-Secret", proxy_secret: str = None, @@ -30,6 +34,9 @@ def __init__( super().__init__(app) self.debug_mode = debug_mode self.auth_header_name = auth_header_name + self.auth_header_type = auth_header_type + self.auth_aws_expected_alb_arn = auth_aws_expected_alb_arn + self.auth_aws_region = auth_aws_region self.proxy_secret_enabled = proxy_secret_enabled self.proxy_secret_header = proxy_secret_header self.proxy_secret = proxy_secret @@ -83,17 +90,22 @@ async def dispatch(self, request: Request, call_next) -> Response: user_email = None if self.debug_mode: # In debug mode, honor auth header if provided, otherwise use config test user - x_email_header = request.headers.get(self.auth_header_name) - if x_email_header: - user_email = get_user_from_header(x_email_header) + x_auth_header = request.headers.get(self.auth_header_name) + if x_auth_header: + user_email = get_user_from_header(x_auth_header) else: # Get test user from config config_manager = app_factory.get_config_manager() user_email = config_manager.app_settings.test_user # logger.info(f"Debug mode: using user {user_email}") else: - x_email_header = request.headers.get(self.auth_header_name) - user_email = get_user_from_header(x_email_header) + x_auth_header = request.headers.get(self.auth_header_name) + + # Extract the user's email, depending on the datatype of auth header + if self.auth_header_type == "aws-alb-jwt": # Amazon Application Load Balancer + user_email = get_user_from_aws_alb_jwt(x_auth_header, self.auth_aws_expected_alb_arn, self.auth_aws_region) + else: + user_email = get_user_from_header(x_auth_header) if not user_email: # Distinguish between API endpoints (return 401) and browser endpoints (redirect) @@ -108,4 +120,4 @@ async def dispatch(self, request: Request, call_next) -> Response: request.state.user_email = user_email response = await call_next(request) - return response \ No newline at end of file + return response diff --git a/backend/main.py b/backend/main.py index eecdb5b..7bf2ce1 100644 --- a/backend/main.py +++ b/backend/main.py @@ -127,6 +127,9 @@ async def lifespan(app: FastAPI): AuthMiddleware, debug_mode=config.app_settings.debug_mode, auth_header_name=config.app_settings.auth_user_header, + auth_header_type=config.app_settings.auth_user_header_type, + auth_aws_expected_alb_arn=config.app_settings.auth_aws_expected_alb_arn, + auth_aws_region=config.app_settings.auth_aws_region, proxy_secret_enabled=config.app_settings.feature_proxy_secret_enabled, proxy_secret_header=config.app_settings.proxy_secret_header, proxy_secret=config.app_settings.proxy_secret, diff --git a/backend/modules/config/config_manager.py b/backend/modules/config/config_manager.py index d2d16d6..75d044a 100644 --- a/backend/modules/config/config_manager.py +++ b/backend/modules/config/config_manager.py @@ -212,6 +212,27 @@ def agent_mode_available(self) -> bool: description="HTTP header name to extract authenticated username from reverse proxy", validation_alias="AUTH_USER_HEADER" ) + + # Authentication header configuration + auth_user_header_type: str = Field( + default="email-string", + description="The datatype stored in AUTH_USER_HEADER", + validation_alias="AUTH_USER_HEADER_TYPE" + ) + + # Authentication AWS expected ALB ARN + auth_aws_expected_alb_arn: str = Field( + default="arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/...", + description="The expected AWS ALB ARN", + validation_alias="AUTH_AWS_EXPECTED_ALB_ARN" + ) + + # Authentication AWS region + auth_aws_region: str = Field( + default="us-east-1", + description="The AWS region", + validation_alias="AUTH_AWS_REGION" + ) # Proxy secret authentication configuration feature_proxy_secret_enabled: bool = Field( @@ -659,4 +680,4 @@ def get_llm_config() -> LLMConfig: def get_mcp_config() -> MCPConfig: """Get MCP configuration.""" - return config_manager.mcp_config \ No newline at end of file + return config_manager.mcp_config diff --git a/docs/02_admin_guide.md b/docs/02_admin_guide.md index 2ab0cfd..df34825 100644 --- a/docs/02_admin_guide.md +++ b/docs/02_admin_guide.md @@ -349,6 +349,17 @@ The intended flow for user authentication in a production environment is as foll The backend application reads this header to identify the user. The header name is configurable via the `AUTH_USER_HEADER` environment variable (default: `X-User-Email`). This allows flexibility for different reverse proxy setups that may use different header names (e.g., `X-Authenticated-User`, `X-Remote-User`). This model is secure only if the backend is not directly exposed to the internet, ensuring that all requests are processed by the proxy first. +If using AWS Application Load Balancer (ALB) as the Auth Service, the following authentication configuration should be used: + +``` + AUTH_USER_HEADER=x-amzn-oidc-data + AUTH_USER_HEADER_TYPE=aws-alb-jwt + AUTH_AWS_EXPECTED_ALB_ARN=arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/... + AUTH_AWS_REGION=us-east-1 +``` + +This configuration will decode the base64-encoded JWT passed in the x-amzn-oidc-data header, validate it, and extract the user's email address from the validated JWT. + ### Development Behavior In a local development environment (when `DEBUG_MODE=true` in the `.env` file), the system falls back to using a default `test@test.com` user if the configured authentication header is not present.