From 90d742e56487ebb083c26559440dc6710f61d375 Mon Sep 17 00:00:00 2001 From: Kevin Pedretti Date: Thu, 20 Nov 2025 14:36:06 -0700 Subject: [PATCH 1/4] WIP: adding support for AWS Application Load Balancer JWT auth --- backend/core/auth.py | 67 +++++++++++++++++++++++- backend/core/middleware.py | 23 +++++--- backend/main.py | 3 ++ backend/modules/config/config_manager.py | 23 +++++++- docs/02_admin_guide.md | 11 ++++ 5 files changed, 119 insertions(+), 8 deletions(-) diff --git a/backend/core/auth.py b/backend/core/auth.py index 37b1b61..bf79b83 100644 --- a/backend/core/auth.py +++ b/backend/core/auth.py @@ -60,8 +60,73 @@ async def is_user_in_group(user_id: str, group_id: str) -> bool: return group_id in user_groups +def get_user_from_aws_alb_jwt(encoded_jwt, expected_alb_arn, aws_region): + """ + Validates the AWS ALB JWT and parses the email address from the payload. + + Args: + encoded_jwt (str): The JWT from the x-amzn-oidc-data header. + expected_alb_arn (str): The ARN of your Application Load Balancer. + aws_region (str): The AWS region where your ALB is located (e.g., 'us-east-1'). + + Returns: + str: The user's email address, or None if validation fails. + """ + try: + # Step 1: Decode the JWT header to get the key ID (kid) and signer + jwt_headers_encoded = encoded_jwt.split('.')[0] + # JWTs use base64url encoding, not standard base64 + # Add padding if missing, as Python's b64decode expects it + jwt_headers_decoded = base64.b64decode(jwt_headers_encoded + '===').decode("utf-8") + decoded_json_headers = json.loads(jwt_headers_decoded) + kid = decoded_json_headers['kid'] + received_alb_arn = decoded_json_headers.get('signer') + + # Step 2: Validate the signer matches the expected ALB ARN + if received_alb_arn != expected_alb_arn: + print(f"Error: Invalid signer ARN. Expected {expected_alb_arn}, got {received_alb_arn}") + return None + + # Step 3: Get the public key from the regional endpoint + url = f'https://public-keys.auth.elb.{aws_region}.amazonaws.com/{kid}' + req = requests.get(url) + pub_key = req.text + + # Step 4: Validate the signature and claims using PyJWT + # The decode method handles signature verification and standard claims (like expiration) + # The ALB uses ES256 algorithm + payload = jwt.decode( + encoded_jwt, + pub_key, + algorithms=['ES256'], + # Optional: Add audience or issuer validation if needed, though ALB handles most standard claims validation + options={"verify_aud": False, "verify_iss": False} + ) + + # Step 5: Extract the email address from the payload + email_address = payload.get('email') + if email_address: + return email_address + else: + print("Error: 'email' claim not found in JWT payload.") + return None + + except jwt.ExpiredSignatureError: + print("Error: Token has expired.") + return None + except jwt.InvalidTokenError as e: + print(f"Error: Invalid token - {e}") + return None + except requests.exceptions.RequestException as e: + print(f"Error fetching public key: {e}") + return None + except Exception as e: + print(f"An unexpected error occurred: {e}") + return None + + def get_user_from_header(x_email_header: Optional[str]) -> Optional[str]: """Extract user email from authentication header value.""" if not x_email_header: return None - return x_email_header.strip() \ No newline at end of file + return x_email_header.strip() diff --git a/backend/core/middleware.py b/backend/core/middleware.py index c389c93..8f5ee05 100644 --- a/backend/core/middleware.py +++ b/backend/core/middleware.py @@ -22,6 +22,9 @@ def __init__( app, debug_mode: bool = False, auth_header_name: str = "X-User-Email", + auth_header_type: str = "email-string", + auth_aws_expected_alb_arn: str = "arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/...", + auth_aws_region: str = "us-east-1", proxy_secret_enabled: bool = False, proxy_secret_header: str = "X-Proxy-Secret", proxy_secret: str = None, @@ -30,6 +33,9 @@ def __init__( super().__init__(app) self.debug_mode = debug_mode self.auth_header_name = auth_header_name + self.auth_header_type = auth_header_type + self.auth_aws_expected_alb_arn = auth_aws_expected_alb_arn + self.auth_aws_region = auth_aws_region self.proxy_secret_enabled = proxy_secret_enabled self.proxy_secret_header = proxy_secret_header self.proxy_secret = proxy_secret @@ -83,17 +89,22 @@ async def dispatch(self, request: Request, call_next) -> Response: user_email = None if self.debug_mode: # In debug mode, honor auth header if provided, otherwise use config test user - x_email_header = request.headers.get(self.auth_header_name) - if x_email_header: - user_email = get_user_from_header(x_email_header) + x_auth_header = request.headers.get(self.auth_header_name) + if x_auth_header: + user_email = get_user_from_header(x_auth_header) else: # Get test user from config config_manager = app_factory.get_config_manager() user_email = config_manager.app_settings.test_user # logger.info(f"Debug mode: using user {user_email}") else: - x_email_header = request.headers.get(self.auth_header_name) - user_email = get_user_from_header(x_email_header) + x_auth_header = request.headers.get(self.auth_header_name) + + # Extract the user's email, depending on the datatype of auth header + if self.auth_header_type = "aws-alb-jwt": # Amazon Application Load Balancer + user_email = get_user_from_aws_alb_jwt(x_auth_header, self.auth_aws_expected_alb_arn, self.auth_aws_region) + else + user_email = get_user_from_header(x_auth_header) if not user_email: # Distinguish between API endpoints (return 401) and browser endpoints (redirect) @@ -108,4 +119,4 @@ async def dispatch(self, request: Request, call_next) -> Response: request.state.user_email = user_email response = await call_next(request) - return response \ No newline at end of file + return response diff --git a/backend/main.py b/backend/main.py index eecdb5b..7bf2ce1 100644 --- a/backend/main.py +++ b/backend/main.py @@ -127,6 +127,9 @@ async def lifespan(app: FastAPI): AuthMiddleware, debug_mode=config.app_settings.debug_mode, auth_header_name=config.app_settings.auth_user_header, + auth_header_type=config.app_settings.auth_user_header_type, + auth_aws_expected_alb_arn=config.app_settings.auth_aws_expected_alb_arn, + auth_aws_region=config.app_settings.auth_aws_region, proxy_secret_enabled=config.app_settings.feature_proxy_secret_enabled, proxy_secret_header=config.app_settings.proxy_secret_header, proxy_secret=config.app_settings.proxy_secret, diff --git a/backend/modules/config/config_manager.py b/backend/modules/config/config_manager.py index d2d16d6..75d044a 100644 --- a/backend/modules/config/config_manager.py +++ b/backend/modules/config/config_manager.py @@ -212,6 +212,27 @@ def agent_mode_available(self) -> bool: description="HTTP header name to extract authenticated username from reverse proxy", validation_alias="AUTH_USER_HEADER" ) + + # Authentication header configuration + auth_user_header_type: str = Field( + default="email-string", + description="The datatype stored in AUTH_USER_HEADER", + validation_alias="AUTH_USER_HEADER_TYPE" + ) + + # Authentication AWS expected ALB ARN + auth_aws_expected_alb_arn: str = Field( + default="arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/...", + description="The expected AWS ALB ARN", + validation_alias="AUTH_AWS_EXPECTED_ALB_ARN" + ) + + # Authentication AWS region + auth_aws_region: str = Field( + default="us-east-1", + description="The AWS region", + validation_alias="AUTH_AWS_REGION" + ) # Proxy secret authentication configuration feature_proxy_secret_enabled: bool = Field( @@ -659,4 +680,4 @@ def get_llm_config() -> LLMConfig: def get_mcp_config() -> MCPConfig: """Get MCP configuration.""" - return config_manager.mcp_config \ No newline at end of file + return config_manager.mcp_config diff --git a/docs/02_admin_guide.md b/docs/02_admin_guide.md index 2ab0cfd..d9255f8 100644 --- a/docs/02_admin_guide.md +++ b/docs/02_admin_guide.md @@ -349,6 +349,17 @@ The intended flow for user authentication in a production environment is as foll The backend application reads this header to identify the user. The header name is configurable via the `AUTH_USER_HEADER` environment variable (default: `X-User-Email`). This allows flexibility for different reverse proxy setups that may use different header names (e.g., `X-Authenticated-User`, `X-Remote-User`). This model is secure only if the backend is not directly exposed to the internet, ensuring that all requests are processed by the proxy first. +If using AWS Application Load Balancer (ALB) as the Auth Service, the following authentication configuration should be used: + +``` + AUTH_USER_HEADER="x-amzn-oidc-data" + AUTH_USER_HEADER_TYPE="aws-alb-jwt" + AUTH_AWS_EXPECTED_ALB_ARN="arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/..." + AUTH_AWS_REGION="us-east-1" +``` + +This configuration will decode the base64-encoded JWT passed in the x-amzn-oidc-data header, validate it, and extract the user's email address from the validated JWT. + ### Development Behavior In a local development environment (when `DEBUG_MODE=true` in the `.env` file), the system falls back to using a default `test@test.com` user if the configured authentication header is not present. From c85c720535006b00d1bb26aa3bc8f130c0b7ecc0 Mon Sep 17 00:00:00 2001 From: Kevin Pedretti Date: Thu, 20 Nov 2025 15:38:04 -0700 Subject: [PATCH 2/4] fixup code bugs --- backend/core/auth.py | 6 ++++++ backend/core/middleware.py | 4 ++-- docs/02_admin_guide.md | 8 ++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/core/auth.py b/backend/core/auth.py index bf79b83..78a1736 100644 --- a/backend/core/auth.py +++ b/backend/core/auth.py @@ -6,6 +6,12 @@ import httpx from modules.config.config_manager import config_manager +import jwt +import requests +import base64 +import json +import time + logger = logging.getLogger(__name__) diff --git a/backend/core/middleware.py b/backend/core/middleware.py index 8f5ee05..5ff321b 100644 --- a/backend/core/middleware.py +++ b/backend/core/middleware.py @@ -101,9 +101,9 @@ async def dispatch(self, request: Request, call_next) -> Response: x_auth_header = request.headers.get(self.auth_header_name) # Extract the user's email, depending on the datatype of auth header - if self.auth_header_type = "aws-alb-jwt": # Amazon Application Load Balancer + if self.auth_header_type == "aws-alb-jwt": # Amazon Application Load Balancer user_email = get_user_from_aws_alb_jwt(x_auth_header, self.auth_aws_expected_alb_arn, self.auth_aws_region) - else + else: user_email = get_user_from_header(x_auth_header) if not user_email: diff --git a/docs/02_admin_guide.md b/docs/02_admin_guide.md index d9255f8..df34825 100644 --- a/docs/02_admin_guide.md +++ b/docs/02_admin_guide.md @@ -352,10 +352,10 @@ The backend application reads this header to identify the user. The header name If using AWS Application Load Balancer (ALB) as the Auth Service, the following authentication configuration should be used: ``` - AUTH_USER_HEADER="x-amzn-oidc-data" - AUTH_USER_HEADER_TYPE="aws-alb-jwt" - AUTH_AWS_EXPECTED_ALB_ARN="arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/..." - AUTH_AWS_REGION="us-east-1" + AUTH_USER_HEADER=x-amzn-oidc-data + AUTH_USER_HEADER_TYPE=aws-alb-jwt + AUTH_AWS_EXPECTED_ALB_ARN=arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/your-alb-name/... + AUTH_AWS_REGION=us-east-1 ``` This configuration will decode the base64-encoded JWT passed in the x-amzn-oidc-data header, validate it, and extract the user's email address from the validated JWT. From 047ccf7204648701515fbf58d89fbdba0f78fb4c Mon Sep 17 00:00:00 2001 From: Kevin Pedretti Date: Thu, 20 Nov 2025 15:45:36 -0700 Subject: [PATCH 3/4] code bug fixup --- backend/core/middleware.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/core/middleware.py b/backend/core/middleware.py index 5ff321b..d045db0 100644 --- a/backend/core/middleware.py +++ b/backend/core/middleware.py @@ -8,6 +8,7 @@ from starlette.responses import Response from core.auth import get_user_from_header +from core.auth import get_user_from_aws_alb_jwt from core.capabilities import verify_file_token from infrastructure.app_factory import app_factory From 0e9e7e391ee6b6ab524df024a83a6ffd256780c6 Mon Sep 17 00:00:00 2001 From: Kevin Pedretti Date: Thu, 20 Nov 2025 15:58:47 -0700 Subject: [PATCH 4/4] fixup split on none bug --- backend/core/auth.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/core/auth.py b/backend/core/auth.py index 78a1736..1862163 100644 --- a/backend/core/auth.py +++ b/backend/core/auth.py @@ -78,6 +78,8 @@ def get_user_from_aws_alb_jwt(encoded_jwt, expected_alb_arn, aws_region): Returns: str: The user's email address, or None if validation fails. """ + if not encoded_jwt: + return None try: # Step 1: Decode the JWT header to get the key ID (kid) and signer jwt_headers_encoded = encoded_jwt.split('.')[0]