diff --git a/CHANGELOG.md b/CHANGELOG.md index ffcc79c1..217b0a10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add scope configuration - Improve **k8s/backup** logging format with detailed error messages and fix suggestions - Add unit tests for **k8s/backup** module (backup_templates and s3 operations) +- Add ALB capacity validation on scope creation. Requires additional AWS permissions: `elasticloadbalancing:DescribeLoadBalancers`, `elasticloadbalancing:DescribeListeners`, `elasticloadbalancing:DescribeRules` +- Add ALB target group capacity validation on deployment. Requires additional AWS permission: `elasticloadbalancing:DescribeTargetGroups` ## [1.10.1] - 2026-02-13 - Hotfix on wait_deployment_iteration diff --git a/k8s/deployment/tests/validate_alb_target_group_capacity.bats b/k8s/deployment/tests/validate_alb_target_group_capacity.bats new file mode 100644 index 00000000..3e6fb50d --- /dev/null +++ b/k8s/deployment/tests/validate_alb_target_group_capacity.bats @@ -0,0 +1,343 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for validate_alb_target_group_capacity +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + export SCRIPT="$PROJECT_ROOT/k8s/deployment/validate_alb_target_group_capacity" + + export ALB_NAME="k8s-nullplatform-internet-facing" + export REGION="us-east-1" + export ALB_MAX_TARGET_GROUPS="98" + + # Base CONTEXT + export CONTEXT='{ + "providers": {} + }' + + # Mock aws - default: ALB with 40 target groups + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "40" + return 0 + ;; + esac + } + export -f aws +} + +teardown() { + unset -f aws +} + +# ============================================================================= +# Success flow +# ============================================================================= +@test "validate_alb_target_group_capacity: success when under capacity" { + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Validating ALB target group capacity for 'k8s-nullplatform-internet-facing'..." + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 98)" + assert_contains "$output" "✅ ALB target group capacity validated: 40/98" +} + +@test "validate_alb_target_group_capacity: displays debug info" { + export LOG_LEVEL="debug" + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB: k8s-nullplatform-internet-facing | Region: us-east-1 | Max target groups: 98" + assert_contains "$output" "📋 ALB ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" +} + +# ============================================================================= +# Capacity exceeded +# ============================================================================= +@test "validate_alb_target_group_capacity: fails when at capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "98" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 98/98" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Too many services or deployments are attached to this ALB" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Remove unused deployments or services from the ALB" + assert_contains "$output" "Increase ALB_MAX_TARGET_GROUPS in values.yaml or scope-configurations provider (AWS limit is 100)" + assert_contains "$output" "Request an AWS service quota increase for target groups per ALB" + assert_contains "$output" "Consider using a separate ALB for additional deployments" +} + +@test "validate_alb_target_group_capacity: fails when over capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "100" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 100/98" +} + +# ============================================================================= +# Configuration via get_config_value +# ============================================================================= +@test "validate_alb_target_group_capacity: uses default ALB_MAX_TARGET_GROUPS of 98" { + unset ALB_MAX_TARGET_GROUPS + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 98)" +} + +@test "validate_alb_target_group_capacity: ALB_MAX_TARGET_GROUPS from env var" { + export ALB_MAX_TARGET_GROUPS="30" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 40/30" +} + +@test "validate_alb_target_group_capacity: ALB_MAX_TARGET_GROUPS from scope-configurations provider" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_target_groups":"30"}}}}' + export ALB_MAX_TARGET_GROUPS="98" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 40/30" +} + +@test "validate_alb_target_group_capacity: ALB_MAX_TARGET_GROUPS from container-orchestration provider" { + export CONTEXT='{"providers":{"container-orchestration":{"balancer":{"alb_max_target_groups":"30"}}}}' + export ALB_MAX_TARGET_GROUPS="98" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached target group capacity: 40/30" +} + +@test "validate_alb_target_group_capacity: scope-configurations takes priority over container-orchestration" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_target_groups":"100"}},"container-orchestration":{"balancer":{"alb_max_target_groups":"30"}}}}' + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 100)" +} + +@test "validate_alb_target_group_capacity: provider takes priority over env var" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_target_groups":"100"}}}}' + export ALB_MAX_TARGET_GROUPS="30" + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 40 target groups (max: 100)" + assert_contains "$output" "✅ ALB target group capacity validated: 40/100" +} + +# ============================================================================= +# AWS API errors +# ============================================================================= +@test "validate_alb_target_group_capacity: fails when describe-load-balancers fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "An error occurred (LoadBalancerNotFound)" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to find load balancer 'k8s-nullplatform-internet-facing' in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer may not exist or the agent lacks permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the ALB exists: aws elbv2 describe-load-balancers --names k8s-nullplatform-internet-facing --region us-east-1" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeLoadBalancers" +} + +@test "validate_alb_target_group_capacity: fails when ALB ARN is None" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "None" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" +} + +@test "validate_alb_target_group_capacity: fails when describe-target-groups fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "Access Denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to describe target groups for ALB 'k8s-nullplatform-internet-facing'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack permissions to describe target groups" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeTargetGroups" +} + +# ============================================================================= +# Edge cases +# ============================================================================= +@test "validate_alb_target_group_capacity: handles zero target groups" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "0" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 0 target groups (max: 98)" + assert_contains "$output" "✅ ALB target group capacity validated: 0/98" +} + +@test "validate_alb_target_group_capacity: passes at exactly one below capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "97" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "✅ ALB target group capacity validated: 97/98" +} + +@test "validate_alb_target_group_capacity: fails when target group count is non-numeric" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-target-groups"*) + echo "WARNING: something unexpected" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Unexpected non-numeric target group count from ALB" + assert_contains "$output" "📋 ALB ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + assert_contains "$output" "📋 Received value: WARNING: something unexpected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The AWS CLI returned an unexpected response format" +} + +@test "validate_alb_target_group_capacity: fails when ALB_MAX_TARGET_GROUPS is non-numeric" { + export ALB_MAX_TARGET_GROUPS="abc" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB_MAX_TARGET_GROUPS must be a numeric value, got: 'abc'" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Set a numeric value in values.yaml or scope-configurations provider" +} + +@test "validate_alb_target_group_capacity: empty ALB ARN response triggers error" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" +} diff --git a/k8s/deployment/validate_alb_target_group_capacity b/k8s/deployment/validate_alb_target_group_capacity new file mode 100755 index 00000000..e7d50cb2 --- /dev/null +++ b/k8s/deployment/validate_alb_target_group_capacity @@ -0,0 +1,111 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +ALB_MAX_TARGET_GROUPS=$(get_config_value \ + --env ALB_MAX_TARGET_GROUPS \ + --provider '.providers["scope-configurations"].networking.alb_max_target_groups' \ + --provider '.providers["container-orchestration"].balancer.alb_max_target_groups' \ + --default "98" +) + +if ! [[ "$ALB_MAX_TARGET_GROUPS" =~ ^[0-9]+$ ]]; then + log error "❌ ALB_MAX_TARGET_GROUPS must be a numeric value, got: '$ALB_MAX_TARGET_GROUPS'" + log error "" + log error "🔧 How to fix:" + log error " • Set a numeric value in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log info "🔍 Validating ALB target group capacity for '$ALB_NAME'..." +log debug "📋 ALB: $ALB_NAME | Region: $REGION | Max target groups: $ALB_MAX_TARGET_GROUPS" + +# Get the ALB ARN +ALB_ARN=$(aws elbv2 describe-load-balancers \ + --names "$ALB_NAME" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to find load balancer '$ALB_NAME' in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer may not exist or the agent lacks permissions" + log error "" + log error "🔧 How to fix:" + log error " • Verify the ALB exists: aws elbv2 describe-load-balancers --names $ALB_NAME --region $REGION" + log error " • Check IAM permissions for elbv2:DescribeLoadBalancers" + log error "" + exit 1 +} + +if [[ -z "$ALB_ARN" ]] || [[ "$ALB_ARN" == "None" ]]; then + log error "❌ Load balancer '$ALB_NAME' not found in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer name may be incorrect or it was deleted" + log error "" + log error "🔧 How to fix:" + log error " • List available ALBs: aws elbv2 describe-load-balancers --region $REGION" + log error " • Check the balancer name in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log debug "📋 ALB ARN: $ALB_ARN" + +# Count target groups attached to this ALB +TARGET_GROUP_COUNT=$(aws elbv2 describe-target-groups \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --query 'length(TargetGroups)' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to describe target groups for ALB '$ALB_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack permissions to describe target groups" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for elbv2:DescribeTargetGroups" + log error "" + exit 1 +} + +if ! [[ "$TARGET_GROUP_COUNT" =~ ^[0-9]+$ ]]; then + log error "❌ Unexpected non-numeric target group count from ALB" + log error "📋 ALB ARN: $ALB_ARN" + log error "📋 Received value: $TARGET_GROUP_COUNT" + log error "" + log error "💡 Possible causes:" + log error " The AWS CLI returned an unexpected response format" + log error "" + log error "🔧 How to fix:" + log error " • Verify AWS CLI version and credentials are correct" + log error " • Run manually: aws elbv2 describe-target-groups --load-balancer-arn $ALB_ARN --region $REGION --query 'length(TargetGroups)'" + log error "" + exit 1 +fi + +log info "📋 ALB '$ALB_NAME' has $TARGET_GROUP_COUNT target groups (max: $ALB_MAX_TARGET_GROUPS)" + +if [[ "$TARGET_GROUP_COUNT" -ge "$ALB_MAX_TARGET_GROUPS" ]]; then + log error "❌ ALB '$ALB_NAME' has reached target group capacity: $TARGET_GROUP_COUNT/$ALB_MAX_TARGET_GROUPS" + log error "" + log error "💡 Possible causes:" + log error " Too many services or deployments are attached to this ALB" + log error "" + log error "🔧 How to fix:" + log error " • Remove unused deployments or services from the ALB" + log error " • Increase ALB_MAX_TARGET_GROUPS in values.yaml or scope-configurations provider (AWS limit is 100)" + log error " • Request an AWS service quota increase for target groups per ALB" + log error " • Consider using a separate ALB for additional deployments" + log error "" + exit 1 +fi + +log info "✅ ALB target group capacity validated: $TARGET_GROUP_COUNT/$ALB_MAX_TARGET_GROUPS" diff --git a/k8s/deployment/workflows/initial.yaml b/k8s/deployment/workflows/initial.yaml index b0b7f230..22032272 100644 --- a/k8s/deployment/workflows/initial.yaml +++ b/k8s/deployment/workflows/initial.yaml @@ -28,6 +28,9 @@ steps: type: environment - name: BLUE_DEPLOYMENT_ID type: environment + - name: validate alb target group capacity + type: script + file: "$SERVICE_PATH/deployment/validate_alb_target_group_capacity" - name: route traffic type: script file: "$SERVICE_PATH/deployment/networking/gateway/route_traffic" diff --git a/k8s/scope/tests/validate_alb_capacity.bats b/k8s/scope/tests/validate_alb_capacity.bats new file mode 100644 index 00000000..547ee639 --- /dev/null +++ b/k8s/scope/tests/validate_alb_capacity.bats @@ -0,0 +1,451 @@ +#!/usr/bin/env bats +# ============================================================================= +# Unit tests for validate_alb_capacity +# ============================================================================= + +setup() { + export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; } + export -f log + source "$PROJECT_ROOT/k8s/utils/get_config_value" + + export SCRIPT="$PROJECT_ROOT/k8s/scope/validate_alb_capacity" + + export ALB_NAME="k8s-nullplatform-internet-facing" + export REGION="us-east-1" + export ALB_MAX_CAPACITY="75" + + # Base CONTEXT + export CONTEXT='{ + "providers": {} + }' + + # Mock aws - default: ALB with 2 listeners, 30 rules each + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/k8s-nullplatform-internet-facing/abc123/listener1 arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/k8s-nullplatform-internet-facing/abc123/listener2" + return 0 + ;; + *"describe-rules"*) + echo "30" + return 0 + ;; + esac + } + export -f aws +} + +teardown() { + unset -f aws +} + +# ============================================================================= +# Success flow +# ============================================================================= +@test "validate_alb_capacity: success when under capacity" { + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "🔍 Validating ALB capacity for 'k8s-nullplatform-internet-facing'..." + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 75)" + assert_contains "$output" "✅ ALB capacity validated: 60/75 rules" +} + +@test "validate_alb_capacity: displays debug info" { + export LOG_LEVEL="debug" + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB: k8s-nullplatform-internet-facing | Region: us-east-1 | Max capacity: 75 rules" + assert_contains "$output" "📋 ALB ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" +} + +@test "validate_alb_capacity: success with single listener" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "10" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 10 rules (max capacity: 75)" + assert_contains "$output" "✅ ALB capacity validated: 10/75 rules" +} + +# ============================================================================= +# Capacity exceeded +# ============================================================================= +@test "validate_alb_capacity: fails when at capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "75" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 75/75 rules" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "Too many scopes or ingress rules are configured on this ALB" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Remove unused scopes or ingress rules from the ALB" + assert_contains "$output" "Increase ALB_MAX_CAPACITY in values.yaml or container-orchestration provider (AWS limit is 100 per listener)" + assert_contains "$output" "Request an AWS service quota increase for rules per ALB listener" + assert_contains "$output" "Consider using a separate ALB for additional scopes" +} + +@test "validate_alb_capacity: fails when over capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "90" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 90/75 rules" +} + +# ============================================================================= +# Configuration via get_config_value +# ============================================================================= +@test "validate_alb_capacity: uses default ALB_MAX_CAPACITY of 75" { + unset ALB_MAX_CAPACITY + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 75)" +} + +@test "validate_alb_capacity: ALB_MAX_CAPACITY from env var" { + export ALB_MAX_CAPACITY="50" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 60/50 rules" +} + +@test "validate_alb_capacity: ALB_MAX_CAPACITY from scope-configurations provider" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_capacity":"50"}}}}' + export ALB_MAX_CAPACITY="75" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 60/50 rules" +} + +@test "validate_alb_capacity: provider takes priority over env var" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_capacity":"100"}}}}' + export ALB_MAX_CAPACITY="50" + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 100)" + assert_contains "$output" "✅ ALB capacity validated: 60/100 rules" +} + +@test "validate_alb_capacity: ALB_MAX_CAPACITY from container-orchestration provider" { + export CONTEXT='{"providers":{"container-orchestration":{"balancer":{"alb_capacity_threshold":"50"}}}}' + export ALB_MAX_CAPACITY="75" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB 'k8s-nullplatform-internet-facing' has reached capacity: 60/50 rules" +} + +@test "validate_alb_capacity: scope-configurations takes priority over container-orchestration" { + export CONTEXT='{"providers":{"scope-configurations":{"networking":{"alb_max_capacity":"100"}},"container-orchestration":{"balancer":{"alb_capacity_threshold":"50"}}}}' + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 60 rules (max capacity: 100)" +} + +# ============================================================================= +# AWS API errors +# ============================================================================= +@test "validate_alb_capacity: fails when describe-load-balancers fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "An error occurred (LoadBalancerNotFound)" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to find load balancer 'k8s-nullplatform-internet-facing' in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer may not exist or the agent lacks permissions" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify the ALB exists: aws elbv2 describe-load-balancers --names k8s-nullplatform-internet-facing --region us-east-1" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeLoadBalancers" +} + +@test "validate_alb_capacity: fails when ALB ARN is None" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "None" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The load balancer name may be incorrect or it was deleted" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "List available ALBs: aws elbv2 describe-load-balancers --region us-east-1" + assert_contains "$output" "Check the balancer name in values.yaml or scope-configurations provider" +} + +@test "validate_alb_capacity: fails when describe-listeners fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "Access Denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to describe listeners for ALB 'k8s-nullplatform-internet-facing'" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack permissions to describe listeners" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeListeners" +} + +@test "validate_alb_capacity: skips when no listeners found" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "None" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "⚠️ No listeners found on ALB 'k8s-nullplatform-internet-facing', skipping capacity check" +} + +@test "validate_alb_capacity: fails when describe-rules fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "Access Denied" >&2 + return 1 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Failed to describe rules for listener" + assert_contains "$output" "📋 Listener ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The agent may lack permissions to describe rules" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Check IAM permissions for elbv2:DescribeRules" +} + +# ============================================================================= +# Edge cases +# ============================================================================= +@test "validate_alb_capacity: handles zero rules" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "0" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "📋 ALB 'k8s-nullplatform-internet-facing' has 0 rules (max capacity: 75)" + assert_contains "$output" "✅ ALB capacity validated: 0/75 rules" +} + +@test "validate_alb_capacity: passes at exactly one below capacity" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "74" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "0" + assert_contains "$output" "✅ ALB capacity validated: 74/75 rules" +} + +@test "validate_alb_capacity: fails when rule count is non-numeric" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/alb/abc123" + return 0 + ;; + *"describe-listeners"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + return 0 + ;; + *"describe-rules"*) + echo "WARNING: something unexpected" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Unexpected non-numeric rule count from listener" + assert_contains "$output" "📋 Listener ARN: arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/alb/abc123/listener1" + assert_contains "$output" "📋 Received value: WARNING: something unexpected" + assert_contains "$output" "💡 Possible causes:" + assert_contains "$output" "The AWS CLI returned an unexpected response format" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Verify AWS CLI version and credentials are correct" +} + +@test "validate_alb_capacity: fails when ALB_MAX_CAPACITY is non-numeric" { + export ALB_MAX_CAPACITY="abc" + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ ALB_MAX_CAPACITY must be a numeric value, got: 'abc'" + assert_contains "$output" "🔧 How to fix:" + assert_contains "$output" "Set a numeric value in values.yaml or scope-configurations provider" +} + +@test "validate_alb_capacity: empty ALB ARN response triggers error" { + aws() { + case "$*" in + *"describe-load-balancers"*) + echo "" + return 0 + ;; + esac + } + export -f aws + + run bash "$SCRIPT" + + assert_equal "$status" "1" + assert_contains "$output" "❌ Load balancer 'k8s-nullplatform-internet-facing' not found in region 'us-east-1'" +} diff --git a/k8s/scope/validate_alb_capacity b/k8s/scope/validate_alb_capacity new file mode 100755 index 00000000..b431787b --- /dev/null +++ b/k8s/scope/validate_alb_capacity @@ -0,0 +1,139 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +ALB_MAX_CAPACITY=$(get_config_value \ + --env ALB_MAX_CAPACITY \ + --provider '.providers["scope-configurations"].networking.alb_max_capacity' \ + --provider '.providers["container-orchestration"].balancer.alb_capacity_threshold' \ + --default "75" +) + +if ! [[ "$ALB_MAX_CAPACITY" =~ ^[0-9]+$ ]]; then + log error "❌ ALB_MAX_CAPACITY must be a numeric value, got: '$ALB_MAX_CAPACITY'" + log error "" + log error "🔧 How to fix:" + log error " • Set a numeric value in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log info "🔍 Validating ALB capacity for '$ALB_NAME'..." +log debug "📋 ALB: $ALB_NAME | Region: $REGION | Max capacity: $ALB_MAX_CAPACITY rules" + +ALB_ARN=$(aws elbv2 describe-load-balancers \ + --names "$ALB_NAME" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to find load balancer '$ALB_NAME' in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer may not exist or the agent lacks permissions" + log error "" + log error "🔧 How to fix:" + log error " • Verify the ALB exists: aws elbv2 describe-load-balancers --names $ALB_NAME --region $REGION" + log error " • Check IAM permissions for elbv2:DescribeLoadBalancers" + log error "" + exit 1 +} + +if [[ -z "$ALB_ARN" ]] || [[ "$ALB_ARN" == "None" ]]; then + log error "❌ Load balancer '$ALB_NAME' not found in region '$REGION'" + log error "" + log error "💡 Possible causes:" + log error " The load balancer name may be incorrect or it was deleted" + log error "" + log error "🔧 How to fix:" + log error " • List available ALBs: aws elbv2 describe-load-balancers --region $REGION" + log error " • Check the balancer name in values.yaml or scope-configurations provider" + log error "" + exit 1 +fi + +log debug "📋 ALB ARN: $ALB_ARN" + +LISTENER_ARNS=$(aws elbv2 describe-listeners \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --query 'Listeners[].ListenerArn' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to describe listeners for ALB '$ALB_NAME'" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack permissions to describe listeners" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for elbv2:DescribeListeners" + log error "" + exit 1 +} + +if [[ -z "$LISTENER_ARNS" ]] || [[ "$LISTENER_ARNS" == "None" ]]; then + log warn "⚠️ No listeners found on ALB '$ALB_NAME', skipping capacity check" + exit 0 +fi + +# Count rules across all listeners (excluding default rules) +TOTAL_RULES=0 + +for LISTENER_ARN in $LISTENER_ARNS; do + RULE_COUNT=$(aws elbv2 describe-rules \ + --listener-arn "$LISTENER_ARN" \ + --region "$REGION" \ + --query 'length(Rules[?!IsDefault])' \ + --output text \ + --no-paginate 2>&1) || { + log error "❌ Failed to describe rules for listener" + log error "📋 Listener ARN: $LISTENER_ARN" + log error "" + log error "💡 Possible causes:" + log error " The agent may lack permissions to describe rules" + log error "" + log error "🔧 How to fix:" + log error " • Check IAM permissions for elbv2:DescribeRules" + log error "" + exit 1 + } + + if ! [[ "$RULE_COUNT" =~ ^[0-9]+$ ]]; then + log error "❌ Unexpected non-numeric rule count from listener" + log error "📋 Listener ARN: $LISTENER_ARN" + log error "📋 Received value: $RULE_COUNT" + log error "" + log error "💡 Possible causes:" + log error " The AWS CLI returned an unexpected response format" + log error "" + log error "🔧 How to fix:" + log error " • Verify AWS CLI version and credentials are correct" + log error " • Run manually: aws elbv2 describe-rules --listener-arn $LISTENER_ARN --region $REGION --query 'length(Rules[?!IsDefault])'" + log error "" + exit 1 + fi + + TOTAL_RULES=$((TOTAL_RULES + RULE_COUNT)) +done + +log info "📋 ALB '$ALB_NAME' has $TOTAL_RULES rules (max capacity: $ALB_MAX_CAPACITY)" + +if [[ "$TOTAL_RULES" -ge "$ALB_MAX_CAPACITY" ]]; then + log error "❌ ALB '$ALB_NAME' has reached capacity: $TOTAL_RULES/$ALB_MAX_CAPACITY rules" + log error "" + log error "💡 Possible causes:" + log error " Too many scopes or ingress rules are configured on this ALB" + log error "" + log error "🔧 How to fix:" + log error " • Remove unused scopes or ingress rules from the ALB" + log error " • Increase ALB_MAX_CAPACITY in values.yaml or container-orchestration provider (AWS limit is 100 per listener)" + log error " • Request an AWS service quota increase for rules per ALB listener" + log error " • Consider using a separate ALB for additional scopes" + log error "" + exit 1 +fi + +log info "✅ ALB capacity validated: $TOTAL_RULES/$ALB_MAX_CAPACITY rules" diff --git a/k8s/scope/workflows/create.yaml b/k8s/scope/workflows/create.yaml index 6eace188..9c0f3006 100644 --- a/k8s/scope/workflows/create.yaml +++ b/k8s/scope/workflows/create.yaml @@ -22,6 +22,9 @@ steps: type: environment - name: OUTPUT_DIR type: environment + - name: validate alb capacity + type: script + file: "$SERVICE_PATH/scope/validate_alb_capacity" - name: iam type: workflow steps: diff --git a/k8s/values.yaml b/k8s/values.yaml index 841f8f7c..6998b0fa 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -10,6 +10,8 @@ configuration: USE_ACCOUNT_SLUG: false DNS_TYPE: route53 # Available values route53 | azure | external_dns ALB_RECONCILIATION_ENABLED: false + ALB_MAX_CAPACITY: 75 + ALB_MAX_TARGET_GROUPS: 98 DEPLOYMENT_MAX_WAIT_IN_SECONDS: 600 DEPLOYMENT_TEMPLATE: "$SERVICE_PATH/deployment/templates/deployment.yaml.tpl" SECRET_TEMPLATE: "$SERVICE_PATH/deployment/templates/secret.yaml.tpl"