From b0a55c2f4bc8703c2991c10090b56d44a0a86a35 Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Fri, 15 May 2026 16:46:21 -0300 Subject: [PATCH 01/10] feat(cert_manager): add Azure SP auth support Adds azure_client_secret variable for environments using service principal auth instead of workload identity, and wires it into the Azure DNS01 solver configuration in the ClusterIssuer. --- .../commons/cert_manager/.terraform.lock.hcl | 17 ++++++++++++++++ infrastructure/commons/cert_manager/locals.tf | 20 ++++++++++--------- infrastructure/commons/cert_manager/main.tf | 14 ++++++++++++- .../commons/cert_manager/providers.tf | 4 ++++ .../cert_manager_azure_values.tmpl.yaml | 6 +++++- .../commons/cert_manager/variables.tf | 7 +++++++ 6 files changed, 57 insertions(+), 11 deletions(-) diff --git a/infrastructure/commons/cert_manager/.terraform.lock.hcl b/infrastructure/commons/cert_manager/.terraform.lock.hcl index 8251ed77..7cf21362 100644 --- a/infrastructure/commons/cert_manager/.terraform.lock.hcl +++ b/infrastructure/commons/cert_manager/.terraform.lock.hcl @@ -17,3 +17,20 @@ provider "registry.opentofu.org/hashicorp/helm" { "zh:f6fe7ecfafc344f4e6aecacf5ae12ac73b94389b9679dcd0f04fc5ff45bdc066", ] } + +provider "registry.opentofu.org/hashicorp/kubernetes" { + version = "2.38.0" + constraints = "~> 2.0" + hashes = [ + "h1:ems+O2dA7atxMWpbtqIrsH7Oa+u+ERWSfpMaFnZPbh0=", + "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", + "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", + "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", + "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", + "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", + "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", + "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", + "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", + "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", + ] +} diff --git a/infrastructure/commons/cert_manager/locals.tf b/infrastructure/commons/cert_manager/locals.tf index 5a4ad10b..acd8a2ec 100644 --- a/infrastructure/commons/cert_manager/locals.tf +++ b/infrastructure/commons/cert_manager/locals.tf @@ -19,12 +19,14 @@ locals { } : {}, var.cloud_provider == "azure" ? { - enabled = true - subscription_id = var.azure_subscription_id - resource_group_name = var.azure_resource_group_name - client_id = var.azure_client_id - tenant_id = var.azure_tenant_id - hosted_zone_name = var.azure_hosted_zone_name + enabled = true + subscription_id = var.azure_subscription_id + resource_group_name = var.azure_resource_group_name + client_id = var.azure_client_id + tenant_id = var.azure_tenant_id + hosted_zone_name = var.azure_hosted_zone_name + use_workload_identity = var.azure_client_secret == "" + client_secret_secret_name = var.azure_client_secret != "" ? "azure-cert-manager-sp" : "" } : {}, var.cloud_provider == "aws" ? { @@ -67,9 +69,9 @@ locals { "eks.amazonaws.com/role-arn" = var.aws_sa_arn } - azure = { + azure = var.azure_client_secret == "" ? { "azure.workload.identity/client-id" = var.azure_client_id - } + } : {} oci = { "oci.oraclecloud.com/workload-identity-principal" = var.oci_sa_ocid @@ -87,7 +89,7 @@ locals { lookup(local.annotations_by_provider, var.cloud_provider, {}) ) } - podLabels = var.cloud_provider == "azure" ? { + podLabels = var.cloud_provider == "azure" && var.azure_client_secret == "" ? { "azure.workload.identity/use" = "true" } : {} dns01RecursiveNameservers = "8.8.8.8:53,1.1.1.1:53" diff --git a/infrastructure/commons/cert_manager/main.tf b/infrastructure/commons/cert_manager/main.tf index 284dbc7e..961fa9e9 100644 --- a/infrastructure/commons/cert_manager/main.tf +++ b/infrastructure/commons/cert_manager/main.tf @@ -26,6 +26,18 @@ resource "helm_release" "cert_manager" { } +resource "kubernetes_secret_v1" "azure_cert_manager_sp" { + count = var.cloud_provider == "azure" && var.azure_client_secret != "" ? 1 : 0 + metadata { + name = "azure-cert-manager-sp" + namespace = var.cert_manager_namespace + } + data = { + "client-secret" = var.azure_client_secret + } + depends_on = [helm_release.cert_manager] +} + resource "helm_release" "cert_manager_config" { name = "cert-manager-config" repository = "https://nullplatform.github.io/helm-charts" @@ -53,7 +65,7 @@ resource "helm_release" "cert_manager_config" { local.cert_manager_provider_values, ] - depends_on = [helm_release.cert_manager] + depends_on = [helm_release.cert_manager, kubernetes_secret_v1.azure_cert_manager_sp] } #########webhook oci############ diff --git a/infrastructure/commons/cert_manager/providers.tf b/infrastructure/commons/cert_manager/providers.tf index 94681f0d..ce4189a7 100644 --- a/infrastructure/commons/cert_manager/providers.tf +++ b/infrastructure/commons/cert_manager/providers.tf @@ -4,5 +4,9 @@ terraform { source = "hashicorp/helm" version = "~> 3.0" } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } } } diff --git a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml index 9cd81552..bf96eb72 100644 --- a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml +++ b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml @@ -4,4 +4,8 @@ azure: clientID: "${client_id}" tenantID: "${tenant_id}" hostedZoneName: "${hosted_zone_name}" - useWorkloadIdentity: true + useWorkloadIdentity: ${use_workload_identity} +%{ if !use_workload_identity ~} + clientSecret: + secretName: "${client_secret_secret_name}" +%{ endif ~} diff --git a/infrastructure/commons/cert_manager/variables.tf b/infrastructure/commons/cert_manager/variables.tf index 4cb92446..7a129d88 100644 --- a/infrastructure/commons/cert_manager/variables.tf +++ b/infrastructure/commons/cert_manager/variables.tf @@ -34,6 +34,13 @@ variable "azure_client_id" { default = "" } +variable "azure_client_secret" { + description = "The Azure service principal client secret. When set, cert-manager uses SP auth (clientSecretSecretRef). When empty, workload identity (managedIdentity) is used." + type = string + sensitive = true + default = "" +} + variable "private_domain_name" { description = "The private domain name for internal certificate issuance" From e94218d770de1173ea1c0dd13a9f050b30e0de34 Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Fri, 15 May 2026 17:23:17 -0300 Subject: [PATCH 02/10] =?UTF-8?q?refactor(cert=5Fmanager):=20remove=20SP?= =?UTF-8?q?=20auth=20=E2=80=94=20always=20use=20workload=20identity=20for?= =?UTF-8?q?=20Azure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- infrastructure/commons/cert_manager/locals.tf | 20 +++++++++---------- infrastructure/commons/cert_manager/main.tf | 14 +------------ .../cert_manager_azure_values.tmpl.yaml | 6 +----- .../commons/cert_manager/variables.tf | 8 -------- 4 files changed, 11 insertions(+), 37 deletions(-) diff --git a/infrastructure/commons/cert_manager/locals.tf b/infrastructure/commons/cert_manager/locals.tf index acd8a2ec..5a4ad10b 100644 --- a/infrastructure/commons/cert_manager/locals.tf +++ b/infrastructure/commons/cert_manager/locals.tf @@ -19,14 +19,12 @@ locals { } : {}, var.cloud_provider == "azure" ? { - enabled = true - subscription_id = var.azure_subscription_id - resource_group_name = var.azure_resource_group_name - client_id = var.azure_client_id - tenant_id = var.azure_tenant_id - hosted_zone_name = var.azure_hosted_zone_name - use_workload_identity = var.azure_client_secret == "" - client_secret_secret_name = var.azure_client_secret != "" ? "azure-cert-manager-sp" : "" + enabled = true + subscription_id = var.azure_subscription_id + resource_group_name = var.azure_resource_group_name + client_id = var.azure_client_id + tenant_id = var.azure_tenant_id + hosted_zone_name = var.azure_hosted_zone_name } : {}, var.cloud_provider == "aws" ? { @@ -69,9 +67,9 @@ locals { "eks.amazonaws.com/role-arn" = var.aws_sa_arn } - azure = var.azure_client_secret == "" ? { + azure = { "azure.workload.identity/client-id" = var.azure_client_id - } : {} + } oci = { "oci.oraclecloud.com/workload-identity-principal" = var.oci_sa_ocid @@ -89,7 +87,7 @@ locals { lookup(local.annotations_by_provider, var.cloud_provider, {}) ) } - podLabels = var.cloud_provider == "azure" && var.azure_client_secret == "" ? { + podLabels = var.cloud_provider == "azure" ? { "azure.workload.identity/use" = "true" } : {} dns01RecursiveNameservers = "8.8.8.8:53,1.1.1.1:53" diff --git a/infrastructure/commons/cert_manager/main.tf b/infrastructure/commons/cert_manager/main.tf index 961fa9e9..284dbc7e 100644 --- a/infrastructure/commons/cert_manager/main.tf +++ b/infrastructure/commons/cert_manager/main.tf @@ -26,18 +26,6 @@ resource "helm_release" "cert_manager" { } -resource "kubernetes_secret_v1" "azure_cert_manager_sp" { - count = var.cloud_provider == "azure" && var.azure_client_secret != "" ? 1 : 0 - metadata { - name = "azure-cert-manager-sp" - namespace = var.cert_manager_namespace - } - data = { - "client-secret" = var.azure_client_secret - } - depends_on = [helm_release.cert_manager] -} - resource "helm_release" "cert_manager_config" { name = "cert-manager-config" repository = "https://nullplatform.github.io/helm-charts" @@ -65,7 +53,7 @@ resource "helm_release" "cert_manager_config" { local.cert_manager_provider_values, ] - depends_on = [helm_release.cert_manager, kubernetes_secret_v1.azure_cert_manager_sp] + depends_on = [helm_release.cert_manager] } #########webhook oci############ diff --git a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml index bf96eb72..9cd81552 100644 --- a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml +++ b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml @@ -4,8 +4,4 @@ azure: clientID: "${client_id}" tenantID: "${tenant_id}" hostedZoneName: "${hosted_zone_name}" - useWorkloadIdentity: ${use_workload_identity} -%{ if !use_workload_identity ~} - clientSecret: - secretName: "${client_secret_secret_name}" -%{ endif ~} + useWorkloadIdentity: true diff --git a/infrastructure/commons/cert_manager/variables.tf b/infrastructure/commons/cert_manager/variables.tf index 7a129d88..e4b67e6a 100644 --- a/infrastructure/commons/cert_manager/variables.tf +++ b/infrastructure/commons/cert_manager/variables.tf @@ -34,14 +34,6 @@ variable "azure_client_id" { default = "" } -variable "azure_client_secret" { - description = "The Azure service principal client secret. When set, cert-manager uses SP auth (clientSecretSecretRef). When empty, workload identity (managedIdentity) is used." - type = string - sensitive = true - default = "" -} - - variable "private_domain_name" { description = "The private domain name for internal certificate issuance" type = string From 13bbec018bd392e95561cbd6b623899a4712a93d Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Fri, 15 May 2026 16:46:46 -0300 Subject: [PATCH 03/10] feat(external_dns): add Azure DNS provider support Adds azure_* variables and wires Azure credentials into the external-dns helm values. Supports both service principal and workload identity flows. --- infrastructure/commons/external_dns/locals.tf | 8 ++++---- infrastructure/commons/external_dns/secret.tf | 8 +++++++- infrastructure/commons/external_dns/variables.tf | 7 +++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/infrastructure/commons/external_dns/locals.tf b/infrastructure/commons/external_dns/locals.tf index f646eab4..8901fdf6 100644 --- a/infrastructure/commons/external_dns/locals.tf +++ b/infrastructure/commons/external_dns/locals.tf @@ -92,13 +92,13 @@ locals { provider = { name = "azure" } serviceAccount = { create = true - annotations = { + annotations = var.azure_client_secret == "" ? { "azure.workload.identity/client-id" = var.azure_client_id - } + } : {} } - podLabels = { + podLabels = var.azure_client_secret == "" ? { "azure.workload.identity/use" = "true" - } + } : {} extraVolumes = [ { name = "azure-config" diff --git a/infrastructure/commons/external_dns/secret.tf b/infrastructure/commons/external_dns/secret.tf index 7f1a4d64..09a33c49 100644 --- a/infrastructure/commons/external_dns/secret.tf +++ b/infrastructure/commons/external_dns/secret.tf @@ -19,7 +19,13 @@ resource "kubernetes_secret_v1" "external_dns_azure_config" { } data = { - "azure.json" = jsonencode({ + "azure.json" = var.azure_client_secret != "" ? jsonencode({ + tenantId = var.azure_tenant_id + subscriptionId = var.azure_subscription_id + resourceGroup = var.azure_resource_group + aadClientId = var.azure_client_id + aadClientSecret = var.azure_client_secret + }) : jsonencode({ tenantId = var.azure_tenant_id subscriptionId = var.azure_subscription_id resourceGroup = var.azure_resource_group diff --git a/infrastructure/commons/external_dns/variables.tf b/infrastructure/commons/external_dns/variables.tf index 3b81fe10..79a1df3d 100644 --- a/infrastructure/commons/external_dns/variables.tf +++ b/infrastructure/commons/external_dns/variables.tf @@ -176,3 +176,10 @@ variable "azure_tenant_id" { default = "" } +variable "azure_client_secret" { + description = "Azure SP client secret. When set, uses service principal auth instead of workload identity." + type = string + sensitive = true + default = "" +} + From 6b6d75b47546d325695cc6044c417b59b4e84244 Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Fri, 15 May 2026 17:39:36 -0300 Subject: [PATCH 04/10] =?UTF-8?q?refactor(external=5Fdns):=20remove=20SP?= =?UTF-8?q?=20auth=20=E2=80=94=20always=20use=20workload=20identity=20for?= =?UTF-8?q?=20Azure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- infrastructure/commons/external_dns/locals.tf | 8 ++++---- infrastructure/commons/external_dns/secret.tf | 8 +------- infrastructure/commons/external_dns/variables.tf | 6 ------ 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/infrastructure/commons/external_dns/locals.tf b/infrastructure/commons/external_dns/locals.tf index 8901fdf6..f646eab4 100644 --- a/infrastructure/commons/external_dns/locals.tf +++ b/infrastructure/commons/external_dns/locals.tf @@ -92,13 +92,13 @@ locals { provider = { name = "azure" } serviceAccount = { create = true - annotations = var.azure_client_secret == "" ? { + annotations = { "azure.workload.identity/client-id" = var.azure_client_id - } : {} + } } - podLabels = var.azure_client_secret == "" ? { + podLabels = { "azure.workload.identity/use" = "true" - } : {} + } extraVolumes = [ { name = "azure-config" diff --git a/infrastructure/commons/external_dns/secret.tf b/infrastructure/commons/external_dns/secret.tf index 09a33c49..7f1a4d64 100644 --- a/infrastructure/commons/external_dns/secret.tf +++ b/infrastructure/commons/external_dns/secret.tf @@ -19,13 +19,7 @@ resource "kubernetes_secret_v1" "external_dns_azure_config" { } data = { - "azure.json" = var.azure_client_secret != "" ? jsonencode({ - tenantId = var.azure_tenant_id - subscriptionId = var.azure_subscription_id - resourceGroup = var.azure_resource_group - aadClientId = var.azure_client_id - aadClientSecret = var.azure_client_secret - }) : jsonencode({ + "azure.json" = jsonencode({ tenantId = var.azure_tenant_id subscriptionId = var.azure_subscription_id resourceGroup = var.azure_resource_group diff --git a/infrastructure/commons/external_dns/variables.tf b/infrastructure/commons/external_dns/variables.tf index 79a1df3d..c214e17b 100644 --- a/infrastructure/commons/external_dns/variables.tf +++ b/infrastructure/commons/external_dns/variables.tf @@ -176,10 +176,4 @@ variable "azure_tenant_id" { default = "" } -variable "azure_client_secret" { - description = "Azure SP client secret. When set, uses service principal auth instead of workload identity." - type = string - sensitive = true - default = "" -} From 2025224aee0ac1a955f745151b2b9b916bc9877b Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Fri, 15 May 2026 16:47:15 -0300 Subject: [PATCH 05/10] fix(istio): default istiod to 2 replicas to avoid PDB blocking node drains The istiod chart installs a PDB with minAvailable=1. A single replica istiod blocks node drains (e.g. during EKS AMI upgrades). Setting both replicaCount and autoscaleMin to 2 prevents the HPA from scaling back to 1. --- infrastructure/commons/istio/variables.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infrastructure/commons/istio/variables.tf b/infrastructure/commons/istio/variables.tf index ca6a3257..cc29466b 100644 --- a/infrastructure/commons/istio/variables.tf +++ b/infrastructure/commons/istio/variables.tf @@ -21,9 +21,9 @@ variable "istiod_version" { } variable "istiod_replicas" { - description = "Number of istiod replicas. Default is 1 to preserve the previous behavior of this module for existing consumers; set to 2 (recommended) to let the pilot deployment tolerate node drains — the istiod chart installs a PodDisruptionBudget with minAvailable=1, and a single-replica istiod therefore blocks node rolling updates (e.g. EKS AMI bumps). This value is applied to both pilot.replicaCount and pilot.autoscaleMin; without the autoscaleMin override, the HPA (enabled by default with autoscaleMin=1) would scale back to 1 replica shortly after install." + description = "Number of istiod replicas. Set to 2+ to avoid PDB blocking node drains. Applied to both pilot.replicaCount and pilot.autoscaleMin to prevent the HPA from scaling back to 1." type = number - default = 1 + default = 2 validation { condition = var.istiod_replicas >= 1 From 20239d95ec88af95215f18eaee98875f4566240e Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Mon, 18 May 2026 13:48:50 -0300 Subject: [PATCH 06/10] =?UTF-8?q?feat(azure/wi):=20make=20workload=20ident?= =?UTF-8?q?ity=20opt-out=20=E2=80=94=20enabled=20by=20default,=20configura?= =?UTF-8?q?ble?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add azure_workload_identity_enabled (default: true) to cert_manager and external_dns. When false, WI annotations and pod labels are omitted, azure_client_id is not required, and useWorkloadIdentityExtension is set to false in the azure.json secret. --- infrastructure/commons/cert_manager/locals.tf | 19 ++++++++++--------- .../cert_manager_azure_values.tmpl.yaml | 4 +++- .../commons/cert_manager/validation.tf | 4 ++-- .../commons/cert_manager/variables.tf | 6 ++++++ infrastructure/commons/external_dns/locals.tf | 8 ++++---- infrastructure/commons/external_dns/secret.tf | 2 +- .../commons/external_dns/validation.tf | 4 ++-- .../commons/external_dns/variables.tf | 8 +++++++- 8 files changed, 35 insertions(+), 20 deletions(-) diff --git a/infrastructure/commons/cert_manager/locals.tf b/infrastructure/commons/cert_manager/locals.tf index 5a4ad10b..38d658f2 100644 --- a/infrastructure/commons/cert_manager/locals.tf +++ b/infrastructure/commons/cert_manager/locals.tf @@ -19,12 +19,13 @@ locals { } : {}, var.cloud_provider == "azure" ? { - enabled = true - subscription_id = var.azure_subscription_id - resource_group_name = var.azure_resource_group_name - client_id = var.azure_client_id - tenant_id = var.azure_tenant_id - hosted_zone_name = var.azure_hosted_zone_name + enabled = true + subscription_id = var.azure_subscription_id + resource_group_name = var.azure_resource_group_name + client_id = var.azure_client_id + tenant_id = var.azure_tenant_id + hosted_zone_name = var.azure_hosted_zone_name + use_workload_identity = var.azure_workload_identity_enabled } : {}, var.cloud_provider == "aws" ? { @@ -67,9 +68,9 @@ locals { "eks.amazonaws.com/role-arn" = var.aws_sa_arn } - azure = { + azure = var.azure_workload_identity_enabled ? { "azure.workload.identity/client-id" = var.azure_client_id - } + } : {} oci = { "oci.oraclecloud.com/workload-identity-principal" = var.oci_sa_ocid @@ -87,7 +88,7 @@ locals { lookup(local.annotations_by_provider, var.cloud_provider, {}) ) } - podLabels = var.cloud_provider == "azure" ? { + podLabels = var.cloud_provider == "azure" && var.azure_workload_identity_enabled ? { "azure.workload.identity/use" = "true" } : {} dns01RecursiveNameservers = "8.8.8.8:53,1.1.1.1:53" diff --git a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml index 9cd81552..6aa091a3 100644 --- a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml +++ b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml @@ -1,7 +1,9 @@ azure: subscriptionID: "${subscription_id}" resourceGroupName: "${resource_group_name}" - clientID: "${client_id}" tenantID: "${tenant_id}" hostedZoneName: "${hosted_zone_name}" +%{ if use_workload_identity ~} + clientID: "${client_id}" useWorkloadIdentity: true +%{ endif ~} diff --git a/infrastructure/commons/cert_manager/validation.tf b/infrastructure/commons/cert_manager/validation.tf index ed657ff9..d0acddc4 100644 --- a/infrastructure/commons/cert_manager/validation.tf +++ b/infrastructure/commons/cert_manager/validation.tf @@ -17,8 +17,8 @@ resource "terraform_data" "provider_validation" { error_message = "aws_region is required when cloud_provider is 'aws'." } precondition { - condition = var.cloud_provider != "azure" || length(var.azure_client_id) > 0 - error_message = "azure_client_id is required when cloud_provider is 'azure'." + condition = var.cloud_provider != "azure" || !var.azure_workload_identity_enabled || length(var.azure_client_id) > 0 + error_message = "azure_client_id is required when cloud_provider is 'azure' and azure_workload_identity_enabled is true." } precondition { condition = var.cloud_provider != "azure" || length(var.azure_subscription_id) > 0 diff --git a/infrastructure/commons/cert_manager/variables.tf b/infrastructure/commons/cert_manager/variables.tf index e4b67e6a..14e6b1f2 100644 --- a/infrastructure/commons/cert_manager/variables.tf +++ b/infrastructure/commons/cert_manager/variables.tf @@ -34,6 +34,12 @@ variable "azure_client_id" { default = "" } +variable "azure_workload_identity_enabled" { + description = "Enable Workload Identity for Azure DNS solver. When false, WI annotations and labels are omitted and azure_client_id is not required." + type = bool + default = true +} + variable "private_domain_name" { description = "The private domain name for internal certificate issuance" type = string diff --git a/infrastructure/commons/external_dns/locals.tf b/infrastructure/commons/external_dns/locals.tf index f646eab4..3081d79b 100644 --- a/infrastructure/commons/external_dns/locals.tf +++ b/infrastructure/commons/external_dns/locals.tf @@ -92,13 +92,13 @@ locals { provider = { name = "azure" } serviceAccount = { create = true - annotations = { + annotations = var.azure_workload_identity_enabled ? { "azure.workload.identity/client-id" = var.azure_client_id - } + } : {} } - podLabels = { + podLabels = var.azure_workload_identity_enabled ? { "azure.workload.identity/use" = "true" - } + } : {} extraVolumes = [ { name = "azure-config" diff --git a/infrastructure/commons/external_dns/secret.tf b/infrastructure/commons/external_dns/secret.tf index 7f1a4d64..3f878ff6 100644 --- a/infrastructure/commons/external_dns/secret.tf +++ b/infrastructure/commons/external_dns/secret.tf @@ -23,7 +23,7 @@ resource "kubernetes_secret_v1" "external_dns_azure_config" { tenantId = var.azure_tenant_id subscriptionId = var.azure_subscription_id resourceGroup = var.azure_resource_group - useWorkloadIdentityExtension = true + useWorkloadIdentityExtension = var.azure_workload_identity_enabled }) } diff --git a/infrastructure/commons/external_dns/validation.tf b/infrastructure/commons/external_dns/validation.tf index 6b531498..98752948 100644 --- a/infrastructure/commons/external_dns/validation.tf +++ b/infrastructure/commons/external_dns/validation.tf @@ -29,8 +29,8 @@ resource "terraform_data" "provider_validation" { error_message = "oci_region is required when dns_provider_name is 'oci'." } precondition { - condition = var.dns_provider_name != "azure" || length(var.azure_client_id) > 0 - error_message = "azure_client_id is required when dns_provider_name is 'azure'." + condition = var.dns_provider_name != "azure" || !var.azure_workload_identity_enabled || length(var.azure_client_id) > 0 + error_message = "azure_client_id is required when dns_provider_name is 'azure' and azure_workload_identity_enabled is true." } precondition { condition = var.dns_provider_name != "azure" || length(var.azure_subscription_id) > 0 diff --git a/infrastructure/commons/external_dns/variables.tf b/infrastructure/commons/external_dns/variables.tf index c214e17b..00ead36f 100644 --- a/infrastructure/commons/external_dns/variables.tf +++ b/infrastructure/commons/external_dns/variables.tf @@ -152,8 +152,14 @@ variable "dns_provider_name" { # AZURE CONFIGURATION ############################################################################### +variable "azure_workload_identity_enabled" { + description = "Enable Workload Identity for Azure DNS provider. When false, WI annotations and labels are omitted and azure_client_id is not required." + type = bool + default = true +} + variable "azure_client_id" { - description = "Client ID of the Azure Managed Identity for Workload Identity (required when dns_provider_name is 'azure')" + description = "Client ID of the Azure Managed Identity for Workload Identity (required when dns_provider_name is 'azure' and azure_workload_identity_enabled is true)" type = string default = "" } From b3177dfb38fa986e25cc10b363ed91fe45ae769f Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Mon, 18 May 2026 20:19:09 -0300 Subject: [PATCH 07/10] feat(azure/wi): add Service Principal fallback when workload identity is disabled cert_manager and external_dns now support both auth modes for Azure: - WI (default): azure_workload_identity_enabled=true, uses SA annotation + pod label - SP (opt-out): azure_workload_identity_enabled=false, requires azure_client_secret azure_client_id is now always required for Azure regardless of auth mode. --- infrastructure/commons/cert_manager/locals.tf | 1 + .../cert_manager_azure_values.tmpl.yaml | 4 +++- .../commons/cert_manager/validation.tf | 8 ++++++-- .../commons/cert_manager/variables.tf | 9 ++++++++- infrastructure/commons/external_dns/secret.tf | 18 ++++++++++++------ .../commons/external_dns/validation.tf | 8 ++++++-- .../commons/external_dns/variables.tf | 9 ++++++++- 7 files changed, 44 insertions(+), 13 deletions(-) diff --git a/infrastructure/commons/cert_manager/locals.tf b/infrastructure/commons/cert_manager/locals.tf index 38d658f2..8fcbd0c8 100644 --- a/infrastructure/commons/cert_manager/locals.tf +++ b/infrastructure/commons/cert_manager/locals.tf @@ -23,6 +23,7 @@ locals { subscription_id = var.azure_subscription_id resource_group_name = var.azure_resource_group_name client_id = var.azure_client_id + client_secret = var.azure_client_secret tenant_id = var.azure_tenant_id hosted_zone_name = var.azure_hosted_zone_name use_workload_identity = var.azure_workload_identity_enabled diff --git a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml index 6aa091a3..6ee80671 100644 --- a/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml +++ b/infrastructure/commons/cert_manager/templates/cert_manager_azure_values.tmpl.yaml @@ -3,7 +3,9 @@ azure: resourceGroupName: "${resource_group_name}" tenantID: "${tenant_id}" hostedZoneName: "${hosted_zone_name}" -%{ if use_workload_identity ~} clientID: "${client_id}" +%{ if use_workload_identity ~} useWorkloadIdentity: true +%{ else ~} + clientSecret: "${client_secret}" %{ endif ~} diff --git a/infrastructure/commons/cert_manager/validation.tf b/infrastructure/commons/cert_manager/validation.tf index d0acddc4..c5388f48 100644 --- a/infrastructure/commons/cert_manager/validation.tf +++ b/infrastructure/commons/cert_manager/validation.tf @@ -17,8 +17,12 @@ resource "terraform_data" "provider_validation" { error_message = "aws_region is required when cloud_provider is 'aws'." } precondition { - condition = var.cloud_provider != "azure" || !var.azure_workload_identity_enabled || length(var.azure_client_id) > 0 - error_message = "azure_client_id is required when cloud_provider is 'azure' and azure_workload_identity_enabled is true." + condition = var.cloud_provider != "azure" || length(var.azure_client_id) > 0 + error_message = "azure_client_id is required when cloud_provider is 'azure'." + } + precondition { + condition = var.cloud_provider != "azure" || var.azure_workload_identity_enabled || length(var.azure_client_secret) > 0 + error_message = "azure_client_secret is required when cloud_provider is 'azure' and azure_workload_identity_enabled is false." } precondition { condition = var.cloud_provider != "azure" || length(var.azure_subscription_id) > 0 diff --git a/infrastructure/commons/cert_manager/variables.tf b/infrastructure/commons/cert_manager/variables.tf index 14e6b1f2..dc0db0d9 100644 --- a/infrastructure/commons/cert_manager/variables.tf +++ b/infrastructure/commons/cert_manager/variables.tf @@ -35,11 +35,18 @@ variable "azure_client_id" { } variable "azure_workload_identity_enabled" { - description = "Enable Workload Identity for Azure DNS solver. When false, WI annotations and labels are omitted and azure_client_id is not required." + description = "Enable Workload Identity for Azure DNS solver. When false, Service Principal auth is used and azure_client_secret is required." type = bool default = true } +variable "azure_client_secret" { + description = "Azure AD client secret for Service Principal auth (required when cloud_provider is 'azure' and azure_workload_identity_enabled is false)." + type = string + sensitive = true + default = "" +} + variable "private_domain_name" { description = "The private domain name for internal certificate issuance" type = string diff --git a/infrastructure/commons/external_dns/secret.tf b/infrastructure/commons/external_dns/secret.tf index 3f878ff6..934437e3 100644 --- a/infrastructure/commons/external_dns/secret.tf +++ b/infrastructure/commons/external_dns/secret.tf @@ -19,12 +19,18 @@ resource "kubernetes_secret_v1" "external_dns_azure_config" { } data = { - "azure.json" = jsonencode({ - tenantId = var.azure_tenant_id - subscriptionId = var.azure_subscription_id - resourceGroup = var.azure_resource_group - useWorkloadIdentityExtension = var.azure_workload_identity_enabled - }) + "azure.json" = jsonencode(merge( + { + tenantId = var.azure_tenant_id + subscriptionId = var.azure_subscription_id + resourceGroup = var.azure_resource_group + useWorkloadIdentityExtension = var.azure_workload_identity_enabled + }, + var.azure_workload_identity_enabled ? {} : { + clientId = var.azure_client_id + clientSecret = var.azure_client_secret + } + )) } depends_on = [kubernetes_namespace_v1.external_dns] diff --git a/infrastructure/commons/external_dns/validation.tf b/infrastructure/commons/external_dns/validation.tf index 98752948..679b0aaa 100644 --- a/infrastructure/commons/external_dns/validation.tf +++ b/infrastructure/commons/external_dns/validation.tf @@ -29,8 +29,12 @@ resource "terraform_data" "provider_validation" { error_message = "oci_region is required when dns_provider_name is 'oci'." } precondition { - condition = var.dns_provider_name != "azure" || !var.azure_workload_identity_enabled || length(var.azure_client_id) > 0 - error_message = "azure_client_id is required when dns_provider_name is 'azure' and azure_workload_identity_enabled is true." + condition = var.dns_provider_name != "azure" || length(var.azure_client_id) > 0 + error_message = "azure_client_id is required when dns_provider_name is 'azure'." + } + precondition { + condition = var.dns_provider_name != "azure" || var.azure_workload_identity_enabled || length(var.azure_client_secret) > 0 + error_message = "azure_client_secret is required when dns_provider_name is 'azure' and azure_workload_identity_enabled is false." } precondition { condition = var.dns_provider_name != "azure" || length(var.azure_subscription_id) > 0 diff --git a/infrastructure/commons/external_dns/variables.tf b/infrastructure/commons/external_dns/variables.tf index 00ead36f..4082cdb7 100644 --- a/infrastructure/commons/external_dns/variables.tf +++ b/infrastructure/commons/external_dns/variables.tf @@ -153,11 +153,18 @@ variable "dns_provider_name" { ############################################################################### variable "azure_workload_identity_enabled" { - description = "Enable Workload Identity for Azure DNS provider. When false, WI annotations and labels are omitted and azure_client_id is not required." + description = "Enable Workload Identity for Azure DNS provider. When false, Service Principal auth is used and azure_client_secret is required." type = bool default = true } +variable "azure_client_secret" { + description = "Azure AD client secret for Service Principal auth (required when dns_provider_name is 'azure' and azure_workload_identity_enabled is false)." + type = string + sensitive = true + default = "" +} + variable "azure_client_id" { description = "Client ID of the Azure Managed Identity for Workload Identity (required when dns_provider_name is 'azure' and azure_workload_identity_enabled is true)" type = string From 9ae5e7bf052186fb2de8e9d2bda88ddc5e32c0c3 Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Mon, 18 May 2026 20:30:20 -0300 Subject: [PATCH 08/10] feat(azure/wi): require federated credential ID when workload identity is enabled When azure_workload_identity_enabled=true (default), callers must pass azure_federated_credential_id from module.iam. This enforces that the Azure AD federation exists before the Helm release runs, and creates the correct apply-time dependency ordering automatically via Tofu references. --- .../tests/cert_manager_azure.tftest.hcl | 11 +++++----- .../cert_manager_cross_provider.tftest.hcl | 22 ++++++++++--------- .../commons/cert_manager/validation.tf | 4 ++++ .../commons/cert_manager/variables.tf | 6 +++++ .../commons/external_dns/validation.tf | 4 ++++ .../commons/external_dns/variables.tf | 6 +++++ 6 files changed, 38 insertions(+), 15 deletions(-) diff --git a/infrastructure/commons/cert_manager/tests/cert_manager_azure.tftest.hcl b/infrastructure/commons/cert_manager/tests/cert_manager_azure.tftest.hcl index d59de64d..595e2f0c 100644 --- a/infrastructure/commons/cert_manager/tests/cert_manager_azure.tftest.hcl +++ b/infrastructure/commons/cert_manager/tests/cert_manager_azure.tftest.hcl @@ -5,11 +5,12 @@ variables { hosted_zone_name = "myorg.nullimplementation.com" account_slug = "myorg" private_domain_name = "myorg.nullimplementation.com" - azure_client_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" - azure_subscription_id = "00000000-0000-0000-0000-000000000000" - azure_resource_group_name = "rg-test" - azure_tenant_id = "11111111-2222-3333-4444-555555555555" - azure_hosted_zone_name = "myorg.nullimplementation.com" + azure_client_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + azure_federated_credential_id = "/subscriptions/00000000/resourceGroups/rg-test/providers/Microsoft.ManagedIdentity/userAssignedIdentities/cert-manager/federatedIdentityCredentials/cert-manager-federated" + azure_subscription_id = "00000000-0000-0000-0000-000000000000" + azure_resource_group_name = "rg-test" + azure_tenant_id = "11111111-2222-3333-4444-555555555555" + azure_hosted_zone_name = "myorg.nullimplementation.com" } # Validates Azure provider config plans successfully diff --git a/infrastructure/commons/cert_manager/tests/cert_manager_cross_provider.tftest.hcl b/infrastructure/commons/cert_manager/tests/cert_manager_cross_provider.tftest.hcl index 452ab66e..e34b0ae7 100644 --- a/infrastructure/commons/cert_manager/tests/cert_manager_cross_provider.tftest.hcl +++ b/infrastructure/commons/cert_manager/tests/cert_manager_cross_provider.tftest.hcl @@ -23,11 +23,12 @@ run "gcp_vars_not_required_for_azure" { hosted_zone_name = "myorg.example.com" account_slug = "myorg" private_domain_name = "myorg.example.com" - azure_client_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" - azure_subscription_id = "00000000-0000-0000-0000-000000000000" - azure_resource_group_name = "rg-test" - azure_tenant_id = "11111111-2222-3333-4444-555555555555" - azure_hosted_zone_name = "myorg.example.com" + azure_client_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + azure_federated_credential_id = "/subscriptions/00000000/resourceGroups/rg-test/providers/Microsoft.ManagedIdentity/userAssignedIdentities/cert-manager/federatedIdentityCredentials/cert-manager-federated" + azure_subscription_id = "00000000-0000-0000-0000-000000000000" + azure_resource_group_name = "rg-test" + azure_tenant_id = "11111111-2222-3333-4444-555555555555" + azure_hosted_zone_name = "myorg.example.com" # gcp_sa_email and project_id intentionally left empty } @@ -126,11 +127,12 @@ run "oci_webhook_not_deployed_for_azure" { hosted_zone_name = "myorg.example.com" account_slug = "myorg" private_domain_name = "myorg.example.com" - azure_client_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" - azure_subscription_id = "00000000-0000-0000-0000-000000000000" - azure_resource_group_name = "rg-test" - azure_tenant_id = "11111111-2222-3333-4444-555555555555" - azure_hosted_zone_name = "myorg.example.com" + azure_client_id = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + azure_federated_credential_id = "/subscriptions/00000000/resourceGroups/rg-test/providers/Microsoft.ManagedIdentity/userAssignedIdentities/cert-manager/federatedIdentityCredentials/cert-manager-federated" + azure_subscription_id = "00000000-0000-0000-0000-000000000000" + azure_resource_group_name = "rg-test" + azure_tenant_id = "11111111-2222-3333-4444-555555555555" + azure_hosted_zone_name = "myorg.example.com" } assert { diff --git a/infrastructure/commons/cert_manager/validation.tf b/infrastructure/commons/cert_manager/validation.tf index c5388f48..a17eebfe 100644 --- a/infrastructure/commons/cert_manager/validation.tf +++ b/infrastructure/commons/cert_manager/validation.tf @@ -20,6 +20,10 @@ resource "terraform_data" "provider_validation" { condition = var.cloud_provider != "azure" || length(var.azure_client_id) > 0 error_message = "azure_client_id is required when cloud_provider is 'azure'." } + precondition { + condition = var.cloud_provider != "azure" || !var.azure_workload_identity_enabled || length(var.azure_federated_credential_id) > 0 + error_message = "azure_federated_credential_id is required when cloud_provider is 'azure' and azure_workload_identity_enabled is true. Use module.iam to create the federated identity credential and pass its id output." + } precondition { condition = var.cloud_provider != "azure" || var.azure_workload_identity_enabled || length(var.azure_client_secret) > 0 error_message = "azure_client_secret is required when cloud_provider is 'azure' and azure_workload_identity_enabled is false." diff --git a/infrastructure/commons/cert_manager/variables.tf b/infrastructure/commons/cert_manager/variables.tf index dc0db0d9..fa185408 100644 --- a/infrastructure/commons/cert_manager/variables.tf +++ b/infrastructure/commons/cert_manager/variables.tf @@ -40,6 +40,12 @@ variable "azure_workload_identity_enabled" { default = true } +variable "azure_federated_credential_id" { + description = "Resource ID of the Azure federated identity credential for cert-manager (required when cloud_provider is 'azure' and azure_workload_identity_enabled is true). Pass module.iam_cert_manager.id to enforce dependency ordering." + type = string + default = "" +} + variable "azure_client_secret" { description = "Azure AD client secret for Service Principal auth (required when cloud_provider is 'azure' and azure_workload_identity_enabled is false)." type = string diff --git a/infrastructure/commons/external_dns/validation.tf b/infrastructure/commons/external_dns/validation.tf index 679b0aaa..d34cf990 100644 --- a/infrastructure/commons/external_dns/validation.tf +++ b/infrastructure/commons/external_dns/validation.tf @@ -32,6 +32,10 @@ resource "terraform_data" "provider_validation" { condition = var.dns_provider_name != "azure" || length(var.azure_client_id) > 0 error_message = "azure_client_id is required when dns_provider_name is 'azure'." } + precondition { + condition = var.dns_provider_name != "azure" || !var.azure_workload_identity_enabled || length(var.azure_federated_credential_id) > 0 + error_message = "azure_federated_credential_id is required when dns_provider_name is 'azure' and azure_workload_identity_enabled is true. Use module.iam to create the federated identity credential and pass its id output." + } precondition { condition = var.dns_provider_name != "azure" || var.azure_workload_identity_enabled || length(var.azure_client_secret) > 0 error_message = "azure_client_secret is required when dns_provider_name is 'azure' and azure_workload_identity_enabled is false." diff --git a/infrastructure/commons/external_dns/variables.tf b/infrastructure/commons/external_dns/variables.tf index 4082cdb7..bcc815cd 100644 --- a/infrastructure/commons/external_dns/variables.tf +++ b/infrastructure/commons/external_dns/variables.tf @@ -158,6 +158,12 @@ variable "azure_workload_identity_enabled" { default = true } +variable "azure_federated_credential_id" { + description = "Resource ID of the Azure federated identity credential for external-dns (required when dns_provider_name is 'azure' and azure_workload_identity_enabled is true). Pass module.iam_external_dns.id to enforce dependency ordering." + type = string + default = "" +} + variable "azure_client_secret" { description = "Azure AD client secret for Service Principal auth (required when dns_provider_name is 'azure' and azure_workload_identity_enabled is false)." type = string From b9d696b0bd45e3e5726fc9fa6520e040d5c4bd9a Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Wed, 20 May 2026 10:49:01 -0300 Subject: [PATCH 09/10] chore(cert_manager): drop unused kubernetes provider declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cert_manager only manages helm_release resources and a terraform_data for validation — no kubernetes_* resources. Removing the unused provider keeps the dependency graph minimal and drops the kubernetes provider lock hash from the module. --- .../commons/cert_manager/.terraform.lock.hcl | 17 ----------------- .../commons/cert_manager/providers.tf | 4 ---- 2 files changed, 21 deletions(-) diff --git a/infrastructure/commons/cert_manager/.terraform.lock.hcl b/infrastructure/commons/cert_manager/.terraform.lock.hcl index 7cf21362..8251ed77 100644 --- a/infrastructure/commons/cert_manager/.terraform.lock.hcl +++ b/infrastructure/commons/cert_manager/.terraform.lock.hcl @@ -17,20 +17,3 @@ provider "registry.opentofu.org/hashicorp/helm" { "zh:f6fe7ecfafc344f4e6aecacf5ae12ac73b94389b9679dcd0f04fc5ff45bdc066", ] } - -provider "registry.opentofu.org/hashicorp/kubernetes" { - version = "2.38.0" - constraints = "~> 2.0" - hashes = [ - "h1:ems+O2dA7atxMWpbtqIrsH7Oa+u+ERWSfpMaFnZPbh0=", - "zh:1096b41c4e5b2ee6c1980916fb9a8579bc1892071396f7a9432be058aabf3cbc", - "zh:2959fde9ae3d1deb5e317df0d7b02ea4977951ee6b9c4beb083c148ca8f3681c", - "zh:5082f98fcb3389c73339365f7df39fc6912bf2bd1a46d5f97778f441a67fd337", - "zh:620fd5d0fbc2d7a24ac6b420a4922e6093020358162a62fa8cbd37b2bac1d22e", - "zh:7f47c2de179bba35d759147c53082cad6c3449d19b0ec0c5a4ca8db5b06393e1", - "zh:89c3aa2a87e29febf100fd21cead34f9a4c0e6e7ae5f383b5cef815c677eb52a", - "zh:96eecc9f94938a0bc35b8a63d2c4a5f972395e44206620db06760b730d0471fc", - "zh:e15567c1095f898af173c281b66bffdc4f3068afdd9f84bb5b5b5521d9f29584", - "zh:ecc6b912629734a9a41a7cf1c4c73fb13b4b510afc9e7b2e0011d290bcd6d77f", - ] -} diff --git a/infrastructure/commons/cert_manager/providers.tf b/infrastructure/commons/cert_manager/providers.tf index ce4189a7..94681f0d 100644 --- a/infrastructure/commons/cert_manager/providers.tf +++ b/infrastructure/commons/cert_manager/providers.tf @@ -4,9 +4,5 @@ terraform { source = "hashicorp/helm" version = "~> 3.0" } - kubernetes = { - source = "hashicorp/kubernetes" - version = "~> 2.0" - } } } From 5aec1fdbb4fa331e8cdcbac130febd73f22b13fa Mon Sep 17 00:00:00 2001 From: Gonzalo Rojas Date: Wed, 20 May 2026 10:49:13 -0300 Subject: [PATCH 10/10] style(external_dns): trim trailing blank line in variables.tf --- infrastructure/commons/external_dns/variables.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/infrastructure/commons/external_dns/variables.tf b/infrastructure/commons/external_dns/variables.tf index bcc815cd..f89f113c 100644 --- a/infrastructure/commons/external_dns/variables.tf +++ b/infrastructure/commons/external_dns/variables.tf @@ -195,4 +195,3 @@ variable "azure_tenant_id" { default = "" } -