From 5a4b330503d89e6c0afe5f68b24126fcb0371498 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Thu, 13 Nov 2025 10:17:17 -0500 Subject: [PATCH 1/3] feat(hypershift): add DNS cleanup for AKS test workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add explicit DNS cleanup step to prevent DNS record leaks in Azure AKS test workflows. The issue was identified when the DNS zone hit the 10,000 record limit due to leaked records from external-dns. Root cause: - HyperShift cluster deletion triggers external-dns cleanup - AKS management cluster gets deleted immediately after - external-dns controller is killed before it can process deletion events - DNS records accumulate over time in shared DNS zones Solution: Query the management cluster for all HostedClusters and extract their infraIDs, then clean up DNS records matching those infraIDs. This approach: - Works for e2e tests (multiple clusters with different infraIDs) - Works for conformance tests (single cluster) - Only cleans up DNS records from THIS test run - Safe for shared DNS zones (won't delete concurrent test records) Implementation: 1. Query: kubectl get hostedclusters --all-namespaces -o jsonpath='{.spec.infraID}' 2. Find DNS records containing any of those infraIDs 3. Delete only those matching records Example: - HostedCluster has infraID: autoscaling-9hpz5 - DNS records: api-autoscaling-9hpz5, a-api-autoscaling-9hpz5-external-dns - Cleanup: Deletes records containing 'autoscaling-9hpz5' - Preserves: Records from other tests (different infraIDs) Updated workflows: - hypershift-azure-aks-conformance (single cluster) - hypershift-azure-aks-e2e (multiple clusters) - 8 specialized deprovision chains Fixes: CNTRLPLANE-1857 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- ...aks-hypershift-base-deprovision-chain.yaml | 1 + ...hypershift-byo-vnet-deprovision-chain.yaml | 1 + ...s-hypershift-cilium-deprovision-chain.yaml | 1 + ...ster-recovery-infra-deprovision-chain.yaml | 1 + ...ift-ephemeral-creds-deprovision-chain.yaml | 1 + ...tcd-disk-encryption-deprovision-chain.yaml | 1 + ...shift-heterogeneous-deprovision-chain.yaml | 1 + ...-registry-overrides-deprovision-chain.yaml | 1 + ...rshift-azure-aks-conformance-workflow.yaml | 1 + .../hypershift-azure-aks-e2e-workflow.yaml | 1 + .../azure/cleanup-external-dns/OWNERS | 1 + ...ift-azure-cleanup-external-dns-commands.sh | 145 ++++++++++++++++++ ...ure-cleanup-external-dns-ref.metadata.json | 23 +++ ...rshift-azure-cleanup-external-dns-ref.yaml | 29 ++++ 14 files changed, 208 insertions(+) create mode 120000 ci-operator/step-registry/hypershift/azure/cleanup-external-dns/OWNERS create mode 100755 ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh create mode 100644 ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.metadata.json create mode 100644 ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.yaml diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/base/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-base-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/base/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-base-deprovision-chain.yaml index 29447a2075777..6f6c4668f9aa4 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/base/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-base-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/base/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-base-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: hypershift-dump-azure - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/byo-vnet/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-byo-vnet-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/byo-vnet/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-byo-vnet-deprovision-chain.yaml index 695fda1e3f55d..4a3b67a9f80ef 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/byo-vnet/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-byo-vnet-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/byo-vnet/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-byo-vnet-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: hypershift-dump-azure - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/cilium/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-cilium-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/cilium/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-cilium-deprovision-chain.yaml index 0e563fc940dd0..87d100b30697f 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/cilium/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-cilium-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/cilium/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-cilium-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: cilium-dump - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/disaster-recovery/infra/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-disaster-recovery-infra-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/disaster-recovery/infra/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-disaster-recovery-infra-deprovision-chain.yaml index 27422d0838041..42b3ddfdb58c4 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/disaster-recovery/infra/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-disaster-recovery-infra-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/disaster-recovery/infra/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-disaster-recovery-infra-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: hypershift-dump-azure - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/ephemeral-creds/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-ephemeral-creds-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/ephemeral-creds/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-ephemeral-creds-deprovision-chain.yaml index 74c43c6e9e602..7300b5c176dcc 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/ephemeral-creds/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-ephemeral-creds-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/ephemeral-creds/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-ephemeral-creds-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: hypershift-dump-azure - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - ref: azure-deprovision-role-assignment - ref: azure-deprovision-sp-and-custom-role - chain: cucushift-installer-rehearse-azure-aks-deprovision diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/etcd-disk-encryption/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-etcd-disk-encryption-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/etcd-disk-encryption/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-etcd-disk-encryption-deprovision-chain.yaml index 099a4a344bea0..6b0471eb66888 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/etcd-disk-encryption/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-etcd-disk-encryption-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/etcd-disk-encryption/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-etcd-disk-encryption-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: hypershift-dump-azure - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/heterogeneous/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-heterogeneous-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/heterogeneous/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-heterogeneous-deprovision-chain.yaml index 05b78f64e4b61..5f8a5c8e8439d 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/heterogeneous/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-heterogeneous-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/heterogeneous/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-heterogeneous-deprovision-chain.yaml @@ -3,6 +3,7 @@ chain: steps: - chain: hypershift-dump - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/registry-overrides/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-registry-overrides-deprovision-chain.yaml b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/registry-overrides/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-registry-overrides-deprovision-chain.yaml index 48b9c519594c1..af1a4a034ffc2 100644 --- a/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/registry-overrides/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-registry-overrides-deprovision-chain.yaml +++ b/ci-operator/step-registry/cucushift/installer/rehearse/azure/aks/hypershift/registry-overrides/deprovision/cucushift-installer-rehearse-azure-aks-hypershift-registry-overrides-deprovision-chain.yaml @@ -4,6 +4,7 @@ chain: - chain: hypershift-dump - chain: hypershift-dump-azure - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: - name: HYPERSHIFT_AZURE_LOCATION diff --git a/ci-operator/step-registry/hypershift/azure/aks/conformance/hypershift-azure-aks-conformance-workflow.yaml b/ci-operator/step-registry/hypershift/azure/aks/conformance/hypershift-azure-aks-conformance-workflow.yaml index ad2dd715fffc5..c53225acbd274 100644 --- a/ci-operator/step-registry/hypershift/azure/aks/conformance/hypershift-azure-aks-conformance-workflow.yaml +++ b/ci-operator/step-registry/hypershift/azure/aks/conformance/hypershift-azure-aks-conformance-workflow.yaml @@ -34,6 +34,7 @@ workflow: post: - chain: hypershift-dump - chain: hypershift-azure-destroy + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: USE_HYPERSHIFT_AZURE_CREDS: "true" diff --git a/ci-operator/step-registry/hypershift/azure/aks/e2e/hypershift-azure-aks-e2e-workflow.yaml b/ci-operator/step-registry/hypershift/azure/aks/e2e/hypershift-azure-aks-e2e-workflow.yaml index e2aa78799e131..cbd7c36149cf1 100644 --- a/ci-operator/step-registry/hypershift/azure/aks/e2e/hypershift-azure-aks-e2e-workflow.yaml +++ b/ci-operator/step-registry/hypershift/azure/aks/e2e/hypershift-azure-aks-e2e-workflow.yaml @@ -27,6 +27,7 @@ workflow: test: - ref: hypershift-azure-run-e2e post: + - ref: hypershift-azure-cleanup-external-dns - chain: cucushift-installer-rehearse-azure-aks-deprovision env: AKS_NODE_COUNT: "6" diff --git a/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/OWNERS b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/OWNERS new file mode 120000 index 0000000000000..8c272259fbba9 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/OWNERS @@ -0,0 +1 @@ +../../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh new file mode 100755 index 0000000000000..20096b94a3686 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +set -o nounset +set -o errexit +set -o pipefail + +echo "$(date -u --rfc-3339=seconds) - Starting external-dns cleanup" + +# Check if external DNS domain is configured +if [[ -z "${HYPERSHIFT_EXTERNAL_DNS_DOMAIN:-}" ]]; then + echo "HYPERSHIFT_EXTERNAL_DNS_DOMAIN is not set, skipping DNS cleanup" + exit 0 +fi + +# Try to get infraIDs from HostedClusters on the management cluster first +if [[ -f "${SHARED_DIR}/mgmt_kubeconfig" ]]; then + export KUBECONFIG="${SHARED_DIR}/mgmt_kubeconfig" +elif [[ -f "${SHARED_DIR}/kubeconfig" ]]; then + export KUBECONFIG="${SHARED_DIR}/kubeconfig" +fi + +INFRA_IDS="" +if [[ -n "${KUBECONFIG:-}" ]]; then + echo "Querying management cluster for HostedClusters..." + INFRA_IDS=$(kubectl get hostedclusters --all-namespaces -o jsonpath='{range .items[*]}{.spec.infraID}{"\n"}{end}' 2>/dev/null || echo "") +fi + +# For e2e tests, HostedClusters are destroyed as part of test teardown before cleanup runs +# Fall back to extracting infraIDs from the test output logs +if [[ -z "${INFRA_IDS}" ]]; then + echo "No HostedClusters found on management cluster" + echo "Attempting to extract infraIDs from test artifacts..." + + # Look for the hypershift-azure-run-e2e test logs + E2E_LOG="${ARTIFACT_DIR}/../hypershift-azure-run-e2e/build-log.txt" + if [[ -f "${E2E_LOG}" ]]; then + # Extract infraIDs from log entries like "Successfully created hostedcluster e2e-clusters-xxx/create-cluster-abc123" + # HyperShift infraIDs follow pattern: test-name-xxxxx (lowercase alphanumeric + hyphens) + INFRA_IDS=$(grep -oE "(create-cluster|autoscaling|azure-scheduler|cilium-connectivity|control-plane-upgrade|konnectivity|node-pool|private|etcd)-[a-z0-9]{5,6}" "${E2E_LOG}" 2>/dev/null | sort -u || echo "") + fi +fi + +if [[ -z "${INFRA_IDS}" ]]; then + echo "No infraIDs found to clean up" + exit 0 +fi + +echo "Found infraIDs to clean up:" +echo "${INFRA_IDS}" + +# Set the parameters we'll need as env vars +AZURE_AUTH_LOCATION="${CLUSTER_PROFILE_DIR}/osServicePrincipal.json" +if [[ "${USE_HYPERSHIFT_AZURE_CREDS}" == "true" ]]; then + AZURE_AUTH_LOCATION="/etc/hypershift-ci-jobs-azurecreds/credentials.json" +fi + +AZURE_AUTH_CLIENT_ID="$(<"${AZURE_AUTH_LOCATION}" jq -r .clientId)" +AZURE_AUTH_CLIENT_SECRET="$(<"${AZURE_AUTH_LOCATION}" jq -r .clientSecret)" +AZURE_AUTH_TENANT_ID="$(<"${AZURE_AUTH_LOCATION}" jq -r .tenantId)" +AZURE_AUTH_SUBSCRIPTION_ID="$(<"${AZURE_AUTH_LOCATION}" jq -r .subscriptionId)" + +# Log in with az +az cloud set --name AzureCloud +az login --service-principal -u "${AZURE_AUTH_CLIENT_ID}" -p "${AZURE_AUTH_CLIENT_SECRET}" --tenant "${AZURE_AUTH_TENANT_ID}" --output none +az account set --subscription "${AZURE_AUTH_SUBSCRIPTION_ID}" + +echo "Logged into Azure successfully" + +# Check if the DNS zone exists +if ! az network dns zone show --resource-group "${EXTERNAL_DNS_ZONE_RESOURCE_GROUP}" --name "${HYPERSHIFT_EXTERNAL_DNS_DOMAIN}" &>/dev/null; then + echo "DNS zone ${HYPERSHIFT_EXTERNAL_DNS_DOMAIN} not found in resource group ${EXTERNAL_DNS_ZONE_RESOURCE_GROUP}, nothing to clean up" + exit 0 +fi + +echo "Listing DNS records matching infraIDs in zone ${HYPERSHIFT_EXTERNAL_DNS_DOMAIN}" + +# List all DNS records that match any of the infraIDs +# External-DNS creates records with patterns like: +# - api-{infraID}.{zone} +# - *.apps-{infraID}.{zone} +# - a-api-{infraID}-external-dns (TXT ownership records) +# - a-ignition-{infraID}-external-dns (TXT ownership records) +# We search for records containing any of the infraIDs from HostedClusters created in this test run + +# Convert infraIDs to a JSON array for jq +INFRA_IDS_JSON=$(echo "${INFRA_IDS}" | jq -R -s 'split("\n") | map(select(length > 0))') + +RECORDS_JSON=$(az network dns record-set list \ + --resource-group "${EXTERNAL_DNS_ZONE_RESOURCE_GROUP}" \ + --zone-name "${HYPERSHIFT_EXTERNAL_DNS_DOMAIN}" \ + --output json | \ + jq --argjson infra_ids "${INFRA_IDS_JSON}" ' + [.[] | + select(.type | endswith("/SOA") | not) | + select(.type | endswith("/NS") | not) | + select( + ($infra_ids | length) > 0 and + (.name as $name | $infra_ids | any(. as $id | $name | contains($id))) + ) | + {name: .name, type: (.type | split("/")[-1])} + ]') + +RECORD_COUNT=$(echo "${RECORDS_JSON}" | jq 'length') +echo "Found ${RECORD_COUNT} DNS record(s) to delete" + +if [[ "${RECORD_COUNT}" -eq 0 ]]; then + echo "No DNS records found matching infraIDs, cleanup complete" + exit 0 +fi + +# Display the records that will be deleted +echo "Records to be deleted:" +echo "${RECORDS_JSON}" | jq -r '.[] | "\(.type) \(.name)"' + +# Delete each record +DELETED_COUNT=0 +FAILED_COUNT=0 + +while read -r record; do + NAME=$(echo "$record" | jq -r '.name') + TYPE=$(echo "$record" | jq -r '.type' | tr '[:upper:]' '[:lower:]') + + echo "Deleting ${TYPE} record: ${NAME}" + + if az network dns record-set "${TYPE}" delete \ + --resource-group "${EXTERNAL_DNS_ZONE_RESOURCE_GROUP}" \ + --zone-name "${HYPERSHIFT_EXTERNAL_DNS_DOMAIN}" \ + --name "${NAME}" \ + --yes; then + DELETED_COUNT=$((DELETED_COUNT + 1)) + echo "Successfully deleted ${TYPE} record: ${NAME}" + else + FAILED_COUNT=$((FAILED_COUNT + 1)) + echo "WARNING: Failed to delete ${TYPE} record: ${NAME}" + fi +done < <(echo "${RECORDS_JSON}" | jq -c '.[]') + +echo "$(date -u --rfc-3339=seconds) - DNS cleanup complete" +echo "Summary: Deleted ${DELETED_COUNT} record(s), Failed ${FAILED_COUNT} record(s)" + +if [[ "${FAILED_COUNT}" -gt 0 ]]; then + echo "WARNING: Some DNS records failed to delete, but continuing with deprovision" +fi + +exit 0 diff --git a/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.metadata.json b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.metadata.json new file mode 100644 index 0000000000000..1b7fbef79f9cc --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.metadata.json @@ -0,0 +1,23 @@ +{ + "path": "hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.yaml", + "owners": { + "approvers": [ + "csrwng", + "enxebre", + "sjenning", + "imain", + "davidvossel", + "LiangquanLi930", + "bryan-cox", + "jparrill" + ], + "reviewers": [ + "csrwng", + "enxebre", + "sjenning", + "imain", + "bryan-cox", + "jparrill" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.yaml b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.yaml new file mode 100644 index 0000000000000..0075fb5a7b7d9 --- /dev/null +++ b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-ref.yaml @@ -0,0 +1,29 @@ +ref: + as: hypershift-azure-cleanup-external-dns + from: upi-installer + commands: hypershift-azure-cleanup-external-dns-commands.sh + resources: + requests: + cpu: 10m + memory: 100Mi + timeout: 10m0s + grace_period: 1m0s + env: + - name: USE_HYPERSHIFT_AZURE_CREDS + default: "false" + documentation: "If true, use hypershift azure credentials" + - name: HYPERSHIFT_EXTERNAL_DNS_DOMAIN + default: "" + documentation: "The external DNS domain used by the cluster (e.g., aks-e2e.hypershift.azure.devcluster.openshift.com)" + - name: EXTERNAL_DNS_ZONE_RESOURCE_GROUP + default: "os4-common" + documentation: "The resource group containing the external DNS zone" + credentials: + - mount_path: /etc/hypershift-ci-jobs-azurecreds + name: hypershift-ci-jobs-azurecreds + namespace: test-credentials + documentation: |- + This step cleans up DNS records created by external-dns for a HyperShift cluster. + It deletes DNS records in the external DNS zone that match the cluster name pattern. + This prevents DNS record leaks when the AKS cluster (and external-dns controller) + is deleted before it can clean up the records. From 35b20360c397a6404fd4204e8a5f24eafcb8ad1d Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Mon, 17 Nov 2025 13:54:59 -0500 Subject: [PATCH 2/3] feat(hypershift): add e2e-aks-override CI job for CPO override testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dedicated e2e-aks-override job to test Control Plane Operator (CPO) overrides on Azure AKS platform. This complements the existing e2e-aws-override job. Key features: - Triggered only when overrides.yaml is modified (run_if_changed) - Sets TEST_CPO_OVERRIDE=1 to enable override testing - Uses same workflow as regular AKS tests (hypershift-azure-aks-e2e) - Paired with runTests field in overrides.yaml for granular control This allows testing Azure CPO overrides independently from AWS overrides, saving CI resources by skipping tests for platforms not being modified. Related: - hypershift PR #7206 (runTests field implementation) - CNTRLPLANE-1893 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../hypershift/openshift-hypershift-main.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml index 6576bd63d2409..8a6d3bc2e538b 100644 --- a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml +++ b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml @@ -178,6 +178,18 @@ tests: HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_SKU: "" HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_VERSION: "" workflow: hypershift-azure-aks-e2e +- as: e2e-aks-override + capabilities: + - build-tmpfs + run_if_changed: (/overrides\.yaml$) + steps: + cluster_profile: hypershift + env: + AUTH_THROUGH_CERTS: "true" + ENABLE_HYPERSHIFT_CERT_ROTATION_SCALE: "true" + HYPERSHIFT_AZURE_LOCATION: centralus + TEST_CPO_OVERRIDE: "1" + workflow: hypershift-azure-aks-e2e - as: e2e-aks-4-20 skip_if_only_changed: (^(\.tekton|\.github|\.claude|docs|examples|enhancements|contrib|\.cursor)/)|(^[A-Z]+\.md$)|((^|/)OWNERS$)|(/overrides\.yaml$)|(^renovate\.json$)|(/\.testcoverage\.yml$)|(^\.gitlint$)|(^\.gitignore$) steps: From d3fda234886eb756e8ed5c4a058214733709248d Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Wed, 19 Nov 2025 04:09:56 -0500 Subject: [PATCH 3/3] Add debug logging to DNS cleanup script to diagnose artifact path issues --- .../hypershift-azure-cleanup-external-dns-commands.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh index 20096b94a3686..4dd3004e41688 100755 --- a/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh +++ b/ci-operator/step-registry/hypershift/azure/cleanup-external-dns/hypershift-azure-cleanup-external-dns-commands.sh @@ -33,10 +33,19 @@ if [[ -z "${INFRA_IDS}" ]]; then # Look for the hypershift-azure-run-e2e test logs E2E_LOG="${ARTIFACT_DIR}/../hypershift-azure-run-e2e/build-log.txt" + echo "DEBUG: ARTIFACT_DIR=${ARTIFACT_DIR}" + echo "DEBUG: Looking for e2e log at: ${E2E_LOG}" + if [[ -f "${E2E_LOG}" ]]; then + echo "DEBUG: Found e2e log file, extracting infraIDs..." # Extract infraIDs from log entries like "Successfully created hostedcluster e2e-clusters-xxx/create-cluster-abc123" # HyperShift infraIDs follow pattern: test-name-xxxxx (lowercase alphanumeric + hyphens) INFRA_IDS=$(grep -oE "(create-cluster|autoscaling|azure-scheduler|cilium-connectivity|control-plane-upgrade|konnectivity|node-pool|private|etcd)-[a-z0-9]{5,6}" "${E2E_LOG}" 2>/dev/null | sort -u || echo "") + echo "DEBUG: Extracted infraIDs: ${INFRA_IDS}" + else + echo "DEBUG: E2E log file not found at expected location" + echo "DEBUG: Listing parent directory contents..." + ls -la "${ARTIFACT_DIR}/.." 2>/dev/null || echo "DEBUG: Could not list parent directory" fi fi