From f2bf419c080552189fd728f4bd62e87720d2e941 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 10:06:50 -0400
Subject: [PATCH 01/12] feat(ci): add e2e-azure-v2-self-managed CI job for
 self-managed Azure

Add CI workflow, step registry chains, and job config for running
self-managed Azure e2e v2 tests using nested management cluster pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../hypershift/openshift-hypershift-main.yaml |  10 ++
 .../openshift-hypershift-main-presubmits.yaml |  83 +++++++++
 .../azure/create-selfmanaged-guests/OWNERS    |  11 ++
 ...ate-selfmanaged-guests-chain.metadata.json |  17 ++
 ...azure-create-selfmanaged-guests-chain.yaml | 168 ++++++++++++++++++
 .../azure/destroy-selfmanaged-guests/OWNERS   |  11 ++
 ...roy-selfmanaged-guests-chain.metadata.json |  17 ++
 ...zure-destroy-selfmanaged-guests-chain.yaml |  45 +++++
 .../azure/e2e/v2-self-managed/OWNERS          |  11 ++
 ...e2e-v2-self-managed-workflow.metadata.json |  17 ++
 ...ft-azure-e2e-v2-self-managed-workflow.yaml |  39 ++++
 .../azure/run-e2e-v2-selfmanaged/OWNERS       |  11 ++
 ...run-e2e-v2-selfmanaged-chain.metadata.json |  17 ++
 ...ft-azure-run-e2e-v2-selfmanaged-chain.yaml |  63 +++++++
 14 files changed, 520 insertions(+)
 create mode 100644 ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/OWNERS
 create mode 100644 ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.metadata.json
 create mode 100644 ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
 create mode 100644 ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/OWNERS
 create mode 100644 ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.metadata.json
 create mode 100644 ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
 create mode 100644 ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/OWNERS
 create mode 100644 ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.metadata.json
 create mode 100644 ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.yaml
 create mode 100644 ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/OWNERS
 create mode 100644 ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.metadata.json
 create mode 100644 ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml

diff --git a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
index a1b4cdf514d85..af7d7a6bc1562 100644
--- a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
+++ b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
@@ -175,6 +175,16 @@ tests:
       HYPERSHIFT_AZURE_LOCATION: centralus
       TEST_CPO_OVERRIDE: "1"
     workflow: hypershift-azure-aks-e2e
+- always_run: false
+  as: e2e-azure-v2-self-managed
+  optional: true
+  pipeline_run_if_changed: ^(test/e2e/v2/selfmanagedazure|test/e2e/util)
+  steps:
+    cluster_profile: hypershift-azure
+    env:
+      ENABLE_HYPERSHIFT_CERT_ROTATION_SCALE: "true"
+      HYPERSHIFT_AZURE_LOCATION: centralus
+    workflow: hypershift-azure-e2e-v2-self-managed
 - always_run: false
   as: e2e-aws
   capabilities:
diff --git a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml
index 78aff6632a2bf..7ca1dd89726dc 100644
--- a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml
+++ b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml
@@ -1305,6 +1305,89 @@ presubmits:
         secret:
           secretName: result-aggregator
     trigger: (?m)^/test( | .* )e2e-azure-self-managed,?($|\s.*)
+  - agent: kubernetes
+    always_run: false
+    annotations:
+      pipeline_run_if_changed: ^(test/e2e/v2/selfmanagedazure|test/e2e/util)
+    branches:
+    - ^main$
+    - ^main-
+    cluster: build01
+    context: ci/prow/e2e-azure-v2-self-managed
+    decorate: true
+    labels:
+      ci-operator.openshift.io/cloud: hypershift-azure
+      ci-operator.openshift.io/cloud-cluster-profile: hypershift-azure
+      ci.openshift.io/generator: prowgen
+      pj-rehearse.openshift.io/can-be-rehearsed: "true"
+    name: pull-ci-openshift-hypershift-main-e2e-azure-v2-self-managed
+    optional: true
+    rerun_command: /test e2e-azure-v2-self-managed
+    spec:
+      containers:
+      - args:
+        - --gcs-upload-secret=/secrets/gcs/service-account.json
+        - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
+        - --lease-server-credentials-file=/etc/boskos/credentials
+        - --report-credentials-file=/etc/report/credentials
+        - --secret-dir=/secrets/ci-pull-credentials
+        - --target=e2e-azure-v2-self-managed
+        command:
+        - ci-operator
+        env:
+        - name: HTTP_SERVER_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+        image: quay-proxy.ci.openshift.org/openshift/ci:ci_ci-operator_latest
+        imagePullPolicy: Always
+        name: ""
+        ports:
+        - containerPort: 8080
+          name: http
+        resources:
+          requests:
+            cpu: 10m
+        volumeMounts:
+        - mountPath: /etc/boskos
+          name: boskos
+          readOnly: true
+        - mountPath: /secrets/ci-pull-credentials
+          name: ci-pull-credentials
+          readOnly: true
+        - mountPath: /secrets/gcs
+          name: gcs-credentials
+          readOnly: true
+        - mountPath: /secrets/manifest-tool
+          name: manifest-tool-local-pusher
+          readOnly: true
+        - mountPath: /etc/pull-secret
+          name: pull-secret
+          readOnly: true
+        - mountPath: /etc/report
+          name: result-aggregator
+          readOnly: true
+      serviceAccountName: ci-operator
+      volumes:
+      - name: boskos
+        secret:
+          items:
+          - key: credentials
+            path: credentials
+          secretName: boskos-credentials
+      - name: ci-pull-credentials
+        secret:
+          secretName: ci-pull-credentials
+      - name: manifest-tool-local-pusher
+        secret:
+          secretName: manifest-tool-local-pusher
+      - name: pull-secret
+        secret:
+          secretName: registry-pull-credentials
+      - name: result-aggregator
+        secret:
+          secretName: result-aggregator
+    trigger: (?m)^/test( | .* )e2e-azure-v2-self-managed,?($|\s.*)
   - agent: kubernetes
     always_run: false
     branches:
diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/OWNERS b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/OWNERS
new file mode 100644
index 0000000000000..6c30e7a30d980
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/OWNERS
@@ -0,0 +1,11 @@
+approvers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
+options: {}
+reviewers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.metadata.json b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.metadata.json
new file mode 100644
index 0000000000000..2ce0fb6bae6f2
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.metadata.json
@@ -0,0 +1,17 @@
+{
+	"path": "hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml",
+	"owners": {
+		"approvers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		],
+		"reviewers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		]
+	}
+}
\ No newline at end of file
diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
new file mode 100644
index 0000000000000..b135067740adf
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -0,0 +1,168 @@
+chain:
+  as: hypershift-azure-create-selfmanaged-guests
+  steps:
+  - as: create-guests
+    cli: latest
+    env:
+    - name: HYPERSHIFT_NODE_COUNT
+      default: "3"
+      documentation: "The number of nodes per guest cluster."
+    - name: HYPERSHIFT_BASE_DOMAIN
+      default: "hcp-sm-azure.azure.devcluster.openshift.com"
+      documentation: "The cluster's FQDN will be a subdomain of the base domain."
+    - name: HYPERSHIFT_AZURE_LOCATION
+      default: "centralus"
+      documentation: "Specifies the Azure location of the clusters."
+    - name: HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_PUBLISHER
+      default: ""
+      documentation: "The Azure Marketplace image publisher."
+    - name: HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_OFFER
+      default: ""
+      documentation: "The Azure Marketplace image offer."
+    - name: HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_SKU
+      default: ""
+      documentation: "The Azure Marketplace image SKU."
+    - name: HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_VERSION
+      default: ""
+      documentation: "The Azure Marketplace image version."
+    commands: |-
+      set -exuo pipefail
+
+      # Use the nested management cluster kubeconfig
+      export KUBECONFIG="${SHARED_DIR}/management_cluster_kubeconfig"
+
+      # Generate unique cluster names from job ID
+      PUBLIC_NAME="$(echo -n "${PROW_JOB_ID}-pub"|sha256sum|cut -c-20)"
+      PRIVATE_NAME="$(echo -n "${PROW_JOB_ID}-prv"|sha256sum|cut -c-20)"
+      OAUTH_LB_NAME="$(echo -n "${PROW_JOB_ID}-oau"|sha256sum|cut -c-20)"
+
+      # Self-managed Azure credentials
+      AZURE_CREDS="/etc/hypershift-ci-jobs-self-managed-azure/credentials.json"
+      AZURE_OIDC_ISSUER_URL="https://smazure.blob.core.windows.net/smazure"
+      AZURE_SA_TOKEN_ISSUER_KEY_PATH="/etc/hypershift-ci-jobs-self-managed-azure-e2e/serviceaccount-signer.private"
+      AZURE_WORKLOAD_IDENTITIES_FILE="/etc/hypershift-ci-jobs-self-managed-azure-e2e/workload-identities.json"
+
+      PULL_SECRET_PATH="/etc/ci-pull-credentials/.dockerconfigjson"
+
+      RELEASE_IMAGE="${RELEASE_IMAGE_LATEST}"
+      HC_LOCATION="${HYPERSHIFT_AZURE_LOCATION:-centralus}"
+
+      # Read private NAT subnet ID from SHARED_DIR (written by setup-private-link step)
+      AZURE_PRIVATE_NAT_SUBNET_ID=""
+      if [[ -f "${SHARED_DIR}/azure_private_nat_subnet_id" ]]; then
+        AZURE_PRIVATE_NAT_SUBNET_ID="$(cat "${SHARED_DIR}/azure_private_nat_subnet_id")"
+      fi
+
+      # Marketplace image flags
+      MARKETPLACE_ARGS=""
+      if [[ -n "${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_PUBLISHER:-}" ]]; then
+        MARKETPLACE_ARGS="--marketplace-publisher=${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_PUBLISHER} --marketplace-offer=${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_OFFER}"
+        if [[ -n "${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_SKU:-}" ]]; then
+          MARKETPLACE_ARGS="${MARKETPLACE_ARGS} --marketplace-sku=${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_SKU}"
+        elif [[ -f "${SHARED_DIR}/azure-marketplace-image-sku" ]]; then
+          MARKETPLACE_ARGS="${MARKETPLACE_ARGS} --marketplace-sku=$(cat "${SHARED_DIR}/azure-marketplace-image-sku")"
+        fi
+        if [[ -n "${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_VERSION:-}" ]]; then
+          MARKETPLACE_ARGS="${MARKETPLACE_ARGS} --marketplace-version=${HYPERSHIFT_AZURE_MARKETPLACE_IMAGE_VERSION}"
+        elif [[ -f "${SHARED_DIR}/azure-marketplace-image-version" ]]; then
+          MARKETPLACE_ARGS="${MARKETPLACE_ARGS} --marketplace-version=$(cat "${SHARED_DIR}/azure-marketplace-image-version")"
+        fi
+      fi
+
+      # Common flags for all self-managed clusters
+      COMMON_FLAGS="--node-pool-replicas=${HYPERSHIFT_NODE_COUNT} \
+        --base-domain=${HYPERSHIFT_BASE_DOMAIN} \
+        --pull-secret=${PULL_SECRET_PATH} \
+        --azure-creds=${AZURE_CREDS} \
+        --location=${HC_LOCATION} \
+        --release-image=${RELEASE_IMAGE} \
+        --oidc-issuer-url=${AZURE_OIDC_ISSUER_URL} \
+        --sa-token-issuer-private-key-path=${AZURE_SA_TOKEN_ISSUER_KEY_PATH} \
+        --workload-identities-file=${AZURE_WORKLOAD_IDENTITIES_FILE} \
+        --generate-ssh \
+        ${MARKETPLACE_ARGS}"
+
+      # Create public cluster
+      echo "$(date) Creating public self-managed cluster: ${PUBLIC_NAME}"
+      /usr/bin/hypershift create cluster azure \
+        --name="${PUBLIC_NAME}" \
+        ${COMMON_FLAGS} &
+      PUBLIC_PID=$!
+
+      # Create private cluster
+      PRIVATE_EXTRA=""
+      if [[ -n "${AZURE_PRIVATE_NAT_SUBNET_ID}" ]]; then
+        PRIVATE_EXTRA="${PRIVATE_EXTRA} --endpoint-access-private-nat-subnet-id=${AZURE_PRIVATE_NAT_SUBNET_ID}"
+      fi
+      echo "$(date) Creating private self-managed cluster: ${PRIVATE_NAME}"
+      /usr/bin/hypershift create cluster azure \
+        --name="${PRIVATE_NAME}" \
+        --endpoint-access=Private \
+        ${COMMON_FLAGS} \
+        ${PRIVATE_EXTRA} &
+      PRIVATE_PID=$!
+
+      # Create OAuth LoadBalancer cluster
+      echo "$(date) Creating OAuth LB self-managed cluster: ${OAUTH_LB_NAME}"
+      /usr/bin/hypershift create cluster azure \
+        --name="${OAUTH_LB_NAME}" \
+        --oauth-publishing-strategy=LoadBalancer \
+        ${COMMON_FLAGS} &
+      OAUTH_LB_PID=$!
+
+      # Wait for create commands to complete
+      echo "$(date) Waiting for cluster create commands to finish..."
+      wait ${PUBLIC_PID}
+      echo "$(date) Public cluster create command completed"
+      wait ${PRIVATE_PID}
+      echo "$(date) Private cluster create command completed"
+      wait ${OAUTH_LB_PID}
+      echo "$(date) OAuth LB cluster create command completed"
+
+      # Wait for clusters to become available
+      echo "$(date) Waiting for public cluster to become available..."
+      oc wait --timeout=30m --for=condition=Available --namespace=clusters "hostedcluster/${PUBLIC_NAME}"
+      echo "$(date) Public cluster is available"
+
+      echo "$(date) Waiting for private cluster to become available..."
+      oc wait --timeout=30m --for=condition=Available --namespace=clusters "hostedcluster/${PRIVATE_NAME}"
+      echo "$(date) Private cluster is available"
+
+      echo "$(date) Waiting for OAuth LB cluster to become available..."
+      oc wait --timeout=30m --for=condition=Available --namespace=clusters "hostedcluster/${OAUTH_LB_NAME}"
+      echo "$(date) OAuth LB cluster is available"
+
+      # Wait for clusteroperators on each cluster
+      for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
+        echo "$(date) Creating kubeconfig for ${CLUSTER}"
+        bin/hypershift create kubeconfig --namespace=clusters --name="${CLUSTER}" > "${SHARED_DIR}/nested_kubeconfig_${CLUSTER}"
+        echo "$(date) Waiting for clusteroperators on ${CLUSTER}..."
+        KUBECONFIG="${SHARED_DIR}/nested_kubeconfig_${CLUSTER}" oc wait clusterversion/version --for='condition=Available=True' --timeout=30m || true
+      done
+
+      # Write cluster names to shared dir
+      echo "${PUBLIC_NAME}" > "${SHARED_DIR}/cluster-name-public"
+      echo "${PRIVATE_NAME}" > "${SHARED_DIR}/cluster-name-private"
+      echo "${OAUTH_LB_NAME}" > "${SHARED_DIR}/cluster-name-oauth-lb"
+
+      echo "$(date) All self-managed guest clusters are ready"
+    from: hypershift-operator
+    grace_period: 5m0s
+    resources:
+      requests:
+        cpu: 100m
+        memory: 100Mi
+    timeout: 60m0s
+    credentials:
+    - mount_path: /etc/ci-pull-credentials
+      name: ci-pull-credentials
+      namespace: test-credentials
+    - mount_path: /etc/hypershift-ci-jobs-self-managed-azure
+      name: hypershift-ci-jobs-self-managed-azure
+      namespace: test-credentials
+    - mount_path: /etc/hypershift-ci-jobs-self-managed-azure-e2e
+      name: hypershift-ci-jobs-self-managed-azure-e2e
+      namespace: test-credentials
+    dependencies:
+    - name: "release:latest"
+      env: RELEASE_IMAGE_LATEST
diff --git a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/OWNERS b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/OWNERS
new file mode 100644
index 0000000000000..6c30e7a30d980
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/OWNERS
@@ -0,0 +1,11 @@
+approvers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
+options: {}
+reviewers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
diff --git a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.metadata.json b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.metadata.json
new file mode 100644
index 0000000000000..6e7260c7b30f9
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.metadata.json
@@ -0,0 +1,17 @@
+{
+	"path": "hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml",
+	"owners": {
+		"approvers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		],
+		"reviewers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		]
+	}
+}
\ No newline at end of file
diff --git a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
new file mode 100644
index 0000000000000..b39188327a0c8
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
@@ -0,0 +1,45 @@
+chain:
+  as: hypershift-azure-destroy-selfmanaged-guests
+  steps:
+  - as: destroy-guests
+    best_effort: true
+    cli: latest
+    env:
+    - name: HYPERSHIFT_AZURE_LOCATION
+      default: "centralus"
+      documentation: "Specifies the Azure location of the clusters."
+    commands: |-
+      set -xuo pipefail
+
+      # Use the nested management cluster kubeconfig
+      export KUBECONFIG="${SHARED_DIR}/management_cluster_kubeconfig"
+
+      AZURE_CREDS="/etc/hypershift-ci-jobs-self-managed-azure/credentials.json"
+      HC_LOCATION="${HYPERSHIFT_AZURE_LOCATION:-centralus}"
+
+      # Re-derive cluster names from job ID (same logic as create step)
+      PUBLIC_NAME="$(echo -n "${PROW_JOB_ID}-pub"|sha256sum|cut -c-20)"
+      PRIVATE_NAME="$(echo -n "${PROW_JOB_ID}-prv"|sha256sum|cut -c-20)"
+      OAUTH_LB_NAME="$(echo -n "${PROW_JOB_ID}-oau"|sha256sum|cut -c-20)"
+
+      # Destroy all clusters, continuing on errors
+      for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
+        echo "$(date) Destroying self-managed cluster: ${CLUSTER}"
+        bin/hypershift destroy cluster azure \
+          --azure-creds="${AZURE_CREDS}" \
+          --name="${CLUSTER}" \
+          --location="${HC_LOCATION}" \
+          --cluster-grace-period=40m || echo "$(date) WARNING: Failed to destroy cluster ${CLUSTER}"
+        echo "$(date) Finished destroying cluster ${CLUSTER}"
+      done
+    from: hypershift-operator
+    grace_period: 5m0s
+    resources:
+      requests:
+        cpu: 100m
+        memory: 100Mi
+    timeout: 30m0s
+    credentials:
+    - mount_path: /etc/hypershift-ci-jobs-self-managed-azure
+      name: hypershift-ci-jobs-self-managed-azure
+      namespace: test-credentials
diff --git a/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/OWNERS b/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/OWNERS
new file mode 100644
index 0000000000000..6c30e7a30d980
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/OWNERS
@@ -0,0 +1,11 @@
+approvers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
+options: {}
+reviewers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
diff --git a/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.metadata.json b/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.metadata.json
new file mode 100644
index 0000000000000..6bbdd6096ef08
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.metadata.json
@@ -0,0 +1,17 @@
+{
+	"path": "hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.yaml",
+	"owners": {
+		"approvers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		],
+		"reviewers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		]
+	}
+}
\ No newline at end of file
diff --git a/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.yaml b/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.yaml
new file mode 100644
index 0000000000000..4362a59bcb253
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/e2e/v2-self-managed/hypershift-azure-e2e-v2-self-managed-workflow.yaml
@@ -0,0 +1,39 @@
+workflow:
+  as: hypershift-azure-e2e-v2-self-managed
+  documentation: |-
+    The HyperShift Azure e2e v2 self-managed workflow provisions a nested management
+    cluster on self-managed Azure infrastructure, installs the HyperShift operator, creates
+    three self-managed Azure guest clusters (public, private, OAuth LoadBalancer), and runs
+    the v2 Ginkgo test suite against each. Each test spec produces a separate JUnit XML
+    entry for Sippy.
+
+    The HyperShift launch capability is currently supported by the HyperShift
+    team. For now, please direct all questions and comments to:
+
+    - Alberto Lamela (agarcial@redhat.com)
+    - Seth Jennings (sjenning@redhat.com)
+    - Cesar Wong (cewong@redhat.com)
+    - Bryan Cox (brcox@redhat.com)
+
+    Learn more about HyperShift here: https://github.com/openshift/hypershift
+
+    Track HyperShift's development here: https://issues.redhat.com/projects/CNTRLPLANE/summary
+  steps:
+    pre:
+    - ref: ipi-install-rbac
+    - chain: hypershift-setup-nested-management-cluster
+    - ref: hypershift-azure-setup-private-link
+    - ref: hypershift-install
+    - chain: hypershift-azure-create-selfmanaged-guests
+    test:
+    - chain: hypershift-azure-run-e2e-v2-selfmanaged
+    post:
+    - ref: hypershift-analyze-e2e-failure
+    - chain: hypershift-azure-destroy-selfmanaged-guests
+    - chain: hypershift-destroy-nested-management-cluster
+    env:
+      CLOUD_PROVIDER: "Azure"
+      HYPERSHIFT_NODE_COUNT: "3"
+      HYPERSHIFT_AZURE_LOCATION: "centralus"
+      AZURE_SELF_MANAGED: "true"
+      HYPERSHIFT_EXTERNAL_DNS_DOMAIN: "aks-e2e.hypershift.azure.devcluster.openshift.com"
diff --git a/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/OWNERS b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/OWNERS
new file mode 100644
index 0000000000000..6c30e7a30d980
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/OWNERS
@@ -0,0 +1,11 @@
+approvers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
+options: {}
+reviewers:
+- csrwng
+- enxebre
+- sjenning
+- bryan-cox
diff --git a/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.metadata.json b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.metadata.json
new file mode 100644
index 0000000000000..e2104c39cc8da
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.metadata.json
@@ -0,0 +1,17 @@
+{
+	"path": "hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml",
+	"owners": {
+		"approvers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		],
+		"reviewers": [
+			"csrwng",
+			"enxebre",
+			"sjenning",
+			"bryan-cox"
+		]
+	}
+}
\ No newline at end of file
diff --git a/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml
new file mode 100644
index 0000000000000..5da816b993776
--- /dev/null
+++ b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml
@@ -0,0 +1,63 @@
+chain:
+  as: hypershift-azure-run-e2e-v2-selfmanaged
+  steps:
+  - as: tests
+    cli: latest
+    commands: |-
+      set -xuo pipefail
+
+      # Use the nested management cluster kubeconfig
+      export KUBECONFIG="${SHARED_DIR}/management_cluster_kubeconfig"
+      export EVENTUALLY_VERBOSE="false"
+
+      # Export private NAT subnet ID for private topology tests
+      AZURE_PRIVATE_NAT_SUBNET_ID=""
+      if [[ -f "${SHARED_DIR}/azure_private_nat_subnet_id" ]]; then
+        AZURE_PRIVATE_NAT_SUBNET_ID="$(cat "${SHARED_DIR}/azure_private_nat_subnet_id")"
+      fi
+      export AZURE_PRIVATE_NAT_SUBNET_ID
+
+      PUBLIC_NAME="$(cat "${SHARED_DIR}/cluster-name-public")"
+      PRIVATE_NAME="$(cat "${SHARED_DIR}/cluster-name-private")"
+      OAUTH_LB_NAME="$(cat "${SHARED_DIR}/cluster-name-oauth-lb")"
+
+      OVERALL_EXIT=0
+
+      # Run public cluster tests
+      echo "$(date) Running public cluster tests against ${PUBLIC_NAME}..."
+      E2E_HOSTED_CLUSTER_NAME="${PUBLIC_NAME}" \
+      E2E_HOSTED_CLUSTER_NAMESPACE=clusters \
+      bin/test-e2e-self-managed-azure \
+        --ginkgo.label-filter="self-managed-azure-public" \
+        --ginkgo.junit-report="${ARTIFACT_DIR}/junit_self_managed_azure_public.xml" \
+        --ginkgo.v || OVERALL_EXIT=1
+      echo "$(date) Public cluster tests finished"
+
+      # Run private topology tests
+      echo "$(date) Running private topology tests against ${PRIVATE_NAME}..."
+      E2E_HOSTED_CLUSTER_NAME="${PRIVATE_NAME}" \
+      E2E_HOSTED_CLUSTER_NAMESPACE=clusters \
+      bin/test-e2e-self-managed-azure \
+        --ginkgo.label-filter="self-managed-azure-private" \
+        --ginkgo.junit-report="${ARTIFACT_DIR}/junit_self_managed_azure_private.xml" \
+        --ginkgo.v || OVERALL_EXIT=1
+      echo "$(date) Private topology tests finished"
+
+      # Run OAuth LoadBalancer tests
+      echo "$(date) Running OAuth LB tests against ${OAUTH_LB_NAME}..."
+      E2E_HOSTED_CLUSTER_NAME="${OAUTH_LB_NAME}" \
+      E2E_HOSTED_CLUSTER_NAMESPACE=clusters \
+      bin/test-e2e-self-managed-azure \
+        --ginkgo.label-filter="self-managed-azure-oauth-lb" \
+        --ginkgo.junit-report="${ARTIFACT_DIR}/junit_self_managed_azure_oauth_lb.xml" \
+        --ginkgo.v || OVERALL_EXIT=1
+      echo "$(date) OAuth LB tests finished"
+
+      exit ${OVERALL_EXIT}
+    timeout: 45m
+    grace_period: 5m
+    from: hypershift-tests
+    resources:
+      requests:
+        cpu: "1"
+        memory: 200Mi

From 0d74efd7ca24c1fdc99e0ead250241928372d946 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 12:17:16 -0400
Subject: [PATCH 02/12] fix(ci): address CodeRabbit review findings

- Use /usr/bin/hypershift instead of bin/hypershift for consistency
- Log warning on clusterversion wait timeout instead of silent || true
- Wait for all background cluster create PIDs before failing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ...ift-azure-create-selfmanaged-guests-chain.yaml | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index b135067740adf..a1553c9202e16 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -112,12 +112,17 @@ chain:
 
       # Wait for create commands to complete
       echo "$(date) Waiting for cluster create commands to finish..."
-      wait ${PUBLIC_PID}
+      FAILED=0
+      wait ${PUBLIC_PID} || FAILED=1
       echo "$(date) Public cluster create command completed"
-      wait ${PRIVATE_PID}
+      wait ${PRIVATE_PID} || FAILED=1
       echo "$(date) Private cluster create command completed"
-      wait ${OAUTH_LB_PID}
+      wait ${OAUTH_LB_PID} || FAILED=1
       echo "$(date) OAuth LB cluster create command completed"
+      if [[ ${FAILED} -ne 0 ]]; then
+        echo "$(date) ERROR: One or more cluster create commands failed"
+        exit 1
+      fi
 
       # Wait for clusters to become available
       echo "$(date) Waiting for public cluster to become available..."
@@ -135,9 +140,9 @@ chain:
       # Wait for clusteroperators on each cluster
       for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
         echo "$(date) Creating kubeconfig for ${CLUSTER}"
-        bin/hypershift create kubeconfig --namespace=clusters --name="${CLUSTER}" > "${SHARED_DIR}/nested_kubeconfig_${CLUSTER}"
+        /usr/bin/hypershift create kubeconfig --namespace=clusters --name="${CLUSTER}" > "${SHARED_DIR}/nested_kubeconfig_${CLUSTER}"
         echo "$(date) Waiting for clusteroperators on ${CLUSTER}..."
-        KUBECONFIG="${SHARED_DIR}/nested_kubeconfig_${CLUSTER}" oc wait clusterversion/version --for='condition=Available=True' --timeout=30m || true
+        KUBECONFIG="${SHARED_DIR}/nested_kubeconfig_${CLUSTER}" oc wait clusterversion/version --for='condition=Available=True' --timeout=30m || echo "$(date) WARNING: clusterversion wait timed out for ${CLUSTER}, continuing anyway"
       done
 
       # Write cluster names to shared dir

From 8ac08aa063f33a42976511d1721959ebfc9789e1 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 12:24:18 -0400
Subject: [PATCH 03/12] fix(ci): increase create-selfmanaged-guests step
 timeout to 120m

The step runs sequential 30m waits for cluster availability and
clusterversion across 3 clusters, so 60m was too tight.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../hypershift-azure-create-selfmanaged-guests-chain.yaml       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index a1553c9202e16..5e6e71825e683 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -157,7 +157,7 @@ chain:
       requests:
         cpu: 100m
         memory: 100Mi
-    timeout: 60m0s
+    timeout: 120m0s
     credentials:
     - mount_path: /etc/ci-pull-credentials
       name: ci-pull-credentials

From 1c45c36fdf6f64b7370d41d9af9295260dfa3d80 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 12:41:58 -0400
Subject: [PATCH 04/12] fix(ci): address CodeRabbit review - fail fast on
 missing prereqs

- Fail if azure_private_nat_subnet_id file is missing instead of
  silently omitting the private-link flag
- Fail the step if any cluster doesn't reach clusterversion Available
  instead of warning and continuing to e2e

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ...azure-create-selfmanaged-guests-chain.yaml | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index 5e6e71825e683..cf0e3ab9063e3 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -48,10 +48,11 @@ chain:
       HC_LOCATION="${HYPERSHIFT_AZURE_LOCATION:-centralus}"
 
       # Read private NAT subnet ID from SHARED_DIR (written by setup-private-link step)
-      AZURE_PRIVATE_NAT_SUBNET_ID=""
-      if [[ -f "${SHARED_DIR}/azure_private_nat_subnet_id" ]]; then
-        AZURE_PRIVATE_NAT_SUBNET_ID="$(cat "${SHARED_DIR}/azure_private_nat_subnet_id")"
+      if [[ ! -s "${SHARED_DIR}/azure_private_nat_subnet_id" ]]; then
+        echo "$(date) ERROR: azure_private_nat_subnet_id is required for the private guest cluster"
+        exit 1
       fi
+      AZURE_PRIVATE_NAT_SUBNET_ID="$(cat "${SHARED_DIR}/azure_private_nat_subnet_id")"
 
       # Marketplace image flags
       MARKETPLACE_ARGS=""
@@ -90,10 +91,7 @@ chain:
       PUBLIC_PID=$!
 
       # Create private cluster
-      PRIVATE_EXTRA=""
-      if [[ -n "${AZURE_PRIVATE_NAT_SUBNET_ID}" ]]; then
-        PRIVATE_EXTRA="${PRIVATE_EXTRA} --endpoint-access-private-nat-subnet-id=${AZURE_PRIVATE_NAT_SUBNET_ID}"
-      fi
+      PRIVATE_EXTRA="--endpoint-access-private-nat-subnet-id=${AZURE_PRIVATE_NAT_SUBNET_ID}"
       echo "$(date) Creating private self-managed cluster: ${PRIVATE_NAME}"
       /usr/bin/hypershift create cluster azure \
         --name="${PRIVATE_NAME}" \
@@ -138,12 +136,19 @@ chain:
       echo "$(date) OAuth LB cluster is available"
 
       # Wait for clusteroperators on each cluster
+      FAILED_READY=0
       for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
         echo "$(date) Creating kubeconfig for ${CLUSTER}"
         /usr/bin/hypershift create kubeconfig --namespace=clusters --name="${CLUSTER}" > "${SHARED_DIR}/nested_kubeconfig_${CLUSTER}"
         echo "$(date) Waiting for clusteroperators on ${CLUSTER}..."
-        KUBECONFIG="${SHARED_DIR}/nested_kubeconfig_${CLUSTER}" oc wait clusterversion/version --for='condition=Available=True' --timeout=30m || echo "$(date) WARNING: clusterversion wait timed out for ${CLUSTER}, continuing anyway"
+        if ! KUBECONFIG="${SHARED_DIR}/nested_kubeconfig_${CLUSTER}" oc wait clusterversion/version --for='condition=Available=True' --timeout=30m; then
+          echo "$(date) ERROR: clusterversion wait timed out for ${CLUSTER}"
+          FAILED_READY=1
+        fi
       done
+      if [[ ${FAILED_READY} -ne 0 ]]; then
+        exit 1
+      fi
 
       # Write cluster names to shared dir
       echo "${PUBLIC_NAME}" > "${SHARED_DIR}/cluster-name-public"

From 2703af268211c3523a215a626b49ede505d55b03 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 13:38:35 -0400
Subject: [PATCH 05/12] fix(ci): use shared e2e v2 binary for self-managed
 Azure tests

Update the run chain to use bin/test-e2e-v2 (shared binary) instead of
bin/test-e2e-self-managed-azure (separate binary). Azure tests now live
in the shared test/e2e/v2/tests/ package and self-select via Ginkgo
label filters and capability-based Skip() logic.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml
index 5da816b993776..a86a749313096 100644
--- a/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/run-e2e-v2-selfmanaged/hypershift-azure-run-e2e-v2-selfmanaged-chain.yaml
@@ -27,7 +27,7 @@ chain:
       echo "$(date) Running public cluster tests against ${PUBLIC_NAME}..."
       E2E_HOSTED_CLUSTER_NAME="${PUBLIC_NAME}" \
       E2E_HOSTED_CLUSTER_NAMESPACE=clusters \
-      bin/test-e2e-self-managed-azure \
+      bin/test-e2e-v2 \
         --ginkgo.label-filter="self-managed-azure-public" \
         --ginkgo.junit-report="${ARTIFACT_DIR}/junit_self_managed_azure_public.xml" \
         --ginkgo.v || OVERALL_EXIT=1
@@ -37,7 +37,7 @@ chain:
       echo "$(date) Running private topology tests against ${PRIVATE_NAME}..."
       E2E_HOSTED_CLUSTER_NAME="${PRIVATE_NAME}" \
       E2E_HOSTED_CLUSTER_NAMESPACE=clusters \
-      bin/test-e2e-self-managed-azure \
+      bin/test-e2e-v2 \
         --ginkgo.label-filter="self-managed-azure-private" \
         --ginkgo.junit-report="${ARTIFACT_DIR}/junit_self_managed_azure_private.xml" \
         --ginkgo.v || OVERALL_EXIT=1
@@ -47,7 +47,7 @@ chain:
       echo "$(date) Running OAuth LB tests against ${OAUTH_LB_NAME}..."
       E2E_HOSTED_CLUSTER_NAME="${OAUTH_LB_NAME}" \
       E2E_HOSTED_CLUSTER_NAMESPACE=clusters \
-      bin/test-e2e-self-managed-azure \
+      bin/test-e2e-v2 \
         --ginkgo.label-filter="self-managed-azure-oauth-lb" \
         --ginkgo.junit-report="${ARTIFACT_DIR}/junit_self_managed_azure_oauth_lb.xml" \
         --ginkgo.v || OVERALL_EXIT=1

From bcb8b366fbd0073b22b3a4d41742d1c544d85178 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 13:48:22 -0400
Subject: [PATCH 06/12] fix(ci): preserve destroy failure signal and fix binary
 path

Address CodeRabbit review on destroy chain:
- Replace || echo pattern with failed flag + exit non-zero to surface
  leaked resources when cluster destroy fails
- Use /usr/bin/hypershift for path consistency with create chain

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ...rshift-azure-destroy-selfmanaged-guests-chain.yaml | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
index b39188327a0c8..5c45c3a62e36c 100644
--- a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
@@ -22,16 +22,21 @@ chain:
       PRIVATE_NAME="$(echo -n "${PROW_JOB_ID}-prv"|sha256sum|cut -c-20)"
       OAUTH_LB_NAME="$(echo -n "${PROW_JOB_ID}-oau"|sha256sum|cut -c-20)"
 
-      # Destroy all clusters, continuing on errors
+      # Destroy all clusters, continue per-cluster, but preserve failure signal
+      FAILED=0
       for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
         echo "$(date) Destroying self-managed cluster: ${CLUSTER}"
-        bin/hypershift destroy cluster azure \
+        if ! /usr/bin/hypershift destroy cluster azure \
           --azure-creds="${AZURE_CREDS}" \
           --name="${CLUSTER}" \
           --location="${HC_LOCATION}" \
-          --cluster-grace-period=40m || echo "$(date) WARNING: Failed to destroy cluster ${CLUSTER}"
+          --cluster-grace-period=40m; then
+          echo "$(date) WARNING: Failed to destroy cluster ${CLUSTER}" >&2
+          FAILED=1
+        fi
         echo "$(date) Finished destroying cluster ${CLUSTER}"
       done
+      exit "${FAILED}"
     from: hypershift-operator
     grace_period: 5m0s
     resources:

From 4c143dee84bc8f9f21ef0d23814665dbc34f7d12 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 13:54:09 -0400
Subject: [PATCH 07/12] fix: update pipeline_run_if_changed path for v2
 self-managed Azure job

The selfmanagedazure package was removed and tests now live in the
shared v2 binary at test/e2e/v2/tests/hosted_cluster_azure_test.go.
Update the path filter to match the new location.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../config/openshift/hypershift/openshift-hypershift-main.yaml  | 2 +-
 .../hypershift/openshift-hypershift-main-presubmits.yaml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
index af7d7a6bc1562..5b19e166d6850 100644
--- a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
+++ b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
@@ -178,7 +178,7 @@ tests:
 - always_run: false
   as: e2e-azure-v2-self-managed
   optional: true
-  pipeline_run_if_changed: ^(test/e2e/v2/selfmanagedazure|test/e2e/util)
+  pipeline_run_if_changed: ^(test/e2e/v2/tests/hosted_cluster_azure|test/e2e/util)
   steps:
     cluster_profile: hypershift-azure
     env:
diff --git a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml
index 7ca1dd89726dc..0c274d6d0dc0c 100644
--- a/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml
+++ b/ci-operator/jobs/openshift/hypershift/openshift-hypershift-main-presubmits.yaml
@@ -1308,7 +1308,7 @@ presubmits:
   - agent: kubernetes
     always_run: false
     annotations:
-      pipeline_run_if_changed: ^(test/e2e/v2/selfmanagedazure|test/e2e/util)
+      pipeline_run_if_changed: ^(test/e2e/v2/tests/hosted_cluster_azure|test/e2e/util)
     branches:
     - ^main$
     - ^main-

From 06805ce6b2de78cd08798e895bb0287b3eda119b Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 17:11:31 -0400
Subject: [PATCH 08/12] fix: increase destroy-guests timeout to 2h for
 sequential Azure cluster teardown

The 30m timeout is insufficient for destroying 3 Azure clusters sequentially,
especially when each cluster has a 40m grace period. The first destroy alone
can consume the full 30m budget, leaving the remaining clusters orphaned.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../hypershift-azure-destroy-selfmanaged-guests-chain.yaml      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
index 5c45c3a62e36c..bb2a630462520 100644
--- a/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/destroy-selfmanaged-guests/hypershift-azure-destroy-selfmanaged-guests-chain.yaml
@@ -43,7 +43,7 @@ chain:
       requests:
         cpu: 100m
         memory: 100Mi
-    timeout: 30m0s
+    timeout: 2h0m0s
     credentials:
     - mount_path: /etc/hypershift-ci-jobs-self-managed-azure
       name: hypershift-ci-jobs-self-managed-azure

From 988b0a8683dcc0e2ce259fa126cb3ca160f8f519 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Mon, 13 Apr 2026 17:42:23 -0400
Subject: [PATCH 09/12] fix: use management-side version rollout check instead
 of guest clusterversion

Mirror the AWS/GCP v2 create pattern: poll
hostedcluster.status.version.history[].state via the management API
instead of connecting to the guest API with oc wait clusterversion.

The previous approach created a guest kubeconfig and ran
`oc wait clusterversion/version` from the CI pod, which:
- Cannot resolve .hypershift.local DNS for private clusters
- Requires guest API network access the CI pod doesn't have

The new approach polls the HC resource on the management cluster,
which works for all endpoint access modes, and emits per-cluster
JUnit XML artifacts with diagnostic info on failure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ...azure-create-selfmanaged-guests-chain.yaml | 46 ++++++++++++++++---
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index cf0e3ab9063e3..afd9a65cca460 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -135,15 +135,49 @@ chain:
       oc wait --timeout=30m --for=condition=Available --namespace=clusters "hostedcluster/${OAUTH_LB_NAME}"
       echo "$(date) OAuth LB cluster is available"
 
-      # Wait for clusteroperators on each cluster
+      # Wait for version rollout to complete on each cluster (via management API, same as AWS/GCP v2)
       FAILED_READY=0
       for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
-        echo "$(date) Creating kubeconfig for ${CLUSTER}"
-        /usr/bin/hypershift create kubeconfig --namespace=clusters --name="${CLUSTER}" > "${SHARED_DIR}/nested_kubeconfig_${CLUSTER}"
-        echo "$(date) Waiting for clusteroperators on ${CLUSTER}..."
-        if ! KUBECONFIG="${SHARED_DIR}/nested_kubeconfig_${CLUSTER}" oc wait clusterversion/version --for='condition=Available=True' --timeout=30m; then
-          echo "$(date) ERROR: clusterversion wait timed out for ${CLUSTER}"
+        echo "$(date) Waiting for version rollout on ${CLUSTER}..."
+        set +e
+        CLUSTER_CHECK="${CLUSTER}" timeout 30m bash -c '
+          until [[ "$(oc get -n clusters hostedcluster/${CLUSTER_CHECK} -o jsonpath='"'"'{.status.version.history[?(@.state!="")].state}'"'"')" = "Completed" ]]; do
+            sleep 15
+          done
+        '
+        ROLLOUT_RC=$?
+        set -e
+        if [[ ${ROLLOUT_RC} -ne 0 ]]; then
+          echo "$(date) ERROR: version rollout timed out for ${CLUSTER}"
+          cat << EOF > "${ARTIFACT_DIR}/junit_hosted_cluster_${CLUSTER}.xml"
+      <?xml version="1.0" encoding="UTF-8"?>
+      <testsuite name="hypershift install ${CLUSTER}" tests="1" failures="1">
+        <testcase name="hosted cluster version rollout succeeds">
+          <failure message="hosted cluster version rollout never completed">
+            <![CDATA[
+      error: hosted cluster version rollout never completed for ${CLUSTER}
+      Degraded: $(oc get -n clusters hostedcluster/${CLUSTER} -o jsonpath='{.status.conditions[?(@.type=="Degraded")].message}')
+      ClusterVersionSucceeding: $(oc get -n clusters hostedcluster/${CLUSTER} -o jsonpath='{.status.conditions[?(@.type=="ClusterVersionSucceeding")].message}')
+            ]]>
+          </failure>
+        </testcase>
+      </testsuite>
+      EOF
           FAILED_READY=1
+        else
+          echo "$(date) Version rollout completed for ${CLUSTER}"
+          cat << EOF > "${ARTIFACT_DIR}/junit_hosted_cluster_${CLUSTER}.xml"
+      <?xml version="1.0" encoding="UTF-8"?>
+      <testsuite name="hypershift install ${CLUSTER}" tests="1" failures="0">
+        <testcase name="hosted cluster version rollout succeeds">
+          <system-out>
+            <![CDATA[
+      info: hosted cluster version rollout completed successfully for ${CLUSTER}
+            ]]>
+          </system-out>
+        </testcase>
+      </testsuite>
+      EOF
         fi
       done
       if [[ ${FAILED_READY} -ne 0 ]]; then

From 4a53e9d28a2e9e3e6a15a43f3c2b46ca6f37c157 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Tue, 14 Apr 2026 06:34:00 -0400
Subject: [PATCH 10/12] fix(ci): parallelize version rollout checks and
 increase timeout to 45m

All 3 cluster version rollout checks were running sequentially with 30m
timeouts each. Azure self-managed 3-node clusters need more than 30m for
all cluster operators to become available after the HC reaches Available
state. The sequential approach also consumed 90m of the 120m step timeout
budget just for version rollout polling.

Changes:
- Increase per-cluster version rollout timeout from 30m to 45m
- Run all 3 version rollout checks in parallel (background subshells)
- Wait for each individually and generate per-cluster JUnit XML artifacts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 ...azure-create-selfmanaged-guests-chain.yaml | 44 ++++++++++++++-----
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index afd9a65cca460..d6cf45a5c56ed 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -135,18 +135,41 @@ chain:
       oc wait --timeout=30m --for=condition=Available --namespace=clusters "hostedcluster/${OAUTH_LB_NAME}"
       echo "$(date) OAuth LB cluster is available"
 
-      # Wait for version rollout to complete on each cluster (via management API, same as AWS/GCP v2)
+      # Wait for version rollout to complete on all clusters in parallel (via management API, same as AWS/GCP v2)
+      echo "$(date) Starting parallel version rollout checks..."
+      set +e
+
+      echo "$(date) Waiting for version rollout on ${PUBLIC_NAME}..."
+      CLUSTER_CHECK="${PUBLIC_NAME}" timeout 45m bash -c '
+        until [[ "$(oc get -n clusters hostedcluster/${CLUSTER_CHECK} -o jsonpath='"'"'{.status.version.history[?(@.state!="")].state}'"'"')" = "Completed" ]]; do
+          sleep 15
+        done
+      ' &
+      ROLLOUT_PID_PUB=$!
+
+      echo "$(date) Waiting for version rollout on ${PRIVATE_NAME}..."
+      CLUSTER_CHECK="${PRIVATE_NAME}" timeout 45m bash -c '
+        until [[ "$(oc get -n clusters hostedcluster/${CLUSTER_CHECK} -o jsonpath='"'"'{.status.version.history[?(@.state!="")].state}'"'"')" = "Completed" ]]; do
+          sleep 15
+        done
+      ' &
+      ROLLOUT_PID_PRV=$!
+
+      echo "$(date) Waiting for version rollout on ${OAUTH_LB_NAME}..."
+      CLUSTER_CHECK="${OAUTH_LB_NAME}" timeout 45m bash -c '
+        until [[ "$(oc get -n clusters hostedcluster/${CLUSTER_CHECK} -o jsonpath='"'"'{.status.version.history[?(@.state!="")].state}'"'"')" = "Completed" ]]; do
+          sleep 15
+        done
+      ' &
+      ROLLOUT_PID_OAU=$!
+
+      echo "$(date) Waiting for all version rollout checks to complete..."
       FAILED_READY=0
-      for CLUSTER in "${PUBLIC_NAME}" "${PRIVATE_NAME}" "${OAUTH_LB_NAME}"; do
-        echo "$(date) Waiting for version rollout on ${CLUSTER}..."
-        set +e
-        CLUSTER_CHECK="${CLUSTER}" timeout 30m bash -c '
-          until [[ "$(oc get -n clusters hostedcluster/${CLUSTER_CHECK} -o jsonpath='"'"'{.status.version.history[?(@.state!="")].state}'"'"')" = "Completed" ]]; do
-            sleep 15
-          done
-        '
+      for CLUSTER_PID in "${PUBLIC_NAME}:${ROLLOUT_PID_PUB}" "${PRIVATE_NAME}:${ROLLOUT_PID_PRV}" "${OAUTH_LB_NAME}:${ROLLOUT_PID_OAU}"; do
+        CLUSTER="${CLUSTER_PID%%:*}"
+        PID="${CLUSTER_PID##*:}"
+        wait ${PID}
         ROLLOUT_RC=$?
-        set -e
         if [[ ${ROLLOUT_RC} -ne 0 ]]; then
           echo "$(date) ERROR: version rollout timed out for ${CLUSTER}"
           cat << EOF > "${ARTIFACT_DIR}/junit_hosted_cluster_${CLUSTER}.xml"
@@ -180,6 +203,7 @@ chain:
       EOF
         fi
       done
+      set -e
       if [[ ${FAILED_READY} -ne 0 ]]; then
         exit 1
       fi

From 31f16c3908f2ddbef8950a269bca92c897304952 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Tue, 14 Apr 2026 06:54:55 -0400
Subject: [PATCH 11/12] fix(ci): add --assign-service-principal-roles and
 --dns-zone-rg-name

Root cause: the create-guests chain was passing --workload-identities-file
but NOT --assign-service-principal-roles. Without this flag, no Azure RBAC
role assignments are created for the workload identities on the managed
resource group. This caused all guest cluster operators (ingress, dns,
storage, etc.) to fail with 403 AuthorizationFailed when accessing Azure
APIs, preventing version rollout from ever completing.

The v1 e2e flow (test/e2e/util/options.go) sets both
AssignServicePrincipalRoles=true and DNSZoneRGName="os4-common".

Evidence from CVO logs:
- ingress: "The client 'd33639a4-...' does not have authorization to
  perform action 'Microsoft.Network/loadBalancers/read'"
- storage: "Waiting for the DaemonSet to deploy the CSI Node Service"
- 11 cluster operators stuck not-available, version.history.state=Partial

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../hypershift-azure-create-selfmanaged-guests-chain.yaml       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index d6cf45a5c56ed..13299dc6cb804 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -80,6 +80,8 @@ chain:
         --oidc-issuer-url=${AZURE_OIDC_ISSUER_URL} \
         --sa-token-issuer-private-key-path=${AZURE_SA_TOKEN_ISSUER_KEY_PATH} \
         --workload-identities-file=${AZURE_WORKLOAD_IDENTITIES_FILE} \
+        --assign-service-principal-roles \
+        --dns-zone-rg-name=os4-common \
         --generate-ssh \
         ${MARKETPLACE_ARGS}"
 

From fb808fe4ed380fdfe539d8c9cbe1875f62931d00 Mon Sep 17 00:00:00 2001
From: Bryan Cox <brcox@redhat.com>
Date: Tue, 14 Apr 2026 12:04:40 -0400
Subject: [PATCH 12/12] fix(ci): increase private cluster version rollout
 timeout to 60m

Private topology clusters require additional time for data-plane
operator convergence because PrivateLink infrastructure (PLS creation,
Private Endpoint, DNS zone provisioning, and DNS propagation) must
complete before operators like ingress, dns, console, and storage can
reach the management cluster APIs through the PrivateLink tunnel.

Evidence from rehearsal 2044008339955781632:
- Public cluster: version rollout completed in ~6 min
- OAuth LB cluster: version rollout completed
- Private cluster: timed out at 45m with 11 operators not-available
  (console, dns, image-registry, ingress, insights,
  kube-storage-version-migrator, monitoring, node-tuning,
  openshift-samples, service-ca, storage)
- Cluster was NOT degraded (RBAC fix working), confirming this is a
  PrivateLink networking convergence timing issue

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../hypershift-azure-create-selfmanaged-guests-chain.yaml  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
index 13299dc6cb804..4e85a4cb7ccbe 100644
--- a/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
+++ b/ci-operator/step-registry/hypershift/azure/create-selfmanaged-guests/hypershift-azure-create-selfmanaged-guests-chain.yaml
@@ -149,8 +149,11 @@ chain:
       ' &
       ROLLOUT_PID_PUB=$!
 
-      echo "$(date) Waiting for version rollout on ${PRIVATE_NAME}..."
-      CLUSTER_CHECK="${PRIVATE_NAME}" timeout 45m bash -c '
+      # Private clusters need extra time: PrivateLink setup (PLS creation, Private Endpoint,
+      # DNS zone provisioning, DNS propagation) adds significant latency before data-plane
+      # operators can converge through the PrivateLink tunnel.
+      echo "$(date) Waiting for version rollout on ${PRIVATE_NAME} (60m timeout for PrivateLink)..."
+      CLUSTER_CHECK="${PRIVATE_NAME}" timeout 60m bash -c '
         until [[ "$(oc get -n clusters hostedcluster/${CLUSTER_CHECK} -o jsonpath='"'"'{.status.version.history[?(@.state!="")].state}'"'"')" = "Completed" ]]; do
           sleep 15
         done