privilegedescalation · privilegedescalation-engineer · May 4, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/.github/scripts/ci-health-check.sh b/.github/scripts/ci-health-check.sh
@@ -12,6 +12,11 @@
 #   - excludes "headlamp-agent-skills" (skills bundle, not a Headlamp plugin)
 # If discovery fails (network error, GH_TOKEN missing, API outage), we fall
 # back to a hardcoded list so the health check still produces a useful report.
+#
+# Failure Categories:
+#   - code: test/lint/build/typecheck failures on main
+#   - infra: startup_failure, timed_out, runner issues
+#   - pending: action_required (awaiting review/approval) - informational only
 set -euo pipefail
 
 ORG="privilegedescalation"
@@ -44,6 +49,7 @@ echo ""
 
 failures=0
 warnings=0
+process_pending=0
 
 for repo in "${PLUGIN_REPOS[@]}"; do
   echo "--- ${repo} ---"
@@ -57,18 +63,40 @@ for repo in "${PLUGIN_REPOS[@]}"; do
     continue
   fi
 
-  # Count CI failures on main — exclude E2E and Release (tracked separately below)
-  main_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests")] | length')
   total=$(echo "$runs" | jq 'length')
 
-  if [ "$main_failures" -gt 0 ]; then
-    echo "  FAIL: ${main_failures} CI failure(s) in last ${total} runs on main:"
+  # Categorize failures:
+  # - code failures: test/lint/build on main
+  # - infra failures: startup_failure, timed_out
+  # - process pending: action_required
+
+  code_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests")] | length')
+  infra_failures=$(echo "$runs" | jq '[.[] | select(.conclusion=="startup_failure" or .conclusion=="timed_out")] | length')
+  action_required=$(echo "$runs" | jq '[.[] | select(.conclusion=="action_required")] | length')
+
+  if [ "$code_failures" -gt 0 ]; then
+    echo "  FAIL (code): ${code_failures} CI failure(s) in last ${total} runs on main:"
     echo "$runs" | jq -r '.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests") | "    - \(.name) (\(.updatedAt))"'
     ((failures++)) || true
-  else
+  fi
+
+  if [ "$infra_failures" -gt 0 ]; then
+    echo "  FAIL (infra): ${infra_failures} infrastructure failure(s):"
+    echo "$runs" | jq -r '.[] | select(.conclusion=="startup_failure" or .conclusion=="timed_out") | "    - \(.name): \(.conclusion) (\(.updatedAt))"'
+    ((failures++)) || true
+  fi
+
+  if [ "$code_failures" -eq 0 ] && [ "$infra_failures" -eq 0 ]; then
     echo "  OK: CI passing on main"
   fi
 
+  # Process pending — informational only (awaiting review/approval)
+  if [ "$action_required" -gt 0 ]; then
+    echo "  INFO: ${action_required} workflow run(s) awaiting action (dual approval, review, etc.):"
+    echo "$runs" | jq -r '.[] | select(.conclusion=="action_required") | "    - \(.name) on \(.headBranch) (\(.updatedAt))"'
+    ((process_pending++)) || true
+  fi
+
   # Surface E2E test failures as warnings (infra blocker: RBAC not yet applied — PRI-494)
   e2e_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .name=="E2E Tests" and .conclusion=="failure")] | length')
   if [ "$e2e_failures" -gt 0 ]; then
@@ -83,15 +111,6 @@ for repo in "${PLUGIN_REPOS[@]}"; do
     ((warnings++)) || true
   fi
 
-  # Check for action_required — GitHub's "Require approval for first-time contributors" setting
-  # blocks workflow runs from GitHub App bot accounts. This is a CI pipeline blocker (see PRI-44).
-  action_required_count=$(echo "$runs" | jq '[.[] | select(.conclusion=="action_required")] | length')
-  if [ "$action_required_count" -gt 0 ]; then
-    echo "  FAIL: ${action_required_count} workflow run(s) with action_required (GitHub App PR approval blocked):"
-    echo "$runs" | jq -r '.[] | select(.conclusion=="action_required") | "    - \(.name) on \(.headBranch) (\(.updatedAt))"'
-    ((failures++)) || true
-  fi
-
   # Check latest release
   latest_release=$(gh api "repos/${ORG}/${repo}/releases" --jq '.[0].tag_name // "none"' 2>/dev/null || echo "error")
   echo "  Latest release: ${latest_release}"
@@ -103,7 +122,8 @@ echo "=== Summary ==="
 echo "Repos scanned: ${#PLUGIN_REPOS[@]}"
 echo "With failures: ${failures}"
 echo "With warnings: ${warnings}"
+echo "With pending approval: ${process_pending}"
 
 if [ "$failures" -gt 0 ]; then
   exit 1
-fi
+fi
diff --git a/.github/workflows/dual-approval-check.yaml b/.github/workflows/dual-approval-check.yaml
@@ -1,22 +1,5 @@
 name: Dual Approval Check
 
-# Reusable workflow: verifies that both the CTO and QA bot accounts
-# have approved a pull request. Plugin repos call this on
-# pull_request_review events to get a required GitHub status check.
-#
-# Usage in a plugin repo's workflow:
-#
-#   on:
-#     pull_request_review:
-#       types: [submitted, dismissed]
-#     pull_request:
-#       types: [opened, reopened, synchronize]
-#
-#   jobs:
-#     dual-approval:
-#       uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
-#       secrets: inherit
-
 on:
   workflow_call:
     inputs:
@@ -50,8 +33,8 @@ jobs:
           PR_NUMBER: ${{ inputs.pr_number }}
           REPO: ${{ github.repository }}
         run: |
-          if [ -z "${PR_NUMBER}" ]; then
-            echo "::notice::No PR number in context (dismissed review?). Skipping dual approval check — no action needed."
+          if [ -z "${PR_NUMBER}" ] || [ "${PR_NUMBER}" = "null" ]; then
+            echo "::notice::No PR number in context (dismissed review or workflow_call without pr_number). Skipping dual approval check — no action needed."
             exit 0
           fi
 
@@ -62,11 +45,16 @@ jobs:
             -H "Accept: application/vnd.github.v3+json" \
             "https://api.github.com/repos/${REPO}/pulls/${PR_NUMBER}/reviews")
 
+          if [ -z "${REVIEWS}" ] || [ "${REVIEWS}" = "null" ]; then
+            echo "::warning::Could not fetch reviews for PR #${PR_NUMBER}. Assuming no approvals yet."
+            exit 1
+          fi
+
           CTO_APPROVED=$(echo "${REVIEWS}" | jq -r --arg user "${CTO_REVIEWER}" \
-            '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | .state == "APPROVED"')
+            '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | if .state then .state == "APPROVED" else false end')
 
           QA_APPROVED=$(echo "${REVIEWS}" | jq -r --arg user "${QA_REVIEWER}" \
-            '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | .state == "APPROVED"')
+            '[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | if .state then .state == "APPROVED" else false end')
 
           echo "CTO (${CTO_REVIEWER}) approved: ${CTO_APPROVED}"
           echo "QA (${QA_REVIEWER}) approved: ${QA_APPROVED}"
@@ -82,4 +70,4 @@ jobs:
               echo "  Missing: QA approval from ${QA_REVIEWER}"
             fi
             exit 1
-          fi
+          fi
diff --git a/.github/workflows/plugin-e2e.yaml b/.github/workflows/plugin-e2e.yaml
@@ -13,32 +13,87 @@ on:
         required: false
         type: string
         default: 'v0.40.1'
+      e2e-namespace:
+        description: 'Namespace for E2E Headlamp deployment'
+        required: false
+        type: string
+        default: 'headlamp-dev'
+      plugin-name:
+        description: 'Plugin name used for ConfigMap naming and mount path (e.g. headlamp-kube-vip)'
+        required: true
+        type: string
 
 jobs:
   e2e:
     runs-on: runners-privilegedescalation
     timeout-minutes: 15
 
     env:
-      E2E_NAMESPACE: headlamp-dev
+      E2E_NAMESPACE: ${{ inputs.e2e-namespace }}
       E2E_RELEASE: headlamp-e2e
       HEADLAMP_VERSION: ${{ inputs.headlamp-version }}
+      PLUGIN_NAME: ${{ inputs.plugin-name }}
 
     steps:
       - name: Checkout
         uses: actions/checkout@v6
 
-      - name: Setup Node.js
+      - name: Detect package manager
+        id: pkg-manager
+        run: |
+          if [ -f "pnpm-lock.yaml" ]; then
+            echo "manager=pnpm" >> $GITHUB_OUTPUT
+            PM=$(python3 -c "import json,sys; d=json.load(open('package.json')); print('true' if d.get('packageManager','').startswith('pnpm@') else 'false')" 2>/dev/null || echo "false")
+            echo "has_package_manager=$PM" >> $GITHUB_OUTPUT
+          else
+            echo "manager=npm" >> $GITHUB_OUTPUT
+            echo "has_package_manager=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Setup Node
         uses: actions/setup-node@v6
         with:
           node-version: ${{ inputs.node-version }}
-          cache: 'npm'
+          cache: ${{ steps.pkg-manager.outputs.manager == 'npm' && 'npm' || '' }}
+
+      - name: Setup pnpm (Corepack, respects packageManager field)
+        if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'true'
+        run: |
+          npm install -g corepack
+          corepack enable pnpm
+          corepack prepare $(node -p "require('./package.json').packageManager") --activate
+
+      - name: Setup pnpm (version latest, no packageManager field)
+        if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'false'
+        uses: pnpm/action-setup@v5
+        with:
+          run_install: false
+          version: latest
+
+      - name: Get pnpm store directory
+        id: pnpm-store
+        if: steps.pkg-manager.outputs.manager == 'pnpm'
+        run: echo "dir=$(pnpm store path --silent)" >> $GITHUB_OUTPUT
+
+      - name: Cache pnpm store
+        if: steps.pkg-manager.outputs.manager == 'pnpm'
+        uses: actions/cache@v5
+        with:
+          path: ${{ steps.pnpm-store.outputs.dir }}
+          key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }}
+          restore-keys: |
+            ${{ runner.os }}-pnpm-
 
       - name: Setup kubectl
         uses: azure/setup-kubectl@v4
 
       - name: Install dependencies
-        run: npm ci
+        run: |
+          if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
+            pnpm install --frozen-lockfile
+          else
+            npm ci
+          fi
 
       - name: Build plugin
         run: npx @kinvolk/headlamp-plugin build
@@ -55,11 +110,37 @@ jobs:
             exit 1
           fi
 
+      - name: Restore Playwright browser cache
+        uses: actions/cache@v5
+        id: playwright-cache
+        with:
+          path: ~/.cache/ms-playwright
+          key: ${{ runner.os }}-playwright-${{ hashFiles('**/package.json') }}
+          restore-keys: |
+            ${{ runner.os }}-playwright-
+
       - name: Install Playwright browsers
-        run: npx playwright install --with-deps chromium
+        if: steps.playwright-cache.outputs.cache-hit != 'true'
+        run: |
+          PM="${{ steps.pkg-manager.outputs.manager }}"
+          echo "Cache miss — installing Playwright browsers (attempt 1/3)..."
+          if [ "$PM" = "pnpm" ]; then
+            pnpm exec playwright install --with-deps chromium || \
+            (echo "Attempt 2/3..." && sleep 5 && pnpm exec playwright install --with-deps chromium) || \
+            (echo "Attempt 3/3..." && sleep 10 && pnpm exec playwright install --with-deps chromium)
+          else
+            npx playwright install --with-deps chromium || \
+            (echo "Attempt 2/3..." && sleep 5 && npx playwright install --with-deps chromium) || \
+            (echo "Attempt 3/3..." && sleep 10 && npx playwright install --with-deps chromium)
+          fi
 
       - name: Run E2E tests
-        run: npm run e2e
+        run: |
+          if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
+            pnpm run e2e
+          else
+            npm run e2e
+          fi
         env:
           HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
           HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}
@@ -93,4 +174,3 @@ jobs:
           name: test-results
           path: test-results/
           retention-days: 7
-