Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions .github/scripts/ci-health-check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
# - excludes "headlamp-agent-skills" (skills bundle, not a Headlamp plugin)
# If discovery fails (network error, GH_TOKEN missing, API outage), we fall
# back to a hardcoded list so the health check still produces a useful report.
#
# Failure Categories:
# - code: test/lint/build/typecheck failures on main
# - infra: startup_failure, timed_out, runner issues
# - pending: action_required (awaiting review/approval) - informational only
set -euo pipefail

ORG="privilegedescalation"
Expand Down Expand Up @@ -44,6 +49,7 @@ echo ""

failures=0
warnings=0
process_pending=0

for repo in "${PLUGIN_REPOS[@]}"; do
echo "--- ${repo} ---"
Expand All @@ -57,18 +63,40 @@ for repo in "${PLUGIN_REPOS[@]}"; do
continue
fi

# Count CI failures on main — exclude E2E and Release (tracked separately below)
main_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests")] | length')
total=$(echo "$runs" | jq 'length')

if [ "$main_failures" -gt 0 ]; then
echo " FAIL: ${main_failures} CI failure(s) in last ${total} runs on main:"
# Categorize failures:
# - code failures: test/lint/build on main
# - infra failures: startup_failure, timed_out
# - process pending: action_required

code_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests")] | length')
infra_failures=$(echo "$runs" | jq '[.[] | select(.conclusion=="startup_failure" or .conclusion=="timed_out")] | length')
action_required=$(echo "$runs" | jq '[.[] | select(.conclusion=="action_required")] | length')

if [ "$code_failures" -gt 0 ]; then
echo " FAIL (code): ${code_failures} CI failure(s) in last ${total} runs on main:"
echo "$runs" | jq -r '.[] | select(.headBranch=="main" and .conclusion=="failure" and .name!="Release" and .name!="E2E Tests") | " - \(.name) (\(.updatedAt))"'
((failures++)) || true
else
fi

if [ "$infra_failures" -gt 0 ]; then
echo " FAIL (infra): ${infra_failures} infrastructure failure(s):"
echo "$runs" | jq -r '.[] | select(.conclusion=="startup_failure" or .conclusion=="timed_out") | " - \(.name): \(.conclusion) (\(.updatedAt))"'
((failures++)) || true
fi

if [ "$code_failures" -eq 0 ] && [ "$infra_failures" -eq 0 ]; then
echo " OK: CI passing on main"
fi

# Process pending — informational only (awaiting review/approval)
if [ "$action_required" -gt 0 ]; then
echo " INFO: ${action_required} workflow run(s) awaiting action (dual approval, review, etc.):"
echo "$runs" | jq -r '.[] | select(.conclusion=="action_required") | " - \(.name) on \(.headBranch) (\(.updatedAt))"'
((process_pending++)) || true
fi

# Surface E2E test failures as warnings (infra blocker: RBAC not yet applied — PRI-494)
e2e_failures=$(echo "$runs" | jq '[.[] | select(.headBranch=="main" and .name=="E2E Tests" and .conclusion=="failure")] | length')
if [ "$e2e_failures" -gt 0 ]; then
Expand All @@ -83,15 +111,6 @@ for repo in "${PLUGIN_REPOS[@]}"; do
((warnings++)) || true
fi

# Check for action_required — GitHub's "Require approval for first-time contributors" setting
# blocks workflow runs from GitHub App bot accounts. This is a CI pipeline blocker (see PRI-44).
action_required_count=$(echo "$runs" | jq '[.[] | select(.conclusion=="action_required")] | length')
if [ "$action_required_count" -gt 0 ]; then
echo " FAIL: ${action_required_count} workflow run(s) with action_required (GitHub App PR approval blocked):"
echo "$runs" | jq -r '.[] | select(.conclusion=="action_required") | " - \(.name) on \(.headBranch) (\(.updatedAt))"'
((failures++)) || true
fi

# Check latest release
latest_release=$(gh api "repos/${ORG}/${repo}/releases" --jq '.[0].tag_name // "none"' 2>/dev/null || echo "error")
echo " Latest release: ${latest_release}"
Expand All @@ -103,7 +122,8 @@ echo "=== Summary ==="
echo "Repos scanned: ${#PLUGIN_REPOS[@]}"
echo "With failures: ${failures}"
echo "With warnings: ${warnings}"
echo "With pending approval: ${process_pending}"

if [ "$failures" -gt 0 ]; then
exit 1
fi
fi
32 changes: 10 additions & 22 deletions .github/workflows/dual-approval-check.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,5 @@
name: Dual Approval Check

# Reusable workflow: verifies that both the CTO and QA bot accounts
# have approved a pull request. Plugin repos call this on
# pull_request_review events to get a required GitHub status check.
#
# Usage in a plugin repo's workflow:
#
# on:
# pull_request_review:
# types: [submitted, dismissed]
# pull_request:
# types: [opened, reopened, synchronize]
#
# jobs:
# dual-approval:
# uses: privilegedescalation/.github/.github/workflows/dual-approval-check.yaml@main
# secrets: inherit

on:
workflow_call:
inputs:
Expand Down Expand Up @@ -50,8 +33,8 @@ jobs:
PR_NUMBER: ${{ inputs.pr_number }}
REPO: ${{ github.repository }}
run: |
if [ -z "${PR_NUMBER}" ]; then
echo "::notice::No PR number in context (dismissed review?). Skipping dual approval check — no action needed."
if [ -z "${PR_NUMBER}" ] || [ "${PR_NUMBER}" = "null" ]; then
echo "::notice::No PR number in context (dismissed review or workflow_call without pr_number). Skipping dual approval check — no action needed."
exit 0
fi

Expand All @@ -62,11 +45,16 @@ jobs:
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${REPO}/pulls/${PR_NUMBER}/reviews")

if [ -z "${REVIEWS}" ] || [ "${REVIEWS}" = "null" ]; then
echo "::warning::Could not fetch reviews for PR #${PR_NUMBER}. Assuming no approvals yet."
exit 1
fi

CTO_APPROVED=$(echo "${REVIEWS}" | jq -r --arg user "${CTO_REVIEWER}" \
'[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | .state == "APPROVED"')
'[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | if .state then .state == "APPROVED" else false end')

QA_APPROVED=$(echo "${REVIEWS}" | jq -r --arg user "${QA_REVIEWER}" \
'[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | .state == "APPROVED"')
'[.[] | select(.user.login == $user or .user.login == ($user + "[bot]"))] | last | if .state then .state == "APPROVED" else false end')

echo "CTO (${CTO_REVIEWER}) approved: ${CTO_APPROVED}"
echo "QA (${QA_REVIEWER}) approved: ${QA_APPROVED}"
Expand All @@ -82,4 +70,4 @@ jobs:
echo " Missing: QA approval from ${QA_REVIEWER}"
fi
exit 1
fi
fi
94 changes: 87 additions & 7 deletions .github/workflows/plugin-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,87 @@ on:
required: false
type: string
default: 'v0.40.1'
e2e-namespace:
description: 'Namespace for E2E Headlamp deployment'
required: false
type: string
default: 'headlamp-dev'
plugin-name:
description: 'Plugin name used for ConfigMap naming and mount path (e.g. headlamp-kube-vip)'
required: true
type: string

jobs:
e2e:
runs-on: runners-privilegedescalation
timeout-minutes: 15

env:
E2E_NAMESPACE: headlamp-dev
E2E_NAMESPACE: ${{ inputs.e2e-namespace }}
E2E_RELEASE: headlamp-e2e
HEADLAMP_VERSION: ${{ inputs.headlamp-version }}
PLUGIN_NAME: ${{ inputs.plugin-name }}

steps:
- name: Checkout
uses: actions/checkout@v6

- name: Setup Node.js
- name: Detect package manager
id: pkg-manager
run: |
if [ -f "pnpm-lock.yaml" ]; then
echo "manager=pnpm" >> $GITHUB_OUTPUT
PM=$(python3 -c "import json,sys; d=json.load(open('package.json')); print('true' if d.get('packageManager','').startswith('pnpm@') else 'false')" 2>/dev/null || echo "false")
echo "has_package_manager=$PM" >> $GITHUB_OUTPUT
else
echo "manager=npm" >> $GITHUB_OUTPUT
echo "has_package_manager=false" >> $GITHUB_OUTPUT
fi

- name: Setup Node
uses: actions/setup-node@v6
with:
node-version: ${{ inputs.node-version }}
cache: 'npm'
cache: ${{ steps.pkg-manager.outputs.manager == 'npm' && 'npm' || '' }}

- name: Setup pnpm (Corepack, respects packageManager field)
if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'true'
run: |
npm install -g corepack
corepack enable pnpm
corepack prepare $(node -p "require('./package.json').packageManager") --activate

- name: Setup pnpm (version latest, no packageManager field)
if: steps.pkg-manager.outputs.manager == 'pnpm' && steps.pkg-manager.outputs.has_package_manager == 'false'
uses: pnpm/action-setup@v5
with:
run_install: false
version: latest

- name: Get pnpm store directory
id: pnpm-store
if: steps.pkg-manager.outputs.manager == 'pnpm'
run: echo "dir=$(pnpm store path --silent)" >> $GITHUB_OUTPUT

- name: Cache pnpm store
if: steps.pkg-manager.outputs.manager == 'pnpm'
uses: actions/cache@v5
with:
path: ${{ steps.pnpm-store.outputs.dir }}
key: ${{ runner.os }}-pnpm-${{ hashFiles('**/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-

- name: Setup kubectl
uses: azure/setup-kubectl@v4

- name: Install dependencies
run: npm ci
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm install --frozen-lockfile
else
npm ci
fi

- name: Build plugin
run: npx @kinvolk/headlamp-plugin build
Expand All @@ -55,11 +110,37 @@ jobs:
exit 1
fi

- name: Restore Playwright browser cache
uses: actions/cache@v5
id: playwright-cache
with:
path: ~/.cache/ms-playwright
key: ${{ runner.os }}-playwright-${{ hashFiles('**/package.json') }}
restore-keys: |
${{ runner.os }}-playwright-

- name: Install Playwright browsers
run: npx playwright install --with-deps chromium
if: steps.playwright-cache.outputs.cache-hit != 'true'
run: |
PM="${{ steps.pkg-manager.outputs.manager }}"
echo "Cache miss — installing Playwright browsers (attempt 1/3)..."
if [ "$PM" = "pnpm" ]; then
pnpm exec playwright install --with-deps chromium || \
(echo "Attempt 2/3..." && sleep 5 && pnpm exec playwright install --with-deps chromium) || \
(echo "Attempt 3/3..." && sleep 10 && pnpm exec playwright install --with-deps chromium)
else
npx playwright install --with-deps chromium || \
(echo "Attempt 2/3..." && sleep 5 && npx playwright install --with-deps chromium) || \
(echo "Attempt 3/3..." && sleep 10 && npx playwright install --with-deps chromium)
fi

- name: Run E2E tests
run: npm run e2e
run: |
if [ "${{ steps.pkg-manager.outputs.manager }}" = "pnpm" ]; then
pnpm run e2e
else
npm run e2e
fi
env:
HEADLAMP_URL: ${{ env.HEADLAMP_URL }}
HEADLAMP_TOKEN: ${{ env.HEADLAMP_TOKEN }}
Expand Down Expand Up @@ -93,4 +174,3 @@ jobs:
name: test-results
path: test-results/
retention-days: 7

Loading