From 120c676a76046458278bcf31e30d1cbe5cbe5bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 10:59:47 +0200 Subject: [PATCH 01/23] feat: add autonomous E2E CI failure fix workflow with skills and Playwright agents Add 6 AI agent skills and /fix-e2e command for autonomous E2E CI failure investigation and fix workflow. Skills: e2e-parse-ci-failure, e2e-deploy-rhdh, e2e-reproduce-failure, e2e-diagnose-and-fix, e2e-verify-fix, e2e-submit-and-review. Key features: - Playwright Test Agents (healer/generator/planner) with MCP server - Auto-generate .env via local-test-setup.sh --env with secure file handling - Sourcebot/Context7 optional with gh/clone fallbacks - OCP vs K8s job differentiation for deploy-only mode - Playwright MCP for parsing HTML test reports - All skills managed via rulesync, synced to OpenCode, Claude Code, and Cursor Assisted-by: OpenCode --- .claude/commands/fix-e2e.md | 167 ++++++++ .claude/rules/e2e-fix-workflow.md | 349 +++++++++++++++++ .claude/skills/e2e-deploy-rhdh/SKILL.md | 232 ++++++++++++ .claude/skills/e2e-diagnose-and-fix/SKILL.md | 250 ++++++++++++ .claude/skills/e2e-parse-ci-failure/SKILL.md | 202 ++++++++++ .claude/skills/e2e-reproduce-failure/SKILL.md | 194 ++++++++++ .claude/skills/e2e-submit-and-review/SKILL.md | 316 ++++++++++++++++ .claude/skills/e2e-verify-fix/SKILL.md | 147 ++++++++ .cursor/commands/fix-e2e.md | 164 ++++++++ .cursor/rules/e2e-fix-workflow.mdc | 352 +++++++++++++++++ .cursor/skills/e2e-deploy-rhdh/SKILL.md | 230 ++++++++++++ .cursor/skills/e2e-diagnose-and-fix/SKILL.md | 248 ++++++++++++ .cursor/skills/e2e-parse-ci-failure/SKILL.md | 200 ++++++++++ .cursor/skills/e2e-reproduce-failure/SKILL.md | 192 ++++++++++ .cursor/skills/e2e-submit-and-review/SKILL.md | 314 ++++++++++++++++ .cursor/skills/e2e-verify-fix/SKILL.md | 145 +++++++ .gitignore | 3 + .opencode/command/fix-e2e.md | 167 ++++++++ .opencode/memories/e2e-fix-workflow.md | 349 +++++++++++++++++ .opencode/skill/e2e-deploy-rhdh/SKILL.md | 232 ++++++++++++ .opencode/skill/e2e-diagnose-and-fix/SKILL.md | 250 ++++++++++++ .opencode/skill/e2e-parse-ci-failure/SKILL.md | 202 ++++++++++ .../skill/e2e-reproduce-failure/SKILL.md | 194 ++++++++++ .../skill/e2e-submit-and-review/SKILL.md | 316 ++++++++++++++++ .opencode/skill/e2e-verify-fix/SKILL.md | 147 ++++++++ .rulesync/commands/fix-e2e.md | 169 +++++++++ .rulesync/rules/e2e-fix-workflow.md | 355 ++++++++++++++++++ .rulesync/skills/e2e-deploy-rhdh/SKILL.md | 234 ++++++++++++ .../skills/e2e-diagnose-and-fix/SKILL.md | 252 +++++++++++++ .../skills/e2e-parse-ci-failure/SKILL.md | 204 ++++++++++ .../skills/e2e-reproduce-failure/SKILL.md | 202 ++++++++++ .../skills/e2e-submit-and-review/SKILL.md | 318 ++++++++++++++++ .rulesync/skills/e2e-verify-fix/SKILL.md | 163 ++++++++ e2e-tests/.gitignore | 9 + e2e-tests/local-test-setup.sh | 70 +++- e2e-tests/playwright.config.ts | 3 + rulesync.jsonc | 3 +- 37 files changed, 7541 insertions(+), 3 deletions(-) create mode 100644 .claude/commands/fix-e2e.md create mode 100644 .claude/rules/e2e-fix-workflow.md create mode 100644 .claude/skills/e2e-deploy-rhdh/SKILL.md create mode 100644 .claude/skills/e2e-diagnose-and-fix/SKILL.md create mode 100644 .claude/skills/e2e-parse-ci-failure/SKILL.md create mode 100644 .claude/skills/e2e-reproduce-failure/SKILL.md create mode 100644 .claude/skills/e2e-submit-and-review/SKILL.md create mode 100644 .claude/skills/e2e-verify-fix/SKILL.md create mode 100644 .cursor/commands/fix-e2e.md create mode 100644 .cursor/rules/e2e-fix-workflow.mdc create mode 100644 .cursor/skills/e2e-deploy-rhdh/SKILL.md create mode 100644 .cursor/skills/e2e-diagnose-and-fix/SKILL.md create mode 100644 .cursor/skills/e2e-parse-ci-failure/SKILL.md create mode 100644 .cursor/skills/e2e-reproduce-failure/SKILL.md create mode 100644 .cursor/skills/e2e-submit-and-review/SKILL.md create mode 100644 .cursor/skills/e2e-verify-fix/SKILL.md create mode 100644 .opencode/command/fix-e2e.md create mode 100644 .opencode/memories/e2e-fix-workflow.md create mode 100644 .opencode/skill/e2e-deploy-rhdh/SKILL.md create mode 100644 .opencode/skill/e2e-diagnose-and-fix/SKILL.md create mode 100644 .opencode/skill/e2e-parse-ci-failure/SKILL.md create mode 100644 .opencode/skill/e2e-reproduce-failure/SKILL.md create mode 100644 .opencode/skill/e2e-submit-and-review/SKILL.md create mode 100644 .opencode/skill/e2e-verify-fix/SKILL.md create mode 100644 .rulesync/commands/fix-e2e.md create mode 100644 .rulesync/rules/e2e-fix-workflow.md create mode 100644 .rulesync/skills/e2e-deploy-rhdh/SKILL.md create mode 100644 .rulesync/skills/e2e-diagnose-and-fix/SKILL.md create mode 100644 .rulesync/skills/e2e-parse-ci-failure/SKILL.md create mode 100644 .rulesync/skills/e2e-reproduce-failure/SKILL.md create mode 100644 .rulesync/skills/e2e-submit-and-review/SKILL.md create mode 100644 .rulesync/skills/e2e-verify-fix/SKILL.md diff --git a/.claude/commands/fix-e2e.md b/.claude/commands/fix-e2e.md new file mode 100644 index 0000000000..f76b4f044f --- /dev/null +++ b/.claude/commands/fix-e2e.md @@ -0,0 +1,167 @@ +--- +description: >- + Autonomously investigate and fix a failing RHDH E2E CI test. Accepts a Prow + job URL or Jira ticket ID. Deploys RHDH, reproduces the failure, fixes the + test using Playwright agents, and submits a PR with Qodo review. +--- +# Fix E2E CI Failure + +Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failing RHDH E2E test. + +## Input + +`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +- **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Jira ticket ID**: `RHIDP-XXXX` +- **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` + +## Workflow + +Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. + +### Phase 1: Parse CI Failure + +**Skill**: `e2e-parse-ci-failure` + +Parse the input to extract: +- Failing test name and spec file path +- Playwright project name +- Release branch (main, release-1.9, etc.) +- Platform (OCP, AKS, EKS, GKE) +- Deployment method (Helm, Operator) +- Error type and message +- local-run.sh job name parameter + +**Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. + +### Phase 2: Setup Fix Branch + +First, check the current branch: + +```bash +git branch --show-current +``` + +- **On `main` or `release-*`**: You're on a base branch — create a feature branch using the skill: + ```bash + git fetch upstream + git checkout -b fix/e2e- upstream/ + ``` + If a Jira ticket was provided, include the ticket ID in the branch name: + `fix/RHIDP-XXXX-e2e-` + +- **On any other branch** (e.g., `fix/e2e-*`): You're likely already on a feature branch. **Ask the user** whether to: + 1. Use the current branch as-is + 2. Create a new branch from the upstream release branch + +### Phase 3: Deploy RHDH + +**Skill**: `e2e-deploy-rhdh` + +Deploy RHDH to a cluster using `e2e-tests/local-run.sh`. CLI mode requires **all three** flags (`-j`, `-r`, `-t`): + +**OCP jobs** — use `-s` (deploy-only) to skip automated test execution so you can run the specific failing test manually: +```bash +cd e2e-tests +./local-run.sh -j -r -t -s +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`. These jobs require the full execution pipeline and do not support deploy-only mode: +```bash +cd e2e-tests +./local-run.sh -j -r -t +``` + +Use the **full Prow CI job name** for `-j` (not shortened names). + +Select the image repo and tag based on the release branch: +- `main` → `-r rhdh-community/rhdh -t next` +- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` +- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` + +After deployment completes, set up the local test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +**Decision gate**: Before attempting deployment, verify cluster connectivity (`oc whoami`). If no cluster is available, **ask the user for explicit approval** before skipping this phase — do not skip silently. If deployment fails, the `e2e-deploy-rhdh` skill has error recovery procedures. If deployment cannot be recovered after investigation, report the deployment issue and stop. + +### Phase 4: Reproduce Failure + +**Skill**: `e2e-reproduce-failure` + +Run the specific failing test to confirm it reproduces locally. Use `--project=any-test` to avoid running the smoke test dependency — it matches any spec file without extra overhead: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Decision gates**: +- **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. +- **Consistent failure**: Proceed to Phase 5 +- **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability +- **Cannot reproduce** (passes every time after 10 runs): Report the reproduction results and possible environment differences, then **ask the user for explicit approval** before proceeding. Do not skip this step silently. + +### Phase 5: Diagnose and Fix + +**Skill**: `e2e-diagnose-and-fix` + +Analyze the failure and implement a fix: + +1. **Classify the failure**: locator drift, timing, assertion mismatch, data dependency, platform-specific, deployment config +2. **Use Playwright Test Agents**: Invoke the healer agent (`@playwright-test-healer`) for automated test repair — it can debug the test, inspect the UI, generate locators, and edit the code +3. **Follow Playwright best practices**: Consult the `playwright-locators` and `ci-e2e-testing` project rules. Use semantic role-based locators (`getByRole`, `getByLabel`), auto-waiting assertions, Page Object Model, component annotations. Fetch official Playwright best practices via Context7 or https://playwright.dev/docs/best-practices if needed +4. **Cross-repo investigation**: If the issue is in deployment config, search `rhdh-operator` and `rhdh-chart` repos. Use Sourcebot or Context7 if available; otherwise fall back to `gh search code` or clone the repo locally and grep + +**Decision gate**: If the analysis reveals a product bug (not a test issue), you must be **absolutely certain** before marking a test with `test.fixme()`. The Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with `// TODO:` linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Proceed to Phase 6 with the `test.fixme()` change + +### Phase 6: Verify Fix + +**Skill**: `e2e-verify-fix` + +Verify the fix: +1. Run the fixed test once — must pass +2. Run 5 times — must pass 5/5 +3. Run code quality checks: `yarn tsc:check`, `yarn lint:check`, `yarn prettier:check` +4. Fix any lint/formatting issues + +**Decision gate**: If the test still fails or is flaky, return to Phase 5 and iterate. If verification cannot be run (no cluster, environment issues), **ask the user for explicit approval** before proceeding without it. + +### Phase 7: Submit PR and Handle Review + +**Skill**: `e2e-submit-and-review` + +1. **Resolve pre-commit hooks**: Run `yarn install` in all relevant workspaces (root, `e2e-tests/`, `.ci/`) before committing +2. **Commit**: Stage changes, commit with conventional format +3. **Push**: `git push -u origin ` +4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` +5. **Trigger Qodo review**: Comment `/agentic_review` on the PR +6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +7. **Address feedback**: Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +9. **Monitor CI**: Watch CI checks with `gh pr checks` + +### Final Report + +After all phases complete, produce a summary: + +``` +E2E Fix Summary: +- Input: +- Test: () +- Branch: +- Root cause: +- Fix: +- Verification: +- PR: +- CI Status: +- Qodo Review: +``` diff --git a/.claude/rules/e2e-fix-workflow.md b/.claude/rules/e2e-fix-workflow.md new file mode 100644 index 0000000000..885604d1e0 --- /dev/null +++ b/.claude/rules/e2e-fix-workflow.md @@ -0,0 +1,349 @@ +# E2E Test Fix Workflow + +This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. + +## Table of Contents + +- [Workflow Overview](#workflow-overview) +- [Parsing CI Failures](#parsing-ci-failures) +- [Branch and Deployment Setup](#branch-and-deployment-setup) +- [Reproducing Failures](#reproducing-failures) +- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) +- [Playwright Test Agents](#playwright-test-agents) +- [Verification and PR Submission](#verification-and-pr-submission) + +## Workflow Overview + +The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: + +1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +2. **Setup Fix Branch** — Create a branch from the correct upstream release branch +3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** — Confirm the failure reproduces locally +5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents +6. **Verify Fix** — Run the test multiple times and check code quality +7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI + +Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. + +**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. + +## Parsing CI Failures + +### Prow URL Structure + +``` +https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +``` + +Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: +``` +✘ [] › /.spec.ts: +``` + +### Job Name Mapping Tables + +#### Job Name → Release Branch + +| Pattern in job name | Release branch | +|---------------------|---------------| +| `*-rhdh-main-*` | `main` | +| `*-rhdh-release-1.9-*` | `release-1.9` | +| `*-rhdh-release-1.8-*` | `release-1.8` | + +#### Job Name → Platform and Deployment Method + +| Pattern | Platform | Method | +|---------|----------|--------| +| `*ocp*helm*` | OCP | Helm | +| `*ocp*operator*` | OCP | Operator | +| `*aks*helm*` | AKS | Helm | +| `*aks*operator*` | AKS | Operator | +| `*eks*helm*` | EKS | Helm | +| `*eks*operator*` | EKS | Operator | +| `*gke*helm*` | GKE | Helm | +| `*gke*operator*` | GKE | Operator | +| `*osd-gcp*` | OSD-GCP | Helm/Operator | + +#### Job Name → Playwright Projects + +| Job pattern | Projects | +|-------------|----------| +| `*ocp*helm*nightly*` (not upgrade) | `showcase`, `showcase-rbac`, `showcase-runtime`, `showcase-sanity-plugins`, `showcase-localization-*` | +| `*ocp*helm*upgrade*` | `showcase-upgrade` | +| `*ocp*operator*nightly*` (not auth) | `showcase-operator`, `showcase-operator-rbac` | +| `*ocp*operator*auth-providers*` | `showcase-auth-providers` | +| `*ocp*helm*pull*` | `showcase`, `showcase-rbac` | +| `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | +| `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | + +#### Job Name → local-run.sh `-j` Parameter + +Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. + +**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` +**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` + +Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. + +#### Release Branch → Image Repo and Tag + +| Branch | `-r` (image repo) | `-t` (image tag) | +|--------|-------------------|-------------------| +| `main` | `rhdh-community/rhdh` | `next` | +| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | +| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | + +## Branch and Deployment Setup + +### Branch Creation + +Always create branches from upstream, never from local copies: + +```bash +git fetch upstream +git checkout -b fix/e2e- upstream/ +``` + +### Deployment via local-run.sh + +CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. + +```bash +cd e2e-tests +# OCP jobs: use -s to deploy only, then run tests manually +./local-run.sh -j -r -t -s +# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required +./local-run.sh -j -r -t +``` + +Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. + +After deployment, source the test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +### Deployment Error Recovery + +| Error | Investigation | Common Fix | +|-------|--------------|------------| +| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | +| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | +| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | +| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | + +For config issues, search these repos for reference: +- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts +- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults + +## Reproducing Failures + +### Test Execution + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 --workers=1 +``` + +### Flakiness Detection + +If the test passes on first run, repeat 10 times: +- **10/10 pass** → cannot reproduce (check environment differences) +- **Mixed results** → flaky (focus on reliability improvements) +- **0/10 pass** → consistent failure + +### Debugging Modes + +```bash +# Headed (visible browser) +yarn playwright test --project= --headed + +# Debug (Playwright Inspector) +yarn playwright test --project= --debug + +# View trace +yarn playwright show-trace test-results//trace.zip +``` + +## Diagnosing and Fixing Tests + +### Failure Classification + +1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors +2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals +3. **Assertion mismatch** — Expected values changed → update test data or report product bug +4. **Data dependency** — Test data missing → add proper setup/teardown +5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional +6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` + +### RHDH Coding Conventions (Required) + +**Locators** — Always use semantic role-based locators: +```typescript +// GOOD +page.getByRole('button', { name: 'Create' }) +page.getByRole('heading', { name: 'Catalog' }) +page.getByText('No results found') + +// BAD — deprecated CSS selectors +page.locator('.MuiButton-root') +page.locator('[data-testid="..."]') +``` + +**Component annotations** — Every spec file must have: +```typescript +test.beforeAll(async ({}, testInfo) => { + testInfo.annotations.push({ + type: 'component', + description: 'your_component_name', + }); +}); +``` + +**Retry patterns** for async assertions: +```typescript +await expect(async () => { + await page.reload(); + await expect(page.getByText('entity')).toBeVisible(); +}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); +``` + +**Conditional skips**: +```typescript +import { skipIfJobName } from '../utils/helper'; +import * as constants from '../utils/constants'; +skipIfJobName(constants.GKE_JOBS); +``` + +**Forbidden patterns**: +- `page.waitForNetworkIdle()` / `networkidle` +- Raw CSS class selectors (`.MuiButton-root`) +- `page.waitForTimeout()` for synchronization +- Hardcoded secrets or credentials + +### Key Utility Classes + +| Class | Path | Purpose | +|-------|------|---------| +| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | +| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | +| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | +| `KubeClient` | `utils/kube-client.ts` | K8s resource management | +| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | +| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | + +### Product Bug Decision + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Do **not** change assertions to match broken behavior +4. Proceed with the `test.fixme()` change + +## Playwright Test Agents + +The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. + +### Available Agents + +| Agent | Mode | Purpose | +|-------|------|---------| +| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | +| `playwright-test-generator` | subagent | Create new test code from a test plan | +| `playwright-test-planner` | subagent | Explore app and create test plans | + +### Healer Agent Usage (Primary for Fixes) + +The healer agent is the primary tool for test repair: +1. Runs tests with `test_run` to identify failures +2. Debugs with `test_debug` to step through failing tests +3. Inspects UI state via `browser_snapshot`, `browser_console_messages` +4. Generates correct locators with `browser_generate_locator` +5. Edits test code with `edit`/`write` tools +6. Re-runs tests to verify the fix + +Invoke with: `@playwright-test-healer Fix the failing test in ` + +## Verification and PR Submission + +### Verification Checklist + +1. Single test run passes +2. 5 consecutive runs pass (stability) +3. `yarn tsc:check` passes +4. `yarn lint:check` passes +5. `yarn prettier:check` passes + +### Pre-Commit Hooks + +Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: + +```bash +yarn install # Root workspace +cd e2e-tests && yarn install && cd .. # If e2e-tests files changed +cd .ci && yarn install && cd .. # If .ci files changed +``` + +### PR Creation + +Always create PRs as **drafts**: + +```bash +git push -u origin +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +``` + +### Qodo Review + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. + +### Trigger Affected CI Job + +After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: + +```bash +# List available presubmit jobs +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Trigger the matching presubmit job +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. + +### CI Monitoring + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` + +## Reference Files + +| Category | Key files | +|----------|-----------| +| Playwright config | `e2e-tests/playwright.config.ts` | +| Project names (SOT) | `e2e-tests/playwright/projects.json` | +| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | +| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | +| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | +| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | +| ConfigMaps | `.ci/pipelines/resources/config_map/` | +| Helm values | `.ci/pipelines/value_files/` | +| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | +| Environment vars | `.ci/pipelines/env_variables.sh` | +| Local test runner | `e2e-tests/local-run.sh` | +| Local test env | `e2e-tests/local-test-setup.sh` | +| Qodo config | `.pr_agent.toml` | diff --git a/.claude/skills/e2e-deploy-rhdh/SKILL.md b/.claude/skills/e2e-deploy-rhdh/SKILL.md new file mode 100644 index 0000000000..eb5a92eede --- /dev/null +++ b/.claude/skills/e2e-deploy-rhdh/SKILL.md @@ -0,0 +1,232 @@ +--- +name: e2e-deploy-rhdh +description: >- + Deploy RHDH to an OpenShift cluster using local-run.sh for E2E test execution, + with autonomous error recovery for deployment failures +--- +# Deploy RHDH + +Deploy Red Hat Developer Hub to a cluster for E2E test execution using the existing `local-run.sh` workflow. + +## When to Use + +Use this skill when you need a running RHDH instance to reproduce and fix a test failure. + +## Prerequisites + +Before running the deployment, verify these tools are installed: + +```bash +# Required tools (local-run.sh checks these automatically) +podman --version # Container runtime +oc version # OpenShift CLI +kubectl version --client # Kubernetes CLI +vault --version # HashiCorp Vault (for secrets) +jq --version # JSON processor +curl --version # HTTP client +rsync --version # File sync +bc --version # Calculator (for resource checks) +``` + +### Podman Machine Requirements + +The podman machine must be running with adequate resources: + +```bash +podman machine inspect | jq '.Resources' +# Requires: >= 8GB RAM, >= 4 CPUs +``` + +If resources are insufficient: +```bash +podman machine stop +podman machine set --memory 8192 --cpus 4 +podman machine start +``` + +## Deployment Using local-run.sh + +The primary deployment method uses `e2e-tests/local-run.sh`, which handles everything: +Vault authentication, cluster service account setup, RHDH deployment, and test execution. + +### Execution Rules + +**CRITICAL — deployment is a long-running operation:** + +1. **Never run `local-run.sh` in the background.** Operator installations can take 20-30 minutes. Use the Bash tool with `timeout: 600000` (10 minutes) and if it times out, **check the container log** — do NOT assume failure. +2. **Before starting a deployment, check for existing containers:** + ```bash + podman ps --format "{{.Names}} {{.Status}}" | grep -i rhdh-e2e-runner + ``` + If a deployment container is already running, **wait for it to finish** instead of starting a new one. Monitor via the container log: + ```bash + tail -f e2e-tests/.local-test/container.log + ``` +3. **Never launch concurrent deployments.** Two deployments to the same cluster will race and both fail. If a deployment appears stuck, check the container log and cluster state before deciding it failed. +4. **How to detect actual failure vs slow progress:** The operator install script outputs detailed debug logs. If the container log shows active progress (timestamps advancing), the deployment is still running. Only consider it failed if: + - The podman container has exited (`podman ps` shows no running container) + - AND the container log shows an error message (e.g., "Failed install RHDH Operator") + +### CLI Mode (Preferred) + +**CRITICAL**: CLI mode requires **all three** flags (`-j`, `-r`, `-t`). If `-r` is omitted, the script falls into interactive mode and will hang in automated contexts. + +```bash +cd e2e-tests +./local-run.sh -j -r -t [-s] +``` + +**Example — OCP job** (deploy-only with `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s +``` + +**Example — K8s job (AKS/EKS/GKE)** (full execution, no `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next +``` + +**Parameters:** +- `-j / --job`: The **full Prow CI job name** extracted from the Prow URL. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match, so the full name works correctly. Example: `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly` +- `-r / --repo`: Image repository (**required** for CLI mode — without it the script enters interactive mode) +- `-t / --tag`: Image tag (e.g., `1.9`, `next`) +- `-s / --skip-tests`: Deploy only, skip test execution. **OCP jobs only** — K8s jobs (AKS, EKS, GKE) do not support this flag and require the full execution pipeline + +**WARNING**: Do NOT use shortened job names like `nightly-ocp-helm` for `-j` — these do not match the glob patterns in `openshift-ci-tests.sh`. + +### Image Selection + +Refer to the `e2e-fix-workflow` rule for the release branch to image repo/tag mapping table. + +### Deploy-Only Mode (OCP Jobs Only) + +For OCP jobs, deploy without running tests so you can run specific tests manually: + +```bash +./local-run.sh -j -r -t -s +``` + +**Note**: K8s jobs (AKS, EKS, GKE) do not support deploy-only mode. They require the full execution pipeline — run without `-s`. + +### What local-run.sh Does + +1. **Validates prerequisites**: Checks all required tools and podman resources +2. **Verifies the image**: Checks the image exists on quay.io via the Quay API +3. **Pulls the runner image**: `quay.io/rhdh-community/rhdh-e2e-runner:main` +4. **Authenticates to Vault**: OIDC-based login for secrets +5. **Sets up cluster access**: Creates `rhdh-local-tester` service account with cluster-admin, generates 48h token +6. **Copies the repo**: Syncs the local repo to `.local-test/rhdh/` (excludes node_modules) +7. **Runs a Podman container**: Executes `container-init.sh` inside the runner image, which: + - Fetches all Vault secrets to `/tmp/secrets/` + - Logs into the cluster + - Sets platform-specific environment variables + - Runs `.ci/pipelines/openshift-ci-tests.sh` for deployment + +### Post-Deployment: Setting Up for Manual Testing + +After `local-run.sh` completes (with `-s` for OCP jobs, or after full execution for K8s jobs), set up the environment for headed Playwright testing: + +```bash +# Source the test setup (choose 'showcase' or 'rbac') +source e2e-tests/local-test-setup.sh showcase +# or +source e2e-tests/local-test-setup.sh rbac +``` + +This exports: +- `BASE_URL` — The RHDH instance URL +- `K8S_CLUSTER_URL` — Cluster API server URL +- `K8S_CLUSTER_TOKEN` — Fresh service account token +- All Vault secrets as environment variables + +Verify RHDH is accessible: +```bash +curl -sSk "$BASE_URL" -o /dev/null -w "%{http_code}" +# Should return 200 +``` + +## Deployment Error Recovery + +### Common Deployment Failures + +#### CrashLoopBackOff + +**Symptoms**: Pod repeatedly crashes and restarts. + +**Investigation**: +```bash +# Check pod status +oc get pods -n +# Check pod logs +oc logs -n --previous +# Check events +oc get events -n --sort-by=.lastTimestamp +``` + +**Common causes and fixes**: +1. **Missing ConfigMap**: The app-config ConfigMap wasn't created → check `.ci/pipelines/resources/config_map/` for the correct template +2. **Bad plugin configuration**: A dynamic plugin is misconfigured → check `dynamic-plugins-config` ConfigMap against `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +3. **Missing secrets**: Required secrets not mounted → verify secrets exist in the namespace +4. **Node.js errors**: Check for JavaScript errors in logs that indicate code issues + +#### ImagePullBackOff + +**Investigation**: +```bash +oc describe pod -n | grep -A5 "Events" +``` + +**Common causes**: +1. **Image doesn't exist**: Verify on quay.io: `curl -s 'https://quay.io/api/v1/repository/rhdh/rhdh-hub-rhel9/tag/?filter_tag_name=like:'` +2. **Pull secret missing**: Check `namespace::setup_image_pull_secret` in `.ci/pipelines/lib/namespace.sh` +3. **Registry auth**: Ensure the pull secret has correct credentials + +#### Helm Install Failure + +**Investigation**: +```bash +helm list -n +helm status -n +``` + +**Common causes**: +1. **Values file error**: Check merged values against `.ci/pipelines/value_files/values_showcase.yaml` +2. **Chart version mismatch**: Verify chart version with `helm::get_chart_version` from `.ci/pipelines/lib/helm.sh` + +#### Operator Deployment Failure + +**Investigation**: +```bash +oc get backstage -n +oc describe backstage -n +oc get csv -n # Check operator subscription status +``` + +**Common causes**: +1. **Backstage CR misconfigured**: Compare against `.ci/pipelines/resources/rhdh-operator/rhdh-start.yaml` +2. **Operator not installed**: Check CatalogSource and Subscription +3. **CRD not ready**: Wait for CRD with `k8s_wait::crd` pattern from `.ci/pipelines/lib/k8s-wait.sh` + +### Cross-Repo Investigation + +When deployment issues stem from the operator or chart, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search `rhdh-operator` and `rhdh-chart` repos for specific error patterns or configuration keys +2. **Context7** (if available): query `redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` for docs and code snippets +3. **Fallback — `gh search code`**: `gh search code '' --repo redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` +4. **Fallback — local clone**: clone the repo into a temp directory and grep for the pattern + +Key areas to look for: +- **rhdh-operator**: Backstage CR configuration, CatalogSource setup, operator installation scripts +- **rhdh-chart**: Helm values schema, chart templates, default configurations + +## Reference Files + +- Main deployment scripts: `.ci/pipelines/openshift-ci-tests.sh`, `.ci/pipelines/utils.sh` +- Library scripts: `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh`, `.ci/pipelines/lib/k8s-wait.sh`, `.ci/pipelines/lib/testing.sh` +- Helm values: `.ci/pipelines/value_files/` +- ConfigMaps: `.ci/pipelines/resources/config_map/` +- Operator CRs: `.ci/pipelines/resources/rhdh-operator/` +- Environment variables: `.ci/pipelines/env_variables.sh` diff --git a/.claude/skills/e2e-diagnose-and-fix/SKILL.md b/.claude/skills/e2e-diagnose-and-fix/SKILL.md new file mode 100644 index 0000000000..16b12ea2ee --- /dev/null +++ b/.claude/skills/e2e-diagnose-and-fix/SKILL.md @@ -0,0 +1,250 @@ +--- +name: e2e-diagnose-and-fix +description: >- + Analyze a failing E2E test, determine root cause, and fix it using Playwright + Test Agents and RHDH project conventions +--- +# Diagnose and Fix + +Analyze the root cause of a failing E2E test and implement a fix following RHDH project conventions. + +## When to Use + +Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. + +## MANDATORY: Always Use the Playwright Healer Agent + +**The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. + +### Healer Initialization + +Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. + +### Environment Setup for Healer + +The healer agent needs a `.env` file in `e2e-tests/` with all required environment variables (BASE_URL, K8S_CLUSTER_TOKEN, vault secrets, etc.). Generate it by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +The `.env` file is gitignored — never commit it. To regenerate (e.g. after token expiry), re-run the command above. + +### Invoking the Healer + +Invoke the healer agent via the Task tool with `subagent_type: general`: + +``` +Task: "You are the Playwright Test Healer agent. Run the failing test, debug it, inspect the UI, and fix the code. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run command: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g ''" +``` + +The healer will autonomously: +1. Run the test and identify the failure +2. Examine error screenshots and error-context.md +3. Debug the test step-by-step using Playwright Inspector +4. Inspect the live UI via page snapshots +5. Generate correct locators and fix assertions +6. Edit the test code +7. Re-run to verify the fix + +### When to Supplement with Manual Diagnosis + +After the healer has run, supplement with manual investigation only for: +- **Data dependency failures** (category 4): The healer may not know how to create missing test data +- **Platform-specific failures** (category 5): The healer doesn't have context about platform differences +- **Deployment configuration issues** (category 6): The healer cannot modify ConfigMaps or Helm values +- **Product bugs**: When the healer confirms the test is correct but the application behavior is wrong + +## Failure Pattern Recognition + +### 1. Locator Drift + +**Symptoms**: `Error: locator.click: Error: strict mode violation` or `Timeout waiting for selector` or element not found errors. + +**Cause**: The UI has changed and selectors no longer match. + +**Fix approach**: +- Invoke the Playwright healer agent (`@playwright-test-healer`) — it will replay the test, inspect the current UI via page snapshots, generate updated locators, and edit the code automatically +- If the healer cannot resolve it, manually update to semantic role-based locators (see project rules) +- Verify the updated locator works by re-running the test + +### 2. Timing / Race Condition + +**Symptoms**: Test passes sometimes, fails sometimes. Errors like `Timeout 10000ms exceeded` or assertions failing on stale data. + +**Cause**: Test acts before the UI is ready, or waits are insufficient. + +**Fix approach**: +- Invoke the Playwright healer agent first — it can identify timing issues by stepping through the test and observing UI state transitions +- If manual fixes are needed: replace `page.waitForTimeout()` with proper waits: `expect(locator).toBeVisible()`, `page.waitForLoadState()` +- Use `expect().toPass()` with retry intervals for inherently async checks: + ```typescript + await expect(async () => { + const text = await page.locator('.count').textContent(); + expect(Number(text)).toBeGreaterThan(0); + }).toPass({ intervals: [1000, 2000, 5000], timeout: 30_000 }); + ``` +- Increase action/navigation timeouts if the operation is legitimately slow +- Use `Common.waitForLoad()` utility before interacting with the page after navigation + +### 3. Assertion Mismatch + +**Symptoms**: `expect(received).toBe(expected)` with clearly different values. + +**Cause**: The expected value has changed due to a product change, data change, or environment difference. + +**Fix approach**: +- Determine if the change is intentional (check recent commits to the release branch) +- If intentional: update the expected value in the test or test data +- If unintentional: this may be a product bug — but you must first exhaust all other possibilities using the Playwright healer agent. Only after the healer confirms the test is correct and the application behavior is wrong should you mark it with `test.fixme()` (see the "Decision: Product Bug vs Test Issue" section below) + +### 4. Data Dependency + +**Symptoms**: Test fails because expected entities, users, or resources don't exist. + +**Cause**: Test data assumptions no longer hold (GitHub repos deleted, Keycloak users changed, catalog entities removed). + +**Fix approach**: +- Update test data in `e2e-tests/playwright/support/test-data/` or `e2e-tests/playwright/data/` +- Ensure test creates its own data in `beforeAll`/`beforeEach` and cleans up in `afterAll`/`afterEach` +- Use `APIHelper` for programmatic setup (GitHub API, Backstage catalog API) + +### 5. Platform-Specific Failure + +**Symptoms**: Test passes on OCP but fails on GKE/AKS/EKS, or vice versa. + +**Cause**: Platform differences (Routes vs Ingress, different auth, different network policies). + +**Fix approach**: +- Add conditional skip if the test is inherently platform-specific: + ```typescript + import { skipIfJobName, skipIfIsOpenShift } from '../utils/helper'; + // Skip on GKE + skipIfJobName(constants.GKE_JOBS); + // Skip on non-OpenShift + skipIfIsOpenShift('false'); + ``` +- Or add platform-specific logic within the test using `process.env.IS_OPENSHIFT`, `process.env.CONTAINER_PLATFORM` + +### 6. Deployment Configuration Issue + +**Symptoms**: RHDH itself is broken (500 errors, missing plugins, wrong behavior). + +**Cause**: ConfigMap or Helm values are incorrect for this test scenario. + +**Fix approach**: +- Check the ConfigMaps: `.ci/pipelines/resources/config_map/app-config-rhdh.yaml` and `app-config-rhdh-rbac.yaml` +- Check Helm values: `.ci/pipelines/value_files/` +- Check dynamic plugins config: `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +- Search `rhdh-operator` and `rhdh-chart` repos for configuration reference (use Sourcebot, Context7, `gh search code`, or a local clone — whichever is available) +- Fix the deployment configuration rather than the test code + +## Playwright Test Agents Reference + +The Playwright Test Agents are initialized via `npx playwright init-agents --loop=opencode` (see initialization section above). This creates an MCP server and agent definitions in `e2e-tests/opencode.json`. + +### Healer Agent (MANDATORY for All Fixes) + +The healer agent is the **primary and mandatory** tool for fixing failing tests. It has access to: + +- **`test_run`**: Run tests and identify failures +- **`test_debug`**: Step through failing tests with the Playwright Inspector +- **`browser_snapshot`**: Capture accessibility snapshots of the live UI +- **`browser_console_messages`**: Read browser console logs +- **`browser_network_requests`**: Monitor network requests +- **`browser_generate_locator`**: Generate correct locators from the live UI +- **`edit`/`write`**: Edit test code directly + +The healer autonomously cycles through: run → debug → inspect → fix → re-run until the test passes. + +### Planner Agent (For Understanding Complex Scenarios) + +Use `@playwright-test-planner` when you need to understand a complex user flow before fixing a test. It explores the app and maps out the interaction patterns. + +### Generator Agent (For Creating New Test Steps) + +Use `@playwright-test-generator` when a test needs major rework and you need to generate new test steps from a plan. + +## Coding Conventions + +Every fix **must** follow Playwright best practices. Before writing or modifying test code, consult these resources in order: + +1. **Project rules** (always available locally): + - `playwright-locators` rule — locator priority, anti-patterns, assertions, Page Objects, DataGrid handling + - `ci-e2e-testing` rule — test structure, component annotations, project configuration, CI scripts + +2. **Official Playwright docs** (fetch via Context7 if available, otherwise use web): + - Best practices: https://playwright.dev/docs/best-practices + - Locators guide: https://playwright.dev/docs/locators + - Assertions: https://playwright.dev/docs/test-assertions + - Auto-waiting: https://playwright.dev/docs/actionability + +### Key requirements + +- **Locators**: always prefer `getByRole()`, `getByLabel()`, `getByPlaceholder()` over CSS/XPath selectors. Never use MUI class names (`.MuiButton-label`, `.MuiDataGrid-*`). +- **Assertions**: use Playwright's auto-waiting assertions (`expect(locator).toBeVisible()`) — never use manual `waitForSelector()` or `waitForTimeout()`. +- **Component annotations**: every `*.spec.ts` file must have a `component` annotation in `test.beforeAll`. +- **Page Object Model**: return `Locator` objects from page classes, not raw strings or elements. +- **No `force: true`**: if a click requires `force`, the locator or timing is wrong — fix the root cause. +- **No `waitForNetworkIdle()`**: use proper load-state waits or assertion-based waiting instead. + +## Cross-Repo Investigation + +When the issue is in RHDH deployment/config rather than test code, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search repos for specific error patterns or configuration keys +2. **Context7** (if available): query repos for docs and code snippets +3. **Fallback — `gh search code`**: e.g. `gh search code '' --repo redhat-developer/rhdh-operator` +4. **Fallback — local clone**: clone the repo into a temp directory and grep + +### rhdh-operator (`redhat-developer/rhdh-operator`) +- Backstage CR specification and defaults +- CatalogSource configuration +- Operator installation scripts (especially `install-rhdh-catalog-source.sh`) + +### rhdh-chart (`redhat-developer/rhdh-chart`) +- Helm values.yaml schema and defaults +- Chart templates for Deployments, Services, ConfigMaps +- Default dynamic plugin configurations + +### Other Repositories +- **backstage/backstage**: For upstream Backstage API changes +- **redhat-developer/red-hat-developers-documentation-rhdh**: For documentation on expected behavior + +## Decision: Product Bug vs Test Issue + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug before marking a test this way. Follow this checklist: + +1. **Run the Playwright healer agent** — it must confirm that the test logic is correct and the application behavior is wrong +2. **Verify manually** — inspect the live UI, check network responses, and confirm the product is genuinely broken (not a stale cache, missing data, or environment-specific issue) +3. **Check recent commits** — search the release branch for recent product changes that could explain the behavior change +4. **Ask the user for confirmation** before applying `test.fixme()` — do not decide unilaterally + +Only after all of the above confirm a product bug: + +1. **File a Jira bug** in the `RHDHBUGS` project (or update the existing ticket) documenting the product regression +2. **Mark the test with `test.fixme()`**, preceded by a `// TODO:` comment linking to the Jira ticket: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Button no longer visible after version upgrade'); + ``` +3. **Do not change the test assertions** to match broken behavior +4. **Proceed to `e2e-submit-and-review`** with the `test.fixme()` change diff --git a/.claude/skills/e2e-parse-ci-failure/SKILL.md b/.claude/skills/e2e-parse-ci-failure/SKILL.md new file mode 100644 index 0000000000..3c1f75659d --- /dev/null +++ b/.claude/skills/e2e-parse-ci-failure/SKILL.md @@ -0,0 +1,202 @@ +--- +name: e2e-parse-ci-failure +description: >- + Parse a Prow CI job URL or Jira ticket to extract E2E test failure details + including test name, spec file, release branch, platform, and error messages +--- +# Parse CI Failure + +Extract structured failure context from a Prow job URL or Jira ticket for an RHDH E2E CI failure. + +## When to Use + +Use this skill when you receive a failing Prow job URL (e.g., `https://prow.ci.openshift.org/view/gs/...`), a Jira ticket ID (e.g., `RHIDP-XXXX`), or a Jira URL (e.g., `https://redhat.atlassian.net/browse/RHIDP-XXXX`) for an E2E test failure and need to extract all relevant details before starting a fix. + +## Input Detection + +- **Playwright report URL**: URL ending in `index.html` (with optional `#?testId=...` fragment) — use Playwright MCP if available (see "Playwright Report Parsing" below), otherwise fall back to build log parsing +- **Prow URL**: Starts with `https://prow.ci.openshift.org/` — parse the job page and build log +- **Jira ticket ID**: Matches pattern `RHIDP-\d+` or similar — use Jira MCP tools to read the ticket +- **Jira URL**: Starts with `https://redhat.atlassian.net/browse/` — extract the ticket ID from the URL path (e.g., `RHIDP-XXXX` from `https://redhat.atlassian.net/browse/RHIDP-XXXX`) and then use Jira MCP tools to read the ticket + +## Prow URL Parsing + +### URL Structure + +Prow job URLs follow two patterns: + +- **Periodic/postsubmit**: `https://prow.ci.openshift.org/view/gs/test-platform-results/logs//` +- **Presubmit (PR)**: `https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/redhat-developer_rhdh///` + +Extract `` and `` from the URL path. These are the two key values needed for all derivations. + +### GCS URL Derivation + +Convert the Prow URL to a GCS artifacts URL by replacing the prefix: + +``` +Prow: https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +GCS: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ +``` + +For presubmit jobs, use `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +Key artifacts within the GCS directory: +- **Build log**: `/build-log.txt` +- **JUnit XML**: `/artifacts/junit-results/results.xml` +- **Playwright report**: `/artifacts/playwright-report/` + +Fetch the Prow job page with WebFetch to find the job status and artifact links, then fetch the build log for test failure details. + +### Extracting Test Failures from Build Log + +Search the build log for these Playwright output patterns: + +``` +# Failing test line (primary source for test name, spec file, and project): + ✘ [] › /.spec.ts: + +# Error details (immediately after the failure line): + Error: + expect(received).toBeVisible() + Locator: + +# Summary (at the end of the log): + X failed + X passed + X skipped +``` + +Also check JUnit XML for `` elements with `` children as a fallback. + +## Playwright Report Parsing + +When the URL points to a Playwright HTML report (`index.html`, optionally with `#?testId=...`), use Playwright MCP if available — navigate with `browser_navigate`, then `browser_snapshot` to extract test name, spec file, error, steps, retries, screenshots, and traces from the accessibility tree. Derive job metadata (``, ``, ``) from the URL path segments. + +If Playwright MCP is not available, derive the `build-log.txt` URL from the report URL and fall back to build log parsing. + +## Jira Ticket Parsing + +Use Jira MCP tools to read the ticket. Extract: + +1. **Prow job URLs** from the description or comments — then parse them using the Prow URL Parsing steps above. +2. **Test names, spec file paths, error messages, or stack traces** from the description, comments, or attachments. +3. **`affects version`** field — map to release branch (e.g., `1.10` → `main`, `1.9` → `release-1.9`, `1.8` → `release-1.8`). +4. **`component`** field for additional context (e.g., "E2E Tests", "CI/CD"). + +## Job Name Mapping + +Refer to the **e2e-fix-workflow** rule for all mapping tables: job name to release branch, job name to platform and deployment method, job name to Playwright projects, release branch to image repo/tag, and job name to `local-run.sh` `-j` parameter. Those tables are the single source of truth and should not be duplicated here. + +When parsing a job name, apply those mapping tables to derive: release branch, platform, deployment method, Playwright projects, and `local-run.sh` flags (`-j`, `-r`, `-t`). + +## Fields Requiring Build Log Access + +Not all output fields can be derived from the Prow URL alone. The following table clarifies what requires fetching the build log or artifacts: + +| Field | Source | Derivable from URL alone? | +|-------|--------|---------------------------| +| Job name | URL path segment | Yes | +| Build ID | URL path segment | Yes | +| Release branch | Job name pattern match | Yes | +| Platform | Job name pattern match | Yes | +| Deployment method | Job name pattern match | Yes | +| Playwright projects | Job name pattern match | Yes | +| `local-run.sh` flags (`-j`, `-r`, `-t`) | Job name + release branch | Yes | +| GCS artifacts URL | Constructed from URL | Yes | +| Test name | Build log Playwright output | No — requires build log | +| Spec file | Build log Playwright output | No — requires build log | +| Specific Playwright project (of failing test) | Build log `[project]` prefix | No — requires build log | +| Error type | Build log error details | No — requires build log | +| Error message | Build log error details | No — requires build log | +| Failure count / pass count | Build log summary line | No — requires build log | + +## Output + +Produce the following structured output with three sections. + +### 1. Structured Summary + +``` +- Test name: +- Spec file: +- Playwright project: +- Release branch:
+- Platform: +- Deployment method: +- Error type: +- Error message: +- Prow URL: +- Jira ticket: +``` + +### 2. Derivation Details + +Show how each field was derived with the matching pattern. This makes the reasoning transparent and auditable. + +``` +| Field | Value | Derivation | +|--------------------|------------------------------|-----------------------------------------------------------| +| Job name | | Extracted from URL path segment | +| Build ID | | Extracted from URL path segment | +| Release branch | | Pattern `*-rhdh--*` matched in job name | +| Platform | | Pattern `**` matched in job name | +| Deployment method | | Pattern `**` matched in job name | +| Playwright project | | `[]` prefix in failing test line | +| Image repo (-r) | | Release branch `` maps to `` | +| Image tag (-t) | | Release branch `` maps to `` | +| Test name | | Parsed from `✘` line in build log | +| Spec file | | Parsed from `✘` line in build log | +| Error type | | Classified from error message pattern | +``` + +### 3. GCS Artifacts Location + +Derive and present the GCS artifacts URLs constructed from the Prow URL: + +``` +GCS Artifacts Base: + https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ + +Build Log: + /artifacts//build-log.txt + +JUnit Results: + /artifacts//artifacts/junit-results/results.xml + +Playwright Report: + /artifacts//artifacts/playwright-report/ +``` + +For presubmit (PR) jobs, the base path uses `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +### 4. local-run.sh Command + +Provide the full command ready to copy-paste, with a flag breakdown. + +**OCP jobs** — use `-s` for deploy-only mode: +``` +cd e2e-tests +./local-run.sh -j -r -t -s + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +| -s | (no value) | Deploy only, skip running tests | +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`; full execution is required: +``` +cd e2e-tests +./local-run.sh -j -r -t + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +``` diff --git a/.claude/skills/e2e-reproduce-failure/SKILL.md b/.claude/skills/e2e-reproduce-failure/SKILL.md new file mode 100644 index 0000000000..17a8728bfc --- /dev/null +++ b/.claude/skills/e2e-reproduce-failure/SKILL.md @@ -0,0 +1,194 @@ +--- +name: e2e-reproduce-failure +description: >- + Run a specific failing E2E test against a deployed RHDH instance to confirm + the failure and determine if it is consistent or flaky +--- +# Reproduce Failure + +Run the failing test locally against a deployed RHDH instance to confirm the failure and classify it. + +## When to Use + +Use this skill after deploying RHDH (via `e2e-deploy-rhdh`) when you need to verify the test failure reproduces locally before attempting a fix. + +## Prerequisites + +- RHDH deployed and accessible (BASE_URL set) +- Environment configured via `source e2e-tests/local-test-setup.sh ` +- Node.js 22 and Yarn available +- Playwright browsers installed (`cd e2e-tests && yarn install && yarn playwright install chromium`) + +## Environment Setup + +### Source the Test Environment + +```bash +# For non-RBAC tests (showcase, showcase-k8s, showcase-operator, etc.) +source e2e-tests/local-test-setup.sh showcase + +# For RBAC tests (showcase-rbac, showcase-rbac-k8s, showcase-operator-rbac) +source e2e-tests/local-test-setup.sh rbac +``` + +This exports all required environment variables: `BASE_URL`, `K8S_CLUSTER_URL`, `K8S_CLUSTER_TOKEN`, and all Vault secrets. + +### Verify Environment + +```bash +echo "BASE_URL: $BASE_URL" +curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" +``` + +## MANDATORY: Use the Playwright Healer Agent for Reproduction + +Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. + +### Healer Initialization (First Time Only) + +Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +### Environment Setup + +Generate the `.env` file by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +To regenerate (e.g. after token expiry), re-run the command above. + +### Project Selection + +When running specific test files or test cases, use `--project=any-test` to avoid running the smoke test dependency. The `any-test` project matches any spec file without extra overhead: + +```bash +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +### Running via Healer Agent + +Invoke the healer agent via the Task tool: + +``` +Task: "You are the Playwright Test Healer agent. Run the following test to reproduce a CI failure. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If the test fails, examine the error output, screenshots in test-results/, and error-context.md. +Report: pass/fail, exact error message, what the UI shows at the point of failure." +``` + +### Fallback: Direct Execution + +If the healer agent is unavailable, run tests directly: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Examples:** +```bash +# A specific spec file +yarn playwright test playwright/e2e/plugins/topology/topology.spec.ts --project=any-test --retries=0 --workers=1 + +# A specific test by name +yarn playwright test -g "should display topology" --project=any-test --retries=0 --workers=1 +``` + +### Headed / Debug Mode + +For visual debugging when manual investigation is needed: + +```bash +# Headed mode (visible browser) +yarn playwright test --project=any-test --retries=0 --workers=1 --headed + +# Debug mode (Playwright Inspector, step-by-step) +yarn playwright test --project=any-test --retries=0 --workers=1 --debug +``` + +## Flakiness Detection + +If the first run **passes** (doesn't reproduce the failure), run multiple times to check for flakiness: + +```bash +cd e2e-tests + +# Run 10 times and track results +PASS=0; FAIL=0 +for i in $(seq 1 10); do + echo "=== Run $i ===" + if yarn playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Results: $PASS passed, $FAIL failed out of 10 runs" +``` + +## Result Classification + +### Consistent Failure +- **Definition**: Fails every time (10/10 runs fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill +- **Confidence**: High — the fix can be verified reliably + +### Flaky +- **Definition**: Fails some runs but not all (e.g., 3/10 fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill, focus on reliability improvements +- **Typical causes**: Race conditions, timing dependencies, state leaks between tests, external service variability + +### Cannot Reproduce +- **Definition**: Passes all runs locally (0/10 fail) +- **Action**: **Stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Investigation**: Check environment differences between local and CI: + - **Cluster version**: CI may use a different OCP version (check the cluster pool version) + - **Image version**: CI may use a different RHDH image + - **Resource constraints**: CI clusters may have less resources + - **Parallel execution**: CI runs with 3 workers; try `--workers=3` + - **Network**: CI clusters are in `us-east-2` AWS region + - **External services**: GitHub API rate limits, Keycloak availability + +## Artifact Collection + +### Playwright Traces + +After a test failure, traces are saved in `e2e-tests/test-results/`: + +```bash +# View a trace +yarn playwright show-trace test-results//trace.zip +``` + +### HTML Report + +```bash +# Generate and open the HTML report +yarn playwright show-report +``` + +### Screenshots and Videos + +On failure, screenshots and videos are saved in `test-results//`: +- `test-failed-1.png` — Screenshot at failure point +- `video.webm` — Full test recording (if video is enabled) + +## Test Project Reference + +Refer to the e2e-fix-workflow rule for the Playwright project → config map mapping. diff --git a/.claude/skills/e2e-submit-and-review/SKILL.md b/.claude/skills/e2e-submit-and-review/SKILL.md new file mode 100644 index 0000000000..5aedb84414 --- /dev/null +++ b/.claude/skills/e2e-submit-and-review/SKILL.md @@ -0,0 +1,316 @@ +--- +name: e2e-submit-and-review +description: >- + Create a PR for an E2E test fix, trigger Qodo agentic review, address review + comments, and monitor CI results +--- +# Submit and Review + +Create a pull request for the E2E test fix, trigger automated review, address feedback, and verify CI passes. + +## When to Use + +Use this skill after verifying the fix (via `e2e-verify-fix`) when all tests pass and code quality checks are clean. + +## Step 0: Resolve Pre-Commit Hooks + +Before committing, ensure all related workspaces have their dependencies installed so pre-commit hooks (lint-staged, rulesync, etc.) pass: + +```bash +# Root workspace +yarn install + +# If e2e-tests files were changed +cd e2e-tests && yarn install && cd .. + +# If .ci files were changed +cd .ci && yarn install && cd .. +``` + +If a pre-commit hook fails during commit, fix the issue and create a **new** commit — do not amend. + +## Step 1: Commit Changes + +### Stage and Commit + +```bash +# Stage only relevant files +git add e2e-tests/ +git add .ci/ # Only if deployment config was changed + +# Commit with a descriptive message +git commit -m "fix(e2e): + + +- What test was failing +- What the root cause was +- How it was fixed" +``` + +### Commit Message Convention + +Follow the conventional commit format: +- `fix(e2e): fix flaky topology test timeout` +- `fix(e2e): update RBAC page locators after UI redesign` +- `fix(e2e): add retry logic for catalog entity refresh` +- `fix(e2e): skip orchestrator test on GKE platform` + +If a Jira ticket exists, reference it: +- `fix(e2e): fix topology locator drift [RHIDP-1234]` + +## Step 2: Push to Fork + +Push the fix branch to the fork (origin): + +```bash +git push -u origin +``` + +Example: +```bash +git push -u origin fix/e2e-topology-locator +# or +git push -u origin fix/RHIDP-1234-e2e-topology-locator +``` + +## Step 3: Create Pull Request + +Create a PR against the upstream `redhat-developer/rhdh` repository. + +**Dynamic username extraction** -- Always derive the GitHub username from the fork remote at runtime rather than hardcoding it. This makes the workflow portable across any contributor's environment: + +```bash +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +``` + +Then create the PR as a **draft** (always use `--draft`): +```bash +gh pr create \ + --draft \ + --repo redhat-developer/rhdh \ + --head "${GITHUB_USER}:" \ + --base \ + --title "fix(e2e): " \ + --body "$(cat <<'EOF' +## Summary +- <1-2 bullet points explaining what was fixed and why> + +## Test Results +- Local verification: 5/5 passes +- Code quality: lint, tsc, prettier all pass + +## Related +- Prow job: +- Jira: +EOF +)" +``` + +**Important**: Always use `--repo redhat-developer/rhdh` and `--head :` for cross-fork PRs. Never hardcode the GitHub username -- always extract it dynamically from the origin remote URL so this workflow works for any contributor. + +### PR Description Guidelines + +Keep it concise: +- What test was failing +- What the root cause was +- How it was fixed +- Link to the original failing CI job or Jira ticket + +## Step 4: Trigger Qodo Agentic Review + +After the PR is created, trigger an agentic review from Qodo (PR-Agent): + +```bash +# Get the PR number from the create output, then comment +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The Qodo bot will: +1. Analyze the code changes +2. Post review comments with suggestions +3. Optionally approve or request changes + +Note: The `.pr_agent.toml` in the repo configures Qodo with: +- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos +- Auto-review, auto-describe, and auto-improve on PR creation +- Scoped to `e2e-tests` folder changes + +## Step 5: Wait for and Address Qodo Review + +### Poll for Review Comments + +Check for Qodo review completion (it typically takes 1-3 minutes): + +```bash +# Check for Qodo bot comments +gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' + +# Check for inline comments +gh api repos/redhat-developer/rhdh/pulls//comments \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' +``` + +### Address Review Comments + +For each review comment: + +1. **Code suggestions**: If the suggestion improves the code, apply it: + ```bash + # Make the change locally + # Then stage only the changed files and commit + git add + git commit -m "fix(e2e): address review feedback" + git push + ``` + **Never use `git add -A` or `git add .`** — always stage specific files to avoid committing `.env`, test artifacts, or other local-only files. + +2. **Style/convention issues**: Fix them per project conventions + +3. **False positives**: If a suggestion is incorrect, explain why in a reply: + ```bash + gh api repos/redhat-developer/rhdh/pulls//comments//replies \ + -f body="This is intentional because " + ``` + +4. **Questions**: Answer them with context from the codebase + +## Step 6: Trigger Affected CI Job + +After addressing Qodo review feedback (and pushing any follow-up commits), trigger the presubmit E2E job that corresponds to the originally failing CI job. Presubmit job names differ from periodic/nightly names but cover the same platform and deployment method. + +**CRITICAL**: Never guess or construct presubmit job names. Always discover them from the `openshift-ci` bot response as described below. + +### Step 6a: Request Available Jobs + +Comment `/test ?` on the PR to request the list of available presubmit jobs: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test ?" +``` + +### Step 6b: Wait for the Bot Response + +Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: + +```bash +# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) +for i in $(seq 1 10); do + BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ + --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') + if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then + echo "Bot response received:" + echo "$BOT_RESPONSE" + break + fi + echo "Waiting for openshift-ci bot response (attempt $i/10)..." + sleep 30 +done +``` + +If no response is received after 5 minutes, ask the user for guidance. + +### Step 6c: Select the Right Job from the Bot Response + +Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: + +| Original failure pattern | Look for presubmit containing | +|--------------------------|-------------------------------| +| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | +| `*ocp*operator*nightly*` | `*ocp*operator*` | +| `*aks*helm*` | `*aks*helm*` | +| `*eks*helm*` | `*eks*helm*` | +| `*gke*helm*` | `*gke*helm*` | + +**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. + +If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. + +### Step 6d: Trigger the Job + +Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. + +## Step 7: Monitor CI Status + +### Watch CI Checks + +After pushing changes, monitor the CI pipeline: + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Or check manually: +```bash +gh pr checks --repo redhat-developer/rhdh +``` + +CI check types (Prow E2E jobs, lint checks, build checks, etc.) are documented in the project CI rules. Use `gh pr checks` output to identify which specific check failed. + +### If CI Fails + +1. **E2E test failure**: Check the Prow job logs, determine if it's the same test or a different one +2. **Lint failure**: Run `yarn lint:fix` locally, commit and push +3. **Build failure**: Check TypeScript errors with `yarn tsc` +4. **Unrelated failure**: Comment on the PR noting it's an unrelated failure, optionally `/retest` to re-trigger + +### Re-trigger CI + +If a CI check needs to be re-run: +```bash +# For Prow jobs, comment on the PR +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# For specific jobs +gh pr comment --repo redhat-developer/rhdh --body "/retest " +``` + +## Step 8: Final Status Report + +After CI passes (or all issues are addressed), produce a final report: + +``` +PR Status Report: +- PR: +- Branch: -> +- CI Status: PASS / PENDING / FAIL +- Qodo Review: Addressed / Pending +- Files changed: +- Action items: +``` + +## Quick Reference: PR Workflow Commands + +```bash +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') + +# Create draft PR (always use --draft) +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base + +# Trigger Qodo review +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" + +# List available presubmit jobs (Step 6a) +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Wait for openshift-ci bot response (Step 6b) -- poll until bot replies with job list + +# Trigger specific presubmit job (Step 6d) -- ONLY use a job name from the bot's response +gh pr comment --repo redhat-developer/rhdh --body "/test " + +# Check CI status +gh pr checks --repo redhat-developer/rhdh + +# Re-trigger tests +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# View PR +gh pr view --repo redhat-developer/rhdh --web +``` diff --git a/.claude/skills/e2e-verify-fix/SKILL.md b/.claude/skills/e2e-verify-fix/SKILL.md new file mode 100644 index 0000000000..8a24f1f442 --- /dev/null +++ b/.claude/skills/e2e-verify-fix/SKILL.md @@ -0,0 +1,147 @@ +--- +name: e2e-verify-fix +description: >- + Verify an E2E test fix by running the test multiple times and checking code + quality +--- +# Verify Fix + +Verify that the test fix works reliably and passes all code quality checks. + +## When to Use + +Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm the fix works before submitting a PR. + +## MANDATORY: Use the Playwright Healer Agent for Verification + +Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. + +### Healer Initialization + +If not already initialized in this session, use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. + +## Verification Steps + +### 1. Single Run Verification via Healer + +Invoke the healer agent to run the fixed test once: + +``` +Task: "You are the Playwright Test Healer agent. Verify a fix by running the test once. +Working directory: /e2e-tests +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If it passes, report success. If it fails, examine the error and report what went wrong." +``` + +If it fails, go back to `e2e-diagnose-and-fix` and iterate — use the healer agent there too for the fix. + +### 2. Multi-Run Stability Check + +Run the test 5 times consecutively to verify no flakiness was introduced: + +```bash +cd e2e-tests +set -a && source .env && set +a +PASS=0; FAIL=0 +for i in $(seq 1 5); do + echo "=== Stability run $i/5 ===" + if npx playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Stability results: $PASS/5 passed" +``` + +**Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. + +**IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. + +### 3. Code Quality Checks + +Run all code quality checks in the e2e-tests workspace: + +```bash +cd e2e-tests + +# TypeScript compilation +yarn tsc:check + +# ESLint +yarn lint:check + +# Prettier formatting +yarn prettier:check +``` + +Fix any issues found: + +```bash +# Auto-fix lint issues +yarn lint:fix + +# Auto-fix formatting +yarn prettier:fix +``` + +### 4. Optional: Full Project Regression Check + +If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 +``` + +This is optional for isolated spec file changes but recommended for changes to: +- `e2e-tests/playwright/utils/` (utility classes) +- `e2e-tests/playwright/support/` (page objects, selectors) +- `e2e-tests/playwright/data/` (shared test data) +- `playwright.config.ts` (configuration) + +### 5. Review the Diff + +Before submitting, review all changes: + +```bash +git diff +git diff --stat +``` + +Verify: +- Only intended files were changed +- No secrets or credentials were added +- No unrelated changes were included +- Component annotations are present in any new/modified spec files +- Semantic selectors are used (no deprecated CSS class selectors) + +## Result Summary + +After verification, produce a summary: + +``` +Fix Verification Results: +- Test: () +- Single run: PASS +- Stability (5 runs): 5/5 PASS +- TypeScript: PASS +- ESLint: PASS +- Prettier: PASS +- Files changed: +- Ready for PR: YES/NO +``` diff --git a/.cursor/commands/fix-e2e.md b/.cursor/commands/fix-e2e.md new file mode 100644 index 0000000000..4acb89c839 --- /dev/null +++ b/.cursor/commands/fix-e2e.md @@ -0,0 +1,164 @@ +--- +description: Autonomously investigate and fix a failing RHDH E2E CI test. Accepts a Prow job URL or Jira ticket ID. Deploys RHDH, reproduces the failure, fixes the test using Playwright agents, and submits a PR with Qodo review. +--- +# Fix E2E CI Failure + +Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failing RHDH E2E test. + +## Input + +`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +- **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Jira ticket ID**: `RHIDP-XXXX` +- **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` + +## Workflow + +Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. + +### Phase 1: Parse CI Failure + +**Skill**: `e2e-parse-ci-failure` + +Parse the input to extract: +- Failing test name and spec file path +- Playwright project name +- Release branch (main, release-1.9, etc.) +- Platform (OCP, AKS, EKS, GKE) +- Deployment method (Helm, Operator) +- Error type and message +- local-run.sh job name parameter + +**Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. + +### Phase 2: Setup Fix Branch + +First, check the current branch: + +```bash +git branch --show-current +``` + +- **On `main` or `release-*`**: You're on a base branch — create a feature branch using the skill: + ```bash + git fetch upstream + git checkout -b fix/e2e- upstream/ + ``` + If a Jira ticket was provided, include the ticket ID in the branch name: + `fix/RHIDP-XXXX-e2e-` + +- **On any other branch** (e.g., `fix/e2e-*`): You're likely already on a feature branch. **Ask the user** whether to: + 1. Use the current branch as-is + 2. Create a new branch from the upstream release branch + +### Phase 3: Deploy RHDH + +**Skill**: `e2e-deploy-rhdh` + +Deploy RHDH to a cluster using `e2e-tests/local-run.sh`. CLI mode requires **all three** flags (`-j`, `-r`, `-t`): + +**OCP jobs** — use `-s` (deploy-only) to skip automated test execution so you can run the specific failing test manually: +```bash +cd e2e-tests +./local-run.sh -j -r -t -s +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`. These jobs require the full execution pipeline and do not support deploy-only mode: +```bash +cd e2e-tests +./local-run.sh -j -r -t +``` + +Use the **full Prow CI job name** for `-j` (not shortened names). + +Select the image repo and tag based on the release branch: +- `main` → `-r rhdh-community/rhdh -t next` +- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` +- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` + +After deployment completes, set up the local test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +**Decision gate**: Before attempting deployment, verify cluster connectivity (`oc whoami`). If no cluster is available, **ask the user for explicit approval** before skipping this phase — do not skip silently. If deployment fails, the `e2e-deploy-rhdh` skill has error recovery procedures. If deployment cannot be recovered after investigation, report the deployment issue and stop. + +### Phase 4: Reproduce Failure + +**Skill**: `e2e-reproduce-failure` + +Run the specific failing test to confirm it reproduces locally. Use `--project=any-test` to avoid running the smoke test dependency — it matches any spec file without extra overhead: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Decision gates**: +- **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. +- **Consistent failure**: Proceed to Phase 5 +- **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability +- **Cannot reproduce** (passes every time after 10 runs): Report the reproduction results and possible environment differences, then **ask the user for explicit approval** before proceeding. Do not skip this step silently. + +### Phase 5: Diagnose and Fix + +**Skill**: `e2e-diagnose-and-fix` + +Analyze the failure and implement a fix: + +1. **Classify the failure**: locator drift, timing, assertion mismatch, data dependency, platform-specific, deployment config +2. **Use Playwright Test Agents**: Invoke the healer agent (`@playwright-test-healer`) for automated test repair — it can debug the test, inspect the UI, generate locators, and edit the code +3. **Follow Playwright best practices**: Consult the `playwright-locators` and `ci-e2e-testing` project rules. Use semantic role-based locators (`getByRole`, `getByLabel`), auto-waiting assertions, Page Object Model, component annotations. Fetch official Playwright best practices via Context7 or https://playwright.dev/docs/best-practices if needed +4. **Cross-repo investigation**: If the issue is in deployment config, search `rhdh-operator` and `rhdh-chart` repos. Use Sourcebot or Context7 if available; otherwise fall back to `gh search code` or clone the repo locally and grep + +**Decision gate**: If the analysis reveals a product bug (not a test issue), you must be **absolutely certain** before marking a test with `test.fixme()`. The Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with `// TODO:` linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Proceed to Phase 6 with the `test.fixme()` change + +### Phase 6: Verify Fix + +**Skill**: `e2e-verify-fix` + +Verify the fix: +1. Run the fixed test once — must pass +2. Run 5 times — must pass 5/5 +3. Run code quality checks: `yarn tsc:check`, `yarn lint:check`, `yarn prettier:check` +4. Fix any lint/formatting issues + +**Decision gate**: If the test still fails or is flaky, return to Phase 5 and iterate. If verification cannot be run (no cluster, environment issues), **ask the user for explicit approval** before proceeding without it. + +### Phase 7: Submit PR and Handle Review + +**Skill**: `e2e-submit-and-review` + +1. **Resolve pre-commit hooks**: Run `yarn install` in all relevant workspaces (root, `e2e-tests/`, `.ci/`) before committing +2. **Commit**: Stage changes, commit with conventional format +3. **Push**: `git push -u origin ` +4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` +5. **Trigger Qodo review**: Comment `/agentic_review` on the PR +6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +7. **Address feedback**: Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +9. **Monitor CI**: Watch CI checks with `gh pr checks` + +### Final Report + +After all phases complete, produce a summary: + +``` +E2E Fix Summary: +- Input: +- Test: () +- Branch: +- Root cause: +- Fix: +- Verification: +- PR: +- CI Status: +- Qodo Review: +``` diff --git a/.cursor/rules/e2e-fix-workflow.mdc b/.cursor/rules/e2e-fix-workflow.mdc new file mode 100644 index 0000000000..4b7993096d --- /dev/null +++ b/.cursor/rules/e2e-fix-workflow.mdc @@ -0,0 +1,352 @@ +--- +--- + +# E2E Test Fix Workflow + +This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. + +## Table of Contents + +- [Workflow Overview](#workflow-overview) +- [Parsing CI Failures](#parsing-ci-failures) +- [Branch and Deployment Setup](#branch-and-deployment-setup) +- [Reproducing Failures](#reproducing-failures) +- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) +- [Playwright Test Agents](#playwright-test-agents) +- [Verification and PR Submission](#verification-and-pr-submission) + +## Workflow Overview + +The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: + +1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +2. **Setup Fix Branch** — Create a branch from the correct upstream release branch +3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** — Confirm the failure reproduces locally +5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents +6. **Verify Fix** — Run the test multiple times and check code quality +7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI + +Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. + +**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. + +## Parsing CI Failures + +### Prow URL Structure + +``` +https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +``` + +Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: +``` +✘ [] › /.spec.ts: +``` + +### Job Name Mapping Tables + +#### Job Name → Release Branch + +| Pattern in job name | Release branch | +|---------------------|---------------| +| `*-rhdh-main-*` | `main` | +| `*-rhdh-release-1.9-*` | `release-1.9` | +| `*-rhdh-release-1.8-*` | `release-1.8` | + +#### Job Name → Platform and Deployment Method + +| Pattern | Platform | Method | +|---------|----------|--------| +| `*ocp*helm*` | OCP | Helm | +| `*ocp*operator*` | OCP | Operator | +| `*aks*helm*` | AKS | Helm | +| `*aks*operator*` | AKS | Operator | +| `*eks*helm*` | EKS | Helm | +| `*eks*operator*` | EKS | Operator | +| `*gke*helm*` | GKE | Helm | +| `*gke*operator*` | GKE | Operator | +| `*osd-gcp*` | OSD-GCP | Helm/Operator | + +#### Job Name → Playwright Projects + +| Job pattern | Projects | +|-------------|----------| +| `*ocp*helm*nightly*` (not upgrade) | `showcase`, `showcase-rbac`, `showcase-runtime`, `showcase-sanity-plugins`, `showcase-localization-*` | +| `*ocp*helm*upgrade*` | `showcase-upgrade` | +| `*ocp*operator*nightly*` (not auth) | `showcase-operator`, `showcase-operator-rbac` | +| `*ocp*operator*auth-providers*` | `showcase-auth-providers` | +| `*ocp*helm*pull*` | `showcase`, `showcase-rbac` | +| `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | +| `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | + +#### Job Name → local-run.sh `-j` Parameter + +Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. + +**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` +**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` + +Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. + +#### Release Branch → Image Repo and Tag + +| Branch | `-r` (image repo) | `-t` (image tag) | +|--------|-------------------|-------------------| +| `main` | `rhdh-community/rhdh` | `next` | +| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | +| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | + +## Branch and Deployment Setup + +### Branch Creation + +Always create branches from upstream, never from local copies: + +```bash +git fetch upstream +git checkout -b fix/e2e- upstream/ +``` + +### Deployment via local-run.sh + +CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. + +```bash +cd e2e-tests +# OCP jobs: use -s to deploy only, then run tests manually +./local-run.sh -j -r -t -s +# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required +./local-run.sh -j -r -t +``` + +Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. + +After deployment, source the test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +### Deployment Error Recovery + +| Error | Investigation | Common Fix | +|-------|--------------|------------| +| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | +| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | +| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | +| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | + +For config issues, search these repos for reference: +- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts +- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults + +## Reproducing Failures + +### Test Execution + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 --workers=1 +``` + +### Flakiness Detection + +If the test passes on first run, repeat 10 times: +- **10/10 pass** → cannot reproduce (check environment differences) +- **Mixed results** → flaky (focus on reliability improvements) +- **0/10 pass** → consistent failure + +### Debugging Modes + +```bash +# Headed (visible browser) +yarn playwright test --project= --headed + +# Debug (Playwright Inspector) +yarn playwright test --project= --debug + +# View trace +yarn playwright show-trace test-results//trace.zip +``` + +## Diagnosing and Fixing Tests + +### Failure Classification + +1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors +2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals +3. **Assertion mismatch** — Expected values changed → update test data or report product bug +4. **Data dependency** — Test data missing → add proper setup/teardown +5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional +6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` + +### RHDH Coding Conventions (Required) + +**Locators** — Always use semantic role-based locators: +```typescript +// GOOD +page.getByRole('button', { name: 'Create' }) +page.getByRole('heading', { name: 'Catalog' }) +page.getByText('No results found') + +// BAD — deprecated CSS selectors +page.locator('.MuiButton-root') +page.locator('[data-testid="..."]') +``` + +**Component annotations** — Every spec file must have: +```typescript +test.beforeAll(async ({}, testInfo) => { + testInfo.annotations.push({ + type: 'component', + description: 'your_component_name', + }); +}); +``` + +**Retry patterns** for async assertions: +```typescript +await expect(async () => { + await page.reload(); + await expect(page.getByText('entity')).toBeVisible(); +}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); +``` + +**Conditional skips**: +```typescript +import { skipIfJobName } from '../utils/helper'; +import * as constants from '../utils/constants'; +skipIfJobName(constants.GKE_JOBS); +``` + +**Forbidden patterns**: +- `page.waitForNetworkIdle()` / `networkidle` +- Raw CSS class selectors (`.MuiButton-root`) +- `page.waitForTimeout()` for synchronization +- Hardcoded secrets or credentials + +### Key Utility Classes + +| Class | Path | Purpose | +|-------|------|---------| +| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | +| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | +| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | +| `KubeClient` | `utils/kube-client.ts` | K8s resource management | +| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | +| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | + +### Product Bug Decision + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Do **not** change assertions to match broken behavior +4. Proceed with the `test.fixme()` change + +## Playwright Test Agents + +The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. + +### Available Agents + +| Agent | Mode | Purpose | +|-------|------|---------| +| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | +| `playwright-test-generator` | subagent | Create new test code from a test plan | +| `playwright-test-planner` | subagent | Explore app and create test plans | + +### Healer Agent Usage (Primary for Fixes) + +The healer agent is the primary tool for test repair: +1. Runs tests with `test_run` to identify failures +2. Debugs with `test_debug` to step through failing tests +3. Inspects UI state via `browser_snapshot`, `browser_console_messages` +4. Generates correct locators with `browser_generate_locator` +5. Edits test code with `edit`/`write` tools +6. Re-runs tests to verify the fix + +Invoke with: `@playwright-test-healer Fix the failing test in ` + +## Verification and PR Submission + +### Verification Checklist + +1. Single test run passes +2. 5 consecutive runs pass (stability) +3. `yarn tsc:check` passes +4. `yarn lint:check` passes +5. `yarn prettier:check` passes + +### Pre-Commit Hooks + +Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: + +```bash +yarn install # Root workspace +cd e2e-tests && yarn install && cd .. # If e2e-tests files changed +cd .ci && yarn install && cd .. # If .ci files changed +``` + +### PR Creation + +Always create PRs as **drafts**: + +```bash +git push -u origin +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +``` + +### Qodo Review + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. + +### Trigger Affected CI Job + +After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: + +```bash +# List available presubmit jobs +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Trigger the matching presubmit job +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. + +### CI Monitoring + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` + +## Reference Files + +| Category | Key files | +|----------|-----------| +| Playwright config | `e2e-tests/playwright.config.ts` | +| Project names (SOT) | `e2e-tests/playwright/projects.json` | +| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | +| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | +| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | +| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | +| ConfigMaps | `.ci/pipelines/resources/config_map/` | +| Helm values | `.ci/pipelines/value_files/` | +| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | +| Environment vars | `.ci/pipelines/env_variables.sh` | +| Local test runner | `e2e-tests/local-run.sh` | +| Local test env | `e2e-tests/local-test-setup.sh` | +| Qodo config | `.pr_agent.toml` | diff --git a/.cursor/skills/e2e-deploy-rhdh/SKILL.md b/.cursor/skills/e2e-deploy-rhdh/SKILL.md new file mode 100644 index 0000000000..0fe7cdaed1 --- /dev/null +++ b/.cursor/skills/e2e-deploy-rhdh/SKILL.md @@ -0,0 +1,230 @@ +--- +name: e2e-deploy-rhdh +description: Deploy RHDH to an OpenShift cluster using local-run.sh for E2E test execution, with autonomous error recovery for deployment failures +--- +# Deploy RHDH + +Deploy Red Hat Developer Hub to a cluster for E2E test execution using the existing `local-run.sh` workflow. + +## When to Use + +Use this skill when you need a running RHDH instance to reproduce and fix a test failure. + +## Prerequisites + +Before running the deployment, verify these tools are installed: + +```bash +# Required tools (local-run.sh checks these automatically) +podman --version # Container runtime +oc version # OpenShift CLI +kubectl version --client # Kubernetes CLI +vault --version # HashiCorp Vault (for secrets) +jq --version # JSON processor +curl --version # HTTP client +rsync --version # File sync +bc --version # Calculator (for resource checks) +``` + +### Podman Machine Requirements + +The podman machine must be running with adequate resources: + +```bash +podman machine inspect | jq '.Resources' +# Requires: >= 8GB RAM, >= 4 CPUs +``` + +If resources are insufficient: +```bash +podman machine stop +podman machine set --memory 8192 --cpus 4 +podman machine start +``` + +## Deployment Using local-run.sh + +The primary deployment method uses `e2e-tests/local-run.sh`, which handles everything: +Vault authentication, cluster service account setup, RHDH deployment, and test execution. + +### Execution Rules + +**CRITICAL — deployment is a long-running operation:** + +1. **Never run `local-run.sh` in the background.** Operator installations can take 20-30 minutes. Use the Bash tool with `timeout: 600000` (10 minutes) and if it times out, **check the container log** — do NOT assume failure. +2. **Before starting a deployment, check for existing containers:** + ```bash + podman ps --format "{{.Names}} {{.Status}}" | grep -i rhdh-e2e-runner + ``` + If a deployment container is already running, **wait for it to finish** instead of starting a new one. Monitor via the container log: + ```bash + tail -f e2e-tests/.local-test/container.log + ``` +3. **Never launch concurrent deployments.** Two deployments to the same cluster will race and both fail. If a deployment appears stuck, check the container log and cluster state before deciding it failed. +4. **How to detect actual failure vs slow progress:** The operator install script outputs detailed debug logs. If the container log shows active progress (timestamps advancing), the deployment is still running. Only consider it failed if: + - The podman container has exited (`podman ps` shows no running container) + - AND the container log shows an error message (e.g., "Failed install RHDH Operator") + +### CLI Mode (Preferred) + +**CRITICAL**: CLI mode requires **all three** flags (`-j`, `-r`, `-t`). If `-r` is omitted, the script falls into interactive mode and will hang in automated contexts. + +```bash +cd e2e-tests +./local-run.sh -j -r -t [-s] +``` + +**Example — OCP job** (deploy-only with `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s +``` + +**Example — K8s job (AKS/EKS/GKE)** (full execution, no `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next +``` + +**Parameters:** +- `-j / --job`: The **full Prow CI job name** extracted from the Prow URL. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match, so the full name works correctly. Example: `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly` +- `-r / --repo`: Image repository (**required** for CLI mode — without it the script enters interactive mode) +- `-t / --tag`: Image tag (e.g., `1.9`, `next`) +- `-s / --skip-tests`: Deploy only, skip test execution. **OCP jobs only** — K8s jobs (AKS, EKS, GKE) do not support this flag and require the full execution pipeline + +**WARNING**: Do NOT use shortened job names like `nightly-ocp-helm` for `-j` — these do not match the glob patterns in `openshift-ci-tests.sh`. + +### Image Selection + +Refer to the `e2e-fix-workflow` rule for the release branch to image repo/tag mapping table. + +### Deploy-Only Mode (OCP Jobs Only) + +For OCP jobs, deploy without running tests so you can run specific tests manually: + +```bash +./local-run.sh -j -r -t -s +``` + +**Note**: K8s jobs (AKS, EKS, GKE) do not support deploy-only mode. They require the full execution pipeline — run without `-s`. + +### What local-run.sh Does + +1. **Validates prerequisites**: Checks all required tools and podman resources +2. **Verifies the image**: Checks the image exists on quay.io via the Quay API +3. **Pulls the runner image**: `quay.io/rhdh-community/rhdh-e2e-runner:main` +4. **Authenticates to Vault**: OIDC-based login for secrets +5. **Sets up cluster access**: Creates `rhdh-local-tester` service account with cluster-admin, generates 48h token +6. **Copies the repo**: Syncs the local repo to `.local-test/rhdh/` (excludes node_modules) +7. **Runs a Podman container**: Executes `container-init.sh` inside the runner image, which: + - Fetches all Vault secrets to `/tmp/secrets/` + - Logs into the cluster + - Sets platform-specific environment variables + - Runs `.ci/pipelines/openshift-ci-tests.sh` for deployment + +### Post-Deployment: Setting Up for Manual Testing + +After `local-run.sh` completes (with `-s` for OCP jobs, or after full execution for K8s jobs), set up the environment for headed Playwright testing: + +```bash +# Source the test setup (choose 'showcase' or 'rbac') +source e2e-tests/local-test-setup.sh showcase +# or +source e2e-tests/local-test-setup.sh rbac +``` + +This exports: +- `BASE_URL` — The RHDH instance URL +- `K8S_CLUSTER_URL` — Cluster API server URL +- `K8S_CLUSTER_TOKEN` — Fresh service account token +- All Vault secrets as environment variables + +Verify RHDH is accessible: +```bash +curl -sSk "$BASE_URL" -o /dev/null -w "%{http_code}" +# Should return 200 +``` + +## Deployment Error Recovery + +### Common Deployment Failures + +#### CrashLoopBackOff + +**Symptoms**: Pod repeatedly crashes and restarts. + +**Investigation**: +```bash +# Check pod status +oc get pods -n +# Check pod logs +oc logs -n --previous +# Check events +oc get events -n --sort-by=.lastTimestamp +``` + +**Common causes and fixes**: +1. **Missing ConfigMap**: The app-config ConfigMap wasn't created → check `.ci/pipelines/resources/config_map/` for the correct template +2. **Bad plugin configuration**: A dynamic plugin is misconfigured → check `dynamic-plugins-config` ConfigMap against `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +3. **Missing secrets**: Required secrets not mounted → verify secrets exist in the namespace +4. **Node.js errors**: Check for JavaScript errors in logs that indicate code issues + +#### ImagePullBackOff + +**Investigation**: +```bash +oc describe pod -n | grep -A5 "Events" +``` + +**Common causes**: +1. **Image doesn't exist**: Verify on quay.io: `curl -s 'https://quay.io/api/v1/repository/rhdh/rhdh-hub-rhel9/tag/?filter_tag_name=like:'` +2. **Pull secret missing**: Check `namespace::setup_image_pull_secret` in `.ci/pipelines/lib/namespace.sh` +3. **Registry auth**: Ensure the pull secret has correct credentials + +#### Helm Install Failure + +**Investigation**: +```bash +helm list -n +helm status -n +``` + +**Common causes**: +1. **Values file error**: Check merged values against `.ci/pipelines/value_files/values_showcase.yaml` +2. **Chart version mismatch**: Verify chart version with `helm::get_chart_version` from `.ci/pipelines/lib/helm.sh` + +#### Operator Deployment Failure + +**Investigation**: +```bash +oc get backstage -n +oc describe backstage -n +oc get csv -n # Check operator subscription status +``` + +**Common causes**: +1. **Backstage CR misconfigured**: Compare against `.ci/pipelines/resources/rhdh-operator/rhdh-start.yaml` +2. **Operator not installed**: Check CatalogSource and Subscription +3. **CRD not ready**: Wait for CRD with `k8s_wait::crd` pattern from `.ci/pipelines/lib/k8s-wait.sh` + +### Cross-Repo Investigation + +When deployment issues stem from the operator or chart, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search `rhdh-operator` and `rhdh-chart` repos for specific error patterns or configuration keys +2. **Context7** (if available): query `redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` for docs and code snippets +3. **Fallback — `gh search code`**: `gh search code '' --repo redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` +4. **Fallback — local clone**: clone the repo into a temp directory and grep for the pattern + +Key areas to look for: +- **rhdh-operator**: Backstage CR configuration, CatalogSource setup, operator installation scripts +- **rhdh-chart**: Helm values schema, chart templates, default configurations + +## Reference Files + +- Main deployment scripts: `.ci/pipelines/openshift-ci-tests.sh`, `.ci/pipelines/utils.sh` +- Library scripts: `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh`, `.ci/pipelines/lib/k8s-wait.sh`, `.ci/pipelines/lib/testing.sh` +- Helm values: `.ci/pipelines/value_files/` +- ConfigMaps: `.ci/pipelines/resources/config_map/` +- Operator CRs: `.ci/pipelines/resources/rhdh-operator/` +- Environment variables: `.ci/pipelines/env_variables.sh` diff --git a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md new file mode 100644 index 0000000000..f0a1726192 --- /dev/null +++ b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md @@ -0,0 +1,248 @@ +--- +name: e2e-diagnose-and-fix +description: Analyze a failing E2E test, determine root cause, and fix it using Playwright Test Agents and RHDH project conventions +--- +# Diagnose and Fix + +Analyze the root cause of a failing E2E test and implement a fix following RHDH project conventions. + +## When to Use + +Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. + +## MANDATORY: Always Use the Playwright Healer Agent + +**The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. + +### Healer Initialization + +Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. + +### Environment Setup for Healer + +The healer agent needs a `.env` file in `e2e-tests/` with all required environment variables (BASE_URL, K8S_CLUSTER_TOKEN, vault secrets, etc.). Generate it by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +The `.env` file is gitignored — never commit it. To regenerate (e.g. after token expiry), re-run the command above. + +### Invoking the Healer + +Invoke the healer agent via the Task tool with `subagent_type: general`: + +``` +Task: "You are the Playwright Test Healer agent. Run the failing test, debug it, inspect the UI, and fix the code. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run command: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g ''" +``` + +The healer will autonomously: +1. Run the test and identify the failure +2. Examine error screenshots and error-context.md +3. Debug the test step-by-step using Playwright Inspector +4. Inspect the live UI via page snapshots +5. Generate correct locators and fix assertions +6. Edit the test code +7. Re-run to verify the fix + +### When to Supplement with Manual Diagnosis + +After the healer has run, supplement with manual investigation only for: +- **Data dependency failures** (category 4): The healer may not know how to create missing test data +- **Platform-specific failures** (category 5): The healer doesn't have context about platform differences +- **Deployment configuration issues** (category 6): The healer cannot modify ConfigMaps or Helm values +- **Product bugs**: When the healer confirms the test is correct but the application behavior is wrong + +## Failure Pattern Recognition + +### 1. Locator Drift + +**Symptoms**: `Error: locator.click: Error: strict mode violation` or `Timeout waiting for selector` or element not found errors. + +**Cause**: The UI has changed and selectors no longer match. + +**Fix approach**: +- Invoke the Playwright healer agent (`@playwright-test-healer`) — it will replay the test, inspect the current UI via page snapshots, generate updated locators, and edit the code automatically +- If the healer cannot resolve it, manually update to semantic role-based locators (see project rules) +- Verify the updated locator works by re-running the test + +### 2. Timing / Race Condition + +**Symptoms**: Test passes sometimes, fails sometimes. Errors like `Timeout 10000ms exceeded` or assertions failing on stale data. + +**Cause**: Test acts before the UI is ready, or waits are insufficient. + +**Fix approach**: +- Invoke the Playwright healer agent first — it can identify timing issues by stepping through the test and observing UI state transitions +- If manual fixes are needed: replace `page.waitForTimeout()` with proper waits: `expect(locator).toBeVisible()`, `page.waitForLoadState()` +- Use `expect().toPass()` with retry intervals for inherently async checks: + ```typescript + await expect(async () => { + const text = await page.locator('.count').textContent(); + expect(Number(text)).toBeGreaterThan(0); + }).toPass({ intervals: [1000, 2000, 5000], timeout: 30_000 }); + ``` +- Increase action/navigation timeouts if the operation is legitimately slow +- Use `Common.waitForLoad()` utility before interacting with the page after navigation + +### 3. Assertion Mismatch + +**Symptoms**: `expect(received).toBe(expected)` with clearly different values. + +**Cause**: The expected value has changed due to a product change, data change, or environment difference. + +**Fix approach**: +- Determine if the change is intentional (check recent commits to the release branch) +- If intentional: update the expected value in the test or test data +- If unintentional: this may be a product bug — but you must first exhaust all other possibilities using the Playwright healer agent. Only after the healer confirms the test is correct and the application behavior is wrong should you mark it with `test.fixme()` (see the "Decision: Product Bug vs Test Issue" section below) + +### 4. Data Dependency + +**Symptoms**: Test fails because expected entities, users, or resources don't exist. + +**Cause**: Test data assumptions no longer hold (GitHub repos deleted, Keycloak users changed, catalog entities removed). + +**Fix approach**: +- Update test data in `e2e-tests/playwright/support/test-data/` or `e2e-tests/playwright/data/` +- Ensure test creates its own data in `beforeAll`/`beforeEach` and cleans up in `afterAll`/`afterEach` +- Use `APIHelper` for programmatic setup (GitHub API, Backstage catalog API) + +### 5. Platform-Specific Failure + +**Symptoms**: Test passes on OCP but fails on GKE/AKS/EKS, or vice versa. + +**Cause**: Platform differences (Routes vs Ingress, different auth, different network policies). + +**Fix approach**: +- Add conditional skip if the test is inherently platform-specific: + ```typescript + import { skipIfJobName, skipIfIsOpenShift } from '../utils/helper'; + // Skip on GKE + skipIfJobName(constants.GKE_JOBS); + // Skip on non-OpenShift + skipIfIsOpenShift('false'); + ``` +- Or add platform-specific logic within the test using `process.env.IS_OPENSHIFT`, `process.env.CONTAINER_PLATFORM` + +### 6. Deployment Configuration Issue + +**Symptoms**: RHDH itself is broken (500 errors, missing plugins, wrong behavior). + +**Cause**: ConfigMap or Helm values are incorrect for this test scenario. + +**Fix approach**: +- Check the ConfigMaps: `.ci/pipelines/resources/config_map/app-config-rhdh.yaml` and `app-config-rhdh-rbac.yaml` +- Check Helm values: `.ci/pipelines/value_files/` +- Check dynamic plugins config: `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +- Search `rhdh-operator` and `rhdh-chart` repos for configuration reference (use Sourcebot, Context7, `gh search code`, or a local clone — whichever is available) +- Fix the deployment configuration rather than the test code + +## Playwright Test Agents Reference + +The Playwright Test Agents are initialized via `npx playwright init-agents --loop=opencode` (see initialization section above). This creates an MCP server and agent definitions in `e2e-tests/opencode.json`. + +### Healer Agent (MANDATORY for All Fixes) + +The healer agent is the **primary and mandatory** tool for fixing failing tests. It has access to: + +- **`test_run`**: Run tests and identify failures +- **`test_debug`**: Step through failing tests with the Playwright Inspector +- **`browser_snapshot`**: Capture accessibility snapshots of the live UI +- **`browser_console_messages`**: Read browser console logs +- **`browser_network_requests`**: Monitor network requests +- **`browser_generate_locator`**: Generate correct locators from the live UI +- **`edit`/`write`**: Edit test code directly + +The healer autonomously cycles through: run → debug → inspect → fix → re-run until the test passes. + +### Planner Agent (For Understanding Complex Scenarios) + +Use `@playwright-test-planner` when you need to understand a complex user flow before fixing a test. It explores the app and maps out the interaction patterns. + +### Generator Agent (For Creating New Test Steps) + +Use `@playwright-test-generator` when a test needs major rework and you need to generate new test steps from a plan. + +## Coding Conventions + +Every fix **must** follow Playwright best practices. Before writing or modifying test code, consult these resources in order: + +1. **Project rules** (always available locally): + - `playwright-locators` rule — locator priority, anti-patterns, assertions, Page Objects, DataGrid handling + - `ci-e2e-testing` rule — test structure, component annotations, project configuration, CI scripts + +2. **Official Playwright docs** (fetch via Context7 if available, otherwise use web): + - Best practices: https://playwright.dev/docs/best-practices + - Locators guide: https://playwright.dev/docs/locators + - Assertions: https://playwright.dev/docs/test-assertions + - Auto-waiting: https://playwright.dev/docs/actionability + +### Key requirements + +- **Locators**: always prefer `getByRole()`, `getByLabel()`, `getByPlaceholder()` over CSS/XPath selectors. Never use MUI class names (`.MuiButton-label`, `.MuiDataGrid-*`). +- **Assertions**: use Playwright's auto-waiting assertions (`expect(locator).toBeVisible()`) — never use manual `waitForSelector()` or `waitForTimeout()`. +- **Component annotations**: every `*.spec.ts` file must have a `component` annotation in `test.beforeAll`. +- **Page Object Model**: return `Locator` objects from page classes, not raw strings or elements. +- **No `force: true`**: if a click requires `force`, the locator or timing is wrong — fix the root cause. +- **No `waitForNetworkIdle()`**: use proper load-state waits or assertion-based waiting instead. + +## Cross-Repo Investigation + +When the issue is in RHDH deployment/config rather than test code, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search repos for specific error patterns or configuration keys +2. **Context7** (if available): query repos for docs and code snippets +3. **Fallback — `gh search code`**: e.g. `gh search code '' --repo redhat-developer/rhdh-operator` +4. **Fallback — local clone**: clone the repo into a temp directory and grep + +### rhdh-operator (`redhat-developer/rhdh-operator`) +- Backstage CR specification and defaults +- CatalogSource configuration +- Operator installation scripts (especially `install-rhdh-catalog-source.sh`) + +### rhdh-chart (`redhat-developer/rhdh-chart`) +- Helm values.yaml schema and defaults +- Chart templates for Deployments, Services, ConfigMaps +- Default dynamic plugin configurations + +### Other Repositories +- **backstage/backstage**: For upstream Backstage API changes +- **redhat-developer/red-hat-developers-documentation-rhdh**: For documentation on expected behavior + +## Decision: Product Bug vs Test Issue + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug before marking a test this way. Follow this checklist: + +1. **Run the Playwright healer agent** — it must confirm that the test logic is correct and the application behavior is wrong +2. **Verify manually** — inspect the live UI, check network responses, and confirm the product is genuinely broken (not a stale cache, missing data, or environment-specific issue) +3. **Check recent commits** — search the release branch for recent product changes that could explain the behavior change +4. **Ask the user for confirmation** before applying `test.fixme()` — do not decide unilaterally + +Only after all of the above confirm a product bug: + +1. **File a Jira bug** in the `RHDHBUGS` project (or update the existing ticket) documenting the product regression +2. **Mark the test with `test.fixme()`**, preceded by a `// TODO:` comment linking to the Jira ticket: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Button no longer visible after version upgrade'); + ``` +3. **Do not change the test assertions** to match broken behavior +4. **Proceed to `e2e-submit-and-review`** with the `test.fixme()` change diff --git a/.cursor/skills/e2e-parse-ci-failure/SKILL.md b/.cursor/skills/e2e-parse-ci-failure/SKILL.md new file mode 100644 index 0000000000..eb347293cb --- /dev/null +++ b/.cursor/skills/e2e-parse-ci-failure/SKILL.md @@ -0,0 +1,200 @@ +--- +name: e2e-parse-ci-failure +description: Parse a Prow CI job URL or Jira ticket to extract E2E test failure details including test name, spec file, release branch, platform, and error messages +--- +# Parse CI Failure + +Extract structured failure context from a Prow job URL or Jira ticket for an RHDH E2E CI failure. + +## When to Use + +Use this skill when you receive a failing Prow job URL (e.g., `https://prow.ci.openshift.org/view/gs/...`), a Jira ticket ID (e.g., `RHIDP-XXXX`), or a Jira URL (e.g., `https://redhat.atlassian.net/browse/RHIDP-XXXX`) for an E2E test failure and need to extract all relevant details before starting a fix. + +## Input Detection + +- **Playwright report URL**: URL ending in `index.html` (with optional `#?testId=...` fragment) — use Playwright MCP if available (see "Playwright Report Parsing" below), otherwise fall back to build log parsing +- **Prow URL**: Starts with `https://prow.ci.openshift.org/` — parse the job page and build log +- **Jira ticket ID**: Matches pattern `RHIDP-\d+` or similar — use Jira MCP tools to read the ticket +- **Jira URL**: Starts with `https://redhat.atlassian.net/browse/` — extract the ticket ID from the URL path (e.g., `RHIDP-XXXX` from `https://redhat.atlassian.net/browse/RHIDP-XXXX`) and then use Jira MCP tools to read the ticket + +## Prow URL Parsing + +### URL Structure + +Prow job URLs follow two patterns: + +- **Periodic/postsubmit**: `https://prow.ci.openshift.org/view/gs/test-platform-results/logs//` +- **Presubmit (PR)**: `https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/redhat-developer_rhdh///` + +Extract `` and `` from the URL path. These are the two key values needed for all derivations. + +### GCS URL Derivation + +Convert the Prow URL to a GCS artifacts URL by replacing the prefix: + +``` +Prow: https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +GCS: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ +``` + +For presubmit jobs, use `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +Key artifacts within the GCS directory: +- **Build log**: `/build-log.txt` +- **JUnit XML**: `/artifacts/junit-results/results.xml` +- **Playwright report**: `/artifacts/playwright-report/` + +Fetch the Prow job page with WebFetch to find the job status and artifact links, then fetch the build log for test failure details. + +### Extracting Test Failures from Build Log + +Search the build log for these Playwright output patterns: + +``` +# Failing test line (primary source for test name, spec file, and project): + ✘ [] › /.spec.ts: + +# Error details (immediately after the failure line): + Error: + expect(received).toBeVisible() + Locator: + +# Summary (at the end of the log): + X failed + X passed + X skipped +``` + +Also check JUnit XML for `` elements with `` children as a fallback. + +## Playwright Report Parsing + +When the URL points to a Playwright HTML report (`index.html`, optionally with `#?testId=...`), use Playwright MCP if available — navigate with `browser_navigate`, then `browser_snapshot` to extract test name, spec file, error, steps, retries, screenshots, and traces from the accessibility tree. Derive job metadata (``, ``, ``) from the URL path segments. + +If Playwright MCP is not available, derive the `build-log.txt` URL from the report URL and fall back to build log parsing. + +## Jira Ticket Parsing + +Use Jira MCP tools to read the ticket. Extract: + +1. **Prow job URLs** from the description or comments — then parse them using the Prow URL Parsing steps above. +2. **Test names, spec file paths, error messages, or stack traces** from the description, comments, or attachments. +3. **`affects version`** field — map to release branch (e.g., `1.10` → `main`, `1.9` → `release-1.9`, `1.8` → `release-1.8`). +4. **`component`** field for additional context (e.g., "E2E Tests", "CI/CD"). + +## Job Name Mapping + +Refer to the **e2e-fix-workflow** rule for all mapping tables: job name to release branch, job name to platform and deployment method, job name to Playwright projects, release branch to image repo/tag, and job name to `local-run.sh` `-j` parameter. Those tables are the single source of truth and should not be duplicated here. + +When parsing a job name, apply those mapping tables to derive: release branch, platform, deployment method, Playwright projects, and `local-run.sh` flags (`-j`, `-r`, `-t`). + +## Fields Requiring Build Log Access + +Not all output fields can be derived from the Prow URL alone. The following table clarifies what requires fetching the build log or artifacts: + +| Field | Source | Derivable from URL alone? | +|-------|--------|---------------------------| +| Job name | URL path segment | Yes | +| Build ID | URL path segment | Yes | +| Release branch | Job name pattern match | Yes | +| Platform | Job name pattern match | Yes | +| Deployment method | Job name pattern match | Yes | +| Playwright projects | Job name pattern match | Yes | +| `local-run.sh` flags (`-j`, `-r`, `-t`) | Job name + release branch | Yes | +| GCS artifacts URL | Constructed from URL | Yes | +| Test name | Build log Playwright output | No — requires build log | +| Spec file | Build log Playwright output | No — requires build log | +| Specific Playwright project (of failing test) | Build log `[project]` prefix | No — requires build log | +| Error type | Build log error details | No — requires build log | +| Error message | Build log error details | No — requires build log | +| Failure count / pass count | Build log summary line | No — requires build log | + +## Output + +Produce the following structured output with three sections. + +### 1. Structured Summary + +``` +- Test name: +- Spec file: +- Playwright project: +- Release branch:
+- Platform: +- Deployment method: +- Error type: +- Error message: +- Prow URL: +- Jira ticket: +``` + +### 2. Derivation Details + +Show how each field was derived with the matching pattern. This makes the reasoning transparent and auditable. + +``` +| Field | Value | Derivation | +|--------------------|------------------------------|-----------------------------------------------------------| +| Job name | | Extracted from URL path segment | +| Build ID | | Extracted from URL path segment | +| Release branch | | Pattern `*-rhdh--*` matched in job name | +| Platform | | Pattern `**` matched in job name | +| Deployment method | | Pattern `**` matched in job name | +| Playwright project | | `[]` prefix in failing test line | +| Image repo (-r) | | Release branch `` maps to `` | +| Image tag (-t) | | Release branch `` maps to `` | +| Test name | | Parsed from `✘` line in build log | +| Spec file | | Parsed from `✘` line in build log | +| Error type | | Classified from error message pattern | +``` + +### 3. GCS Artifacts Location + +Derive and present the GCS artifacts URLs constructed from the Prow URL: + +``` +GCS Artifacts Base: + https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ + +Build Log: + /artifacts//build-log.txt + +JUnit Results: + /artifacts//artifacts/junit-results/results.xml + +Playwright Report: + /artifacts//artifacts/playwright-report/ +``` + +For presubmit (PR) jobs, the base path uses `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +### 4. local-run.sh Command + +Provide the full command ready to copy-paste, with a flag breakdown. + +**OCP jobs** — use `-s` for deploy-only mode: +``` +cd e2e-tests +./local-run.sh -j -r -t -s + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +| -s | (no value) | Deploy only, skip running tests | +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`; full execution is required: +``` +cd e2e-tests +./local-run.sh -j -r -t + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +``` diff --git a/.cursor/skills/e2e-reproduce-failure/SKILL.md b/.cursor/skills/e2e-reproduce-failure/SKILL.md new file mode 100644 index 0000000000..2dcb17210d --- /dev/null +++ b/.cursor/skills/e2e-reproduce-failure/SKILL.md @@ -0,0 +1,192 @@ +--- +name: e2e-reproduce-failure +description: Run a specific failing E2E test against a deployed RHDH instance to confirm the failure and determine if it is consistent or flaky +--- +# Reproduce Failure + +Run the failing test locally against a deployed RHDH instance to confirm the failure and classify it. + +## When to Use + +Use this skill after deploying RHDH (via `e2e-deploy-rhdh`) when you need to verify the test failure reproduces locally before attempting a fix. + +## Prerequisites + +- RHDH deployed and accessible (BASE_URL set) +- Environment configured via `source e2e-tests/local-test-setup.sh ` +- Node.js 22 and Yarn available +- Playwright browsers installed (`cd e2e-tests && yarn install && yarn playwright install chromium`) + +## Environment Setup + +### Source the Test Environment + +```bash +# For non-RBAC tests (showcase, showcase-k8s, showcase-operator, etc.) +source e2e-tests/local-test-setup.sh showcase + +# For RBAC tests (showcase-rbac, showcase-rbac-k8s, showcase-operator-rbac) +source e2e-tests/local-test-setup.sh rbac +``` + +This exports all required environment variables: `BASE_URL`, `K8S_CLUSTER_URL`, `K8S_CLUSTER_TOKEN`, and all Vault secrets. + +### Verify Environment + +```bash +echo "BASE_URL: $BASE_URL" +curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" +``` + +## MANDATORY: Use the Playwright Healer Agent for Reproduction + +Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. + +### Healer Initialization (First Time Only) + +Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +### Environment Setup + +Generate the `.env` file by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +To regenerate (e.g. after token expiry), re-run the command above. + +### Project Selection + +When running specific test files or test cases, use `--project=any-test` to avoid running the smoke test dependency. The `any-test` project matches any spec file without extra overhead: + +```bash +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +### Running via Healer Agent + +Invoke the healer agent via the Task tool: + +``` +Task: "You are the Playwright Test Healer agent. Run the following test to reproduce a CI failure. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If the test fails, examine the error output, screenshots in test-results/, and error-context.md. +Report: pass/fail, exact error message, what the UI shows at the point of failure." +``` + +### Fallback: Direct Execution + +If the healer agent is unavailable, run tests directly: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Examples:** +```bash +# A specific spec file +yarn playwright test playwright/e2e/plugins/topology/topology.spec.ts --project=any-test --retries=0 --workers=1 + +# A specific test by name +yarn playwright test -g "should display topology" --project=any-test --retries=0 --workers=1 +``` + +### Headed / Debug Mode + +For visual debugging when manual investigation is needed: + +```bash +# Headed mode (visible browser) +yarn playwright test --project=any-test --retries=0 --workers=1 --headed + +# Debug mode (Playwright Inspector, step-by-step) +yarn playwright test --project=any-test --retries=0 --workers=1 --debug +``` + +## Flakiness Detection + +If the first run **passes** (doesn't reproduce the failure), run multiple times to check for flakiness: + +```bash +cd e2e-tests + +# Run 10 times and track results +PASS=0; FAIL=0 +for i in $(seq 1 10); do + echo "=== Run $i ===" + if yarn playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Results: $PASS passed, $FAIL failed out of 10 runs" +``` + +## Result Classification + +### Consistent Failure +- **Definition**: Fails every time (10/10 runs fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill +- **Confidence**: High — the fix can be verified reliably + +### Flaky +- **Definition**: Fails some runs but not all (e.g., 3/10 fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill, focus on reliability improvements +- **Typical causes**: Race conditions, timing dependencies, state leaks between tests, external service variability + +### Cannot Reproduce +- **Definition**: Passes all runs locally (0/10 fail) +- **Action**: **Stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Investigation**: Check environment differences between local and CI: + - **Cluster version**: CI may use a different OCP version (check the cluster pool version) + - **Image version**: CI may use a different RHDH image + - **Resource constraints**: CI clusters may have less resources + - **Parallel execution**: CI runs with 3 workers; try `--workers=3` + - **Network**: CI clusters are in `us-east-2` AWS region + - **External services**: GitHub API rate limits, Keycloak availability + +## Artifact Collection + +### Playwright Traces + +After a test failure, traces are saved in `e2e-tests/test-results/`: + +```bash +# View a trace +yarn playwright show-trace test-results//trace.zip +``` + +### HTML Report + +```bash +# Generate and open the HTML report +yarn playwright show-report +``` + +### Screenshots and Videos + +On failure, screenshots and videos are saved in `test-results//`: +- `test-failed-1.png` — Screenshot at failure point +- `video.webm` — Full test recording (if video is enabled) + +## Test Project Reference + +Refer to the e2e-fix-workflow rule for the Playwright project → config map mapping. diff --git a/.cursor/skills/e2e-submit-and-review/SKILL.md b/.cursor/skills/e2e-submit-and-review/SKILL.md new file mode 100644 index 0000000000..45d71ebe3d --- /dev/null +++ b/.cursor/skills/e2e-submit-and-review/SKILL.md @@ -0,0 +1,314 @@ +--- +name: e2e-submit-and-review +description: Create a PR for an E2E test fix, trigger Qodo agentic review, address review comments, and monitor CI results +--- +# Submit and Review + +Create a pull request for the E2E test fix, trigger automated review, address feedback, and verify CI passes. + +## When to Use + +Use this skill after verifying the fix (via `e2e-verify-fix`) when all tests pass and code quality checks are clean. + +## Step 0: Resolve Pre-Commit Hooks + +Before committing, ensure all related workspaces have their dependencies installed so pre-commit hooks (lint-staged, rulesync, etc.) pass: + +```bash +# Root workspace +yarn install + +# If e2e-tests files were changed +cd e2e-tests && yarn install && cd .. + +# If .ci files were changed +cd .ci && yarn install && cd .. +``` + +If a pre-commit hook fails during commit, fix the issue and create a **new** commit — do not amend. + +## Step 1: Commit Changes + +### Stage and Commit + +```bash +# Stage only relevant files +git add e2e-tests/ +git add .ci/ # Only if deployment config was changed + +# Commit with a descriptive message +git commit -m "fix(e2e): + + +- What test was failing +- What the root cause was +- How it was fixed" +``` + +### Commit Message Convention + +Follow the conventional commit format: +- `fix(e2e): fix flaky topology test timeout` +- `fix(e2e): update RBAC page locators after UI redesign` +- `fix(e2e): add retry logic for catalog entity refresh` +- `fix(e2e): skip orchestrator test on GKE platform` + +If a Jira ticket exists, reference it: +- `fix(e2e): fix topology locator drift [RHIDP-1234]` + +## Step 2: Push to Fork + +Push the fix branch to the fork (origin): + +```bash +git push -u origin +``` + +Example: +```bash +git push -u origin fix/e2e-topology-locator +# or +git push -u origin fix/RHIDP-1234-e2e-topology-locator +``` + +## Step 3: Create Pull Request + +Create a PR against the upstream `redhat-developer/rhdh` repository. + +**Dynamic username extraction** -- Always derive the GitHub username from the fork remote at runtime rather than hardcoding it. This makes the workflow portable across any contributor's environment: + +```bash +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +``` + +Then create the PR as a **draft** (always use `--draft`): +```bash +gh pr create \ + --draft \ + --repo redhat-developer/rhdh \ + --head "${GITHUB_USER}:" \ + --base \ + --title "fix(e2e): " \ + --body "$(cat <<'EOF' +## Summary +- <1-2 bullet points explaining what was fixed and why> + +## Test Results +- Local verification: 5/5 passes +- Code quality: lint, tsc, prettier all pass + +## Related +- Prow job: +- Jira: +EOF +)" +``` + +**Important**: Always use `--repo redhat-developer/rhdh` and `--head :` for cross-fork PRs. Never hardcode the GitHub username -- always extract it dynamically from the origin remote URL so this workflow works for any contributor. + +### PR Description Guidelines + +Keep it concise: +- What test was failing +- What the root cause was +- How it was fixed +- Link to the original failing CI job or Jira ticket + +## Step 4: Trigger Qodo Agentic Review + +After the PR is created, trigger an agentic review from Qodo (PR-Agent): + +```bash +# Get the PR number from the create output, then comment +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The Qodo bot will: +1. Analyze the code changes +2. Post review comments with suggestions +3. Optionally approve or request changes + +Note: The `.pr_agent.toml` in the repo configures Qodo with: +- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos +- Auto-review, auto-describe, and auto-improve on PR creation +- Scoped to `e2e-tests` folder changes + +## Step 5: Wait for and Address Qodo Review + +### Poll for Review Comments + +Check for Qodo review completion (it typically takes 1-3 minutes): + +```bash +# Check for Qodo bot comments +gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' + +# Check for inline comments +gh api repos/redhat-developer/rhdh/pulls//comments \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' +``` + +### Address Review Comments + +For each review comment: + +1. **Code suggestions**: If the suggestion improves the code, apply it: + ```bash + # Make the change locally + # Then stage only the changed files and commit + git add + git commit -m "fix(e2e): address review feedback" + git push + ``` + **Never use `git add -A` or `git add .`** — always stage specific files to avoid committing `.env`, test artifacts, or other local-only files. + +2. **Style/convention issues**: Fix them per project conventions + +3. **False positives**: If a suggestion is incorrect, explain why in a reply: + ```bash + gh api repos/redhat-developer/rhdh/pulls//comments//replies \ + -f body="This is intentional because " + ``` + +4. **Questions**: Answer them with context from the codebase + +## Step 6: Trigger Affected CI Job + +After addressing Qodo review feedback (and pushing any follow-up commits), trigger the presubmit E2E job that corresponds to the originally failing CI job. Presubmit job names differ from periodic/nightly names but cover the same platform and deployment method. + +**CRITICAL**: Never guess or construct presubmit job names. Always discover them from the `openshift-ci` bot response as described below. + +### Step 6a: Request Available Jobs + +Comment `/test ?` on the PR to request the list of available presubmit jobs: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test ?" +``` + +### Step 6b: Wait for the Bot Response + +Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: + +```bash +# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) +for i in $(seq 1 10); do + BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ + --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') + if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then + echo "Bot response received:" + echo "$BOT_RESPONSE" + break + fi + echo "Waiting for openshift-ci bot response (attempt $i/10)..." + sleep 30 +done +``` + +If no response is received after 5 minutes, ask the user for guidance. + +### Step 6c: Select the Right Job from the Bot Response + +Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: + +| Original failure pattern | Look for presubmit containing | +|--------------------------|-------------------------------| +| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | +| `*ocp*operator*nightly*` | `*ocp*operator*` | +| `*aks*helm*` | `*aks*helm*` | +| `*eks*helm*` | `*eks*helm*` | +| `*gke*helm*` | `*gke*helm*` | + +**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. + +If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. + +### Step 6d: Trigger the Job + +Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. + +## Step 7: Monitor CI Status + +### Watch CI Checks + +After pushing changes, monitor the CI pipeline: + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Or check manually: +```bash +gh pr checks --repo redhat-developer/rhdh +``` + +CI check types (Prow E2E jobs, lint checks, build checks, etc.) are documented in the project CI rules. Use `gh pr checks` output to identify which specific check failed. + +### If CI Fails + +1. **E2E test failure**: Check the Prow job logs, determine if it's the same test or a different one +2. **Lint failure**: Run `yarn lint:fix` locally, commit and push +3. **Build failure**: Check TypeScript errors with `yarn tsc` +4. **Unrelated failure**: Comment on the PR noting it's an unrelated failure, optionally `/retest` to re-trigger + +### Re-trigger CI + +If a CI check needs to be re-run: +```bash +# For Prow jobs, comment on the PR +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# For specific jobs +gh pr comment --repo redhat-developer/rhdh --body "/retest " +``` + +## Step 8: Final Status Report + +After CI passes (or all issues are addressed), produce a final report: + +``` +PR Status Report: +- PR: +- Branch: -> +- CI Status: PASS / PENDING / FAIL +- Qodo Review: Addressed / Pending +- Files changed: +- Action items: +``` + +## Quick Reference: PR Workflow Commands + +```bash +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') + +# Create draft PR (always use --draft) +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base + +# Trigger Qodo review +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" + +# List available presubmit jobs (Step 6a) +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Wait for openshift-ci bot response (Step 6b) -- poll until bot replies with job list + +# Trigger specific presubmit job (Step 6d) -- ONLY use a job name from the bot's response +gh pr comment --repo redhat-developer/rhdh --body "/test " + +# Check CI status +gh pr checks --repo redhat-developer/rhdh + +# Re-trigger tests +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# View PR +gh pr view --repo redhat-developer/rhdh --web +``` diff --git a/.cursor/skills/e2e-verify-fix/SKILL.md b/.cursor/skills/e2e-verify-fix/SKILL.md new file mode 100644 index 0000000000..a7ee9ed8d8 --- /dev/null +++ b/.cursor/skills/e2e-verify-fix/SKILL.md @@ -0,0 +1,145 @@ +--- +name: e2e-verify-fix +description: Verify an E2E test fix by running the test multiple times and checking code quality +--- +# Verify Fix + +Verify that the test fix works reliably and passes all code quality checks. + +## When to Use + +Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm the fix works before submitting a PR. + +## MANDATORY: Use the Playwright Healer Agent for Verification + +Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. + +### Healer Initialization + +If not already initialized in this session, use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. + +## Verification Steps + +### 1. Single Run Verification via Healer + +Invoke the healer agent to run the fixed test once: + +``` +Task: "You are the Playwright Test Healer agent. Verify a fix by running the test once. +Working directory: /e2e-tests +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If it passes, report success. If it fails, examine the error and report what went wrong." +``` + +If it fails, go back to `e2e-diagnose-and-fix` and iterate — use the healer agent there too for the fix. + +### 2. Multi-Run Stability Check + +Run the test 5 times consecutively to verify no flakiness was introduced: + +```bash +cd e2e-tests +set -a && source .env && set +a +PASS=0; FAIL=0 +for i in $(seq 1 5); do + echo "=== Stability run $i/5 ===" + if npx playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Stability results: $PASS/5 passed" +``` + +**Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. + +**IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. + +### 3. Code Quality Checks + +Run all code quality checks in the e2e-tests workspace: + +```bash +cd e2e-tests + +# TypeScript compilation +yarn tsc:check + +# ESLint +yarn lint:check + +# Prettier formatting +yarn prettier:check +``` + +Fix any issues found: + +```bash +# Auto-fix lint issues +yarn lint:fix + +# Auto-fix formatting +yarn prettier:fix +``` + +### 4. Optional: Full Project Regression Check + +If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 +``` + +This is optional for isolated spec file changes but recommended for changes to: +- `e2e-tests/playwright/utils/` (utility classes) +- `e2e-tests/playwright/support/` (page objects, selectors) +- `e2e-tests/playwright/data/` (shared test data) +- `playwright.config.ts` (configuration) + +### 5. Review the Diff + +Before submitting, review all changes: + +```bash +git diff +git diff --stat +``` + +Verify: +- Only intended files were changed +- No secrets or credentials were added +- No unrelated changes were included +- Component annotations are present in any new/modified spec files +- Semantic selectors are used (no deprecated CSS class selectors) + +## Result Summary + +After verification, produce a summary: + +``` +Fix Verification Results: +- Test: () +- Single run: PASS +- Stability (5 runs): 5/5 PASS +- TypeScript: PASS +- ESLint: PASS +- Prettier: PASS +- Files changed: +- Ready for PR: YES/NO +``` diff --git a/.gitignore b/.gitignore index 84ac1c7a9c..3dfdc8b43d 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,9 @@ node_modules/ .env .env.test +# Playwright MCP snapshots +.playwright-mcp + # Build output dist dist-types diff --git a/.opencode/command/fix-e2e.md b/.opencode/command/fix-e2e.md new file mode 100644 index 0000000000..f76b4f044f --- /dev/null +++ b/.opencode/command/fix-e2e.md @@ -0,0 +1,167 @@ +--- +description: >- + Autonomously investigate and fix a failing RHDH E2E CI test. Accepts a Prow + job URL or Jira ticket ID. Deploys RHDH, reproduces the failure, fixes the + test using Playwright agents, and submits a PR with Qodo review. +--- +# Fix E2E CI Failure + +Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failing RHDH E2E test. + +## Input + +`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +- **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Jira ticket ID**: `RHIDP-XXXX` +- **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` + +## Workflow + +Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. + +### Phase 1: Parse CI Failure + +**Skill**: `e2e-parse-ci-failure` + +Parse the input to extract: +- Failing test name and spec file path +- Playwright project name +- Release branch (main, release-1.9, etc.) +- Platform (OCP, AKS, EKS, GKE) +- Deployment method (Helm, Operator) +- Error type and message +- local-run.sh job name parameter + +**Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. + +### Phase 2: Setup Fix Branch + +First, check the current branch: + +```bash +git branch --show-current +``` + +- **On `main` or `release-*`**: You're on a base branch — create a feature branch using the skill: + ```bash + git fetch upstream + git checkout -b fix/e2e- upstream/ + ``` + If a Jira ticket was provided, include the ticket ID in the branch name: + `fix/RHIDP-XXXX-e2e-` + +- **On any other branch** (e.g., `fix/e2e-*`): You're likely already on a feature branch. **Ask the user** whether to: + 1. Use the current branch as-is + 2. Create a new branch from the upstream release branch + +### Phase 3: Deploy RHDH + +**Skill**: `e2e-deploy-rhdh` + +Deploy RHDH to a cluster using `e2e-tests/local-run.sh`. CLI mode requires **all three** flags (`-j`, `-r`, `-t`): + +**OCP jobs** — use `-s` (deploy-only) to skip automated test execution so you can run the specific failing test manually: +```bash +cd e2e-tests +./local-run.sh -j -r -t -s +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`. These jobs require the full execution pipeline and do not support deploy-only mode: +```bash +cd e2e-tests +./local-run.sh -j -r -t +``` + +Use the **full Prow CI job name** for `-j` (not shortened names). + +Select the image repo and tag based on the release branch: +- `main` → `-r rhdh-community/rhdh -t next` +- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` +- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` + +After deployment completes, set up the local test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +**Decision gate**: Before attempting deployment, verify cluster connectivity (`oc whoami`). If no cluster is available, **ask the user for explicit approval** before skipping this phase — do not skip silently. If deployment fails, the `e2e-deploy-rhdh` skill has error recovery procedures. If deployment cannot be recovered after investigation, report the deployment issue and stop. + +### Phase 4: Reproduce Failure + +**Skill**: `e2e-reproduce-failure` + +Run the specific failing test to confirm it reproduces locally. Use `--project=any-test` to avoid running the smoke test dependency — it matches any spec file without extra overhead: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Decision gates**: +- **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. +- **Consistent failure**: Proceed to Phase 5 +- **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability +- **Cannot reproduce** (passes every time after 10 runs): Report the reproduction results and possible environment differences, then **ask the user for explicit approval** before proceeding. Do not skip this step silently. + +### Phase 5: Diagnose and Fix + +**Skill**: `e2e-diagnose-and-fix` + +Analyze the failure and implement a fix: + +1. **Classify the failure**: locator drift, timing, assertion mismatch, data dependency, platform-specific, deployment config +2. **Use Playwright Test Agents**: Invoke the healer agent (`@playwright-test-healer`) for automated test repair — it can debug the test, inspect the UI, generate locators, and edit the code +3. **Follow Playwright best practices**: Consult the `playwright-locators` and `ci-e2e-testing` project rules. Use semantic role-based locators (`getByRole`, `getByLabel`), auto-waiting assertions, Page Object Model, component annotations. Fetch official Playwright best practices via Context7 or https://playwright.dev/docs/best-practices if needed +4. **Cross-repo investigation**: If the issue is in deployment config, search `rhdh-operator` and `rhdh-chart` repos. Use Sourcebot or Context7 if available; otherwise fall back to `gh search code` or clone the repo locally and grep + +**Decision gate**: If the analysis reveals a product bug (not a test issue), you must be **absolutely certain** before marking a test with `test.fixme()`. The Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with `// TODO:` linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Proceed to Phase 6 with the `test.fixme()` change + +### Phase 6: Verify Fix + +**Skill**: `e2e-verify-fix` + +Verify the fix: +1. Run the fixed test once — must pass +2. Run 5 times — must pass 5/5 +3. Run code quality checks: `yarn tsc:check`, `yarn lint:check`, `yarn prettier:check` +4. Fix any lint/formatting issues + +**Decision gate**: If the test still fails or is flaky, return to Phase 5 and iterate. If verification cannot be run (no cluster, environment issues), **ask the user for explicit approval** before proceeding without it. + +### Phase 7: Submit PR and Handle Review + +**Skill**: `e2e-submit-and-review` + +1. **Resolve pre-commit hooks**: Run `yarn install` in all relevant workspaces (root, `e2e-tests/`, `.ci/`) before committing +2. **Commit**: Stage changes, commit with conventional format +3. **Push**: `git push -u origin ` +4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` +5. **Trigger Qodo review**: Comment `/agentic_review` on the PR +6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +7. **Address feedback**: Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +9. **Monitor CI**: Watch CI checks with `gh pr checks` + +### Final Report + +After all phases complete, produce a summary: + +``` +E2E Fix Summary: +- Input: +- Test: () +- Branch: +- Root cause: +- Fix: +- Verification: +- PR: +- CI Status: +- Qodo Review: +``` diff --git a/.opencode/memories/e2e-fix-workflow.md b/.opencode/memories/e2e-fix-workflow.md new file mode 100644 index 0000000000..885604d1e0 --- /dev/null +++ b/.opencode/memories/e2e-fix-workflow.md @@ -0,0 +1,349 @@ +# E2E Test Fix Workflow + +This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. + +## Table of Contents + +- [Workflow Overview](#workflow-overview) +- [Parsing CI Failures](#parsing-ci-failures) +- [Branch and Deployment Setup](#branch-and-deployment-setup) +- [Reproducing Failures](#reproducing-failures) +- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) +- [Playwright Test Agents](#playwright-test-agents) +- [Verification and PR Submission](#verification-and-pr-submission) + +## Workflow Overview + +The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: + +1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +2. **Setup Fix Branch** — Create a branch from the correct upstream release branch +3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** — Confirm the failure reproduces locally +5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents +6. **Verify Fix** — Run the test multiple times and check code quality +7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI + +Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. + +**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. + +## Parsing CI Failures + +### Prow URL Structure + +``` +https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +``` + +Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: +``` +✘ [] › /.spec.ts: +``` + +### Job Name Mapping Tables + +#### Job Name → Release Branch + +| Pattern in job name | Release branch | +|---------------------|---------------| +| `*-rhdh-main-*` | `main` | +| `*-rhdh-release-1.9-*` | `release-1.9` | +| `*-rhdh-release-1.8-*` | `release-1.8` | + +#### Job Name → Platform and Deployment Method + +| Pattern | Platform | Method | +|---------|----------|--------| +| `*ocp*helm*` | OCP | Helm | +| `*ocp*operator*` | OCP | Operator | +| `*aks*helm*` | AKS | Helm | +| `*aks*operator*` | AKS | Operator | +| `*eks*helm*` | EKS | Helm | +| `*eks*operator*` | EKS | Operator | +| `*gke*helm*` | GKE | Helm | +| `*gke*operator*` | GKE | Operator | +| `*osd-gcp*` | OSD-GCP | Helm/Operator | + +#### Job Name → Playwright Projects + +| Job pattern | Projects | +|-------------|----------| +| `*ocp*helm*nightly*` (not upgrade) | `showcase`, `showcase-rbac`, `showcase-runtime`, `showcase-sanity-plugins`, `showcase-localization-*` | +| `*ocp*helm*upgrade*` | `showcase-upgrade` | +| `*ocp*operator*nightly*` (not auth) | `showcase-operator`, `showcase-operator-rbac` | +| `*ocp*operator*auth-providers*` | `showcase-auth-providers` | +| `*ocp*helm*pull*` | `showcase`, `showcase-rbac` | +| `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | +| `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | + +#### Job Name → local-run.sh `-j` Parameter + +Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. + +**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` +**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` + +Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. + +#### Release Branch → Image Repo and Tag + +| Branch | `-r` (image repo) | `-t` (image tag) | +|--------|-------------------|-------------------| +| `main` | `rhdh-community/rhdh` | `next` | +| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | +| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | + +## Branch and Deployment Setup + +### Branch Creation + +Always create branches from upstream, never from local copies: + +```bash +git fetch upstream +git checkout -b fix/e2e- upstream/ +``` + +### Deployment via local-run.sh + +CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. + +```bash +cd e2e-tests +# OCP jobs: use -s to deploy only, then run tests manually +./local-run.sh -j -r -t -s +# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required +./local-run.sh -j -r -t +``` + +Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. + +After deployment, source the test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +### Deployment Error Recovery + +| Error | Investigation | Common Fix | +|-------|--------------|------------| +| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | +| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | +| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | +| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | + +For config issues, search these repos for reference: +- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts +- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults + +## Reproducing Failures + +### Test Execution + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 --workers=1 +``` + +### Flakiness Detection + +If the test passes on first run, repeat 10 times: +- **10/10 pass** → cannot reproduce (check environment differences) +- **Mixed results** → flaky (focus on reliability improvements) +- **0/10 pass** → consistent failure + +### Debugging Modes + +```bash +# Headed (visible browser) +yarn playwright test --project= --headed + +# Debug (Playwright Inspector) +yarn playwright test --project= --debug + +# View trace +yarn playwright show-trace test-results//trace.zip +``` + +## Diagnosing and Fixing Tests + +### Failure Classification + +1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors +2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals +3. **Assertion mismatch** — Expected values changed → update test data or report product bug +4. **Data dependency** — Test data missing → add proper setup/teardown +5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional +6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` + +### RHDH Coding Conventions (Required) + +**Locators** — Always use semantic role-based locators: +```typescript +// GOOD +page.getByRole('button', { name: 'Create' }) +page.getByRole('heading', { name: 'Catalog' }) +page.getByText('No results found') + +// BAD — deprecated CSS selectors +page.locator('.MuiButton-root') +page.locator('[data-testid="..."]') +``` + +**Component annotations** — Every spec file must have: +```typescript +test.beforeAll(async ({}, testInfo) => { + testInfo.annotations.push({ + type: 'component', + description: 'your_component_name', + }); +}); +``` + +**Retry patterns** for async assertions: +```typescript +await expect(async () => { + await page.reload(); + await expect(page.getByText('entity')).toBeVisible(); +}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); +``` + +**Conditional skips**: +```typescript +import { skipIfJobName } from '../utils/helper'; +import * as constants from '../utils/constants'; +skipIfJobName(constants.GKE_JOBS); +``` + +**Forbidden patterns**: +- `page.waitForNetworkIdle()` / `networkidle` +- Raw CSS class selectors (`.MuiButton-root`) +- `page.waitForTimeout()` for synchronization +- Hardcoded secrets or credentials + +### Key Utility Classes + +| Class | Path | Purpose | +|-------|------|---------| +| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | +| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | +| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | +| `KubeClient` | `utils/kube-client.ts` | K8s resource management | +| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | +| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | + +### Product Bug Decision + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Do **not** change assertions to match broken behavior +4. Proceed with the `test.fixme()` change + +## Playwright Test Agents + +The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. + +### Available Agents + +| Agent | Mode | Purpose | +|-------|------|---------| +| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | +| `playwright-test-generator` | subagent | Create new test code from a test plan | +| `playwright-test-planner` | subagent | Explore app and create test plans | + +### Healer Agent Usage (Primary for Fixes) + +The healer agent is the primary tool for test repair: +1. Runs tests with `test_run` to identify failures +2. Debugs with `test_debug` to step through failing tests +3. Inspects UI state via `browser_snapshot`, `browser_console_messages` +4. Generates correct locators with `browser_generate_locator` +5. Edits test code with `edit`/`write` tools +6. Re-runs tests to verify the fix + +Invoke with: `@playwright-test-healer Fix the failing test in ` + +## Verification and PR Submission + +### Verification Checklist + +1. Single test run passes +2. 5 consecutive runs pass (stability) +3. `yarn tsc:check` passes +4. `yarn lint:check` passes +5. `yarn prettier:check` passes + +### Pre-Commit Hooks + +Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: + +```bash +yarn install # Root workspace +cd e2e-tests && yarn install && cd .. # If e2e-tests files changed +cd .ci && yarn install && cd .. # If .ci files changed +``` + +### PR Creation + +Always create PRs as **drafts**: + +```bash +git push -u origin +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +``` + +### Qodo Review + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. + +### Trigger Affected CI Job + +After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: + +```bash +# List available presubmit jobs +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Trigger the matching presubmit job +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. + +### CI Monitoring + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` + +## Reference Files + +| Category | Key files | +|----------|-----------| +| Playwright config | `e2e-tests/playwright.config.ts` | +| Project names (SOT) | `e2e-tests/playwright/projects.json` | +| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | +| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | +| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | +| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | +| ConfigMaps | `.ci/pipelines/resources/config_map/` | +| Helm values | `.ci/pipelines/value_files/` | +| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | +| Environment vars | `.ci/pipelines/env_variables.sh` | +| Local test runner | `e2e-tests/local-run.sh` | +| Local test env | `e2e-tests/local-test-setup.sh` | +| Qodo config | `.pr_agent.toml` | diff --git a/.opencode/skill/e2e-deploy-rhdh/SKILL.md b/.opencode/skill/e2e-deploy-rhdh/SKILL.md new file mode 100644 index 0000000000..eb5a92eede --- /dev/null +++ b/.opencode/skill/e2e-deploy-rhdh/SKILL.md @@ -0,0 +1,232 @@ +--- +name: e2e-deploy-rhdh +description: >- + Deploy RHDH to an OpenShift cluster using local-run.sh for E2E test execution, + with autonomous error recovery for deployment failures +--- +# Deploy RHDH + +Deploy Red Hat Developer Hub to a cluster for E2E test execution using the existing `local-run.sh` workflow. + +## When to Use + +Use this skill when you need a running RHDH instance to reproduce and fix a test failure. + +## Prerequisites + +Before running the deployment, verify these tools are installed: + +```bash +# Required tools (local-run.sh checks these automatically) +podman --version # Container runtime +oc version # OpenShift CLI +kubectl version --client # Kubernetes CLI +vault --version # HashiCorp Vault (for secrets) +jq --version # JSON processor +curl --version # HTTP client +rsync --version # File sync +bc --version # Calculator (for resource checks) +``` + +### Podman Machine Requirements + +The podman machine must be running with adequate resources: + +```bash +podman machine inspect | jq '.Resources' +# Requires: >= 8GB RAM, >= 4 CPUs +``` + +If resources are insufficient: +```bash +podman machine stop +podman machine set --memory 8192 --cpus 4 +podman machine start +``` + +## Deployment Using local-run.sh + +The primary deployment method uses `e2e-tests/local-run.sh`, which handles everything: +Vault authentication, cluster service account setup, RHDH deployment, and test execution. + +### Execution Rules + +**CRITICAL — deployment is a long-running operation:** + +1. **Never run `local-run.sh` in the background.** Operator installations can take 20-30 minutes. Use the Bash tool with `timeout: 600000` (10 minutes) and if it times out, **check the container log** — do NOT assume failure. +2. **Before starting a deployment, check for existing containers:** + ```bash + podman ps --format "{{.Names}} {{.Status}}" | grep -i rhdh-e2e-runner + ``` + If a deployment container is already running, **wait for it to finish** instead of starting a new one. Monitor via the container log: + ```bash + tail -f e2e-tests/.local-test/container.log + ``` +3. **Never launch concurrent deployments.** Two deployments to the same cluster will race and both fail. If a deployment appears stuck, check the container log and cluster state before deciding it failed. +4. **How to detect actual failure vs slow progress:** The operator install script outputs detailed debug logs. If the container log shows active progress (timestamps advancing), the deployment is still running. Only consider it failed if: + - The podman container has exited (`podman ps` shows no running container) + - AND the container log shows an error message (e.g., "Failed install RHDH Operator") + +### CLI Mode (Preferred) + +**CRITICAL**: CLI mode requires **all three** flags (`-j`, `-r`, `-t`). If `-r` is omitted, the script falls into interactive mode and will hang in automated contexts. + +```bash +cd e2e-tests +./local-run.sh -j -r -t [-s] +``` + +**Example — OCP job** (deploy-only with `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s +``` + +**Example — K8s job (AKS/EKS/GKE)** (full execution, no `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next +``` + +**Parameters:** +- `-j / --job`: The **full Prow CI job name** extracted from the Prow URL. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match, so the full name works correctly. Example: `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly` +- `-r / --repo`: Image repository (**required** for CLI mode — without it the script enters interactive mode) +- `-t / --tag`: Image tag (e.g., `1.9`, `next`) +- `-s / --skip-tests`: Deploy only, skip test execution. **OCP jobs only** — K8s jobs (AKS, EKS, GKE) do not support this flag and require the full execution pipeline + +**WARNING**: Do NOT use shortened job names like `nightly-ocp-helm` for `-j` — these do not match the glob patterns in `openshift-ci-tests.sh`. + +### Image Selection + +Refer to the `e2e-fix-workflow` rule for the release branch to image repo/tag mapping table. + +### Deploy-Only Mode (OCP Jobs Only) + +For OCP jobs, deploy without running tests so you can run specific tests manually: + +```bash +./local-run.sh -j -r -t -s +``` + +**Note**: K8s jobs (AKS, EKS, GKE) do not support deploy-only mode. They require the full execution pipeline — run without `-s`. + +### What local-run.sh Does + +1. **Validates prerequisites**: Checks all required tools and podman resources +2. **Verifies the image**: Checks the image exists on quay.io via the Quay API +3. **Pulls the runner image**: `quay.io/rhdh-community/rhdh-e2e-runner:main` +4. **Authenticates to Vault**: OIDC-based login for secrets +5. **Sets up cluster access**: Creates `rhdh-local-tester` service account with cluster-admin, generates 48h token +6. **Copies the repo**: Syncs the local repo to `.local-test/rhdh/` (excludes node_modules) +7. **Runs a Podman container**: Executes `container-init.sh` inside the runner image, which: + - Fetches all Vault secrets to `/tmp/secrets/` + - Logs into the cluster + - Sets platform-specific environment variables + - Runs `.ci/pipelines/openshift-ci-tests.sh` for deployment + +### Post-Deployment: Setting Up for Manual Testing + +After `local-run.sh` completes (with `-s` for OCP jobs, or after full execution for K8s jobs), set up the environment for headed Playwright testing: + +```bash +# Source the test setup (choose 'showcase' or 'rbac') +source e2e-tests/local-test-setup.sh showcase +# or +source e2e-tests/local-test-setup.sh rbac +``` + +This exports: +- `BASE_URL` — The RHDH instance URL +- `K8S_CLUSTER_URL` — Cluster API server URL +- `K8S_CLUSTER_TOKEN` — Fresh service account token +- All Vault secrets as environment variables + +Verify RHDH is accessible: +```bash +curl -sSk "$BASE_URL" -o /dev/null -w "%{http_code}" +# Should return 200 +``` + +## Deployment Error Recovery + +### Common Deployment Failures + +#### CrashLoopBackOff + +**Symptoms**: Pod repeatedly crashes and restarts. + +**Investigation**: +```bash +# Check pod status +oc get pods -n +# Check pod logs +oc logs -n --previous +# Check events +oc get events -n --sort-by=.lastTimestamp +``` + +**Common causes and fixes**: +1. **Missing ConfigMap**: The app-config ConfigMap wasn't created → check `.ci/pipelines/resources/config_map/` for the correct template +2. **Bad plugin configuration**: A dynamic plugin is misconfigured → check `dynamic-plugins-config` ConfigMap against `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +3. **Missing secrets**: Required secrets not mounted → verify secrets exist in the namespace +4. **Node.js errors**: Check for JavaScript errors in logs that indicate code issues + +#### ImagePullBackOff + +**Investigation**: +```bash +oc describe pod -n | grep -A5 "Events" +``` + +**Common causes**: +1. **Image doesn't exist**: Verify on quay.io: `curl -s 'https://quay.io/api/v1/repository/rhdh/rhdh-hub-rhel9/tag/?filter_tag_name=like:'` +2. **Pull secret missing**: Check `namespace::setup_image_pull_secret` in `.ci/pipelines/lib/namespace.sh` +3. **Registry auth**: Ensure the pull secret has correct credentials + +#### Helm Install Failure + +**Investigation**: +```bash +helm list -n +helm status -n +``` + +**Common causes**: +1. **Values file error**: Check merged values against `.ci/pipelines/value_files/values_showcase.yaml` +2. **Chart version mismatch**: Verify chart version with `helm::get_chart_version` from `.ci/pipelines/lib/helm.sh` + +#### Operator Deployment Failure + +**Investigation**: +```bash +oc get backstage -n +oc describe backstage -n +oc get csv -n # Check operator subscription status +``` + +**Common causes**: +1. **Backstage CR misconfigured**: Compare against `.ci/pipelines/resources/rhdh-operator/rhdh-start.yaml` +2. **Operator not installed**: Check CatalogSource and Subscription +3. **CRD not ready**: Wait for CRD with `k8s_wait::crd` pattern from `.ci/pipelines/lib/k8s-wait.sh` + +### Cross-Repo Investigation + +When deployment issues stem from the operator or chart, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search `rhdh-operator` and `rhdh-chart` repos for specific error patterns or configuration keys +2. **Context7** (if available): query `redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` for docs and code snippets +3. **Fallback — `gh search code`**: `gh search code '' --repo redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` +4. **Fallback — local clone**: clone the repo into a temp directory and grep for the pattern + +Key areas to look for: +- **rhdh-operator**: Backstage CR configuration, CatalogSource setup, operator installation scripts +- **rhdh-chart**: Helm values schema, chart templates, default configurations + +## Reference Files + +- Main deployment scripts: `.ci/pipelines/openshift-ci-tests.sh`, `.ci/pipelines/utils.sh` +- Library scripts: `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh`, `.ci/pipelines/lib/k8s-wait.sh`, `.ci/pipelines/lib/testing.sh` +- Helm values: `.ci/pipelines/value_files/` +- ConfigMaps: `.ci/pipelines/resources/config_map/` +- Operator CRs: `.ci/pipelines/resources/rhdh-operator/` +- Environment variables: `.ci/pipelines/env_variables.sh` diff --git a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md new file mode 100644 index 0000000000..16b12ea2ee --- /dev/null +++ b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md @@ -0,0 +1,250 @@ +--- +name: e2e-diagnose-and-fix +description: >- + Analyze a failing E2E test, determine root cause, and fix it using Playwright + Test Agents and RHDH project conventions +--- +# Diagnose and Fix + +Analyze the root cause of a failing E2E test and implement a fix following RHDH project conventions. + +## When to Use + +Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. + +## MANDATORY: Always Use the Playwright Healer Agent + +**The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. + +### Healer Initialization + +Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. + +### Environment Setup for Healer + +The healer agent needs a `.env` file in `e2e-tests/` with all required environment variables (BASE_URL, K8S_CLUSTER_TOKEN, vault secrets, etc.). Generate it by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +The `.env` file is gitignored — never commit it. To regenerate (e.g. after token expiry), re-run the command above. + +### Invoking the Healer + +Invoke the healer agent via the Task tool with `subagent_type: general`: + +``` +Task: "You are the Playwright Test Healer agent. Run the failing test, debug it, inspect the UI, and fix the code. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run command: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g ''" +``` + +The healer will autonomously: +1. Run the test and identify the failure +2. Examine error screenshots and error-context.md +3. Debug the test step-by-step using Playwright Inspector +4. Inspect the live UI via page snapshots +5. Generate correct locators and fix assertions +6. Edit the test code +7. Re-run to verify the fix + +### When to Supplement with Manual Diagnosis + +After the healer has run, supplement with manual investigation only for: +- **Data dependency failures** (category 4): The healer may not know how to create missing test data +- **Platform-specific failures** (category 5): The healer doesn't have context about platform differences +- **Deployment configuration issues** (category 6): The healer cannot modify ConfigMaps or Helm values +- **Product bugs**: When the healer confirms the test is correct but the application behavior is wrong + +## Failure Pattern Recognition + +### 1. Locator Drift + +**Symptoms**: `Error: locator.click: Error: strict mode violation` or `Timeout waiting for selector` or element not found errors. + +**Cause**: The UI has changed and selectors no longer match. + +**Fix approach**: +- Invoke the Playwright healer agent (`@playwright-test-healer`) — it will replay the test, inspect the current UI via page snapshots, generate updated locators, and edit the code automatically +- If the healer cannot resolve it, manually update to semantic role-based locators (see project rules) +- Verify the updated locator works by re-running the test + +### 2. Timing / Race Condition + +**Symptoms**: Test passes sometimes, fails sometimes. Errors like `Timeout 10000ms exceeded` or assertions failing on stale data. + +**Cause**: Test acts before the UI is ready, or waits are insufficient. + +**Fix approach**: +- Invoke the Playwright healer agent first — it can identify timing issues by stepping through the test and observing UI state transitions +- If manual fixes are needed: replace `page.waitForTimeout()` with proper waits: `expect(locator).toBeVisible()`, `page.waitForLoadState()` +- Use `expect().toPass()` with retry intervals for inherently async checks: + ```typescript + await expect(async () => { + const text = await page.locator('.count').textContent(); + expect(Number(text)).toBeGreaterThan(0); + }).toPass({ intervals: [1000, 2000, 5000], timeout: 30_000 }); + ``` +- Increase action/navigation timeouts if the operation is legitimately slow +- Use `Common.waitForLoad()` utility before interacting with the page after navigation + +### 3. Assertion Mismatch + +**Symptoms**: `expect(received).toBe(expected)` with clearly different values. + +**Cause**: The expected value has changed due to a product change, data change, or environment difference. + +**Fix approach**: +- Determine if the change is intentional (check recent commits to the release branch) +- If intentional: update the expected value in the test or test data +- If unintentional: this may be a product bug — but you must first exhaust all other possibilities using the Playwright healer agent. Only after the healer confirms the test is correct and the application behavior is wrong should you mark it with `test.fixme()` (see the "Decision: Product Bug vs Test Issue" section below) + +### 4. Data Dependency + +**Symptoms**: Test fails because expected entities, users, or resources don't exist. + +**Cause**: Test data assumptions no longer hold (GitHub repos deleted, Keycloak users changed, catalog entities removed). + +**Fix approach**: +- Update test data in `e2e-tests/playwright/support/test-data/` or `e2e-tests/playwright/data/` +- Ensure test creates its own data in `beforeAll`/`beforeEach` and cleans up in `afterAll`/`afterEach` +- Use `APIHelper` for programmatic setup (GitHub API, Backstage catalog API) + +### 5. Platform-Specific Failure + +**Symptoms**: Test passes on OCP but fails on GKE/AKS/EKS, or vice versa. + +**Cause**: Platform differences (Routes vs Ingress, different auth, different network policies). + +**Fix approach**: +- Add conditional skip if the test is inherently platform-specific: + ```typescript + import { skipIfJobName, skipIfIsOpenShift } from '../utils/helper'; + // Skip on GKE + skipIfJobName(constants.GKE_JOBS); + // Skip on non-OpenShift + skipIfIsOpenShift('false'); + ``` +- Or add platform-specific logic within the test using `process.env.IS_OPENSHIFT`, `process.env.CONTAINER_PLATFORM` + +### 6. Deployment Configuration Issue + +**Symptoms**: RHDH itself is broken (500 errors, missing plugins, wrong behavior). + +**Cause**: ConfigMap or Helm values are incorrect for this test scenario. + +**Fix approach**: +- Check the ConfigMaps: `.ci/pipelines/resources/config_map/app-config-rhdh.yaml` and `app-config-rhdh-rbac.yaml` +- Check Helm values: `.ci/pipelines/value_files/` +- Check dynamic plugins config: `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +- Search `rhdh-operator` and `rhdh-chart` repos for configuration reference (use Sourcebot, Context7, `gh search code`, or a local clone — whichever is available) +- Fix the deployment configuration rather than the test code + +## Playwright Test Agents Reference + +The Playwright Test Agents are initialized via `npx playwright init-agents --loop=opencode` (see initialization section above). This creates an MCP server and agent definitions in `e2e-tests/opencode.json`. + +### Healer Agent (MANDATORY for All Fixes) + +The healer agent is the **primary and mandatory** tool for fixing failing tests. It has access to: + +- **`test_run`**: Run tests and identify failures +- **`test_debug`**: Step through failing tests with the Playwright Inspector +- **`browser_snapshot`**: Capture accessibility snapshots of the live UI +- **`browser_console_messages`**: Read browser console logs +- **`browser_network_requests`**: Monitor network requests +- **`browser_generate_locator`**: Generate correct locators from the live UI +- **`edit`/`write`**: Edit test code directly + +The healer autonomously cycles through: run → debug → inspect → fix → re-run until the test passes. + +### Planner Agent (For Understanding Complex Scenarios) + +Use `@playwright-test-planner` when you need to understand a complex user flow before fixing a test. It explores the app and maps out the interaction patterns. + +### Generator Agent (For Creating New Test Steps) + +Use `@playwright-test-generator` when a test needs major rework and you need to generate new test steps from a plan. + +## Coding Conventions + +Every fix **must** follow Playwright best practices. Before writing or modifying test code, consult these resources in order: + +1. **Project rules** (always available locally): + - `playwright-locators` rule — locator priority, anti-patterns, assertions, Page Objects, DataGrid handling + - `ci-e2e-testing` rule — test structure, component annotations, project configuration, CI scripts + +2. **Official Playwright docs** (fetch via Context7 if available, otherwise use web): + - Best practices: https://playwright.dev/docs/best-practices + - Locators guide: https://playwright.dev/docs/locators + - Assertions: https://playwright.dev/docs/test-assertions + - Auto-waiting: https://playwright.dev/docs/actionability + +### Key requirements + +- **Locators**: always prefer `getByRole()`, `getByLabel()`, `getByPlaceholder()` over CSS/XPath selectors. Never use MUI class names (`.MuiButton-label`, `.MuiDataGrid-*`). +- **Assertions**: use Playwright's auto-waiting assertions (`expect(locator).toBeVisible()`) — never use manual `waitForSelector()` or `waitForTimeout()`. +- **Component annotations**: every `*.spec.ts` file must have a `component` annotation in `test.beforeAll`. +- **Page Object Model**: return `Locator` objects from page classes, not raw strings or elements. +- **No `force: true`**: if a click requires `force`, the locator or timing is wrong — fix the root cause. +- **No `waitForNetworkIdle()`**: use proper load-state waits or assertion-based waiting instead. + +## Cross-Repo Investigation + +When the issue is in RHDH deployment/config rather than test code, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search repos for specific error patterns or configuration keys +2. **Context7** (if available): query repos for docs and code snippets +3. **Fallback — `gh search code`**: e.g. `gh search code '' --repo redhat-developer/rhdh-operator` +4. **Fallback — local clone**: clone the repo into a temp directory and grep + +### rhdh-operator (`redhat-developer/rhdh-operator`) +- Backstage CR specification and defaults +- CatalogSource configuration +- Operator installation scripts (especially `install-rhdh-catalog-source.sh`) + +### rhdh-chart (`redhat-developer/rhdh-chart`) +- Helm values.yaml schema and defaults +- Chart templates for Deployments, Services, ConfigMaps +- Default dynamic plugin configurations + +### Other Repositories +- **backstage/backstage**: For upstream Backstage API changes +- **redhat-developer/red-hat-developers-documentation-rhdh**: For documentation on expected behavior + +## Decision: Product Bug vs Test Issue + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug before marking a test this way. Follow this checklist: + +1. **Run the Playwright healer agent** — it must confirm that the test logic is correct and the application behavior is wrong +2. **Verify manually** — inspect the live UI, check network responses, and confirm the product is genuinely broken (not a stale cache, missing data, or environment-specific issue) +3. **Check recent commits** — search the release branch for recent product changes that could explain the behavior change +4. **Ask the user for confirmation** before applying `test.fixme()` — do not decide unilaterally + +Only after all of the above confirm a product bug: + +1. **File a Jira bug** in the `RHDHBUGS` project (or update the existing ticket) documenting the product regression +2. **Mark the test with `test.fixme()`**, preceded by a `// TODO:` comment linking to the Jira ticket: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Button no longer visible after version upgrade'); + ``` +3. **Do not change the test assertions** to match broken behavior +4. **Proceed to `e2e-submit-and-review`** with the `test.fixme()` change diff --git a/.opencode/skill/e2e-parse-ci-failure/SKILL.md b/.opencode/skill/e2e-parse-ci-failure/SKILL.md new file mode 100644 index 0000000000..3c1f75659d --- /dev/null +++ b/.opencode/skill/e2e-parse-ci-failure/SKILL.md @@ -0,0 +1,202 @@ +--- +name: e2e-parse-ci-failure +description: >- + Parse a Prow CI job URL or Jira ticket to extract E2E test failure details + including test name, spec file, release branch, platform, and error messages +--- +# Parse CI Failure + +Extract structured failure context from a Prow job URL or Jira ticket for an RHDH E2E CI failure. + +## When to Use + +Use this skill when you receive a failing Prow job URL (e.g., `https://prow.ci.openshift.org/view/gs/...`), a Jira ticket ID (e.g., `RHIDP-XXXX`), or a Jira URL (e.g., `https://redhat.atlassian.net/browse/RHIDP-XXXX`) for an E2E test failure and need to extract all relevant details before starting a fix. + +## Input Detection + +- **Playwright report URL**: URL ending in `index.html` (with optional `#?testId=...` fragment) — use Playwright MCP if available (see "Playwright Report Parsing" below), otherwise fall back to build log parsing +- **Prow URL**: Starts with `https://prow.ci.openshift.org/` — parse the job page and build log +- **Jira ticket ID**: Matches pattern `RHIDP-\d+` or similar — use Jira MCP tools to read the ticket +- **Jira URL**: Starts with `https://redhat.atlassian.net/browse/` — extract the ticket ID from the URL path (e.g., `RHIDP-XXXX` from `https://redhat.atlassian.net/browse/RHIDP-XXXX`) and then use Jira MCP tools to read the ticket + +## Prow URL Parsing + +### URL Structure + +Prow job URLs follow two patterns: + +- **Periodic/postsubmit**: `https://prow.ci.openshift.org/view/gs/test-platform-results/logs//` +- **Presubmit (PR)**: `https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/redhat-developer_rhdh///` + +Extract `` and `` from the URL path. These are the two key values needed for all derivations. + +### GCS URL Derivation + +Convert the Prow URL to a GCS artifacts URL by replacing the prefix: + +``` +Prow: https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +GCS: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ +``` + +For presubmit jobs, use `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +Key artifacts within the GCS directory: +- **Build log**: `/build-log.txt` +- **JUnit XML**: `/artifacts/junit-results/results.xml` +- **Playwright report**: `/artifacts/playwright-report/` + +Fetch the Prow job page with WebFetch to find the job status and artifact links, then fetch the build log for test failure details. + +### Extracting Test Failures from Build Log + +Search the build log for these Playwright output patterns: + +``` +# Failing test line (primary source for test name, spec file, and project): + ✘ [] › /.spec.ts: + +# Error details (immediately after the failure line): + Error: + expect(received).toBeVisible() + Locator: + +# Summary (at the end of the log): + X failed + X passed + X skipped +``` + +Also check JUnit XML for `` elements with `` children as a fallback. + +## Playwright Report Parsing + +When the URL points to a Playwright HTML report (`index.html`, optionally with `#?testId=...`), use Playwright MCP if available — navigate with `browser_navigate`, then `browser_snapshot` to extract test name, spec file, error, steps, retries, screenshots, and traces from the accessibility tree. Derive job metadata (``, ``, ``) from the URL path segments. + +If Playwright MCP is not available, derive the `build-log.txt` URL from the report URL and fall back to build log parsing. + +## Jira Ticket Parsing + +Use Jira MCP tools to read the ticket. Extract: + +1. **Prow job URLs** from the description or comments — then parse them using the Prow URL Parsing steps above. +2. **Test names, spec file paths, error messages, or stack traces** from the description, comments, or attachments. +3. **`affects version`** field — map to release branch (e.g., `1.10` → `main`, `1.9` → `release-1.9`, `1.8` → `release-1.8`). +4. **`component`** field for additional context (e.g., "E2E Tests", "CI/CD"). + +## Job Name Mapping + +Refer to the **e2e-fix-workflow** rule for all mapping tables: job name to release branch, job name to platform and deployment method, job name to Playwright projects, release branch to image repo/tag, and job name to `local-run.sh` `-j` parameter. Those tables are the single source of truth and should not be duplicated here. + +When parsing a job name, apply those mapping tables to derive: release branch, platform, deployment method, Playwright projects, and `local-run.sh` flags (`-j`, `-r`, `-t`). + +## Fields Requiring Build Log Access + +Not all output fields can be derived from the Prow URL alone. The following table clarifies what requires fetching the build log or artifacts: + +| Field | Source | Derivable from URL alone? | +|-------|--------|---------------------------| +| Job name | URL path segment | Yes | +| Build ID | URL path segment | Yes | +| Release branch | Job name pattern match | Yes | +| Platform | Job name pattern match | Yes | +| Deployment method | Job name pattern match | Yes | +| Playwright projects | Job name pattern match | Yes | +| `local-run.sh` flags (`-j`, `-r`, `-t`) | Job name + release branch | Yes | +| GCS artifacts URL | Constructed from URL | Yes | +| Test name | Build log Playwright output | No — requires build log | +| Spec file | Build log Playwright output | No — requires build log | +| Specific Playwright project (of failing test) | Build log `[project]` prefix | No — requires build log | +| Error type | Build log error details | No — requires build log | +| Error message | Build log error details | No — requires build log | +| Failure count / pass count | Build log summary line | No — requires build log | + +## Output + +Produce the following structured output with three sections. + +### 1. Structured Summary + +``` +- Test name: +- Spec file: +- Playwright project: +- Release branch:
+- Platform: +- Deployment method: +- Error type: +- Error message: +- Prow URL: +- Jira ticket: +``` + +### 2. Derivation Details + +Show how each field was derived with the matching pattern. This makes the reasoning transparent and auditable. + +``` +| Field | Value | Derivation | +|--------------------|------------------------------|-----------------------------------------------------------| +| Job name | | Extracted from URL path segment | +| Build ID | | Extracted from URL path segment | +| Release branch | | Pattern `*-rhdh--*` matched in job name | +| Platform | | Pattern `**` matched in job name | +| Deployment method | | Pattern `**` matched in job name | +| Playwright project | | `[]` prefix in failing test line | +| Image repo (-r) | | Release branch `` maps to `` | +| Image tag (-t) | | Release branch `` maps to `` | +| Test name | | Parsed from `✘` line in build log | +| Spec file | | Parsed from `✘` line in build log | +| Error type | | Classified from error message pattern | +``` + +### 3. GCS Artifacts Location + +Derive and present the GCS artifacts URLs constructed from the Prow URL: + +``` +GCS Artifacts Base: + https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ + +Build Log: + /artifacts//build-log.txt + +JUnit Results: + /artifacts//artifacts/junit-results/results.xml + +Playwright Report: + /artifacts//artifacts/playwright-report/ +``` + +For presubmit (PR) jobs, the base path uses `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +### 4. local-run.sh Command + +Provide the full command ready to copy-paste, with a flag breakdown. + +**OCP jobs** — use `-s` for deploy-only mode: +``` +cd e2e-tests +./local-run.sh -j -r -t -s + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +| -s | (no value) | Deploy only, skip running tests | +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`; full execution is required: +``` +cd e2e-tests +./local-run.sh -j -r -t + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +``` diff --git a/.opencode/skill/e2e-reproduce-failure/SKILL.md b/.opencode/skill/e2e-reproduce-failure/SKILL.md new file mode 100644 index 0000000000..17a8728bfc --- /dev/null +++ b/.opencode/skill/e2e-reproduce-failure/SKILL.md @@ -0,0 +1,194 @@ +--- +name: e2e-reproduce-failure +description: >- + Run a specific failing E2E test against a deployed RHDH instance to confirm + the failure and determine if it is consistent or flaky +--- +# Reproduce Failure + +Run the failing test locally against a deployed RHDH instance to confirm the failure and classify it. + +## When to Use + +Use this skill after deploying RHDH (via `e2e-deploy-rhdh`) when you need to verify the test failure reproduces locally before attempting a fix. + +## Prerequisites + +- RHDH deployed and accessible (BASE_URL set) +- Environment configured via `source e2e-tests/local-test-setup.sh ` +- Node.js 22 and Yarn available +- Playwright browsers installed (`cd e2e-tests && yarn install && yarn playwright install chromium`) + +## Environment Setup + +### Source the Test Environment + +```bash +# For non-RBAC tests (showcase, showcase-k8s, showcase-operator, etc.) +source e2e-tests/local-test-setup.sh showcase + +# For RBAC tests (showcase-rbac, showcase-rbac-k8s, showcase-operator-rbac) +source e2e-tests/local-test-setup.sh rbac +``` + +This exports all required environment variables: `BASE_URL`, `K8S_CLUSTER_URL`, `K8S_CLUSTER_TOKEN`, and all Vault secrets. + +### Verify Environment + +```bash +echo "BASE_URL: $BASE_URL" +curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" +``` + +## MANDATORY: Use the Playwright Healer Agent for Reproduction + +Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. + +### Healer Initialization (First Time Only) + +Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +### Environment Setup + +Generate the `.env` file by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +To regenerate (e.g. after token expiry), re-run the command above. + +### Project Selection + +When running specific test files or test cases, use `--project=any-test` to avoid running the smoke test dependency. The `any-test` project matches any spec file without extra overhead: + +```bash +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +### Running via Healer Agent + +Invoke the healer agent via the Task tool: + +``` +Task: "You are the Playwright Test Healer agent. Run the following test to reproduce a CI failure. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If the test fails, examine the error output, screenshots in test-results/, and error-context.md. +Report: pass/fail, exact error message, what the UI shows at the point of failure." +``` + +### Fallback: Direct Execution + +If the healer agent is unavailable, run tests directly: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Examples:** +```bash +# A specific spec file +yarn playwright test playwright/e2e/plugins/topology/topology.spec.ts --project=any-test --retries=0 --workers=1 + +# A specific test by name +yarn playwright test -g "should display topology" --project=any-test --retries=0 --workers=1 +``` + +### Headed / Debug Mode + +For visual debugging when manual investigation is needed: + +```bash +# Headed mode (visible browser) +yarn playwright test --project=any-test --retries=0 --workers=1 --headed + +# Debug mode (Playwright Inspector, step-by-step) +yarn playwright test --project=any-test --retries=0 --workers=1 --debug +``` + +## Flakiness Detection + +If the first run **passes** (doesn't reproduce the failure), run multiple times to check for flakiness: + +```bash +cd e2e-tests + +# Run 10 times and track results +PASS=0; FAIL=0 +for i in $(seq 1 10); do + echo "=== Run $i ===" + if yarn playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Results: $PASS passed, $FAIL failed out of 10 runs" +``` + +## Result Classification + +### Consistent Failure +- **Definition**: Fails every time (10/10 runs fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill +- **Confidence**: High — the fix can be verified reliably + +### Flaky +- **Definition**: Fails some runs but not all (e.g., 3/10 fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill, focus on reliability improvements +- **Typical causes**: Race conditions, timing dependencies, state leaks between tests, external service variability + +### Cannot Reproduce +- **Definition**: Passes all runs locally (0/10 fail) +- **Action**: **Stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Investigation**: Check environment differences between local and CI: + - **Cluster version**: CI may use a different OCP version (check the cluster pool version) + - **Image version**: CI may use a different RHDH image + - **Resource constraints**: CI clusters may have less resources + - **Parallel execution**: CI runs with 3 workers; try `--workers=3` + - **Network**: CI clusters are in `us-east-2` AWS region + - **External services**: GitHub API rate limits, Keycloak availability + +## Artifact Collection + +### Playwright Traces + +After a test failure, traces are saved in `e2e-tests/test-results/`: + +```bash +# View a trace +yarn playwright show-trace test-results//trace.zip +``` + +### HTML Report + +```bash +# Generate and open the HTML report +yarn playwright show-report +``` + +### Screenshots and Videos + +On failure, screenshots and videos are saved in `test-results//`: +- `test-failed-1.png` — Screenshot at failure point +- `video.webm` — Full test recording (if video is enabled) + +## Test Project Reference + +Refer to the e2e-fix-workflow rule for the Playwright project → config map mapping. diff --git a/.opencode/skill/e2e-submit-and-review/SKILL.md b/.opencode/skill/e2e-submit-and-review/SKILL.md new file mode 100644 index 0000000000..5aedb84414 --- /dev/null +++ b/.opencode/skill/e2e-submit-and-review/SKILL.md @@ -0,0 +1,316 @@ +--- +name: e2e-submit-and-review +description: >- + Create a PR for an E2E test fix, trigger Qodo agentic review, address review + comments, and monitor CI results +--- +# Submit and Review + +Create a pull request for the E2E test fix, trigger automated review, address feedback, and verify CI passes. + +## When to Use + +Use this skill after verifying the fix (via `e2e-verify-fix`) when all tests pass and code quality checks are clean. + +## Step 0: Resolve Pre-Commit Hooks + +Before committing, ensure all related workspaces have their dependencies installed so pre-commit hooks (lint-staged, rulesync, etc.) pass: + +```bash +# Root workspace +yarn install + +# If e2e-tests files were changed +cd e2e-tests && yarn install && cd .. + +# If .ci files were changed +cd .ci && yarn install && cd .. +``` + +If a pre-commit hook fails during commit, fix the issue and create a **new** commit — do not amend. + +## Step 1: Commit Changes + +### Stage and Commit + +```bash +# Stage only relevant files +git add e2e-tests/ +git add .ci/ # Only if deployment config was changed + +# Commit with a descriptive message +git commit -m "fix(e2e): + + +- What test was failing +- What the root cause was +- How it was fixed" +``` + +### Commit Message Convention + +Follow the conventional commit format: +- `fix(e2e): fix flaky topology test timeout` +- `fix(e2e): update RBAC page locators after UI redesign` +- `fix(e2e): add retry logic for catalog entity refresh` +- `fix(e2e): skip orchestrator test on GKE platform` + +If a Jira ticket exists, reference it: +- `fix(e2e): fix topology locator drift [RHIDP-1234]` + +## Step 2: Push to Fork + +Push the fix branch to the fork (origin): + +```bash +git push -u origin +``` + +Example: +```bash +git push -u origin fix/e2e-topology-locator +# or +git push -u origin fix/RHIDP-1234-e2e-topology-locator +``` + +## Step 3: Create Pull Request + +Create a PR against the upstream `redhat-developer/rhdh` repository. + +**Dynamic username extraction** -- Always derive the GitHub username from the fork remote at runtime rather than hardcoding it. This makes the workflow portable across any contributor's environment: + +```bash +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +``` + +Then create the PR as a **draft** (always use `--draft`): +```bash +gh pr create \ + --draft \ + --repo redhat-developer/rhdh \ + --head "${GITHUB_USER}:" \ + --base \ + --title "fix(e2e): " \ + --body "$(cat <<'EOF' +## Summary +- <1-2 bullet points explaining what was fixed and why> + +## Test Results +- Local verification: 5/5 passes +- Code quality: lint, tsc, prettier all pass + +## Related +- Prow job: +- Jira: +EOF +)" +``` + +**Important**: Always use `--repo redhat-developer/rhdh` and `--head :` for cross-fork PRs. Never hardcode the GitHub username -- always extract it dynamically from the origin remote URL so this workflow works for any contributor. + +### PR Description Guidelines + +Keep it concise: +- What test was failing +- What the root cause was +- How it was fixed +- Link to the original failing CI job or Jira ticket + +## Step 4: Trigger Qodo Agentic Review + +After the PR is created, trigger an agentic review from Qodo (PR-Agent): + +```bash +# Get the PR number from the create output, then comment +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The Qodo bot will: +1. Analyze the code changes +2. Post review comments with suggestions +3. Optionally approve or request changes + +Note: The `.pr_agent.toml` in the repo configures Qodo with: +- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos +- Auto-review, auto-describe, and auto-improve on PR creation +- Scoped to `e2e-tests` folder changes + +## Step 5: Wait for and Address Qodo Review + +### Poll for Review Comments + +Check for Qodo review completion (it typically takes 1-3 minutes): + +```bash +# Check for Qodo bot comments +gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' + +# Check for inline comments +gh api repos/redhat-developer/rhdh/pulls//comments \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' +``` + +### Address Review Comments + +For each review comment: + +1. **Code suggestions**: If the suggestion improves the code, apply it: + ```bash + # Make the change locally + # Then stage only the changed files and commit + git add + git commit -m "fix(e2e): address review feedback" + git push + ``` + **Never use `git add -A` or `git add .`** — always stage specific files to avoid committing `.env`, test artifacts, or other local-only files. + +2. **Style/convention issues**: Fix them per project conventions + +3. **False positives**: If a suggestion is incorrect, explain why in a reply: + ```bash + gh api repos/redhat-developer/rhdh/pulls//comments//replies \ + -f body="This is intentional because " + ``` + +4. **Questions**: Answer them with context from the codebase + +## Step 6: Trigger Affected CI Job + +After addressing Qodo review feedback (and pushing any follow-up commits), trigger the presubmit E2E job that corresponds to the originally failing CI job. Presubmit job names differ from periodic/nightly names but cover the same platform and deployment method. + +**CRITICAL**: Never guess or construct presubmit job names. Always discover them from the `openshift-ci` bot response as described below. + +### Step 6a: Request Available Jobs + +Comment `/test ?` on the PR to request the list of available presubmit jobs: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test ?" +``` + +### Step 6b: Wait for the Bot Response + +Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: + +```bash +# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) +for i in $(seq 1 10); do + BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ + --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') + if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then + echo "Bot response received:" + echo "$BOT_RESPONSE" + break + fi + echo "Waiting for openshift-ci bot response (attempt $i/10)..." + sleep 30 +done +``` + +If no response is received after 5 minutes, ask the user for guidance. + +### Step 6c: Select the Right Job from the Bot Response + +Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: + +| Original failure pattern | Look for presubmit containing | +|--------------------------|-------------------------------| +| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | +| `*ocp*operator*nightly*` | `*ocp*operator*` | +| `*aks*helm*` | `*aks*helm*` | +| `*eks*helm*` | `*eks*helm*` | +| `*gke*helm*` | `*gke*helm*` | + +**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. + +If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. + +### Step 6d: Trigger the Job + +Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. + +## Step 7: Monitor CI Status + +### Watch CI Checks + +After pushing changes, monitor the CI pipeline: + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Or check manually: +```bash +gh pr checks --repo redhat-developer/rhdh +``` + +CI check types (Prow E2E jobs, lint checks, build checks, etc.) are documented in the project CI rules. Use `gh pr checks` output to identify which specific check failed. + +### If CI Fails + +1. **E2E test failure**: Check the Prow job logs, determine if it's the same test or a different one +2. **Lint failure**: Run `yarn lint:fix` locally, commit and push +3. **Build failure**: Check TypeScript errors with `yarn tsc` +4. **Unrelated failure**: Comment on the PR noting it's an unrelated failure, optionally `/retest` to re-trigger + +### Re-trigger CI + +If a CI check needs to be re-run: +```bash +# For Prow jobs, comment on the PR +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# For specific jobs +gh pr comment --repo redhat-developer/rhdh --body "/retest " +``` + +## Step 8: Final Status Report + +After CI passes (or all issues are addressed), produce a final report: + +``` +PR Status Report: +- PR: +- Branch: -> +- CI Status: PASS / PENDING / FAIL +- Qodo Review: Addressed / Pending +- Files changed: +- Action items: +``` + +## Quick Reference: PR Workflow Commands + +```bash +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') + +# Create draft PR (always use --draft) +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base + +# Trigger Qodo review +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" + +# List available presubmit jobs (Step 6a) +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Wait for openshift-ci bot response (Step 6b) -- poll until bot replies with job list + +# Trigger specific presubmit job (Step 6d) -- ONLY use a job name from the bot's response +gh pr comment --repo redhat-developer/rhdh --body "/test " + +# Check CI status +gh pr checks --repo redhat-developer/rhdh + +# Re-trigger tests +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# View PR +gh pr view --repo redhat-developer/rhdh --web +``` diff --git a/.opencode/skill/e2e-verify-fix/SKILL.md b/.opencode/skill/e2e-verify-fix/SKILL.md new file mode 100644 index 0000000000..8a24f1f442 --- /dev/null +++ b/.opencode/skill/e2e-verify-fix/SKILL.md @@ -0,0 +1,147 @@ +--- +name: e2e-verify-fix +description: >- + Verify an E2E test fix by running the test multiple times and checking code + quality +--- +# Verify Fix + +Verify that the test fix works reliably and passes all code quality checks. + +## When to Use + +Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm the fix works before submitting a PR. + +## MANDATORY: Use the Playwright Healer Agent for Verification + +Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. + +### Healer Initialization + +If not already initialized in this session, use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. + +## Verification Steps + +### 1. Single Run Verification via Healer + +Invoke the healer agent to run the fixed test once: + +``` +Task: "You are the Playwright Test Healer agent. Verify a fix by running the test once. +Working directory: /e2e-tests +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If it passes, report success. If it fails, examine the error and report what went wrong." +``` + +If it fails, go back to `e2e-diagnose-and-fix` and iterate — use the healer agent there too for the fix. + +### 2. Multi-Run Stability Check + +Run the test 5 times consecutively to verify no flakiness was introduced: + +```bash +cd e2e-tests +set -a && source .env && set +a +PASS=0; FAIL=0 +for i in $(seq 1 5); do + echo "=== Stability run $i/5 ===" + if npx playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Stability results: $PASS/5 passed" +``` + +**Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. + +**IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. + +### 3. Code Quality Checks + +Run all code quality checks in the e2e-tests workspace: + +```bash +cd e2e-tests + +# TypeScript compilation +yarn tsc:check + +# ESLint +yarn lint:check + +# Prettier formatting +yarn prettier:check +``` + +Fix any issues found: + +```bash +# Auto-fix lint issues +yarn lint:fix + +# Auto-fix formatting +yarn prettier:fix +``` + +### 4. Optional: Full Project Regression Check + +If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 +``` + +This is optional for isolated spec file changes but recommended for changes to: +- `e2e-tests/playwright/utils/` (utility classes) +- `e2e-tests/playwright/support/` (page objects, selectors) +- `e2e-tests/playwright/data/` (shared test data) +- `playwright.config.ts` (configuration) + +### 5. Review the Diff + +Before submitting, review all changes: + +```bash +git diff +git diff --stat +``` + +Verify: +- Only intended files were changed +- No secrets or credentials were added +- No unrelated changes were included +- Component annotations are present in any new/modified spec files +- Semantic selectors are used (no deprecated CSS class selectors) + +## Result Summary + +After verification, produce a summary: + +``` +Fix Verification Results: +- Test: () +- Single run: PASS +- Stability (5 runs): 5/5 PASS +- TypeScript: PASS +- ESLint: PASS +- Prettier: PASS +- Files changed: +- Ready for PR: YES/NO +``` diff --git a/.rulesync/commands/fix-e2e.md b/.rulesync/commands/fix-e2e.md new file mode 100644 index 0000000000..80dc6fe3c7 --- /dev/null +++ b/.rulesync/commands/fix-e2e.md @@ -0,0 +1,169 @@ +--- +targets: + - '*' +description: >- + Autonomously investigate and fix a failing RHDH E2E CI test. Accepts a Prow + job URL or Jira ticket ID. Deploys RHDH, reproduces the failure, fixes the + test using Playwright agents, and submits a PR with Qodo review. +--- +# Fix E2E CI Failure + +Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failing RHDH E2E test. + +## Input + +`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +- **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Jira ticket ID**: `RHIDP-XXXX` +- **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` + +## Workflow + +Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. + +### Phase 1: Parse CI Failure + +**Skill**: `e2e-parse-ci-failure` + +Parse the input to extract: +- Failing test name and spec file path +- Playwright project name +- Release branch (main, release-1.9, etc.) +- Platform (OCP, AKS, EKS, GKE) +- Deployment method (Helm, Operator) +- Error type and message +- local-run.sh job name parameter + +**Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. + +### Phase 2: Setup Fix Branch + +First, check the current branch: + +```bash +git branch --show-current +``` + +- **On `main` or `release-*`**: You're on a base branch — create a feature branch using the skill: + ```bash + git fetch upstream + git checkout -b fix/e2e- upstream/ + ``` + If a Jira ticket was provided, include the ticket ID in the branch name: + `fix/RHIDP-XXXX-e2e-` + +- **On any other branch** (e.g., `fix/e2e-*`): You're likely already on a feature branch. **Ask the user** whether to: + 1. Use the current branch as-is + 2. Create a new branch from the upstream release branch + +### Phase 3: Deploy RHDH + +**Skill**: `e2e-deploy-rhdh` + +Deploy RHDH to a cluster using `e2e-tests/local-run.sh`. CLI mode requires **all three** flags (`-j`, `-r`, `-t`): + +**OCP jobs** — use `-s` (deploy-only) to skip automated test execution so you can run the specific failing test manually: +```bash +cd e2e-tests +./local-run.sh -j -r -t -s +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`. These jobs require the full execution pipeline and do not support deploy-only mode: +```bash +cd e2e-tests +./local-run.sh -j -r -t +``` + +Use the **full Prow CI job name** for `-j` (not shortened names). + +Select the image repo and tag based on the release branch: +- `main` → `-r rhdh-community/rhdh -t next` +- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` +- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` + +After deployment completes, set up the local test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +**Decision gate**: Before attempting deployment, verify cluster connectivity (`oc whoami`). If no cluster is available, **ask the user for explicit approval** before skipping this phase — do not skip silently. If deployment fails, the `e2e-deploy-rhdh` skill has error recovery procedures. If deployment cannot be recovered after investigation, report the deployment issue and stop. + +### Phase 4: Reproduce Failure + +**Skill**: `e2e-reproduce-failure` + +Run the specific failing test to confirm it reproduces locally. Use `--project=any-test` to avoid running the smoke test dependency — it matches any spec file without extra overhead: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Decision gates**: +- **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. +- **Consistent failure**: Proceed to Phase 5 +- **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability +- **Cannot reproduce** (passes every time after 10 runs): Before giving up, try running the entire CI project with `CI=true yarn playwright test --project= --retries=0` to simulate CI conditions (3 workers, full test suite). If that also passes, report the results and **ask the user for explicit approval** before proceeding. + +### Phase 5: Diagnose and Fix + +**Skill**: `e2e-diagnose-and-fix` + +Analyze the failure and implement a fix: + +1. **Classify the failure**: locator drift, timing, assertion mismatch, data dependency, platform-specific, deployment config +2. **Use Playwright Test Agents**: Invoke the healer agent (`@playwright-test-healer`) for automated test repair — it can debug the test, inspect the UI, generate locators, and edit the code +3. **Follow Playwright best practices**: Consult the `playwright-locators` and `ci-e2e-testing` project rules. Use semantic role-based locators (`getByRole`, `getByLabel`), auto-waiting assertions, Page Object Model, component annotations. Fetch official Playwright best practices via Context7 or https://playwright.dev/docs/best-practices if needed +4. **Cross-repo investigation**: If the issue is in deployment config, search `rhdh-operator` and `rhdh-chart` repos. Use Sourcebot or Context7 if available; otherwise fall back to `gh search code` or clone the repo locally and grep + +**Decision gate**: If the analysis reveals a product bug (not a test issue), you must be **absolutely certain** before marking a test with `test.fixme()`. The Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with `// TODO:` linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Proceed to Phase 6 with the `test.fixme()` change + +### Phase 6: Verify Fix + +**Skill**: `e2e-verify-fix` + +Verify the fix: +1. Run the fixed test once — must pass +2. Run 5 times — must pass 5/5 +3. Run code quality checks: `yarn tsc:check`, `yarn lint:check`, `yarn prettier:check` +4. Fix any lint/formatting issues + +**Decision gate**: If the test still fails or is flaky, return to Phase 5 and iterate. If verification cannot be run (no cluster, environment issues), **ask the user for explicit approval** before proceeding without it. + +### Phase 7: Submit PR and Handle Review + +**Skill**: `e2e-submit-and-review` + +1. **Resolve pre-commit hooks**: Run `yarn install` in all relevant workspaces (root, `e2e-tests/`, `.ci/`) before committing +2. **Commit**: Stage changes, commit with conventional format +3. **Push**: `git push -u origin ` +4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` +5. **Trigger Qodo review**: Comment `/agentic_review` on the PR +6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +7. **Address feedback**: Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +9. **Monitor CI**: Watch CI checks with `gh pr checks` + +### Final Report + +After all phases complete, produce a summary: + +``` +E2E Fix Summary: +- Input: +- Test: () +- Branch: +- Root cause: +- Fix: +- Verification: +- PR: +- CI Status: +- Qodo Review: +``` diff --git a/.rulesync/rules/e2e-fix-workflow.md b/.rulesync/rules/e2e-fix-workflow.md new file mode 100644 index 0000000000..6718e915cc --- /dev/null +++ b/.rulesync/rules/e2e-fix-workflow.md @@ -0,0 +1,355 @@ +--- +root: false +targets: + - '*' +globs: [] +--- +# E2E Test Fix Workflow + +This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. + +## Table of Contents + +- [Workflow Overview](#workflow-overview) +- [Parsing CI Failures](#parsing-ci-failures) +- [Branch and Deployment Setup](#branch-and-deployment-setup) +- [Reproducing Failures](#reproducing-failures) +- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) +- [Playwright Test Agents](#playwright-test-agents) +- [Verification and PR Submission](#verification-and-pr-submission) + +## Workflow Overview + +The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: + +1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +2. **Setup Fix Branch** — Create a branch from the correct upstream release branch +3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** — Confirm the failure reproduces locally +5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents +6. **Verify Fix** — Run the test multiple times and check code quality +7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI + +Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. + +**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. + +## Parsing CI Failures + +### Prow URL Structure + +``` +https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +``` + +Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: +``` +✘ [] › /.spec.ts: +``` + +### Job Name Mapping Tables + +#### Job Name → Release Branch + +| Pattern in job name | Release branch | +|---------------------|---------------| +| `*-rhdh-main-*` | `main` | +| `*-rhdh-release-1.9-*` | `release-1.9` | +| `*-rhdh-release-1.8-*` | `release-1.8` | + +#### Job Name → Platform and Deployment Method + +| Pattern | Platform | Method | +|---------|----------|--------| +| `*ocp*helm*` | OCP | Helm | +| `*ocp*operator*` | OCP | Operator | +| `*aks*helm*` | AKS | Helm | +| `*aks*operator*` | AKS | Operator | +| `*eks*helm*` | EKS | Helm | +| `*eks*operator*` | EKS | Operator | +| `*gke*helm*` | GKE | Helm | +| `*gke*operator*` | GKE | Operator | +| `*osd-gcp*` | OSD-GCP | Helm/Operator | + +#### Job Name → Playwright Projects + +| Job pattern | Projects | +|-------------|----------| +| `*ocp*helm*nightly*` (not upgrade) | `showcase`, `showcase-rbac`, `showcase-runtime`, `showcase-sanity-plugins`, `showcase-localization-*` | +| `*ocp*helm*upgrade*` | `showcase-upgrade` | +| `*ocp*operator*nightly*` (not auth) | `showcase-operator`, `showcase-operator-rbac` | +| `*ocp*operator*auth-providers*` | `showcase-auth-providers` | +| `*ocp*helm*pull*` | `showcase`, `showcase-rbac` | +| `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | +| `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | + +#### Job Name → local-run.sh `-j` Parameter + +Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. + +**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` +**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` + +Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. + +#### Release Branch → Image Repo and Tag + +| Branch | `-r` (image repo) | `-t` (image tag) | +|--------|-------------------|-------------------| +| `main` | `rhdh-community/rhdh` | `next` | +| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | +| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | + +## Branch and Deployment Setup + +### Branch Creation + +Always create branches from upstream, never from local copies: + +```bash +git fetch upstream +git checkout -b fix/e2e- upstream/ +``` + +### Deployment via local-run.sh + +CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. + +```bash +cd e2e-tests +# OCP jobs: use -s to deploy only, then run tests manually +./local-run.sh -j -r -t -s +# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required +./local-run.sh -j -r -t +``` + +Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. + +After deployment, source the test environment: +```bash +source e2e-tests/local-test-setup.sh +``` + +### Deployment Error Recovery + +| Error | Investigation | Common Fix | +|-------|--------------|------------| +| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | +| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | +| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | +| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | + +For config issues, search these repos for reference: +- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts +- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults + +## Reproducing Failures + +### Test Execution + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 --workers=1 +``` + +### Flakiness Detection + +If the test passes on first run, repeat 10 times: +- **10/10 pass** → cannot reproduce (check environment differences) +- **Mixed results** → flaky (focus on reliability improvements) +- **0/10 pass** → consistent failure + +### Debugging Modes + +```bash +# Headed (visible browser) +yarn playwright test --project= --headed + +# Debug (Playwright Inspector) +yarn playwright test --project= --debug + +# View trace +yarn playwright show-trace test-results//trace.zip +``` + +## Diagnosing and Fixing Tests + +### Failure Classification + +1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors +2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals +3. **Assertion mismatch** — Expected values changed → update test data or report product bug +4. **Data dependency** — Test data missing → add proper setup/teardown +5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional +6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` + +### RHDH Coding Conventions (Required) + +**Locators** — Always use semantic role-based locators: +```typescript +// GOOD +page.getByRole('button', { name: 'Create' }) +page.getByRole('heading', { name: 'Catalog' }) +page.getByText('No results found') + +// BAD — deprecated CSS selectors +page.locator('.MuiButton-root') +page.locator('[data-testid="..."]') +``` + +**Component annotations** — Every spec file must have: +```typescript +test.beforeAll(async ({}, testInfo) => { + testInfo.annotations.push({ + type: 'component', + description: 'your_component_name', + }); +}); +``` + +**Retry patterns** for async assertions: +```typescript +await expect(async () => { + await page.reload(); + await expect(page.getByText('entity')).toBeVisible(); +}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); +``` + +**Conditional skips**: +```typescript +import { skipIfJobName } from '../utils/helper'; +import * as constants from '../utils/constants'; +skipIfJobName(constants.GKE_JOBS); +``` + +**Forbidden patterns**: +- `page.waitForNetworkIdle()` / `networkidle` +- Raw CSS class selectors (`.MuiButton-root`) +- `page.waitForTimeout()` for synchronization +- Hardcoded secrets or credentials + +### Key Utility Classes + +| Class | Path | Purpose | +|-------|------|---------| +| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | +| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | +| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | +| `KubeClient` | `utils/kube-client.ts` | K8s resource management | +| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | +| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | + +### Product Bug Decision + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: +1. File or update a Jira bug in the `RHDHBUGS` project +2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Description of the product bug'); + ``` +3. Do **not** change assertions to match broken behavior +4. Proceed with the `test.fixme()` change + +## Playwright Test Agents + +The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. + +### Available Agents + +| Agent | Mode | Purpose | +|-------|------|---------| +| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | +| `playwright-test-generator` | subagent | Create new test code from a test plan | +| `playwright-test-planner` | subagent | Explore app and create test plans | + +### Healer Agent Usage (Primary for Fixes) + +The healer agent is the primary tool for test repair: +1. Runs tests with `test_run` to identify failures +2. Debugs with `test_debug` to step through failing tests +3. Inspects UI state via `browser_snapshot`, `browser_console_messages` +4. Generates correct locators with `browser_generate_locator` +5. Edits test code with `edit`/`write` tools +6. Re-runs tests to verify the fix + +Invoke with: `@playwright-test-healer Fix the failing test in ` + +## Verification and PR Submission + +### Verification Checklist + +1. Single test run passes +2. 5 consecutive runs pass (stability) +3. `yarn tsc:check` passes +4. `yarn lint:check` passes +5. `yarn prettier:check` passes + +### Pre-Commit Hooks + +Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: + +```bash +yarn install # Root workspace +cd e2e-tests && yarn install && cd .. # If e2e-tests files changed +cd .ci && yarn install && cd .. # If .ci files changed +``` + +### PR Creation + +Always create PRs as **drafts**: + +```bash +git push -u origin +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +``` + +### Qodo Review + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. + +### Trigger Affected CI Job + +After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: + +```bash +# List available presubmit jobs +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Trigger the matching presubmit job +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. + +### CI Monitoring + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` + +## Reference Files + +| Category | Key files | +|----------|-----------| +| Playwright config | `e2e-tests/playwright.config.ts` | +| Project names (SOT) | `e2e-tests/playwright/projects.json` | +| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | +| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | +| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | +| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | +| ConfigMaps | `.ci/pipelines/resources/config_map/` | +| Helm values | `.ci/pipelines/value_files/` | +| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | +| Environment vars | `.ci/pipelines/env_variables.sh` | +| Local test runner | `e2e-tests/local-run.sh` | +| Local test env | `e2e-tests/local-test-setup.sh` | +| Qodo config | `.pr_agent.toml` | diff --git a/.rulesync/skills/e2e-deploy-rhdh/SKILL.md b/.rulesync/skills/e2e-deploy-rhdh/SKILL.md new file mode 100644 index 0000000000..219a7c5cf5 --- /dev/null +++ b/.rulesync/skills/e2e-deploy-rhdh/SKILL.md @@ -0,0 +1,234 @@ +--- +name: e2e-deploy-rhdh +description: >- + Deploy RHDH to an OpenShift cluster using local-run.sh for E2E test execution, + with autonomous error recovery for deployment failures +targets: + - '*' +--- +# Deploy RHDH + +Deploy Red Hat Developer Hub to a cluster for E2E test execution using the existing `local-run.sh` workflow. + +## When to Use + +Use this skill when you need a running RHDH instance to reproduce and fix a test failure. + +## Prerequisites + +Before running the deployment, verify these tools are installed: + +```bash +# Required tools (local-run.sh checks these automatically) +podman --version # Container runtime +oc version # OpenShift CLI +kubectl version --client # Kubernetes CLI +vault --version # HashiCorp Vault (for secrets) +jq --version # JSON processor +curl --version # HTTP client +rsync --version # File sync +bc --version # Calculator (for resource checks) +``` + +### Podman Machine Requirements + +The podman machine must be running with adequate resources: + +```bash +podman machine inspect | jq '.Resources' +# Requires: >= 8GB RAM, >= 4 CPUs +``` + +If resources are insufficient: +```bash +podman machine stop +podman machine set --memory 8192 --cpus 4 +podman machine start +``` + +## Deployment Using local-run.sh + +The primary deployment method uses `e2e-tests/local-run.sh`, which handles everything: +Vault authentication, cluster service account setup, RHDH deployment, and test execution. + +### Execution Rules + +**CRITICAL — deployment is a long-running operation:** + +1. **Never run `local-run.sh` in the background.** Operator installations can take 20-30 minutes. Use the Bash tool with `timeout: 600000` (10 minutes) and if it times out, **check the container log** — do NOT assume failure. +2. **Before starting a deployment, check for existing containers:** + ```bash + podman ps --format "{{.Names}} {{.Status}}" | grep -i rhdh-e2e-runner + ``` + If a deployment container is already running, **wait for it to finish** instead of starting a new one. Monitor via the container log: + ```bash + tail -f e2e-tests/.local-test/container.log + ``` +3. **Never launch concurrent deployments.** Two deployments to the same cluster will race and both fail. If a deployment appears stuck, check the container log and cluster state before deciding it failed. +4. **How to detect actual failure vs slow progress:** The operator install script outputs detailed debug logs. If the container log shows active progress (timestamps advancing), the deployment is still running. Only consider it failed if: + - The podman container has exited (`podman ps` shows no running container) + - AND the container log shows an error message (e.g., "Failed install RHDH Operator") + +### CLI Mode (Preferred) + +**CRITICAL**: CLI mode requires **all three** flags (`-j`, `-r`, `-t`). If `-r` is omitted, the script falls into interactive mode and will hang in automated contexts. + +```bash +cd e2e-tests +./local-run.sh -j -r -t [-s] +``` + +**Example — OCP job** (deploy-only with `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s +``` + +**Example — K8s job (AKS/EKS/GKE)** (full execution, no `-s`): +```bash +cd e2e-tests +./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next +``` + +**Parameters:** +- `-j / --job`: The **full Prow CI job name** extracted from the Prow URL. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match, so the full name works correctly. Example: `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly` +- `-r / --repo`: Image repository (**required** for CLI mode — without it the script enters interactive mode) +- `-t / --tag`: Image tag (e.g., `1.9`, `next`) +- `-s / --skip-tests`: Deploy only, skip test execution. **OCP jobs only** — K8s jobs (AKS, EKS, GKE) do not support this flag and require the full execution pipeline + +**WARNING**: Do NOT use shortened job names like `nightly-ocp-helm` for `-j` — these do not match the glob patterns in `openshift-ci-tests.sh`. + +### Image Selection + +Refer to the `e2e-fix-workflow` rule for the release branch to image repo/tag mapping table. + +### Deploy-Only Mode (OCP Jobs Only) + +For OCP jobs, deploy without running tests so you can run specific tests manually: + +```bash +./local-run.sh -j -r -t -s +``` + +**Note**: K8s jobs (AKS, EKS, GKE) do not support deploy-only mode. They require the full execution pipeline — run without `-s`. + +### What local-run.sh Does + +1. **Validates prerequisites**: Checks all required tools and podman resources +2. **Verifies the image**: Checks the image exists on quay.io via the Quay API +3. **Pulls the runner image**: `quay.io/rhdh-community/rhdh-e2e-runner:main` +4. **Authenticates to Vault**: OIDC-based login for secrets +5. **Sets up cluster access**: Creates `rhdh-local-tester` service account with cluster-admin, generates 48h token +6. **Copies the repo**: Syncs the local repo to `.local-test/rhdh/` (excludes node_modules) +7. **Runs a Podman container**: Executes `container-init.sh` inside the runner image, which: + - Fetches all Vault secrets to `/tmp/secrets/` + - Logs into the cluster + - Sets platform-specific environment variables + - Runs `.ci/pipelines/openshift-ci-tests.sh` for deployment + +### Post-Deployment: Setting Up for Manual Testing + +After `local-run.sh` completes (with `-s` for OCP jobs, or after full execution for K8s jobs), set up the environment for headed Playwright testing: + +```bash +# Source the test setup (choose 'showcase' or 'rbac') +source e2e-tests/local-test-setup.sh showcase +# or +source e2e-tests/local-test-setup.sh rbac +``` + +This exports: +- `BASE_URL` — The RHDH instance URL +- `K8S_CLUSTER_URL` — Cluster API server URL +- `K8S_CLUSTER_TOKEN` — Fresh service account token +- All Vault secrets as environment variables + +Verify RHDH is accessible: +```bash +curl -sSk "$BASE_URL" -o /dev/null -w "%{http_code}" +# Should return 200 +``` + +## Deployment Error Recovery + +### Common Deployment Failures + +#### CrashLoopBackOff + +**Symptoms**: Pod repeatedly crashes and restarts. + +**Investigation**: +```bash +# Check pod status +oc get pods -n +# Check pod logs +oc logs -n --previous +# Check events +oc get events -n --sort-by=.lastTimestamp +``` + +**Common causes and fixes**: +1. **Missing ConfigMap**: The app-config ConfigMap wasn't created → check `.ci/pipelines/resources/config_map/` for the correct template +2. **Bad plugin configuration**: A dynamic plugin is misconfigured → check `dynamic-plugins-config` ConfigMap against `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +3. **Missing secrets**: Required secrets not mounted → verify secrets exist in the namespace +4. **Node.js errors**: Check for JavaScript errors in logs that indicate code issues + +#### ImagePullBackOff + +**Investigation**: +```bash +oc describe pod -n | grep -A5 "Events" +``` + +**Common causes**: +1. **Image doesn't exist**: Verify on quay.io: `curl -s 'https://quay.io/api/v1/repository/rhdh/rhdh-hub-rhel9/tag/?filter_tag_name=like:'` +2. **Pull secret missing**: Check `namespace::setup_image_pull_secret` in `.ci/pipelines/lib/namespace.sh` +3. **Registry auth**: Ensure the pull secret has correct credentials + +#### Helm Install Failure + +**Investigation**: +```bash +helm list -n +helm status -n +``` + +**Common causes**: +1. **Values file error**: Check merged values against `.ci/pipelines/value_files/values_showcase.yaml` +2. **Chart version mismatch**: Verify chart version with `helm::get_chart_version` from `.ci/pipelines/lib/helm.sh` + +#### Operator Deployment Failure + +**Investigation**: +```bash +oc get backstage -n +oc describe backstage -n +oc get csv -n # Check operator subscription status +``` + +**Common causes**: +1. **Backstage CR misconfigured**: Compare against `.ci/pipelines/resources/rhdh-operator/rhdh-start.yaml` +2. **Operator not installed**: Check CatalogSource and Subscription +3. **CRD not ready**: Wait for CRD with `k8s_wait::crd` pattern from `.ci/pipelines/lib/k8s-wait.sh` + +### Cross-Repo Investigation + +When deployment issues stem from the operator or chart, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search `rhdh-operator` and `rhdh-chart` repos for specific error patterns or configuration keys +2. **Context7** (if available): query `redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` for docs and code snippets +3. **Fallback — `gh search code`**: `gh search code '' --repo redhat-developer/rhdh-operator` or `redhat-developer/rhdh-chart` +4. **Fallback — local clone**: clone the repo into a temp directory and grep for the pattern + +Key areas to look for: +- **rhdh-operator**: Backstage CR configuration, CatalogSource setup, operator installation scripts +- **rhdh-chart**: Helm values schema, chart templates, default configurations + +## Reference Files + +- Main deployment scripts: `.ci/pipelines/openshift-ci-tests.sh`, `.ci/pipelines/utils.sh` +- Library scripts: `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh`, `.ci/pipelines/lib/k8s-wait.sh`, `.ci/pipelines/lib/testing.sh` +- Helm values: `.ci/pipelines/value_files/` +- ConfigMaps: `.ci/pipelines/resources/config_map/` +- Operator CRs: `.ci/pipelines/resources/rhdh-operator/` +- Environment variables: `.ci/pipelines/env_variables.sh` diff --git a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md new file mode 100644 index 0000000000..46d5cb9c17 --- /dev/null +++ b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md @@ -0,0 +1,252 @@ +--- +name: e2e-diagnose-and-fix +description: >- + Analyze a failing E2E test, determine root cause, and fix it using Playwright + Test Agents and RHDH project conventions +targets: + - '*' +--- +# Diagnose and Fix + +Analyze the root cause of a failing E2E test and implement a fix following RHDH project conventions. + +## When to Use + +Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. + +## MANDATORY: Always Use the Playwright Healer Agent + +**The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. + +### Healer Initialization + +Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. + +### Environment Setup for Healer + +The healer agent needs a `.env` file in `e2e-tests/` with all required environment variables (BASE_URL, K8S_CLUSTER_TOKEN, vault secrets, etc.). Generate it by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +The `.env` file is gitignored — never commit it. To regenerate (e.g. after token expiry), re-run the command above. + +### Invoking the Healer + +Invoke the healer agent via the Task tool with `subagent_type: general`: + +``` +Task: "You are the Playwright Test Healer agent. Run the failing test, debug it, inspect the UI, and fix the code. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run command: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g ''" +``` + +The healer will autonomously: +1. Run the test and identify the failure +2. Examine error screenshots and error-context.md +3. Debug the test step-by-step using Playwright Inspector +4. Inspect the live UI via page snapshots +5. Generate correct locators and fix assertions +6. Edit the test code +7. Re-run to verify the fix + +### When to Supplement with Manual Diagnosis + +After the healer has run, supplement with manual investigation only for: +- **Data dependency failures** (category 4): The healer may not know how to create missing test data +- **Platform-specific failures** (category 5): The healer doesn't have context about platform differences +- **Deployment configuration issues** (category 6): The healer cannot modify ConfigMaps or Helm values +- **Product bugs**: When the healer confirms the test is correct but the application behavior is wrong + +## Failure Pattern Recognition + +### 1. Locator Drift + +**Symptoms**: `Error: locator.click: Error: strict mode violation` or `Timeout waiting for selector` or element not found errors. + +**Cause**: The UI has changed and selectors no longer match. + +**Fix approach**: +- Invoke the Playwright healer agent (`@playwright-test-healer`) — it will replay the test, inspect the current UI via page snapshots, generate updated locators, and edit the code automatically +- If the healer cannot resolve it, manually update to semantic role-based locators (see project rules) +- Verify the updated locator works by re-running the test + +### 2. Timing / Race Condition + +**Symptoms**: Test passes sometimes, fails sometimes. Errors like `Timeout 10000ms exceeded` or assertions failing on stale data. + +**Cause**: Test acts before the UI is ready, or waits are insufficient. + +**Fix approach**: +- Invoke the Playwright healer agent first — it can identify timing issues by stepping through the test and observing UI state transitions +- If manual fixes are needed: replace `page.waitForTimeout()` with proper waits: `expect(locator).toBeVisible()`, `page.waitForLoadState()` +- Use `expect().toPass()` with retry intervals for inherently async checks: + ```typescript + await expect(async () => { + const text = await page.locator('.count').textContent(); + expect(Number(text)).toBeGreaterThan(0); + }).toPass({ intervals: [1000, 2000, 5000], timeout: 30_000 }); + ``` +- Increase action/navigation timeouts if the operation is legitimately slow +- Use `Common.waitForLoad()` utility before interacting with the page after navigation + +### 3. Assertion Mismatch + +**Symptoms**: `expect(received).toBe(expected)` with clearly different values. + +**Cause**: The expected value has changed due to a product change, data change, or environment difference. + +**Fix approach**: +- Determine if the change is intentional (check recent commits to the release branch) +- If intentional: update the expected value in the test or test data +- If unintentional: this may be a product bug — but you must first exhaust all other possibilities using the Playwright healer agent. Only after the healer confirms the test is correct and the application behavior is wrong should you mark it with `test.fixme()` (see the "Decision: Product Bug vs Test Issue" section below) + +### 4. Data Dependency + +**Symptoms**: Test fails because expected entities, users, or resources don't exist. + +**Cause**: Test data assumptions no longer hold (GitHub repos deleted, Keycloak users changed, catalog entities removed). + +**Fix approach**: +- Update test data in `e2e-tests/playwright/support/test-data/` or `e2e-tests/playwright/data/` +- Ensure test creates its own data in `beforeAll`/`beforeEach` and cleans up in `afterAll`/`afterEach` +- Use `APIHelper` for programmatic setup (GitHub API, Backstage catalog API) + +### 5. Platform-Specific Failure + +**Symptoms**: Test passes on OCP but fails on GKE/AKS/EKS, or vice versa. + +**Cause**: Platform differences (Routes vs Ingress, different auth, different network policies). + +**Fix approach**: +- Add conditional skip if the test is inherently platform-specific: + ```typescript + import { skipIfJobName, skipIfIsOpenShift } from '../utils/helper'; + // Skip on GKE + skipIfJobName(constants.GKE_JOBS); + // Skip on non-OpenShift + skipIfIsOpenShift('false'); + ``` +- Or add platform-specific logic within the test using `process.env.IS_OPENSHIFT`, `process.env.CONTAINER_PLATFORM` + +### 6. Deployment Configuration Issue + +**Symptoms**: RHDH itself is broken (500 errors, missing plugins, wrong behavior). + +**Cause**: ConfigMap or Helm values are incorrect for this test scenario. + +**Fix approach**: +- Check the ConfigMaps: `.ci/pipelines/resources/config_map/app-config-rhdh.yaml` and `app-config-rhdh-rbac.yaml` +- Check Helm values: `.ci/pipelines/value_files/` +- Check dynamic plugins config: `.ci/pipelines/resources/config_map/dynamic-plugins-config.yaml` +- Search `rhdh-operator` and `rhdh-chart` repos for configuration reference (use Sourcebot, Context7, `gh search code`, or a local clone — whichever is available) +- Fix the deployment configuration rather than the test code + +## Playwright Test Agents Reference + +The Playwright Test Agents are initialized via `npx playwright init-agents --loop=opencode` (see initialization section above). This creates an MCP server and agent definitions in `e2e-tests/opencode.json`. + +### Healer Agent (MANDATORY for All Fixes) + +The healer agent is the **primary and mandatory** tool for fixing failing tests. It has access to: + +- **`test_run`**: Run tests and identify failures +- **`test_debug`**: Step through failing tests with the Playwright Inspector +- **`browser_snapshot`**: Capture accessibility snapshots of the live UI +- **`browser_console_messages`**: Read browser console logs +- **`browser_network_requests`**: Monitor network requests +- **`browser_generate_locator`**: Generate correct locators from the live UI +- **`edit`/`write`**: Edit test code directly + +The healer autonomously cycles through: run → debug → inspect → fix → re-run until the test passes. + +### Planner Agent (For Understanding Complex Scenarios) + +Use `@playwright-test-planner` when you need to understand a complex user flow before fixing a test. It explores the app and maps out the interaction patterns. + +### Generator Agent (For Creating New Test Steps) + +Use `@playwright-test-generator` when a test needs major rework and you need to generate new test steps from a plan. + +## Coding Conventions + +Every fix **must** follow Playwright best practices. Before writing or modifying test code, consult these resources in order: + +1. **Project rules** (always available locally): + - `playwright-locators` rule — locator priority, anti-patterns, assertions, Page Objects, DataGrid handling + - `ci-e2e-testing` rule — test structure, component annotations, project configuration, CI scripts + +2. **Official Playwright docs** (fetch via Context7 if available, otherwise use web): + - Best practices: https://playwright.dev/docs/best-practices + - Locators guide: https://playwright.dev/docs/locators + - Assertions: https://playwright.dev/docs/test-assertions + - Auto-waiting: https://playwright.dev/docs/actionability + +### Key requirements + +- **Locators**: always prefer `getByRole()`, `getByLabel()`, `getByPlaceholder()` over CSS/XPath selectors. Never use MUI class names (`.MuiButton-label`, `.MuiDataGrid-*`). +- **Assertions**: use Playwright's auto-waiting assertions (`expect(locator).toBeVisible()`) — never use manual `waitForSelector()` or `waitForTimeout()`. +- **Component annotations**: every `*.spec.ts` file must have a `component` annotation in `test.beforeAll`. +- **Page Object Model**: return `Locator` objects from page classes, not raw strings or elements. +- **No `force: true`**: if a click requires `force`, the locator or timing is wrong — fix the root cause. +- **No `waitForNetworkIdle()`**: use proper load-state waits or assertion-based waiting instead. + +## Cross-Repo Investigation + +When the issue is in RHDH deployment/config rather than test code, search the relevant repos using whichever tool is available. Try them in this order and use the first one that works: + +1. **Sourcebot** (if available): search repos for specific error patterns or configuration keys +2. **Context7** (if available): query repos for docs and code snippets +3. **Fallback — `gh search code`**: e.g. `gh search code '' --repo redhat-developer/rhdh-operator` +4. **Fallback — local clone**: clone the repo into a temp directory and grep + +### rhdh-operator (`redhat-developer/rhdh-operator`) +- Backstage CR specification and defaults +- CatalogSource configuration +- Operator installation scripts (especially `install-rhdh-catalog-source.sh`) + +### rhdh-chart (`redhat-developer/rhdh-chart`) +- Helm values.yaml schema and defaults +- Chart templates for Deployments, Services, ConfigMaps +- Default dynamic plugin configurations + +### Other Repositories +- **backstage/backstage**: For upstream Backstage API changes +- **redhat-developer/red-hat-developers-documentation-rhdh**: For documentation on expected behavior + +## Decision: Product Bug vs Test Issue + +**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug before marking a test this way. Follow this checklist: + +1. **Run the Playwright healer agent** — it must confirm that the test logic is correct and the application behavior is wrong +2. **Verify manually** — inspect the live UI, check network responses, and confirm the product is genuinely broken (not a stale cache, missing data, or environment-specific issue) +3. **Check recent commits** — search the release branch for recent product changes that could explain the behavior change +4. **Ask the user for confirmation** before applying `test.fixme()` — do not decide unilaterally + +Only after all of the above confirm a product bug: + +1. **File a Jira bug** in the `RHDHBUGS` project (or update the existing ticket) documenting the product regression +2. **Mark the test with `test.fixme()`**, preceded by a `// TODO:` comment linking to the Jira ticket: + ```typescript + // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX + test.fixme('Button no longer visible after version upgrade'); + ``` +3. **Do not change the test assertions** to match broken behavior +4. **Proceed to `e2e-submit-and-review`** with the `test.fixme()` change diff --git a/.rulesync/skills/e2e-parse-ci-failure/SKILL.md b/.rulesync/skills/e2e-parse-ci-failure/SKILL.md new file mode 100644 index 0000000000..dd211a74f1 --- /dev/null +++ b/.rulesync/skills/e2e-parse-ci-failure/SKILL.md @@ -0,0 +1,204 @@ +--- +name: e2e-parse-ci-failure +description: >- + Parse a Prow CI job URL or Jira ticket to extract E2E test failure details + including test name, spec file, release branch, platform, and error messages +targets: + - '*' +--- +# Parse CI Failure + +Extract structured failure context from a Prow job URL or Jira ticket for an RHDH E2E CI failure. + +## When to Use + +Use this skill when you receive a failing Prow job URL (e.g., `https://prow.ci.openshift.org/view/gs/...`), a Jira ticket ID (e.g., `RHIDP-XXXX`), or a Jira URL (e.g., `https://redhat.atlassian.net/browse/RHIDP-XXXX`) for an E2E test failure and need to extract all relevant details before starting a fix. + +## Input Detection + +- **Playwright report URL**: URL ending in `index.html` (with optional `#?testId=...` fragment) — use Playwright MCP if available (see "Playwright Report Parsing" below), otherwise fall back to build log parsing +- **Prow URL**: Starts with `https://prow.ci.openshift.org/` — parse the job page and build log +- **Jira ticket ID**: Matches pattern `RHIDP-\d+` or similar — use Jira MCP tools to read the ticket +- **Jira URL**: Starts with `https://redhat.atlassian.net/browse/` — extract the ticket ID from the URL path (e.g., `RHIDP-XXXX` from `https://redhat.atlassian.net/browse/RHIDP-XXXX`) and then use Jira MCP tools to read the ticket + +## Prow URL Parsing + +### URL Structure + +Prow job URLs follow two patterns: + +- **Periodic/postsubmit**: `https://prow.ci.openshift.org/view/gs/test-platform-results/logs//` +- **Presubmit (PR)**: `https://prow.ci.openshift.org/view/gs/test-platform-results/pr-logs/pull/redhat-developer_rhdh///` + +Extract `` and `` from the URL path. These are the two key values needed for all derivations. + +### GCS URL Derivation + +Convert the Prow URL to a GCS artifacts URL by replacing the prefix: + +``` +Prow: https://prow.ci.openshift.org/view/gs/test-platform-results/logs// +GCS: https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ +``` + +For presubmit jobs, use `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +Key artifacts within the GCS directory: +- **Build log**: `/build-log.txt` +- **JUnit XML**: `/artifacts/junit-results/results.xml` +- **Playwright report**: `/artifacts/playwright-report/` + +Fetch the Prow job page with WebFetch to find the job status and artifact links, then fetch the build log for test failure details. + +### Extracting Test Failures from Build Log + +Search the build log for these Playwright output patterns: + +``` +# Failing test line (primary source for test name, spec file, and project): + ✘ [] › /.spec.ts: + +# Error details (immediately after the failure line): + Error: + expect(received).toBeVisible() + Locator: + +# Summary (at the end of the log): + X failed + X passed + X skipped +``` + +Also check JUnit XML for `` elements with `` children as a fallback. + +## Playwright Report Parsing + +When the URL points to a Playwright HTML report (`index.html`, optionally with `#?testId=...`), use Playwright MCP if available — navigate with `browser_navigate`, then `browser_snapshot` to extract test name, spec file, error, steps, retries, screenshots, and traces from the accessibility tree. Derive job metadata (``, ``, ``) from the URL path segments. + +If Playwright MCP is not available, derive the `build-log.txt` URL from the report URL and fall back to build log parsing. + +## Jira Ticket Parsing + +Use Jira MCP tools to read the ticket. Extract: + +1. **Prow job URLs** from the description or comments — then parse them using the Prow URL Parsing steps above. +2. **Test names, spec file paths, error messages, or stack traces** from the description, comments, or attachments. +3. **`affects version`** field — map to release branch (e.g., `1.10` → `main`, `1.9` → `release-1.9`, `1.8` → `release-1.8`). +4. **`component`** field for additional context (e.g., "E2E Tests", "CI/CD"). + +## Job Name Mapping + +Refer to the **e2e-fix-workflow** rule for all mapping tables: job name to release branch, job name to platform and deployment method, job name to Playwright projects, release branch to image repo/tag, and job name to `local-run.sh` `-j` parameter. Those tables are the single source of truth and should not be duplicated here. + +When parsing a job name, apply those mapping tables to derive: release branch, platform, deployment method, Playwright projects, and `local-run.sh` flags (`-j`, `-r`, `-t`). + +## Fields Requiring Build Log Access + +Not all output fields can be derived from the Prow URL alone. The following table clarifies what requires fetching the build log or artifacts: + +| Field | Source | Derivable from URL alone? | +|-------|--------|---------------------------| +| Job name | URL path segment | Yes | +| Build ID | URL path segment | Yes | +| Release branch | Job name pattern match | Yes | +| Platform | Job name pattern match | Yes | +| Deployment method | Job name pattern match | Yes | +| Playwright projects | Job name pattern match | Yes | +| `local-run.sh` flags (`-j`, `-r`, `-t`) | Job name + release branch | Yes | +| GCS artifacts URL | Constructed from URL | Yes | +| Test name | Build log Playwright output | No — requires build log | +| Spec file | Build log Playwright output | No — requires build log | +| Specific Playwright project (of failing test) | Build log `[project]` prefix | No — requires build log | +| Error type | Build log error details | No — requires build log | +| Error message | Build log error details | No — requires build log | +| Failure count / pass count | Build log summary line | No — requires build log | + +## Output + +Produce the following structured output with three sections. + +### 1. Structured Summary + +``` +- Test name: +- Spec file: +- Playwright project: +- Release branch:
+- Platform: +- Deployment method: +- Error type: +- Error message: +- Prow URL: +- Jira ticket: +``` + +### 2. Derivation Details + +Show how each field was derived with the matching pattern. This makes the reasoning transparent and auditable. + +``` +| Field | Value | Derivation | +|--------------------|------------------------------|-----------------------------------------------------------| +| Job name | | Extracted from URL path segment | +| Build ID | | Extracted from URL path segment | +| Release branch | | Pattern `*-rhdh--*` matched in job name | +| Platform | | Pattern `**` matched in job name | +| Deployment method | | Pattern `**` matched in job name | +| Playwright project | | `[]` prefix in failing test line | +| Image repo (-r) | | Release branch `` maps to `` | +| Image tag (-t) | | Release branch `` maps to `` | +| Test name | | Parsed from `✘` line in build log | +| Spec file | | Parsed from `✘` line in build log | +| Error type | | Classified from error message pattern | +``` + +### 3. GCS Artifacts Location + +Derive and present the GCS artifacts URLs constructed from the Prow URL: + +``` +GCS Artifacts Base: + https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/logs///artifacts/ + +Build Log: + /artifacts//build-log.txt + +JUnit Results: + /artifacts//artifacts/junit-results/results.xml + +Playwright Report: + /artifacts//artifacts/playwright-report/ +``` + +For presubmit (PR) jobs, the base path uses `pr-logs/pull/redhat-developer_rhdh//` instead of `logs/`. + +### 4. local-run.sh Command + +Provide the full command ready to copy-paste, with a flag breakdown. + +**OCP jobs** — use `-s` for deploy-only mode: +``` +cd e2e-tests +./local-run.sh -j -r -t -s + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +| -s | (no value) | Deploy only, skip running tests | +``` + +**K8s jobs (AKS, EKS, GKE)** — do **not** use `-s`; full execution is required: +``` +cd e2e-tests +./local-run.sh -j -r -t + +Flag breakdown: +| Flag | Value | Reason | +|------|--------------------|--------------------------------------------------| +| -j | | Full Prow job name (matches glob in CI script) | +| -r | | Image repo derived from release branch | +| -t | | Image tag derived from release branch | +``` diff --git a/.rulesync/skills/e2e-reproduce-failure/SKILL.md b/.rulesync/skills/e2e-reproduce-failure/SKILL.md new file mode 100644 index 0000000000..2244acc474 --- /dev/null +++ b/.rulesync/skills/e2e-reproduce-failure/SKILL.md @@ -0,0 +1,202 @@ +--- +name: e2e-reproduce-failure +description: >- + Run a specific failing E2E test against a deployed RHDH instance to confirm + the failure and determine if it is consistent or flaky +targets: + - '*' +--- +# Reproduce Failure + +Run the failing test locally against a deployed RHDH instance to confirm the failure and classify it. + +## When to Use + +Use this skill after deploying RHDH (via `e2e-deploy-rhdh`) when you need to verify the test failure reproduces locally before attempting a fix. + +## Prerequisites + +- RHDH deployed and accessible (BASE_URL set) +- Environment configured via `source e2e-tests/local-test-setup.sh ` +- Node.js 22 and Yarn available +- Playwright browsers installed (`cd e2e-tests && yarn install && yarn playwright install chromium`) + +## Environment Setup + +### Source the Test Environment + +```bash +# For non-RBAC tests (showcase, showcase-k8s, showcase-operator, etc.) +source e2e-tests/local-test-setup.sh showcase + +# For RBAC tests (showcase-rbac, showcase-rbac-k8s, showcase-operator-rbac) +source e2e-tests/local-test-setup.sh rbac +``` + +This exports all required environment variables: `BASE_URL`, `K8S_CLUSTER_URL`, `K8S_CLUSTER_TOKEN`, and all Vault secrets. + +### Verify Environment + +```bash +echo "BASE_URL: $BASE_URL" +curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" +``` + +## MANDATORY: Use the Playwright Healer Agent for Reproduction + +Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. + +### Healer Initialization (First Time Only) + +Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +### Environment Setup + +Generate the `.env` file by passing the `--env` flag to `local-test-setup.sh`: + +```bash +cd e2e-tests +source local-test-setup.sh --env +``` + +To regenerate (e.g. after token expiry), re-run the command above. + +### Project Selection + +When running specific test files or test cases, use `--project=any-test` to avoid running the smoke test dependency. The `any-test` project matches any spec file without extra overhead: + +```bash +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +### Running via Healer Agent + +Invoke the healer agent via the Task tool: + +``` +Task: "You are the Playwright Test Healer agent. Run the following test to reproduce a CI failure. +Working directory: /e2e-tests +Test: --project=any-test -g '' +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If the test fails, examine the error output, screenshots in test-results/, and error-context.md. +Report: pass/fail, exact error message, what the UI shows at the point of failure." +``` + +### Fallback: Direct Execution + +If the healer agent is unavailable, run tests directly: + +```bash +cd e2e-tests +yarn playwright test --project=any-test --retries=0 --workers=1 +``` + +**Examples:** +```bash +# A specific spec file +yarn playwright test playwright/e2e/plugins/topology/topology.spec.ts --project=any-test --retries=0 --workers=1 + +# A specific test by name +yarn playwright test -g "should display topology" --project=any-test --retries=0 --workers=1 +``` + +### Headed / Debug Mode + +For visual debugging when manual investigation is needed: + +```bash +# Headed mode (visible browser) +yarn playwright test --project=any-test --retries=0 --workers=1 --headed + +# Debug mode (Playwright Inspector, step-by-step) +yarn playwright test --project=any-test --retries=0 --workers=1 --debug +``` + +## Flakiness Detection + +If the first run **passes** (doesn't reproduce the failure), run multiple times to check for flakiness: + +```bash +cd e2e-tests + +# Run 10 times and track results +PASS=0; FAIL=0 +for i in $(seq 1 10); do + echo "=== Run $i ===" + if yarn playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Results: $PASS passed, $FAIL failed out of 10 runs" +``` + +## Result Classification + +### Consistent Failure +- **Definition**: Fails every time (10/10 runs fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill +- **Confidence**: High — the fix can be verified reliably + +### Flaky +- **Definition**: Fails some runs but not all (e.g., 3/10 fail) +- **Action**: Proceed to `e2e-diagnose-and-fix` skill, focus on reliability improvements +- **Typical causes**: Race conditions, timing dependencies, state leaks between tests, external service variability + +### Cannot Reproduce +- **Definition**: Passes all runs locally (0/10 fail) +- **Before giving up**, try running the **entire Playwright project** that failed in CI with `CI=true` to simulate CI conditions (this sets the worker count to 3, matching CI): + ```bash + cd e2e-tests + CI=true yarn playwright test --project= --retries=0 + ``` + Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This runs all tests in that project concurrently, which can expose race conditions and resource contention that single-test runs miss. +- If the full project run also passes, **stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Investigation**: Check environment differences between local and CI: + - **Cluster version**: CI may use a different OCP version (check the cluster pool version) + - **Image version**: CI may use a different RHDH image + - **Resource constraints**: CI clusters may have less resources + - **Parallel execution**: CI runs with 3 workers; the full project run above simulates this + - **Network**: CI clusters are in `us-east-2` AWS region + - **External services**: GitHub API rate limits, Keycloak availability + +## Artifact Collection + +### Playwright Traces + +After a test failure, traces are saved in `e2e-tests/test-results/`: + +```bash +# View a trace +yarn playwright show-trace test-results//trace.zip +``` + +### HTML Report + +```bash +# Generate and open the HTML report +yarn playwright show-report +``` + +### Screenshots and Videos + +On failure, screenshots and videos are saved in `test-results//`: +- `test-failed-1.png` — Screenshot at failure point +- `video.webm` — Full test recording (if video is enabled) + +## Test Project Reference + +Refer to the e2e-fix-workflow rule for the Playwright project → config map mapping. diff --git a/.rulesync/skills/e2e-submit-and-review/SKILL.md b/.rulesync/skills/e2e-submit-and-review/SKILL.md new file mode 100644 index 0000000000..1e0831fba4 --- /dev/null +++ b/.rulesync/skills/e2e-submit-and-review/SKILL.md @@ -0,0 +1,318 @@ +--- +name: e2e-submit-and-review +description: >- + Create a PR for an E2E test fix, trigger Qodo agentic review, address review + comments, and monitor CI results +targets: + - '*' +--- +# Submit and Review + +Create a pull request for the E2E test fix, trigger automated review, address feedback, and verify CI passes. + +## When to Use + +Use this skill after verifying the fix (via `e2e-verify-fix`) when all tests pass and code quality checks are clean. + +## Step 0: Resolve Pre-Commit Hooks + +Before committing, ensure all related workspaces have their dependencies installed so pre-commit hooks (lint-staged, rulesync, etc.) pass: + +```bash +# Root workspace +yarn install + +# If e2e-tests files were changed +cd e2e-tests && yarn install && cd .. + +# If .ci files were changed +cd .ci && yarn install && cd .. +``` + +If a pre-commit hook fails during commit, fix the issue and create a **new** commit — do not amend. + +## Step 1: Commit Changes + +### Stage and Commit + +```bash +# Stage only relevant files +git add e2e-tests/ +git add .ci/ # Only if deployment config was changed + +# Commit with a descriptive message +git commit -m "fix(e2e): + + +- What test was failing +- What the root cause was +- How it was fixed" +``` + +### Commit Message Convention + +Follow the conventional commit format: +- `fix(e2e): fix flaky topology test timeout` +- `fix(e2e): update RBAC page locators after UI redesign` +- `fix(e2e): add retry logic for catalog entity refresh` +- `fix(e2e): skip orchestrator test on GKE platform` + +If a Jira ticket exists, reference it: +- `fix(e2e): fix topology locator drift [RHIDP-1234]` + +## Step 2: Push to Fork + +Push the fix branch to the fork (origin): + +```bash +git push -u origin +``` + +Example: +```bash +git push -u origin fix/e2e-topology-locator +# or +git push -u origin fix/RHIDP-1234-e2e-topology-locator +``` + +## Step 3: Create Pull Request + +Create a PR against the upstream `redhat-developer/rhdh` repository. + +**Dynamic username extraction** -- Always derive the GitHub username from the fork remote at runtime rather than hardcoding it. This makes the workflow portable across any contributor's environment: + +```bash +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') +``` + +Then create the PR as a **draft** (always use `--draft`): +```bash +gh pr create \ + --draft \ + --repo redhat-developer/rhdh \ + --head "${GITHUB_USER}:" \ + --base \ + --title "fix(e2e): " \ + --body "$(cat <<'EOF' +## Summary +- <1-2 bullet points explaining what was fixed and why> + +## Test Results +- Local verification: 5/5 passes +- Code quality: lint, tsc, prettier all pass + +## Related +- Prow job: +- Jira: +EOF +)" +``` + +**Important**: Always use `--repo redhat-developer/rhdh` and `--head :` for cross-fork PRs. Never hardcode the GitHub username -- always extract it dynamically from the origin remote URL so this workflow works for any contributor. + +### PR Description Guidelines + +Keep it concise: +- What test was failing +- What the root cause was +- How it was fixed +- Link to the original failing CI job or Jira ticket + +## Step 4: Trigger Qodo Agentic Review + +After the PR is created, trigger an agentic review from Qodo (PR-Agent): + +```bash +# Get the PR number from the create output, then comment +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" +``` + +The Qodo bot will: +1. Analyze the code changes +2. Post review comments with suggestions +3. Optionally approve or request changes + +Note: The `.pr_agent.toml` in the repo configures Qodo with: +- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos +- Auto-review, auto-describe, and auto-improve on PR creation +- Scoped to `e2e-tests` folder changes + +## Step 5: Wait for and Address Qodo Review + +### Poll for Review Comments + +Check for Qodo review completion (it typically takes 1-3 minutes): + +```bash +# Check for Qodo bot comments +gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' + +# Check for inline comments +gh api repos/redhat-developer/rhdh/pulls//comments \ + --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' +``` + +### Address Review Comments + +For each review comment: + +1. **Code suggestions**: If the suggestion improves the code, apply it: + ```bash + # Make the change locally + # Then stage only the changed files and commit + git add + git commit -m "fix(e2e): address review feedback" + git push + ``` + **Never use `git add -A` or `git add .`** — always stage specific files to avoid committing `.env`, test artifacts, or other local-only files. + +2. **Style/convention issues**: Fix them per project conventions + +3. **False positives**: If a suggestion is incorrect, explain why in a reply: + ```bash + gh api repos/redhat-developer/rhdh/pulls//comments//replies \ + -f body="This is intentional because " + ``` + +4. **Questions**: Answer them with context from the codebase + +## Step 6: Trigger Affected CI Job + +After addressing Qodo review feedback (and pushing any follow-up commits), trigger the presubmit E2E job that corresponds to the originally failing CI job. Presubmit job names differ from periodic/nightly names but cover the same platform and deployment method. + +**CRITICAL**: Never guess or construct presubmit job names. Always discover them from the `openshift-ci` bot response as described below. + +### Step 6a: Request Available Jobs + +Comment `/test ?` on the PR to request the list of available presubmit jobs: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test ?" +``` + +### Step 6b: Wait for the Bot Response + +Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: + +```bash +# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) +for i in $(seq 1 10); do + BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ + --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') + if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then + echo "Bot response received:" + echo "$BOT_RESPONSE" + break + fi + echo "Waiting for openshift-ci bot response (attempt $i/10)..." + sleep 30 +done +``` + +If no response is received after 5 minutes, ask the user for guidance. + +### Step 6c: Select the Right Job from the Bot Response + +Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: + +| Original failure pattern | Look for presubmit containing | +|--------------------------|-------------------------------| +| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | +| `*ocp*operator*nightly*` | `*ocp*operator*` | +| `*aks*helm*` | `*aks*helm*` | +| `*eks*helm*` | `*eks*helm*` | +| `*gke*helm*` | `*gke*helm*` | + +**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. + +If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. + +### Step 6d: Trigger the Job + +Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: + +```bash +gh pr comment --repo redhat-developer/rhdh --body "/test " +``` + +**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. + +## Step 7: Monitor CI Status + +### Watch CI Checks + +After pushing changes, monitor the CI pipeline: + +```bash +gh pr checks --repo redhat-developer/rhdh --watch +``` + +Or check manually: +```bash +gh pr checks --repo redhat-developer/rhdh +``` + +CI check types (Prow E2E jobs, lint checks, build checks, etc.) are documented in the project CI rules. Use `gh pr checks` output to identify which specific check failed. + +### If CI Fails + +1. **E2E test failure**: Check the Prow job logs, determine if it's the same test or a different one +2. **Lint failure**: Run `yarn lint:fix` locally, commit and push +3. **Build failure**: Check TypeScript errors with `yarn tsc` +4. **Unrelated failure**: Comment on the PR noting it's an unrelated failure, optionally `/retest` to re-trigger + +### Re-trigger CI + +If a CI check needs to be re-run: +```bash +# For Prow jobs, comment on the PR +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# For specific jobs +gh pr comment --repo redhat-developer/rhdh --body "/retest " +``` + +## Step 8: Final Status Report + +After CI passes (or all issues are addressed), produce a final report: + +``` +PR Status Report: +- PR: +- Branch: -> +- CI Status: PASS / PENDING / FAIL +- Qodo Review: Addressed / Pending +- Files changed: +- Action items: +``` + +## Quick Reference: PR Workflow Commands + +```bash +# Determine GitHub username from fork remote +GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') + +# Create draft PR (always use --draft) +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base + +# Trigger Qodo review +gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" + +# List available presubmit jobs (Step 6a) +gh pr comment --repo redhat-developer/rhdh --body "/test ?" + +# Wait for openshift-ci bot response (Step 6b) -- poll until bot replies with job list + +# Trigger specific presubmit job (Step 6d) -- ONLY use a job name from the bot's response +gh pr comment --repo redhat-developer/rhdh --body "/test " + +# Check CI status +gh pr checks --repo redhat-developer/rhdh + +# Re-trigger tests +gh pr comment --repo redhat-developer/rhdh --body "/retest" + +# View PR +gh pr view --repo redhat-developer/rhdh --web +``` diff --git a/.rulesync/skills/e2e-verify-fix/SKILL.md b/.rulesync/skills/e2e-verify-fix/SKILL.md new file mode 100644 index 0000000000..19a02920e5 --- /dev/null +++ b/.rulesync/skills/e2e-verify-fix/SKILL.md @@ -0,0 +1,163 @@ +--- +name: e2e-verify-fix +description: >- + Verify an E2E test fix by running the test multiple times and checking code + quality +targets: + - '*' +--- +# Verify Fix + +Verify that the test fix works reliably and passes all code quality checks. + +## When to Use + +Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm the fix works before submitting a PR. + +## MANDATORY: Use the Playwright Healer Agent for Verification + +Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. + +### Healer Initialization + +If not already initialized in this session, use the `--loop` flag matching your AI coding tool: + +```bash +cd e2e-tests + +# For OpenCode +npx playwright init-agents --loop=opencode + +# For Claude Code +npx playwright init-agents --loop=claude +``` + +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. + +Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. + +## Verification Steps + +### 1. Single Run Verification via Healer + +Invoke the healer agent to run the fixed test once: + +``` +Task: "You are the Playwright Test Healer agent. Verify a fix by running the test once. +Working directory: /e2e-tests +Run: set -a && source .env && set +a && npx playwright test --project=any-test --retries=0 --workers=1 -g '' +If it passes, report success. If it fails, examine the error and report what went wrong." +``` + +If it fails, go back to `e2e-diagnose-and-fix` and iterate — use the healer agent there too for the fix. + +### 2. Multi-Run Stability Check + +Run the test 5 times consecutively to verify no flakiness was introduced: + +```bash +cd e2e-tests +set -a && source .env && set +a +PASS=0; FAIL=0 +for i in $(seq 1 5); do + echo "=== Stability run $i/5 ===" + if npx playwright test --project=any-test --retries=0 --workers=1 2>&1; then + PASS=$((PASS + 1)) + else + FAIL=$((FAIL + 1)) + fi +done +echo "Stability results: $PASS/5 passed" +``` + +**Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. + +### 3. Full Project Stability Check (if failure was only reproducible with full project) + +If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: + +```bash +cd e2e-tests +set -a && source .env && set +a +CI=true yarn playwright test --project= --retries=0 +``` + +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. + +**Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. + +**IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. + +### 4. Code Quality Checks + +Run all code quality checks in the e2e-tests workspace: + +```bash +cd e2e-tests + +# TypeScript compilation +yarn tsc:check + +# ESLint +yarn lint:check + +# Prettier formatting +yarn prettier:check +``` + +Fix any issues found: + +```bash +# Auto-fix lint issues +yarn lint:fix + +# Auto-fix formatting +yarn prettier:fix +``` + +### 5. Optional: Full Project Regression Check + +If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: + +```bash +cd e2e-tests +yarn playwright test --project= --retries=0 +``` + +This is optional for isolated spec file changes but recommended for changes to: +- `e2e-tests/playwright/utils/` (utility classes) +- `e2e-tests/playwright/support/` (page objects, selectors) +- `e2e-tests/playwright/data/` (shared test data) +- `playwright.config.ts` (configuration) + +### 5. Review the Diff + +Before submitting, review all changes: + +```bash +git diff +git diff --stat +``` + +Verify: +- Only intended files were changed +- No secrets or credentials were added +- No unrelated changes were included +- Component annotations are present in any new/modified spec files +- Semantic selectors are used (no deprecated CSS class selectors) + +## Result Summary + +After verification, produce a summary: + +``` +Fix Verification Results: +- Test: () +- Single run: PASS +- Stability (5 runs): 5/5 PASS +- TypeScript: PASS +- ESLint: PASS +- Prettier: PASS +- Files changed: +- Ready for PR: YES/NO +``` diff --git a/e2e-tests/.gitignore b/e2e-tests/.gitignore index 67fecbdb72..fdd9119cc5 100644 --- a/e2e-tests/.gitignore +++ b/e2e-tests/.gitignore @@ -76,3 +76,12 @@ junit-results.xml **/app-config.test.yaml **/dynamic-plugins.test.yaml **/rbac.test.csv + +# Playwright Test Agents (generated by `npx playwright init-agents`) +.claude/ +.opencode/ +opencode.json +.mcp.json +specs/ +playwright/seed.spec.ts +.playwright-mcp diff --git a/e2e-tests/local-test-setup.sh b/e2e-tests/local-test-setup.sh index 42abfc3f1b..0d26c95957 100755 --- a/e2e-tests/local-test-setup.sh +++ b/e2e-tests/local-test-setup.sh @@ -4,13 +4,18 @@ # Supports both OpenShift (OCP, OSD-GCP) and non-OpenShift (AKS, EKS, GKE) platforms. # # Usage (run from e2e-tests directory): -# source local-test-setup.sh [showcase|rbac] +# source local-test-setup.sh [showcase|rbac] [--env] +# +# Options: +# showcase|rbac Select the test type (default: showcase) +# --env Generate a .env file in e2e-tests/ for Playwright Test Agents # # Examples: # cd e2e-tests # source local-test-setup.sh # Uses Showcase URL (default) # source local-test-setup.sh showcase # Uses Showcase URL # source local-test-setup.sh rbac # Uses Showcase RBAC URL +# source local-test-setup.sh rbac --env # RBAC + generate .env file # # After sourcing, you can run tests: # yarn install @@ -41,8 +46,18 @@ log::info "Loading config from: $CONFIG_FILE" # shellcheck source=/dev/null source "$CONFIG_FILE" +# Parse arguments +GENERATE_ENV=false +TEST_TYPE="showcase" +for arg in "$@"; do + case "$arg" in + --env) GENERATE_ENV=true ;; + showcase | rbac) TEST_TYPE="$arg" ;; + *) log::warn "Unknown argument: $arg (ignored)" ;; + esac +done + # Select URL based on argument -TEST_TYPE="${1:-showcase}" case "$TEST_TYPE" in showcase) export BASE_URL="$SHOWCASE_URL" @@ -128,6 +143,57 @@ while IFS= read -r key; do export "$safe_key"="$value" done < <(printf '%s' "$SECRETS_JSON" | jq -r 'keys[]') +# Generate .env file for Playwright Test Agents (healer, planner, generator) +# Only when --env flag is passed. The .env file is gitignored and must never be committed. +if [[ "$GENERATE_ENV" == "true" ]]; then + ENV_FILE="$SCRIPT_DIR/.env" + # Create temp file with restrictive permissions from the start + ENV_TMP="$(umask 077 && mktemp "${ENV_FILE}.XXXXXX")" + log::info "Generating .env file: $ENV_FILE" + + # Helper: single-quote a value for .env to handle multiline content (PEM certs, private keys) + env_quote() { + local val="$1" + # Escape existing single quotes: ' → '"'"' + val="${val//\'/\'\"\'\"\'}" + printf "'%s'" "$val" + return 0 + } + + # Write to a temp file first, then atomically move into place. + { + echo "# Auto-generated by local-test-setup.sh --env — do not commit" + echo "# Regenerate by running: source local-test-setup.sh --env" + echo "" + echo "BASE_URL=$(env_quote "$BASE_URL")" + echo "K8S_CLUSTER_URL=$(env_quote "$K8S_CLUSTER_URL")" + echo "K8S_CLUSTER_TOKEN=$(env_quote "$K8S_CLUSTER_TOKEN")" + echo "JOB_NAME=$(env_quote "$JOB_NAME")" + echo "IMAGE_REGISTRY=$(env_quote "$IMAGE_REGISTRY")" + echo "IMAGE_REPO=$(env_quote "$IMAGE_REPO")" + echo "TAG_NAME=$(env_quote "$TAG_NAME")" + echo "SHOWCASE_URL=$(env_quote "$SHOWCASE_URL")" + echo "SHOWCASE_RBAC_URL=$(env_quote "$SHOWCASE_RBAC_URL")" + echo "CONTAINER_PLATFORM=$(env_quote "$CONTAINER_PLATFORM")" + echo "IS_OPENSHIFT=$(env_quote "$IS_OPENSHIFT")" + echo "" + echo "# Vault secrets" + # Write each vault secret as KEY='VALUE', using the same safe_key transform + # Single-quoting handles multiline values (PEM certs, private keys) + while IFS= read -r key; do + [[ -z "$key" ]] && continue + [[ "$key" == "secretsync/"* ]] && continue + value=$(printf '%s' "$SECRETS_JSON" | jq -r --arg k "$key" '.[$k]') + safe_key=$(echo "$key" | tr './-' '___') + echo "$safe_key=$(env_quote "$value")" + done < <(printf '%s' "$SECRETS_JSON" | jq -r 'keys[]') + } > "$ENV_TMP" + mv -f "$ENV_TMP" "$ENV_FILE" + chmod 600 "$ENV_FILE" + log::success ".env file written with $(wc -l < "$ENV_FILE" | tr -d ' ') lines (mode 600)" + echo "" +fi + log::section "Environment Ready" log::info "Available URLs:" log::info " Showcase: $SHOWCASE_URL" diff --git a/e2e-tests/playwright.config.ts b/e2e-tests/playwright.config.ts index 0842dec5ae..fb217cfc33 100644 --- a/e2e-tests/playwright.config.ts +++ b/e2e-tests/playwright.config.ts @@ -100,6 +100,7 @@ export default defineConfig({ name: PW_PROJECT.SHOWCASE, dependencies: [PW_PROJECT.SMOKE_TEST], testIgnore: [ + "**/playwright/seed.spec.ts", "**/playwright/e2e/plugins/rbac/**/*.spec.ts", "**/playwright/e2e/**/*-rbac.spec.ts", "**/playwright/e2e/external-database/verify-tls-config-with-external-crunchy.spec.ts", @@ -149,6 +150,7 @@ export default defineConfig({ ...k8sSpecificConfig, dependencies: [PW_PROJECT.SMOKE_TEST], testIgnore: [ + "**/playwright/seed.spec.ts", "**/playwright/e2e/smoke-test.spec.ts", "**/playwright/e2e/plugins/rbac/**/*.spec.ts", "**/playwright/e2e/**/*-rbac.spec.ts", @@ -185,6 +187,7 @@ export default defineConfig({ name: PW_PROJECT.SHOWCASE_OPERATOR, dependencies: [PW_PROJECT.SMOKE_TEST], testIgnore: [ + "**/playwright/seed.spec.ts", "**/playwright/e2e/plugins/rbac/**/*.spec.ts", "**/playwright/e2e/**/*-rbac.spec.ts", "**/playwright/e2e/external-database/verify-tls-config-with-external-crunchy.spec.ts", diff --git a/rulesync.jsonc b/rulesync.jsonc index a3da1ab14c..9d4a6bfa74 100644 --- a/rulesync.jsonc +++ b/rulesync.jsonc @@ -6,7 +6,8 @@ ], "features": [ "rules", - "commands" + "commands", + "skills" ], "baseDirs": [ "." From dc52e4b2bdc21e8baf622878536031fe00397c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:11:56 +0200 Subject: [PATCH 02/23] fix(skills): clarify /test ? response format and job selection in submit-and-review Document required vs optional job sections, shortened job names, and update the mapping table with actual /test commands from the bot. Assisted-by: OpenCode --- .../skills/e2e-submit-and-review/SKILL.md | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/.rulesync/skills/e2e-submit-and-review/SKILL.md b/.rulesync/skills/e2e-submit-and-review/SKILL.md index 1e0831fba4..d3a11c83f7 100644 --- a/.rulesync/skills/e2e-submit-and-review/SKILL.md +++ b/.rulesync/skills/e2e-submit-and-review/SKILL.md @@ -212,31 +212,45 @@ done If no response is received after 5 minutes, ask the user for guidance. -### Step 6c: Select the Right Job from the Bot Response +### Step 6c: Understand the Bot Response -Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: +The bot's response has two sections: -| Original failure pattern | Look for presubmit containing | -|--------------------------|-------------------------------| -| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | -| `*ocp*operator*nightly*` | `*ocp*operator*` | -| `*aks*helm*` | `*aks*helm*` | -| `*eks*helm*` | `*eks*helm*` | -| `*gke*helm*` | `*gke*helm*` | +1. **Required jobs** — triggered automatically on PR creation/update. These run the basic presubmit checks: + ``` + /test e2e-ocp-helm + ``` +2. **Optional jobs** — must be triggered explicitly. These include nightly variants, other platforms, and operators: + ``` + /test e2e-ocp-helm-nightly + /test e2e-eks-helm-nightly + /test e2e-aks-operator-nightly + ... + ``` -**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. +Note: the job names in the bot's response are **shortened** (e.g., `e2e-ocp-helm`), not the full Prow `pull-ci-redhat-developer-rhdh-...` format. Use these short names directly with `/test`. -If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. +### Step 6d: Select and Trigger the Right Job -### Step 6d: Trigger the Job +Match the original failure to the right presubmit job from the bot's list: -Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: +| Original failure pattern | Trigger | +|--------------------------|---------| +| `*ocp*helm*nightly*` | `/test e2e-ocp-helm-nightly` | +| `*ocp*operator*nightly*` | `/test e2e-ocp-operator-nightly` | +| `*ocp*v4-19*helm*` | `/test e2e-ocp-v4-19-helm-nightly` | +| `*aks*helm*` | `/test e2e-aks-helm-nightly` | +| `*eks*helm*` | `/test e2e-eks-helm-nightly` | +| `*gke*operator*` | `/test e2e-gke-operator-nightly` | ```bash -gh pr comment --repo redhat-developer/rhdh --body "/test " +gh pr comment --repo redhat-developer/rhdh --body "/test " ``` -**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. +**Rules**: +- **Only use job names that appeared in the bot's response** — never construct or guess names +- The required job (`e2e-ocp-helm`) runs automatically — you usually only need to trigger the optional job matching the original failure +- If no matching job exists in the list, inform the user and ask how to proceed ## Step 7: Monitor CI Status From 51baea5e6b78d4940bb0f2c57dfe02124b523120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:13:23 +0200 Subject: [PATCH 03/23] fix(skills): reduce /test ? poll interval to 5s, bot responds in seconds Assisted-by: OpenCode --- .../skills/e2e-submit-and-review/SKILL.md | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.rulesync/skills/e2e-submit-and-review/SKILL.md b/.rulesync/skills/e2e-submit-and-review/SKILL.md index d3a11c83f7..ca3c914768 100644 --- a/.rulesync/skills/e2e-submit-and-review/SKILL.md +++ b/.rulesync/skills/e2e-submit-and-review/SKILL.md @@ -193,11 +193,11 @@ gh pr comment --repo redhat-developer/rhdh --body "/test ?" ### Step 6b: Wait for the Bot Response -Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: +The bot usually responds within seconds. Poll PR comments for the `openshift-ci` bot's response: ```bash -# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) -for i in $(seq 1 10); do +# Poll for the openshift-ci bot response (check every 5s, up to 12 attempts = 1 min) +for i in $(seq 1 12); do BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then @@ -205,28 +205,28 @@ for i in $(seq 1 10); do echo "$BOT_RESPONSE" break fi - echo "Waiting for openshift-ci bot response (attempt $i/10)..." - sleep 30 + echo "Waiting for openshift-ci bot response (attempt $i/12)..." + sleep 5 done ``` -If no response is received after 5 minutes, ask the user for guidance. +If no response is received after 1 minute, ask the user for guidance. ### Step 6c: Understand the Bot Response The bot's response has two sections: 1. **Required jobs** — triggered automatically on PR creation/update. These run the basic presubmit checks: - ``` - /test e2e-ocp-helm - ``` + ``` + /test e2e-ocp-helm + ``` 2. **Optional jobs** — must be triggered explicitly. These include nightly variants, other platforms, and operators: - ``` - /test e2e-ocp-helm-nightly - /test e2e-eks-helm-nightly - /test e2e-aks-operator-nightly - ... - ``` + ``` + /test e2e-ocp-helm-nightly + /test e2e-eks-helm-nightly + /test e2e-aks-operator-nightly + ... + ``` Note: the job names in the bot's response are **shortened** (e.g., `e2e-ocp-helm`), not the full Prow `pull-ci-redhat-developer-rhdh-...` format. Use these short names directly with `/test`. From 173542ac456031ac3a01c42e3f98ed98ef8dd5db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:30:43 +0200 Subject: [PATCH 04/23] fix(skills): clarify required jobs not triggered on draft PRs Assisted-by: OpenCode --- .rulesync/skills/e2e-submit-and-review/SKILL.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.rulesync/skills/e2e-submit-and-review/SKILL.md b/.rulesync/skills/e2e-submit-and-review/SKILL.md index ca3c914768..bc28c31852 100644 --- a/.rulesync/skills/e2e-submit-and-review/SKILL.md +++ b/.rulesync/skills/e2e-submit-and-review/SKILL.md @@ -132,11 +132,6 @@ The Qodo bot will: 2. Post review comments with suggestions 3. Optionally approve or request changes -Note: The `.pr_agent.toml` in the repo configures Qodo with: -- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos -- Auto-review, auto-describe, and auto-improve on PR creation -- Scoped to `e2e-tests` folder changes - ## Step 5: Wait for and Address Qodo Review ### Poll for Review Comments @@ -216,7 +211,7 @@ If no response is received after 1 minute, ask the user for guidance. The bot's response has two sections: -1. **Required jobs** — triggered automatically on PR creation/update. These run the basic presubmit checks: +1. **Required jobs** — triggered automatically when the PR is marked as ready for review (not on draft PRs). These run the basic presubmit checks: ``` /test e2e-ocp-helm ``` From 51c95161016b76d6603507745a01ad3f4128efcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:31:42 +0200 Subject: [PATCH 05/23] chore: regenerate rulesync output files Assisted-by: OpenCode --- .claude/commands/fix-e2e.md | 2 +- .claude/skills/e2e-reproduce-failure/SKILL.md | 10 ++- .claude/skills/e2e-submit-and-review/SKILL.md | 61 +++++++++++-------- .claude/skills/e2e-verify-fix/SKILL.md | 18 +++++- .cursor/commands/fix-e2e.md | 2 +- .cursor/skills/e2e-reproduce-failure/SKILL.md | 10 ++- .cursor/skills/e2e-submit-and-review/SKILL.md | 61 +++++++++++-------- .cursor/skills/e2e-verify-fix/SKILL.md | 18 +++++- .opencode/command/fix-e2e.md | 2 +- .../skill/e2e-reproduce-failure/SKILL.md | 10 ++- .../skill/e2e-submit-and-review/SKILL.md | 61 +++++++++++-------- .opencode/skill/e2e-verify-fix/SKILL.md | 18 +++++- 12 files changed, 180 insertions(+), 93 deletions(-) diff --git a/.claude/commands/fix-e2e.md b/.claude/commands/fix-e2e.md index f76b4f044f..44edfc94cd 100644 --- a/.claude/commands/fix-e2e.md +++ b/.claude/commands/fix-e2e.md @@ -101,7 +101,7 @@ yarn playwright test --project=any-test --retries=0 --workers=1 - **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. - **Consistent failure**: Proceed to Phase 5 - **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability -- **Cannot reproduce** (passes every time after 10 runs): Report the reproduction results and possible environment differences, then **ask the user for explicit approval** before proceeding. Do not skip this step silently. +- **Cannot reproduce** (passes every time after 10 runs): Before giving up, try running the entire CI project with `CI=true yarn playwright test --project= --retries=0` to simulate CI conditions (3 workers, full test suite). If that also passes, report the results and **ask the user for explicit approval** before proceeding. ### Phase 5: Diagnose and Fix diff --git a/.claude/skills/e2e-reproduce-failure/SKILL.md b/.claude/skills/e2e-reproduce-failure/SKILL.md index 17a8728bfc..58d675100d 100644 --- a/.claude/skills/e2e-reproduce-failure/SKILL.md +++ b/.claude/skills/e2e-reproduce-failure/SKILL.md @@ -156,12 +156,18 @@ echo "Results: $PASS passed, $FAIL failed out of 10 runs" ### Cannot Reproduce - **Definition**: Passes all runs locally (0/10 fail) -- **Action**: **Stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Before giving up**, try running the **entire Playwright project** that failed in CI with `CI=true` to simulate CI conditions (this sets the worker count to 3, matching CI): + ```bash + cd e2e-tests + CI=true yarn playwright test --project= --retries=0 + ``` + Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This runs all tests in that project concurrently, which can expose race conditions and resource contention that single-test runs miss. +- If the full project run also passes, **stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. - **Investigation**: Check environment differences between local and CI: - **Cluster version**: CI may use a different OCP version (check the cluster pool version) - **Image version**: CI may use a different RHDH image - **Resource constraints**: CI clusters may have less resources - - **Parallel execution**: CI runs with 3 workers; try `--workers=3` + - **Parallel execution**: CI runs with 3 workers; the full project run above simulates this - **Network**: CI clusters are in `us-east-2` AWS region - **External services**: GitHub API rate limits, Keycloak availability diff --git a/.claude/skills/e2e-submit-and-review/SKILL.md b/.claude/skills/e2e-submit-and-review/SKILL.md index 5aedb84414..ff09b9a3f4 100644 --- a/.claude/skills/e2e-submit-and-review/SKILL.md +++ b/.claude/skills/e2e-submit-and-review/SKILL.md @@ -130,11 +130,6 @@ The Qodo bot will: 2. Post review comments with suggestions 3. Optionally approve or request changes -Note: The `.pr_agent.toml` in the repo configures Qodo with: -- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos -- Auto-review, auto-describe, and auto-improve on PR creation -- Scoped to `e2e-tests` folder changes - ## Step 5: Wait for and Address Qodo Review ### Poll for Review Comments @@ -191,11 +186,11 @@ gh pr comment --repo redhat-developer/rhdh --body "/test ?" ### Step 6b: Wait for the Bot Response -Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: +The bot usually responds within seconds. Poll PR comments for the `openshift-ci` bot's response: ```bash -# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) -for i in $(seq 1 10); do +# Poll for the openshift-ci bot response (check every 5s, up to 12 attempts = 1 min) +for i in $(seq 1 12); do BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then @@ -203,38 +198,52 @@ for i in $(seq 1 10); do echo "$BOT_RESPONSE" break fi - echo "Waiting for openshift-ci bot response (attempt $i/10)..." - sleep 30 + echo "Waiting for openshift-ci bot response (attempt $i/12)..." + sleep 5 done ``` -If no response is received after 5 minutes, ask the user for guidance. +If no response is received after 1 minute, ask the user for guidance. -### Step 6c: Select the Right Job from the Bot Response +### Step 6c: Understand the Bot Response -Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: +The bot's response has two sections: -| Original failure pattern | Look for presubmit containing | -|--------------------------|-------------------------------| -| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | -| `*ocp*operator*nightly*` | `*ocp*operator*` | -| `*aks*helm*` | `*aks*helm*` | -| `*eks*helm*` | `*eks*helm*` | -| `*gke*helm*` | `*gke*helm*` | +1. **Required jobs** — triggered automatically when the PR is marked as ready for review (not on draft PRs). These run the basic presubmit checks: + ``` + /test e2e-ocp-helm + ``` +2. **Optional jobs** — must be triggered explicitly. These include nightly variants, other platforms, and operators: + ``` + /test e2e-ocp-helm-nightly + /test e2e-eks-helm-nightly + /test e2e-aks-operator-nightly + ... + ``` -**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. +Note: the job names in the bot's response are **shortened** (e.g., `e2e-ocp-helm`), not the full Prow `pull-ci-redhat-developer-rhdh-...` format. Use these short names directly with `/test`. -If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. +### Step 6d: Select and Trigger the Right Job -### Step 6d: Trigger the Job +Match the original failure to the right presubmit job from the bot's list: -Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: +| Original failure pattern | Trigger | +|--------------------------|---------| +| `*ocp*helm*nightly*` | `/test e2e-ocp-helm-nightly` | +| `*ocp*operator*nightly*` | `/test e2e-ocp-operator-nightly` | +| `*ocp*v4-19*helm*` | `/test e2e-ocp-v4-19-helm-nightly` | +| `*aks*helm*` | `/test e2e-aks-helm-nightly` | +| `*eks*helm*` | `/test e2e-eks-helm-nightly` | +| `*gke*operator*` | `/test e2e-gke-operator-nightly` | ```bash -gh pr comment --repo redhat-developer/rhdh --body "/test " +gh pr comment --repo redhat-developer/rhdh --body "/test " ``` -**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. +**Rules**: +- **Only use job names that appeared in the bot's response** — never construct or guess names +- The required job (`e2e-ocp-helm`) runs automatically — you usually only need to trigger the optional job matching the original failure +- If no matching job exists in the list, inform the user and ask how to proceed ## Step 7: Monitor CI Status diff --git a/.claude/skills/e2e-verify-fix/SKILL.md b/.claude/skills/e2e-verify-fix/SKILL.md index 8a24f1f442..2b78e07dbd 100644 --- a/.claude/skills/e2e-verify-fix/SKILL.md +++ b/.claude/skills/e2e-verify-fix/SKILL.md @@ -70,9 +70,23 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. +### 3. Full Project Stability Check (if failure was only reproducible with full project) + +If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: + +```bash +cd e2e-tests +set -a && source .env && set +a +CI=true yarn playwright test --project= --retries=0 +``` + +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. + +**Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. + **IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. -### 3. Code Quality Checks +### 4. Code Quality Checks Run all code quality checks in the e2e-tests workspace: @@ -99,7 +113,7 @@ yarn lint:fix yarn prettier:fix ``` -### 4. Optional: Full Project Regression Check +### 5. Optional: Full Project Regression Check If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: diff --git a/.cursor/commands/fix-e2e.md b/.cursor/commands/fix-e2e.md index 4acb89c839..f1c1c84c7a 100644 --- a/.cursor/commands/fix-e2e.md +++ b/.cursor/commands/fix-e2e.md @@ -98,7 +98,7 @@ yarn playwright test --project=any-test --retries=0 --workers=1 - **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. - **Consistent failure**: Proceed to Phase 5 - **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability -- **Cannot reproduce** (passes every time after 10 runs): Report the reproduction results and possible environment differences, then **ask the user for explicit approval** before proceeding. Do not skip this step silently. +- **Cannot reproduce** (passes every time after 10 runs): Before giving up, try running the entire CI project with `CI=true yarn playwright test --project= --retries=0` to simulate CI conditions (3 workers, full test suite). If that also passes, report the results and **ask the user for explicit approval** before proceeding. ### Phase 5: Diagnose and Fix diff --git a/.cursor/skills/e2e-reproduce-failure/SKILL.md b/.cursor/skills/e2e-reproduce-failure/SKILL.md index 2dcb17210d..fa31ef6f79 100644 --- a/.cursor/skills/e2e-reproduce-failure/SKILL.md +++ b/.cursor/skills/e2e-reproduce-failure/SKILL.md @@ -154,12 +154,18 @@ echo "Results: $PASS passed, $FAIL failed out of 10 runs" ### Cannot Reproduce - **Definition**: Passes all runs locally (0/10 fail) -- **Action**: **Stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Before giving up**, try running the **entire Playwright project** that failed in CI with `CI=true` to simulate CI conditions (this sets the worker count to 3, matching CI): + ```bash + cd e2e-tests + CI=true yarn playwright test --project= --retries=0 + ``` + Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This runs all tests in that project concurrently, which can expose race conditions and resource contention that single-test runs miss. +- If the full project run also passes, **stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. - **Investigation**: Check environment differences between local and CI: - **Cluster version**: CI may use a different OCP version (check the cluster pool version) - **Image version**: CI may use a different RHDH image - **Resource constraints**: CI clusters may have less resources - - **Parallel execution**: CI runs with 3 workers; try `--workers=3` + - **Parallel execution**: CI runs with 3 workers; the full project run above simulates this - **Network**: CI clusters are in `us-east-2` AWS region - **External services**: GitHub API rate limits, Keycloak availability diff --git a/.cursor/skills/e2e-submit-and-review/SKILL.md b/.cursor/skills/e2e-submit-and-review/SKILL.md index 45d71ebe3d..b7d23aa85b 100644 --- a/.cursor/skills/e2e-submit-and-review/SKILL.md +++ b/.cursor/skills/e2e-submit-and-review/SKILL.md @@ -128,11 +128,6 @@ The Qodo bot will: 2. Post review comments with suggestions 3. Optionally approve or request changes -Note: The `.pr_agent.toml` in the repo configures Qodo with: -- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos -- Auto-review, auto-describe, and auto-improve on PR creation -- Scoped to `e2e-tests` folder changes - ## Step 5: Wait for and Address Qodo Review ### Poll for Review Comments @@ -189,11 +184,11 @@ gh pr comment --repo redhat-developer/rhdh --body "/test ?" ### Step 6b: Wait for the Bot Response -Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: +The bot usually responds within seconds. Poll PR comments for the `openshift-ci` bot's response: ```bash -# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) -for i in $(seq 1 10); do +# Poll for the openshift-ci bot response (check every 5s, up to 12 attempts = 1 min) +for i in $(seq 1 12); do BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then @@ -201,38 +196,52 @@ for i in $(seq 1 10); do echo "$BOT_RESPONSE" break fi - echo "Waiting for openshift-ci bot response (attempt $i/10)..." - sleep 30 + echo "Waiting for openshift-ci bot response (attempt $i/12)..." + sleep 5 done ``` -If no response is received after 5 minutes, ask the user for guidance. +If no response is received after 1 minute, ask the user for guidance. -### Step 6c: Select the Right Job from the Bot Response +### Step 6c: Understand the Bot Response -Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: +The bot's response has two sections: -| Original failure pattern | Look for presubmit containing | -|--------------------------|-------------------------------| -| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | -| `*ocp*operator*nightly*` | `*ocp*operator*` | -| `*aks*helm*` | `*aks*helm*` | -| `*eks*helm*` | `*eks*helm*` | -| `*gke*helm*` | `*gke*helm*` | +1. **Required jobs** — triggered automatically when the PR is marked as ready for review (not on draft PRs). These run the basic presubmit checks: + ``` + /test e2e-ocp-helm + ``` +2. **Optional jobs** — must be triggered explicitly. These include nightly variants, other platforms, and operators: + ``` + /test e2e-ocp-helm-nightly + /test e2e-eks-helm-nightly + /test e2e-aks-operator-nightly + ... + ``` -**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. +Note: the job names in the bot's response are **shortened** (e.g., `e2e-ocp-helm`), not the full Prow `pull-ci-redhat-developer-rhdh-...` format. Use these short names directly with `/test`. -If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. +### Step 6d: Select and Trigger the Right Job -### Step 6d: Trigger the Job +Match the original failure to the right presubmit job from the bot's list: -Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: +| Original failure pattern | Trigger | +|--------------------------|---------| +| `*ocp*helm*nightly*` | `/test e2e-ocp-helm-nightly` | +| `*ocp*operator*nightly*` | `/test e2e-ocp-operator-nightly` | +| `*ocp*v4-19*helm*` | `/test e2e-ocp-v4-19-helm-nightly` | +| `*aks*helm*` | `/test e2e-aks-helm-nightly` | +| `*eks*helm*` | `/test e2e-eks-helm-nightly` | +| `*gke*operator*` | `/test e2e-gke-operator-nightly` | ```bash -gh pr comment --repo redhat-developer/rhdh --body "/test " +gh pr comment --repo redhat-developer/rhdh --body "/test " ``` -**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. +**Rules**: +- **Only use job names that appeared in the bot's response** — never construct or guess names +- The required job (`e2e-ocp-helm`) runs automatically — you usually only need to trigger the optional job matching the original failure +- If no matching job exists in the list, inform the user and ask how to proceed ## Step 7: Monitor CI Status diff --git a/.cursor/skills/e2e-verify-fix/SKILL.md b/.cursor/skills/e2e-verify-fix/SKILL.md index a7ee9ed8d8..6016d4f731 100644 --- a/.cursor/skills/e2e-verify-fix/SKILL.md +++ b/.cursor/skills/e2e-verify-fix/SKILL.md @@ -68,9 +68,23 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. +### 3. Full Project Stability Check (if failure was only reproducible with full project) + +If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: + +```bash +cd e2e-tests +set -a && source .env && set +a +CI=true yarn playwright test --project= --retries=0 +``` + +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. + +**Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. + **IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. -### 3. Code Quality Checks +### 4. Code Quality Checks Run all code quality checks in the e2e-tests workspace: @@ -97,7 +111,7 @@ yarn lint:fix yarn prettier:fix ``` -### 4. Optional: Full Project Regression Check +### 5. Optional: Full Project Regression Check If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: diff --git a/.opencode/command/fix-e2e.md b/.opencode/command/fix-e2e.md index f76b4f044f..44edfc94cd 100644 --- a/.opencode/command/fix-e2e.md +++ b/.opencode/command/fix-e2e.md @@ -101,7 +101,7 @@ yarn playwright test --project=any-test --retries=0 --workers=1 - **No cluster or deployment available**: If Phase 3 was skipped or no running RHDH instance exists, **ask the user for explicit approval** before skipping reproduction — do not skip silently. - **Consistent failure**: Proceed to Phase 5 - **Flaky** (fails sometimes): Proceed to Phase 5, focus on reliability -- **Cannot reproduce** (passes every time after 10 runs): Report the reproduction results and possible environment differences, then **ask the user for explicit approval** before proceeding. Do not skip this step silently. +- **Cannot reproduce** (passes every time after 10 runs): Before giving up, try running the entire CI project with `CI=true yarn playwright test --project= --retries=0` to simulate CI conditions (3 workers, full test suite). If that also passes, report the results and **ask the user for explicit approval** before proceeding. ### Phase 5: Diagnose and Fix diff --git a/.opencode/skill/e2e-reproduce-failure/SKILL.md b/.opencode/skill/e2e-reproduce-failure/SKILL.md index 17a8728bfc..58d675100d 100644 --- a/.opencode/skill/e2e-reproduce-failure/SKILL.md +++ b/.opencode/skill/e2e-reproduce-failure/SKILL.md @@ -156,12 +156,18 @@ echo "Results: $PASS passed, $FAIL failed out of 10 runs" ### Cannot Reproduce - **Definition**: Passes all runs locally (0/10 fail) -- **Action**: **Stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. +- **Before giving up**, try running the **entire Playwright project** that failed in CI with `CI=true` to simulate CI conditions (this sets the worker count to 3, matching CI): + ```bash + cd e2e-tests + CI=true yarn playwright test --project= --retries=0 + ``` + Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This runs all tests in that project concurrently, which can expose race conditions and resource contention that single-test runs miss. +- If the full project run also passes, **stop and ask the user for approval before skipping this step.** Present the reproduction results and the list of possible environment differences. Do not proceed to diagnose-and-fix without explicit user confirmation. - **Investigation**: Check environment differences between local and CI: - **Cluster version**: CI may use a different OCP version (check the cluster pool version) - **Image version**: CI may use a different RHDH image - **Resource constraints**: CI clusters may have less resources - - **Parallel execution**: CI runs with 3 workers; try `--workers=3` + - **Parallel execution**: CI runs with 3 workers; the full project run above simulates this - **Network**: CI clusters are in `us-east-2` AWS region - **External services**: GitHub API rate limits, Keycloak availability diff --git a/.opencode/skill/e2e-submit-and-review/SKILL.md b/.opencode/skill/e2e-submit-and-review/SKILL.md index 5aedb84414..ff09b9a3f4 100644 --- a/.opencode/skill/e2e-submit-and-review/SKILL.md +++ b/.opencode/skill/e2e-submit-and-review/SKILL.md @@ -130,11 +130,6 @@ The Qodo bot will: 2. Post review comments with suggestions 3. Optionally approve or request changes -Note: The `.pr_agent.toml` in the repo configures Qodo with: -- RAG enabled across `rhdh`, `rhdh-operator`, `rhdh-chart`, and documentation repos -- Auto-review, auto-describe, and auto-improve on PR creation -- Scoped to `e2e-tests` folder changes - ## Step 5: Wait for and Address Qodo Review ### Poll for Review Comments @@ -191,11 +186,11 @@ gh pr comment --repo redhat-developer/rhdh --body "/test ?" ### Step 6b: Wait for the Bot Response -Poll PR comments every 30 seconds (up to 5 minutes) for a response from the `openshift-ci` bot containing the available job list: +The bot usually responds within seconds. Poll PR comments for the `openshift-ci` bot's response: ```bash -# Poll for the openshift-ci bot response (check every 30s, up to 10 attempts = 5 min) -for i in $(seq 1 10); do +# Poll for the openshift-ci bot response (check every 5s, up to 12 attempts = 1 min) +for i in $(seq 1 12); do BOT_RESPONSE=$(gh api repos/redhat-developer/rhdh/issues//comments \ --jq '[.[] | select(.user.login == "openshift-ci[bot]" or .user.login == "openshift-ci-robot")] | last | .body // empty') if [[ -n "$BOT_RESPONSE" ]] && echo "$BOT_RESPONSE" | grep -q '/test'; then @@ -203,38 +198,52 @@ for i in $(seq 1 10); do echo "$BOT_RESPONSE" break fi - echo "Waiting for openshift-ci bot response (attempt $i/10)..." - sleep 30 + echo "Waiting for openshift-ci bot response (attempt $i/12)..." + sleep 5 done ``` -If no response is received after 5 minutes, ask the user for guidance. +If no response is received after 1 minute, ask the user for guidance. -### Step 6c: Select the Right Job from the Bot Response +### Step 6c: Understand the Bot Response -Parse the bot's response to find the presubmit job name matching the platform and deployment method from Phase 1. Use these patterns to identify the right job: +The bot's response has two sections: -| Original failure pattern | Look for presubmit containing | -|--------------------------|-------------------------------| -| `*ocp*helm*nightly*` | `*ocp*helm*` (not nightly) | -| `*ocp*operator*nightly*` | `*ocp*operator*` | -| `*aks*helm*` | `*aks*helm*` | -| `*eks*helm*` | `*eks*helm*` | -| `*gke*helm*` | `*gke*helm*` | +1. **Required jobs** — triggered automatically when the PR is marked as ready for review (not on draft PRs). These run the basic presubmit checks: + ``` + /test e2e-ocp-helm + ``` +2. **Optional jobs** — must be triggered explicitly. These include nightly variants, other platforms, and operators: + ``` + /test e2e-ocp-helm-nightly + /test e2e-eks-helm-nightly + /test e2e-aks-operator-nightly + ... + ``` -**Example**: If the original failure was `periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly`, look for a presubmit job in the bot's response like `pull-ci-redhat-developer-rhdh-main-e2e-ocp-v4-17-helm`. +Note: the job names in the bot's response are **shortened** (e.g., `e2e-ocp-helm`), not the full Prow `pull-ci-redhat-developer-rhdh-...` format. Use these short names directly with `/test`. -If no matching job appears in the bot's response, pick the closest available job for the same platform and deployment method **from the list the bot returned**. If no suitable job exists in the list, inform the user and ask how to proceed. +### Step 6d: Select and Trigger the Right Job -### Step 6d: Trigger the Job +Match the original failure to the right presubmit job from the bot's list: -Comment `/test ` using **only** a job name that appeared in the bot's response from Step 6b: +| Original failure pattern | Trigger | +|--------------------------|---------| +| `*ocp*helm*nightly*` | `/test e2e-ocp-helm-nightly` | +| `*ocp*operator*nightly*` | `/test e2e-ocp-operator-nightly` | +| `*ocp*v4-19*helm*` | `/test e2e-ocp-v4-19-helm-nightly` | +| `*aks*helm*` | `/test e2e-aks-helm-nightly` | +| `*eks*helm*` | `/test e2e-eks-helm-nightly` | +| `*gke*operator*` | `/test e2e-gke-operator-nightly` | ```bash -gh pr comment --repo redhat-developer/rhdh --body "/test " +gh pr comment --repo redhat-developer/rhdh --body "/test " ``` -**Never** construct, guess, or infer job names from the original periodic/nightly job name. Only use exact job names from the `openshift-ci` bot's `/test ?` response. +**Rules**: +- **Only use job names that appeared in the bot's response** — never construct or guess names +- The required job (`e2e-ocp-helm`) runs automatically — you usually only need to trigger the optional job matching the original failure +- If no matching job exists in the list, inform the user and ask how to proceed ## Step 7: Monitor CI Status diff --git a/.opencode/skill/e2e-verify-fix/SKILL.md b/.opencode/skill/e2e-verify-fix/SKILL.md index 8a24f1f442..2b78e07dbd 100644 --- a/.opencode/skill/e2e-verify-fix/SKILL.md +++ b/.opencode/skill/e2e-verify-fix/SKILL.md @@ -70,9 +70,23 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. +### 3. Full Project Stability Check (if failure was only reproducible with full project) + +If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: + +```bash +cd e2e-tests +set -a && source .env && set +a +CI=true yarn playwright test --project= --retries=0 +``` + +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. + +**Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. + **IMPORTANT**: Never skip verification steps. If you cannot run tests (e.g., no cluster available, environment issues), **stop and ask the user for explicit approval** before proceeding without verification. Do not assume it's OK to skip. -### 3. Code Quality Checks +### 4. Code Quality Checks Run all code quality checks in the e2e-tests workspace: @@ -99,7 +113,7 @@ yarn lint:fix yarn prettier:fix ``` -### 4. Optional: Full Project Regression Check +### 5. Optional: Full Project Regression Check If the fix touches shared utilities or page objects, run the entire Playwright project to check for regressions: From 7bdce1beea3781ab716f6bc10e0ccf0ab3c9fcb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:43:53 +0200 Subject: [PATCH 06/23] fix(skills): fix step numbering, add Playwright report input, script Qodo polling - Fix duplicate ### 5 numbering in e2e-verify-fix (Review the Diff is now ### 6) - Add Playwright report URL to fix-e2e.md input list - Add scripted polling loop for Qodo review (15s interval, 5 min timeout) - Update command to match new Qodo polling interval Assisted-by: OpenCode --- .rulesync/commands/fix-e2e.md | 5 ++-- .../skills/e2e-submit-and-review/SKILL.md | 24 +++++++++++++++---- .rulesync/skills/e2e-verify-fix/SKILL.md | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/.rulesync/commands/fix-e2e.md b/.rulesync/commands/fix-e2e.md index 80dc6fe3c7..aae98473d6 100644 --- a/.rulesync/commands/fix-e2e.md +++ b/.rulesync/commands/fix-e2e.md @@ -12,8 +12,9 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` @@ -146,7 +147,7 @@ Verify the fix: 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` 5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) 7. **Address feedback**: Apply valid suggestions, explain rejections 8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` diff --git a/.rulesync/skills/e2e-submit-and-review/SKILL.md b/.rulesync/skills/e2e-submit-and-review/SKILL.md index bc28c31852..33abd88d4c 100644 --- a/.rulesync/skills/e2e-submit-and-review/SKILL.md +++ b/.rulesync/skills/e2e-submit-and-review/SKILL.md @@ -136,18 +136,32 @@ The Qodo bot will: ### Poll for Review Comments -Check for Qodo review completion (it typically takes 1-3 minutes): +Poll for Qodo review completion (typically takes 1-3 minutes): ```bash -# Check for Qodo bot comments -gh api repos/redhat-developer/rhdh/pulls//reviews \ - --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' +# Poll for Qodo bot review (check every 15s, up to 20 attempts = 5 min) +for i in $(seq 1 20); do + REVIEW_STATE=$(gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '[.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent"))] | last | .state // empty') + if [[ -n "$REVIEW_STATE" ]]; then + echo "Qodo review received (state: $REVIEW_STATE)" + break + fi + echo "Waiting for Qodo review (attempt $i/20)..." + sleep 15 +done +``` -# Check for inline comments +If a review is received, fetch the inline comments: + +```bash +# Get inline review comments gh api repos/redhat-developer/rhdh/pulls//comments \ --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' ``` +If no review is received after 5 minutes, ask the user for guidance. + ### Address Review Comments For each review comment: diff --git a/.rulesync/skills/e2e-verify-fix/SKILL.md b/.rulesync/skills/e2e-verify-fix/SKILL.md index 19a02920e5..42aebb7866 100644 --- a/.rulesync/skills/e2e-verify-fix/SKILL.md +++ b/.rulesync/skills/e2e-verify-fix/SKILL.md @@ -130,7 +130,7 @@ This is optional for isolated spec file changes but recommended for changes to: - `e2e-tests/playwright/data/` (shared test data) - `playwright.config.ts` (configuration) -### 5. Review the Diff +### 6. Review the Diff Before submitting, review all changes: From 31fa6c7b64e0f222b1ac1e8dcb2219041c2932e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:45:11 +0200 Subject: [PATCH 07/23] fix(skills): standardize healer initialization phrasing across all 3 skills Assisted-by: OpenCode --- .rulesync/skills/e2e-diagnose-and-fix/SKILL.md | 6 ++---- .rulesync/skills/e2e-reproduce-failure/SKILL.md | 6 +++--- .rulesync/skills/e2e-verify-fix/SKILL.md | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md index 46d5cb9c17..aa9c00529c 100644 --- a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md @@ -20,7 +20,7 @@ Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when yo ### Healer Initialization -Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -32,9 +32,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. - -This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup for Healer diff --git a/.rulesync/skills/e2e-reproduce-failure/SKILL.md b/.rulesync/skills/e2e-reproduce-failure/SKILL.md index 2244acc474..8af7893e06 100644 --- a/.rulesync/skills/e2e-reproduce-failure/SKILL.md +++ b/.rulesync/skills/e2e-reproduce-failure/SKILL.md @@ -46,9 +46,9 @@ curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. -### Healer Initialization (First Time Only) +### Healer Initialization -Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -60,7 +60,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup diff --git a/.rulesync/skills/e2e-verify-fix/SKILL.md b/.rulesync/skills/e2e-verify-fix/SKILL.md index 42aebb7866..8911d149fa 100644 --- a/.rulesync/skills/e2e-verify-fix/SKILL.md +++ b/.rulesync/skills/e2e-verify-fix/SKILL.md @@ -20,7 +20,7 @@ Always use the Playwright healer agent for test verification. The healer provide ### Healer Initialization -If not already initialized in this session, use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -32,7 +32,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. From 7dd22f5efa6b2bae006701242c3db549879a1fb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:46:51 +0200 Subject: [PATCH 08/23] fix(skills): document handling of multiple test failures from a single job Present classified failures, ask user which to fix, group by shared root cause when possible. Assisted-by: OpenCode --- .rulesync/commands/fix-e2e.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.rulesync/commands/fix-e2e.md b/.rulesync/commands/fix-e2e.md index aae98473d6..ae0a4a993d 100644 --- a/.rulesync/commands/fix-e2e.md +++ b/.rulesync/commands/fix-e2e.md @@ -37,6 +37,12 @@ Parse the input to extract: **Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. +**Multiple failures**: If the job has more than one failing test: +1. Present all failures in a table with test name, spec file, error type, and consistency (e.g., "failed 3/3" vs "failed 1/3") +2. Group failures that likely share a root cause (same spec file, same error pattern, same page object) +3. **Ask the user** which failure(s) to focus on +4. If failures share a root cause, fix them together in one PR. If they're unrelated, fix them in separate branches/PRs — complete one before starting the next. + ### Phase 2: Setup Fix Branch First, check the current branch: From 7f6624010db63ca1a9e3d59e3c46bcd61a4a06de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:47:45 +0200 Subject: [PATCH 09/23] fix(skills): clarify when full project stability check is required vs optional Assisted-by: OpenCode --- .rulesync/skills/e2e-verify-fix/SKILL.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.rulesync/skills/e2e-verify-fix/SKILL.md b/.rulesync/skills/e2e-verify-fix/SKILL.md index 8911d149fa..1b9b4b8d62 100644 --- a/.rulesync/skills/e2e-verify-fix/SKILL.md +++ b/.rulesync/skills/e2e-verify-fix/SKILL.md @@ -72,9 +72,11 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. -### 3. Full Project Stability Check (if failure was only reproducible with full project) +### 3. Full Project Stability Check -If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: +> **When to run**: This step is **required** if the failure was only reproducible when running the full CI project (`CI=true yarn playwright test --project=`) during `e2e-reproduce-failure`. If the failure reproduced in isolated single-test runs, this step is optional but still recommended. + +Run the full project to confirm the fix holds under CI-like concurrency: ```bash cd e2e-tests @@ -82,7 +84,7 @@ set -a && source .env && set +a CI=true yarn playwright test --project= --retries=0 ``` -Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This verifies the fix under the same worker count and test interaction conditions that triggered the original failure. **Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. From 5b2d2615f2148a081d2b6b500b18618fcc987540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:50:50 +0200 Subject: [PATCH 10/23] fix(skills): add Cursor fallback notes for Playwright healer agent The healer agent is only supported in OpenCode and Claude Code. Add notes to all 3 healer-using skills directing Cursor users to direct execution and manual diagnosis instead. Assisted-by: OpenCode --- .rulesync/skills/e2e-diagnose-and-fix/SKILL.md | 2 ++ .rulesync/skills/e2e-reproduce-failure/SKILL.md | 4 +++- .rulesync/skills/e2e-verify-fix/SKILL.md | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md index aa9c00529c..6cd5a266c9 100644 --- a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md @@ -18,6 +18,8 @@ Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when yo **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and proceed directly to the "Failure Pattern Recognition" section below. Use manual diagnosis with direct test execution (`yarn playwright test ...`) and headed/debug mode (`--headed`, `--debug`) for live UI inspection. + ### Healer Initialization If not already initialized in this session, initialize the healer agent in `e2e-tests/`: diff --git a/.rulesync/skills/e2e-reproduce-failure/SKILL.md b/.rulesync/skills/e2e-reproduce-failure/SKILL.md index 8af7893e06..776bbe4b6b 100644 --- a/.rulesync/skills/e2e-reproduce-failure/SKILL.md +++ b/.rulesync/skills/e2e-reproduce-failure/SKILL.md @@ -46,6 +46,8 @@ curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use the "Fallback: Direct Execution" method below instead. + ### Healer Initialization If not already initialized in this session, initialize the healer agent in `e2e-tests/`: @@ -96,7 +98,7 @@ Report: pass/fail, exact error message, what the UI shows at the point of failur ### Fallback: Direct Execution -If the healer agent is unavailable, run tests directly: +If the healer agent is unavailable (e.g., in Cursor), run tests directly: ```bash cd e2e-tests diff --git a/.rulesync/skills/e2e-verify-fix/SKILL.md b/.rulesync/skills/e2e-verify-fix/SKILL.md index 1b9b4b8d62..2ef8c8fa25 100644 --- a/.rulesync/skills/e2e-verify-fix/SKILL.md +++ b/.rulesync/skills/e2e-verify-fix/SKILL.md @@ -18,6 +18,8 @@ Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use direct test execution for all verification steps (`yarn playwright test ...`). + ### Healer Initialization If not already initialized in this session, initialize the healer agent in `e2e-tests/`: From 7a2a1443d1882b9e8ef9db83288e6d52e87b7070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 11:52:06 +0200 Subject: [PATCH 11/23] chore: regenerate rulesync output files Assisted-by: OpenCode --- .claude/commands/fix-e2e.md | 11 +++++++-- .claude/skills/e2e-diagnose-and-fix/SKILL.md | 8 +++---- .claude/skills/e2e-reproduce-failure/SKILL.md | 10 ++++---- .claude/skills/e2e-submit-and-review/SKILL.md | 24 +++++++++++++++---- .claude/skills/e2e-verify-fix/SKILL.md | 16 ++++++++----- .cursor/commands/fix-e2e.md | 11 +++++++-- .cursor/skills/e2e-diagnose-and-fix/SKILL.md | 8 +++---- .cursor/skills/e2e-reproduce-failure/SKILL.md | 10 ++++---- .cursor/skills/e2e-submit-and-review/SKILL.md | 24 +++++++++++++++---- .cursor/skills/e2e-verify-fix/SKILL.md | 16 ++++++++----- .opencode/command/fix-e2e.md | 11 +++++++-- .opencode/skill/e2e-diagnose-and-fix/SKILL.md | 8 +++---- .../skill/e2e-reproduce-failure/SKILL.md | 10 ++++---- .../skill/e2e-submit-and-review/SKILL.md | 24 +++++++++++++++---- .opencode/skill/e2e-verify-fix/SKILL.md | 16 ++++++++----- 15 files changed, 144 insertions(+), 63 deletions(-) diff --git a/.claude/commands/fix-e2e.md b/.claude/commands/fix-e2e.md index 44edfc94cd..b56220cab4 100644 --- a/.claude/commands/fix-e2e.md +++ b/.claude/commands/fix-e2e.md @@ -10,8 +10,9 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` @@ -34,6 +35,12 @@ Parse the input to extract: **Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. +**Multiple failures**: If the job has more than one failing test: +1. Present all failures in a table with test name, spec file, error type, and consistency (e.g., "failed 3/3" vs "failed 1/3") +2. Group failures that likely share a root cause (same spec file, same error pattern, same page object) +3. **Ask the user** which failure(s) to focus on +4. If failures share a root cause, fix them together in one PR. If they're unrelated, fix them in separate branches/PRs — complete one before starting the next. + ### Phase 2: Setup Fix Branch First, check the current branch: @@ -144,7 +151,7 @@ Verify the fix: 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` 5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) 7. **Address feedback**: Apply valid suggestions, explain rejections 8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` diff --git a/.claude/skills/e2e-diagnose-and-fix/SKILL.md b/.claude/skills/e2e-diagnose-and-fix/SKILL.md index 16b12ea2ee..f7bfa5cb2b 100644 --- a/.claude/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.claude/skills/e2e-diagnose-and-fix/SKILL.md @@ -16,9 +16,11 @@ Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when yo **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and proceed directly to the "Failure Pattern Recognition" section below. Use manual diagnosis with direct test execution (`yarn playwright test ...`) and headed/debug mode (`--headed`, `--debug`) for live UI inspection. + ### Healer Initialization -Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -30,9 +32,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. - -This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup for Healer diff --git a/.claude/skills/e2e-reproduce-failure/SKILL.md b/.claude/skills/e2e-reproduce-failure/SKILL.md index 58d675100d..2cdf6a445a 100644 --- a/.claude/skills/e2e-reproduce-failure/SKILL.md +++ b/.claude/skills/e2e-reproduce-failure/SKILL.md @@ -44,9 +44,11 @@ curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. -### Healer Initialization (First Time Only) +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use the "Fallback: Direct Execution" method below instead. -Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: +### Healer Initialization + +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -58,7 +60,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup @@ -94,7 +96,7 @@ Report: pass/fail, exact error message, what the UI shows at the point of failur ### Fallback: Direct Execution -If the healer agent is unavailable, run tests directly: +If the healer agent is unavailable (e.g., in Cursor), run tests directly: ```bash cd e2e-tests diff --git a/.claude/skills/e2e-submit-and-review/SKILL.md b/.claude/skills/e2e-submit-and-review/SKILL.md index ff09b9a3f4..f60a90bd06 100644 --- a/.claude/skills/e2e-submit-and-review/SKILL.md +++ b/.claude/skills/e2e-submit-and-review/SKILL.md @@ -134,18 +134,32 @@ The Qodo bot will: ### Poll for Review Comments -Check for Qodo review completion (it typically takes 1-3 minutes): +Poll for Qodo review completion (typically takes 1-3 minutes): ```bash -# Check for Qodo bot comments -gh api repos/redhat-developer/rhdh/pulls//reviews \ - --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' +# Poll for Qodo bot review (check every 15s, up to 20 attempts = 5 min) +for i in $(seq 1 20); do + REVIEW_STATE=$(gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '[.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent"))] | last | .state // empty') + if [[ -n "$REVIEW_STATE" ]]; then + echo "Qodo review received (state: $REVIEW_STATE)" + break + fi + echo "Waiting for Qodo review (attempt $i/20)..." + sleep 15 +done +``` -# Check for inline comments +If a review is received, fetch the inline comments: + +```bash +# Get inline review comments gh api repos/redhat-developer/rhdh/pulls//comments \ --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' ``` +If no review is received after 5 minutes, ask the user for guidance. + ### Address Review Comments For each review comment: diff --git a/.claude/skills/e2e-verify-fix/SKILL.md b/.claude/skills/e2e-verify-fix/SKILL.md index 2b78e07dbd..f226994f7d 100644 --- a/.claude/skills/e2e-verify-fix/SKILL.md +++ b/.claude/skills/e2e-verify-fix/SKILL.md @@ -16,9 +16,11 @@ Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use direct test execution for all verification steps (`yarn playwright test ...`). + ### Healer Initialization -If not already initialized in this session, use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -30,7 +32,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. @@ -70,9 +72,11 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. -### 3. Full Project Stability Check (if failure was only reproducible with full project) +### 3. Full Project Stability Check + +> **When to run**: This step is **required** if the failure was only reproducible when running the full CI project (`CI=true yarn playwright test --project=`) during `e2e-reproduce-failure`. If the failure reproduced in isolated single-test runs, this step is optional but still recommended. -If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: +Run the full project to confirm the fix holds under CI-like concurrency: ```bash cd e2e-tests @@ -80,7 +84,7 @@ set -a && source .env && set +a CI=true yarn playwright test --project= --retries=0 ``` -Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This verifies the fix under the same worker count and test interaction conditions that triggered the original failure. **Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. @@ -128,7 +132,7 @@ This is optional for isolated spec file changes but recommended for changes to: - `e2e-tests/playwright/data/` (shared test data) - `playwright.config.ts` (configuration) -### 5. Review the Diff +### 6. Review the Diff Before submitting, review all changes: diff --git a/.cursor/commands/fix-e2e.md b/.cursor/commands/fix-e2e.md index f1c1c84c7a..010e932e46 100644 --- a/.cursor/commands/fix-e2e.md +++ b/.cursor/commands/fix-e2e.md @@ -7,8 +7,9 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` @@ -31,6 +32,12 @@ Parse the input to extract: **Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. +**Multiple failures**: If the job has more than one failing test: +1. Present all failures in a table with test name, spec file, error type, and consistency (e.g., "failed 3/3" vs "failed 1/3") +2. Group failures that likely share a root cause (same spec file, same error pattern, same page object) +3. **Ask the user** which failure(s) to focus on +4. If failures share a root cause, fix them together in one PR. If they're unrelated, fix them in separate branches/PRs — complete one before starting the next. + ### Phase 2: Setup Fix Branch First, check the current branch: @@ -141,7 +148,7 @@ Verify the fix: 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` 5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) 7. **Address feedback**: Apply valid suggestions, explain rejections 8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` diff --git a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md index f0a1726192..977f8c9b53 100644 --- a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md @@ -14,9 +14,11 @@ Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when yo **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and proceed directly to the "Failure Pattern Recognition" section below. Use manual diagnosis with direct test execution (`yarn playwright test ...`) and headed/debug mode (`--headed`, `--debug`) for live UI inspection. + ### Healer Initialization -Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -28,9 +30,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. - -This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup for Healer diff --git a/.cursor/skills/e2e-reproduce-failure/SKILL.md b/.cursor/skills/e2e-reproduce-failure/SKILL.md index fa31ef6f79..265d29d2b9 100644 --- a/.cursor/skills/e2e-reproduce-failure/SKILL.md +++ b/.cursor/skills/e2e-reproduce-failure/SKILL.md @@ -42,9 +42,11 @@ curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. -### Healer Initialization (First Time Only) +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use the "Fallback: Direct Execution" method below instead. -Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: +### Healer Initialization + +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -56,7 +58,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup @@ -92,7 +94,7 @@ Report: pass/fail, exact error message, what the UI shows at the point of failur ### Fallback: Direct Execution -If the healer agent is unavailable, run tests directly: +If the healer agent is unavailable (e.g., in Cursor), run tests directly: ```bash cd e2e-tests diff --git a/.cursor/skills/e2e-submit-and-review/SKILL.md b/.cursor/skills/e2e-submit-and-review/SKILL.md index b7d23aa85b..325bee9c7f 100644 --- a/.cursor/skills/e2e-submit-and-review/SKILL.md +++ b/.cursor/skills/e2e-submit-and-review/SKILL.md @@ -132,18 +132,32 @@ The Qodo bot will: ### Poll for Review Comments -Check for Qodo review completion (it typically takes 1-3 minutes): +Poll for Qodo review completion (typically takes 1-3 minutes): ```bash -# Check for Qodo bot comments -gh api repos/redhat-developer/rhdh/pulls//reviews \ - --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' +# Poll for Qodo bot review (check every 15s, up to 20 attempts = 5 min) +for i in $(seq 1 20); do + REVIEW_STATE=$(gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '[.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent"))] | last | .state // empty') + if [[ -n "$REVIEW_STATE" ]]; then + echo "Qodo review received (state: $REVIEW_STATE)" + break + fi + echo "Waiting for Qodo review (attempt $i/20)..." + sleep 15 +done +``` -# Check for inline comments +If a review is received, fetch the inline comments: + +```bash +# Get inline review comments gh api repos/redhat-developer/rhdh/pulls//comments \ --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' ``` +If no review is received after 5 minutes, ask the user for guidance. + ### Address Review Comments For each review comment: diff --git a/.cursor/skills/e2e-verify-fix/SKILL.md b/.cursor/skills/e2e-verify-fix/SKILL.md index 6016d4f731..08c412fdb2 100644 --- a/.cursor/skills/e2e-verify-fix/SKILL.md +++ b/.cursor/skills/e2e-verify-fix/SKILL.md @@ -14,9 +14,11 @@ Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use direct test execution for all verification steps (`yarn playwright test ...`). + ### Healer Initialization -If not already initialized in this session, use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -28,7 +30,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. @@ -68,9 +70,11 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. -### 3. Full Project Stability Check (if failure was only reproducible with full project) +### 3. Full Project Stability Check + +> **When to run**: This step is **required** if the failure was only reproducible when running the full CI project (`CI=true yarn playwright test --project=`) during `e2e-reproduce-failure`. If the failure reproduced in isolated single-test runs, this step is optional but still recommended. -If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: +Run the full project to confirm the fix holds under CI-like concurrency: ```bash cd e2e-tests @@ -78,7 +82,7 @@ set -a && source .env && set +a CI=true yarn playwright test --project= --retries=0 ``` -Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This verifies the fix under the same worker count and test interaction conditions that triggered the original failure. **Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. @@ -126,7 +130,7 @@ This is optional for isolated spec file changes but recommended for changes to: - `e2e-tests/playwright/data/` (shared test data) - `playwright.config.ts` (configuration) -### 5. Review the Diff +### 6. Review the Diff Before submitting, review all changes: diff --git a/.opencode/command/fix-e2e.md b/.opencode/command/fix-e2e.md index 44edfc94cd..b56220cab4 100644 --- a/.opencode/command/fix-e2e.md +++ b/.opencode/command/fix-e2e.md @@ -10,8 +10,9 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` +- **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` @@ -34,6 +35,12 @@ Parse the input to extract: **Decision gate**: If the input cannot be parsed (invalid URL, inaccessible Jira ticket), report the error and ask the user for clarification. +**Multiple failures**: If the job has more than one failing test: +1. Present all failures in a table with test name, spec file, error type, and consistency (e.g., "failed 3/3" vs "failed 1/3") +2. Group failures that likely share a root cause (same spec file, same error pattern, same page object) +3. **Ask the user** which failure(s) to focus on +4. If failures share a root cause, fix them together in one PR. If they're unrelated, fix them in separate branches/PRs — complete one before starting the next. + ### Phase 2: Setup Fix Branch First, check the current branch: @@ -144,7 +151,7 @@ Verify the fix: 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` 5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot comments (check every 60s, up to 10 minutes) +6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) 7. **Address feedback**: Apply valid suggestions, explain rejections 8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` diff --git a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md index 16b12ea2ee..f7bfa5cb2b 100644 --- a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md +++ b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md @@ -16,9 +16,11 @@ Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when yo **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and proceed directly to the "Failure Pattern Recognition" section below. Use manual diagnosis with direct test execution (`yarn playwright test ...`) and headed/debug mode (`--headed`, `--debug`) for live UI inspection. + ### Healer Initialization -Before first use in a session, initialize the healer agent in the `e2e-tests/` directory. Use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -30,9 +32,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. - -This creates configuration files with the Playwright MCP server and agent definitions. The generated files are local tooling — do NOT commit them. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup for Healer diff --git a/.opencode/skill/e2e-reproduce-failure/SKILL.md b/.opencode/skill/e2e-reproduce-failure/SKILL.md index 58d675100d..2cdf6a445a 100644 --- a/.opencode/skill/e2e-reproduce-failure/SKILL.md +++ b/.opencode/skill/e2e-reproduce-failure/SKILL.md @@ -44,9 +44,11 @@ curl -sSk "$BASE_URL" -o /dev/null -w "HTTP Status: %{http_code}\n" Always use the Playwright healer agent to run and reproduce failing tests. The healer provides richer diagnostics than plain `yarn playwright test` — it can debug step-by-step, inspect the live UI, and collect detailed failure context automatically. -### Healer Initialization (First Time Only) +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use the "Fallback: Direct Execution" method below instead. -Before first use in a session, initialize the healer agent with the `--loop` flag matching your AI coding tool: +### Healer Initialization + +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -58,7 +60,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. ### Environment Setup @@ -94,7 +96,7 @@ Report: pass/fail, exact error message, what the UI shows at the point of failur ### Fallback: Direct Execution -If the healer agent is unavailable, run tests directly: +If the healer agent is unavailable (e.g., in Cursor), run tests directly: ```bash cd e2e-tests diff --git a/.opencode/skill/e2e-submit-and-review/SKILL.md b/.opencode/skill/e2e-submit-and-review/SKILL.md index ff09b9a3f4..f60a90bd06 100644 --- a/.opencode/skill/e2e-submit-and-review/SKILL.md +++ b/.opencode/skill/e2e-submit-and-review/SKILL.md @@ -134,18 +134,32 @@ The Qodo bot will: ### Poll for Review Comments -Check for Qodo review completion (it typically takes 1-3 minutes): +Poll for Qodo review completion (typically takes 1-3 minutes): ```bash -# Check for Qodo bot comments -gh api repos/redhat-developer/rhdh/pulls//reviews \ - --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | .state' +# Poll for Qodo bot review (check every 15s, up to 20 attempts = 5 min) +for i in $(seq 1 20); do + REVIEW_STATE=$(gh api repos/redhat-developer/rhdh/pulls//reviews \ + --jq '[.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent"))] | last | .state // empty') + if [[ -n "$REVIEW_STATE" ]]; then + echo "Qodo review received (state: $REVIEW_STATE)" + break + fi + echo "Waiting for Qodo review (attempt $i/20)..." + sleep 15 +done +``` -# Check for inline comments +If a review is received, fetch the inline comments: + +```bash +# Get inline review comments gh api repos/redhat-developer/rhdh/pulls//comments \ --jq '.[] | select(.user.login | test("github-actions|qodo|codium|pr-agent")) | {path: .path, line: .line, body: .body}' ``` +If no review is received after 5 minutes, ask the user for guidance. + ### Address Review Comments For each review comment: diff --git a/.opencode/skill/e2e-verify-fix/SKILL.md b/.opencode/skill/e2e-verify-fix/SKILL.md index 2b78e07dbd..f226994f7d 100644 --- a/.opencode/skill/e2e-verify-fix/SKILL.md +++ b/.opencode/skill/e2e-verify-fix/SKILL.md @@ -16,9 +16,11 @@ Use this skill after implementing a fix (via `e2e-diagnose-and-fix`) to confirm Always use the Playwright healer agent for test verification. The healer provides step-by-step debugging if a run fails, making it faster to iterate on fixes. +> **Note**: The Playwright healer agent is currently supported in **OpenCode** and **Claude Code** only. In **Cursor** or other tools without Playwright agent support, skip the healer initialization and use direct test execution for all verification steps (`yarn playwright test ...`). + ### Healer Initialization -If not already initialized in this session, use the `--loop` flag matching your AI coding tool: +If not already initialized in this session, initialize the healer agent in `e2e-tests/`: ```bash cd e2e-tests @@ -30,7 +32,7 @@ npx playwright init-agents --loop=opencode npx playwright init-agents --loop=claude ``` -See https://playwright.dev/docs/test-agents for the full list of supported tools and options. +See https://playwright.dev/docs/test-agents for the full list of supported tools and options. The generated files are local tooling — do NOT commit them. Ensure the `.env` file exists — generate it with `source local-test-setup.sh --env`. To regenerate (e.g. after token expiry), re-run the same command. @@ -70,9 +72,11 @@ echo "Stability results: $PASS/5 passed" **Acceptance criteria**: 5/5 passes. If any run fails, invoke the healer agent on the failing run to diagnose and fix the remaining issue — do not manually guess at fixes. -### 3. Full Project Stability Check (if failure was only reproducible with full project) +### 3. Full Project Stability Check + +> **When to run**: This step is **required** if the failure was only reproducible when running the full CI project (`CI=true yarn playwright test --project=`) during `e2e-reproduce-failure`. If the failure reproduced in isolated single-test runs, this step is optional but still recommended. -If during reproduction (in `e2e-reproduce-failure`) the failure only appeared when running the full CI project (not in isolated test runs), the verification **must** also use the full project run to confirm the fix: +Run the full project to confirm the fix holds under CI-like concurrency: ```bash cd e2e-tests @@ -80,7 +84,7 @@ set -a && source .env && set +a CI=true yarn playwright test --project= --retries=0 ``` -Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This ensures the fix holds under the same concurrency and test interaction conditions that triggered the original failure. +Replace `` with the project from the CI failure (e.g., `showcase`, `showcase-rbac`). This verifies the fix under the same worker count and test interaction conditions that triggered the original failure. **Acceptance criteria**: The full project run must pass. If the fixed test still fails when run alongside other tests, the fix is incomplete — return to `e2e-diagnose-and-fix`. @@ -128,7 +132,7 @@ This is optional for isolated spec file changes but recommended for changes to: - `e2e-tests/playwright/data/` (shared test data) - `playwright.config.ts` (configuration) -### 5. Review the Diff +### 6. Review the Diff Before submitting, review all changes: From 7dfcde4ebe2bd1e9d023a27a6d7820e43ccd3c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 14:49:04 +0200 Subject: [PATCH 12/23] fix: prevent Playwright HTML report from blocking the session Set PLAYWRIGHT_HTML_OPEN=never in generated .env and replace show-report instruction with direct file access. Assisted-by: OpenCode --- .claude/skills/e2e-reproduce-failure/SKILL.md | 5 +---- .cursor/skills/e2e-reproduce-failure/SKILL.md | 5 +---- .opencode/skill/e2e-reproduce-failure/SKILL.md | 5 +---- .rulesync/skills/e2e-reproduce-failure/SKILL.md | 5 +---- e2e-tests/local-test-setup.sh | 3 +++ 5 files changed, 7 insertions(+), 16 deletions(-) diff --git a/.claude/skills/e2e-reproduce-failure/SKILL.md b/.claude/skills/e2e-reproduce-failure/SKILL.md index 2cdf6a445a..61f5eb1f30 100644 --- a/.claude/skills/e2e-reproduce-failure/SKILL.md +++ b/.claude/skills/e2e-reproduce-failure/SKILL.md @@ -186,10 +186,7 @@ yarn playwright show-trace test-results//trace.zip ### HTML Report -```bash -# Generate and open the HTML report -yarn playwright show-report -``` +**Never use `yarn playwright show-report`** — it starts a blocking HTTP server that will hang the session. Instead, the HTML report is generated automatically in `playwright-report/` after test runs. To view it, open `playwright-report/index.html` directly in a browser, or use Playwright MCP to navigate to it. ### Screenshots and Videos diff --git a/.cursor/skills/e2e-reproduce-failure/SKILL.md b/.cursor/skills/e2e-reproduce-failure/SKILL.md index 265d29d2b9..3ace276267 100644 --- a/.cursor/skills/e2e-reproduce-failure/SKILL.md +++ b/.cursor/skills/e2e-reproduce-failure/SKILL.md @@ -184,10 +184,7 @@ yarn playwright show-trace test-results//trace.zip ### HTML Report -```bash -# Generate and open the HTML report -yarn playwright show-report -``` +**Never use `yarn playwright show-report`** — it starts a blocking HTTP server that will hang the session. Instead, the HTML report is generated automatically in `playwright-report/` after test runs. To view it, open `playwright-report/index.html` directly in a browser, or use Playwright MCP to navigate to it. ### Screenshots and Videos diff --git a/.opencode/skill/e2e-reproduce-failure/SKILL.md b/.opencode/skill/e2e-reproduce-failure/SKILL.md index 2cdf6a445a..61f5eb1f30 100644 --- a/.opencode/skill/e2e-reproduce-failure/SKILL.md +++ b/.opencode/skill/e2e-reproduce-failure/SKILL.md @@ -186,10 +186,7 @@ yarn playwright show-trace test-results//trace.zip ### HTML Report -```bash -# Generate and open the HTML report -yarn playwright show-report -``` +**Never use `yarn playwright show-report`** — it starts a blocking HTTP server that will hang the session. Instead, the HTML report is generated automatically in `playwright-report/` after test runs. To view it, open `playwright-report/index.html` directly in a browser, or use Playwright MCP to navigate to it. ### Screenshots and Videos diff --git a/.rulesync/skills/e2e-reproduce-failure/SKILL.md b/.rulesync/skills/e2e-reproduce-failure/SKILL.md index 776bbe4b6b..27dac408be 100644 --- a/.rulesync/skills/e2e-reproduce-failure/SKILL.md +++ b/.rulesync/skills/e2e-reproduce-failure/SKILL.md @@ -188,10 +188,7 @@ yarn playwright show-trace test-results//trace.zip ### HTML Report -```bash -# Generate and open the HTML report -yarn playwright show-report -``` +**Never use `yarn playwright show-report`** — it starts a blocking HTTP server that will hang the session. Instead, the HTML report is generated automatically in `playwright-report/` after test runs. To view it, open `playwright-report/index.html` directly in a browser, or use Playwright MCP to navigate to it. ### Screenshots and Videos diff --git a/e2e-tests/local-test-setup.sh b/e2e-tests/local-test-setup.sh index 0d26c95957..f6296ebd79 100755 --- a/e2e-tests/local-test-setup.sh +++ b/e2e-tests/local-test-setup.sh @@ -165,6 +165,9 @@ if [[ "$GENERATE_ENV" == "true" ]]; then echo "# Auto-generated by local-test-setup.sh --env — do not commit" echo "# Regenerate by running: source local-test-setup.sh --env" echo "" + echo "# Prevent Playwright from opening a blocking HTTP server for HTML reports" + echo "PLAYWRIGHT_HTML_OPEN='never'" + echo "" echo "BASE_URL=$(env_quote "$BASE_URL")" echo "K8S_CLUSTER_URL=$(env_quote "$K8S_CLUSTER_URL")" echo "K8S_CLUSTER_TOKEN=$(env_quote "$K8S_CLUSTER_TOKEN")" From 21d44e5d04f8438e9b0cab4497edb31c8119a698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:31:42 +0200 Subject: [PATCH 13/23] refactor: deduplicate e2e-fix-workflow, cross-reference existing rules Replace duplicated coding conventions, debugging modes, and reference files with cross-references to playwright-locators and ci-e2e-testing. Assisted-by: OpenCode --- .claude/rules/e2e-fix-workflow.md | 83 +++----------------------- .cursor/rules/e2e-fix-workflow.mdc | 83 +++----------------------- .opencode/memories/e2e-fix-workflow.md | 83 +++----------------------- .rulesync/rules/e2e-fix-workflow.md | 83 +++----------------------- 4 files changed, 28 insertions(+), 304 deletions(-) diff --git a/.claude/rules/e2e-fix-workflow.md b/.claude/rules/e2e-fix-workflow.md index 885604d1e0..d1c02a48e9 100644 --- a/.claude/rules/e2e-fix-workflow.md +++ b/.claude/rules/e2e-fix-workflow.md @@ -153,18 +153,7 @@ If the test passes on first run, repeat 10 times: - **Mixed results** → flaky (focus on reliability improvements) - **0/10 pass** → consistent failure -### Debugging Modes - -```bash -# Headed (visible browser) -yarn playwright test --project= --headed - -# Debug (Playwright Inspector) -yarn playwright test --project= --debug - -# View trace -yarn playwright show-trace test-results//trace.zip -``` +For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. ## Diagnosing and Fixing Tests @@ -177,61 +166,11 @@ yarn playwright show-trace test-results//trace.zip 5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional 6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` -### RHDH Coding Conventions (Required) - -**Locators** — Always use semantic role-based locators: -```typescript -// GOOD -page.getByRole('button', { name: 'Create' }) -page.getByRole('heading', { name: 'Catalog' }) -page.getByText('No results found') - -// BAD — deprecated CSS selectors -page.locator('.MuiButton-root') -page.locator('[data-testid="..."]') -``` - -**Component annotations** — Every spec file must have: -```typescript -test.beforeAll(async ({}, testInfo) => { - testInfo.annotations.push({ - type: 'component', - description: 'your_component_name', - }); -}); -``` - -**Retry patterns** for async assertions: -```typescript -await expect(async () => { - await page.reload(); - await expect(page.getByText('entity')).toBeVisible(); -}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); -``` - -**Conditional skips**: -```typescript -import { skipIfJobName } from '../utils/helper'; -import * as constants from '../utils/constants'; -skipIfJobName(constants.GKE_JOBS); -``` +### RHDH Coding Conventions -**Forbidden patterns**: -- `page.waitForNetworkIdle()` / `networkidle` -- Raw CSS class selectors (`.MuiButton-root`) -- `page.waitForTimeout()` for synchronization -- Hardcoded secrets or credentials - -### Key Utility Classes - -| Class | Path | Purpose | -|-------|------|---------| -| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | -| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | -| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | -| `KubeClient` | `utils/kube-client.ts` | K8s resource management | -| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | -| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | +All test code must follow the project's coding rules: +- **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects +- **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts ### Product Bug Decision @@ -332,18 +271,10 @@ Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/rete ## Reference Files +See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: + | Category | Key files | |----------|-----------| -| Playwright config | `e2e-tests/playwright.config.ts` | -| Project names (SOT) | `e2e-tests/playwright/projects.json` | -| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | -| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | -| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | -| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | -| ConfigMaps | `.ci/pipelines/resources/config_map/` | -| Helm values | `.ci/pipelines/value_files/` | -| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | -| Environment vars | `.ci/pipelines/env_variables.sh` | | Local test runner | `e2e-tests/local-run.sh` | | Local test env | `e2e-tests/local-test-setup.sh` | | Qodo config | `.pr_agent.toml` | diff --git a/.cursor/rules/e2e-fix-workflow.mdc b/.cursor/rules/e2e-fix-workflow.mdc index 4b7993096d..9bd7f3ed3d 100644 --- a/.cursor/rules/e2e-fix-workflow.mdc +++ b/.cursor/rules/e2e-fix-workflow.mdc @@ -156,18 +156,7 @@ If the test passes on first run, repeat 10 times: - **Mixed results** → flaky (focus on reliability improvements) - **0/10 pass** → consistent failure -### Debugging Modes - -```bash -# Headed (visible browser) -yarn playwright test --project= --headed - -# Debug (Playwright Inspector) -yarn playwright test --project= --debug - -# View trace -yarn playwright show-trace test-results//trace.zip -``` +For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. ## Diagnosing and Fixing Tests @@ -180,61 +169,11 @@ yarn playwright show-trace test-results//trace.zip 5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional 6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` -### RHDH Coding Conventions (Required) - -**Locators** — Always use semantic role-based locators: -```typescript -// GOOD -page.getByRole('button', { name: 'Create' }) -page.getByRole('heading', { name: 'Catalog' }) -page.getByText('No results found') - -// BAD — deprecated CSS selectors -page.locator('.MuiButton-root') -page.locator('[data-testid="..."]') -``` - -**Component annotations** — Every spec file must have: -```typescript -test.beforeAll(async ({}, testInfo) => { - testInfo.annotations.push({ - type: 'component', - description: 'your_component_name', - }); -}); -``` - -**Retry patterns** for async assertions: -```typescript -await expect(async () => { - await page.reload(); - await expect(page.getByText('entity')).toBeVisible(); -}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); -``` - -**Conditional skips**: -```typescript -import { skipIfJobName } from '../utils/helper'; -import * as constants from '../utils/constants'; -skipIfJobName(constants.GKE_JOBS); -``` +### RHDH Coding Conventions -**Forbidden patterns**: -- `page.waitForNetworkIdle()` / `networkidle` -- Raw CSS class selectors (`.MuiButton-root`) -- `page.waitForTimeout()` for synchronization -- Hardcoded secrets or credentials - -### Key Utility Classes - -| Class | Path | Purpose | -|-------|------|---------| -| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | -| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | -| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | -| `KubeClient` | `utils/kube-client.ts` | K8s resource management | -| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | -| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | +All test code must follow the project's coding rules: +- **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects +- **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts ### Product Bug Decision @@ -335,18 +274,10 @@ Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/rete ## Reference Files +See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: + | Category | Key files | |----------|-----------| -| Playwright config | `e2e-tests/playwright.config.ts` | -| Project names (SOT) | `e2e-tests/playwright/projects.json` | -| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | -| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | -| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | -| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | -| ConfigMaps | `.ci/pipelines/resources/config_map/` | -| Helm values | `.ci/pipelines/value_files/` | -| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | -| Environment vars | `.ci/pipelines/env_variables.sh` | | Local test runner | `e2e-tests/local-run.sh` | | Local test env | `e2e-tests/local-test-setup.sh` | | Qodo config | `.pr_agent.toml` | diff --git a/.opencode/memories/e2e-fix-workflow.md b/.opencode/memories/e2e-fix-workflow.md index 885604d1e0..d1c02a48e9 100644 --- a/.opencode/memories/e2e-fix-workflow.md +++ b/.opencode/memories/e2e-fix-workflow.md @@ -153,18 +153,7 @@ If the test passes on first run, repeat 10 times: - **Mixed results** → flaky (focus on reliability improvements) - **0/10 pass** → consistent failure -### Debugging Modes - -```bash -# Headed (visible browser) -yarn playwright test --project= --headed - -# Debug (Playwright Inspector) -yarn playwright test --project= --debug - -# View trace -yarn playwright show-trace test-results//trace.zip -``` +For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. ## Diagnosing and Fixing Tests @@ -177,61 +166,11 @@ yarn playwright show-trace test-results//trace.zip 5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional 6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` -### RHDH Coding Conventions (Required) - -**Locators** — Always use semantic role-based locators: -```typescript -// GOOD -page.getByRole('button', { name: 'Create' }) -page.getByRole('heading', { name: 'Catalog' }) -page.getByText('No results found') - -// BAD — deprecated CSS selectors -page.locator('.MuiButton-root') -page.locator('[data-testid="..."]') -``` - -**Component annotations** — Every spec file must have: -```typescript -test.beforeAll(async ({}, testInfo) => { - testInfo.annotations.push({ - type: 'component', - description: 'your_component_name', - }); -}); -``` - -**Retry patterns** for async assertions: -```typescript -await expect(async () => { - await page.reload(); - await expect(page.getByText('entity')).toBeVisible(); -}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); -``` - -**Conditional skips**: -```typescript -import { skipIfJobName } from '../utils/helper'; -import * as constants from '../utils/constants'; -skipIfJobName(constants.GKE_JOBS); -``` +### RHDH Coding Conventions -**Forbidden patterns**: -- `page.waitForNetworkIdle()` / `networkidle` -- Raw CSS class selectors (`.MuiButton-root`) -- `page.waitForTimeout()` for synchronization -- Hardcoded secrets or credentials - -### Key Utility Classes - -| Class | Path | Purpose | -|-------|------|---------| -| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | -| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | -| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | -| `KubeClient` | `utils/kube-client.ts` | K8s resource management | -| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | -| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | +All test code must follow the project's coding rules: +- **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects +- **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts ### Product Bug Decision @@ -332,18 +271,10 @@ Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/rete ## Reference Files +See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: + | Category | Key files | |----------|-----------| -| Playwright config | `e2e-tests/playwright.config.ts` | -| Project names (SOT) | `e2e-tests/playwright/projects.json` | -| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | -| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | -| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | -| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | -| ConfigMaps | `.ci/pipelines/resources/config_map/` | -| Helm values | `.ci/pipelines/value_files/` | -| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | -| Environment vars | `.ci/pipelines/env_variables.sh` | | Local test runner | `e2e-tests/local-run.sh` | | Local test env | `e2e-tests/local-test-setup.sh` | | Qodo config | `.pr_agent.toml` | diff --git a/.rulesync/rules/e2e-fix-workflow.md b/.rulesync/rules/e2e-fix-workflow.md index 6718e915cc..5d1732b0a2 100644 --- a/.rulesync/rules/e2e-fix-workflow.md +++ b/.rulesync/rules/e2e-fix-workflow.md @@ -159,18 +159,7 @@ If the test passes on first run, repeat 10 times: - **Mixed results** → flaky (focus on reliability improvements) - **0/10 pass** → consistent failure -### Debugging Modes - -```bash -# Headed (visible browser) -yarn playwright test --project= --headed - -# Debug (Playwright Inspector) -yarn playwright test --project= --debug - -# View trace -yarn playwright show-trace test-results//trace.zip -``` +For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. ## Diagnosing and Fixing Tests @@ -183,61 +172,11 @@ yarn playwright show-trace test-results//trace.zip 5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional 6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` -### RHDH Coding Conventions (Required) - -**Locators** — Always use semantic role-based locators: -```typescript -// GOOD -page.getByRole('button', { name: 'Create' }) -page.getByRole('heading', { name: 'Catalog' }) -page.getByText('No results found') - -// BAD — deprecated CSS selectors -page.locator('.MuiButton-root') -page.locator('[data-testid="..."]') -``` - -**Component annotations** — Every spec file must have: -```typescript -test.beforeAll(async ({}, testInfo) => { - testInfo.annotations.push({ - type: 'component', - description: 'your_component_name', - }); -}); -``` - -**Retry patterns** for async assertions: -```typescript -await expect(async () => { - await page.reload(); - await expect(page.getByText('entity')).toBeVisible(); -}).toPass({ intervals: [2000, 5000, 10000], timeout: 60_000 }); -``` - -**Conditional skips**: -```typescript -import { skipIfJobName } from '../utils/helper'; -import * as constants from '../utils/constants'; -skipIfJobName(constants.GKE_JOBS); -``` +### RHDH Coding Conventions -**Forbidden patterns**: -- `page.waitForNetworkIdle()` / `networkidle` -- Raw CSS class selectors (`.MuiButton-root`) -- `page.waitForTimeout()` for synchronization -- Hardcoded secrets or credentials - -### Key Utility Classes - -| Class | Path | Purpose | -|-------|------|---------| -| `Common` | `utils/common.ts` | Login flows, `waitForLoad()`, `signOut()` | -| `UIhelper` | `utils/ui-helper.ts` | 90+ UI interaction methods | -| `APIHelper` | `utils/api-helper.ts` | GitHub API, Backstage catalog API | -| `KubeClient` | `utils/kube-client.ts` | K8s resource management | -| `SemanticSelectors` | `support/selectors/semantic-selectors.ts` | Role-based selector helpers | -| `RHDHDeployment` | `utils/authentication-providers/rhdh-deployment.ts` | RHDH deployment lifecycle | +All test code must follow the project's coding rules: +- **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects +- **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts ### Product Bug Decision @@ -338,18 +277,10 @@ Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/rete ## Reference Files +See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: + | Category | Key files | |----------|-----------| -| Playwright config | `e2e-tests/playwright.config.ts` | -| Project names (SOT) | `e2e-tests/playwright/projects.json` | -| Test specs | `e2e-tests/playwright/e2e/**/*.spec.ts` | -| Utilities | `e2e-tests/playwright/utils/`, `e2e-tests/playwright/support/` | -| CI entry point | `.ci/pipelines/openshift-ci-tests.sh` | -| Deployment lib | `.ci/pipelines/lib/helm.sh`, `.ci/pipelines/lib/operators.sh` | -| ConfigMaps | `.ci/pipelines/resources/config_map/` | -| Helm values | `.ci/pipelines/value_files/` | -| Operator CRs | `.ci/pipelines/resources/rhdh-operator/` | -| Environment vars | `.ci/pipelines/env_variables.sh` | | Local test runner | `e2e-tests/local-run.sh` | | Local test env | `e2e-tests/local-test-setup.sh` | | Qodo config | `.pr_agent.toml` | From 40d9ac2a49308cd97976f9d5304bb2a3430031a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:37:37 +0200 Subject: [PATCH 14/23] fix(skills): check main for existing fix before healer on release branches When working on a release branch, check if the failing test was already fixed on main and cherry-pick if possible. Assisted-by: OpenCode --- .claude/skills/e2e-diagnose-and-fix/SKILL.md | 23 +++++++++++++++++++ .cursor/skills/e2e-diagnose-and-fix/SKILL.md | 23 +++++++++++++++++++ .opencode/skill/e2e-diagnose-and-fix/SKILL.md | 23 +++++++++++++++++++ .../skills/e2e-diagnose-and-fix/SKILL.md | 23 +++++++++++++++++++ 4 files changed, 92 insertions(+) diff --git a/.claude/skills/e2e-diagnose-and-fix/SKILL.md b/.claude/skills/e2e-diagnose-and-fix/SKILL.md index f7bfa5cb2b..cbaade48ac 100644 --- a/.claude/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.claude/skills/e2e-diagnose-and-fix/SKILL.md @@ -12,6 +12,29 @@ Analyze the root cause of a failing E2E test and implement a fix following RHDH Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. +## Check for Existing Fix on Main (Release Branches Only) + +If the fix branch is based on a **release branch** (e.g., `release-1.9`), check whether the failing test was already fixed on `main` before proceeding with the healer: + +```bash +git fetch upstream main +git log --oneline upstream/main -- | head -10 +``` + +If there are recent commits touching the failing spec file or its page objects, inspect them: + +```bash +git log --oneline upstream/main -p -- | head -100 +``` + +If a fix exists on `main`, cherry-pick it onto the release branch: + +```bash +git cherry-pick +``` + +Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. + ## MANDATORY: Always Use the Playwright Healer Agent **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. diff --git a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md index 977f8c9b53..82e83f27b9 100644 --- a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md @@ -10,6 +10,29 @@ Analyze the root cause of a failing E2E test and implement a fix following RHDH Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. +## Check for Existing Fix on Main (Release Branches Only) + +If the fix branch is based on a **release branch** (e.g., `release-1.9`), check whether the failing test was already fixed on `main` before proceeding with the healer: + +```bash +git fetch upstream main +git log --oneline upstream/main -- | head -10 +``` + +If there are recent commits touching the failing spec file or its page objects, inspect them: + +```bash +git log --oneline upstream/main -p -- | head -100 +``` + +If a fix exists on `main`, cherry-pick it onto the release branch: + +```bash +git cherry-pick +``` + +Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. + ## MANDATORY: Always Use the Playwright Healer Agent **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. diff --git a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md index f7bfa5cb2b..cbaade48ac 100644 --- a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md +++ b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md @@ -12,6 +12,29 @@ Analyze the root cause of a failing E2E test and implement a fix following RHDH Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. +## Check for Existing Fix on Main (Release Branches Only) + +If the fix branch is based on a **release branch** (e.g., `release-1.9`), check whether the failing test was already fixed on `main` before proceeding with the healer: + +```bash +git fetch upstream main +git log --oneline upstream/main -- | head -10 +``` + +If there are recent commits touching the failing spec file or its page objects, inspect them: + +```bash +git log --oneline upstream/main -p -- | head -100 +``` + +If a fix exists on `main`, cherry-pick it onto the release branch: + +```bash +git cherry-pick +``` + +Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. + ## MANDATORY: Always Use the Playwright Healer Agent **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. diff --git a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md index 6cd5a266c9..54f9a9fd82 100644 --- a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md @@ -14,6 +14,29 @@ Analyze the root cause of a failing E2E test and implement a fix following RHDH Use this skill after reproducing a failure (via `e2e-reproduce-failure`) when you have confirmed the test fails and need to determine the root cause and implement a fix. +## Check for Existing Fix on Main (Release Branches Only) + +If the fix branch is based on a **release branch** (e.g., `release-1.9`), check whether the failing test was already fixed on `main` before proceeding with the healer: + +```bash +git fetch upstream main +git log --oneline upstream/main -- | head -10 +``` + +If there are recent commits touching the failing spec file or its page objects, inspect them: + +```bash +git log --oneline upstream/main -p -- | head -100 +``` + +If a fix exists on `main`, cherry-pick it onto the release branch: + +```bash +git cherry-pick +``` + +Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. + ## MANDATORY: Always Use the Playwright Healer Agent **The Playwright healer agent MUST be used for ALL test failures, regardless of failure category.** Do not attempt manual diagnosis without first running the healer. The healer can run the test, debug it step-by-step, inspect the live UI, generate correct locators, and edit the code — often resolving the issue end-to-end without manual intervention. From a1952b0ee4442fba487b1a3e828a6ac9ae54fad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:39:31 +0200 Subject: [PATCH 15/23] fix(skills): cherry-pick from main always takes priority, resolve conflicts Assisted-by: OpenCode --- .claude/skills/e2e-diagnose-and-fix/SKILL.md | 6 ++++-- .cursor/skills/e2e-diagnose-and-fix/SKILL.md | 6 ++++-- .opencode/skill/e2e-diagnose-and-fix/SKILL.md | 6 ++++-- .rulesync/skills/e2e-diagnose-and-fix/SKILL.md | 6 ++++-- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.claude/skills/e2e-diagnose-and-fix/SKILL.md b/.claude/skills/e2e-diagnose-and-fix/SKILL.md index cbaade48ac..62075928ba 100644 --- a/.claude/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.claude/skills/e2e-diagnose-and-fix/SKILL.md @@ -27,13 +27,15 @@ If there are recent commits touching the failing spec file or its page objects, git log --oneline upstream/main -p -- | head -100 ``` -If a fix exists on `main`, cherry-pick it onto the release branch: +If a fix exists on `main`, **always cherry-pick it** — this takes priority over running the healer: ```bash git cherry-pick ``` -Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. +If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. + +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. ## MANDATORY: Always Use the Playwright Healer Agent diff --git a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md index 82e83f27b9..fc828bc2ad 100644 --- a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md @@ -25,13 +25,15 @@ If there are recent commits touching the failing spec file or its page objects, git log --oneline upstream/main -p -- | head -100 ``` -If a fix exists on `main`, cherry-pick it onto the release branch: +If a fix exists on `main`, **always cherry-pick it** — this takes priority over running the healer: ```bash git cherry-pick ``` -Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. +If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. + +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. ## MANDATORY: Always Use the Playwright Healer Agent diff --git a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md index cbaade48ac..62075928ba 100644 --- a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md +++ b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md @@ -27,13 +27,15 @@ If there are recent commits touching the failing spec file or its page objects, git log --oneline upstream/main -p -- | head -100 ``` -If a fix exists on `main`, cherry-pick it onto the release branch: +If a fix exists on `main`, **always cherry-pick it** — this takes priority over running the healer: ```bash git cherry-pick ``` -Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. +If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. + +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. ## MANDATORY: Always Use the Playwright Healer Agent diff --git a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md index 54f9a9fd82..020ee409c8 100644 --- a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md @@ -29,13 +29,15 @@ If there are recent commits touching the failing spec file or its page objects, git log --oneline upstream/main -p -- | head -100 ``` -If a fix exists on `main`, cherry-pick it onto the release branch: +If a fix exists on `main`, **always cherry-pick it** — this takes priority over running the healer: ```bash git cherry-pick ``` -Then verify the cherry-picked fix works (proceed to `e2e-verify-fix`). If the cherry-pick has conflicts or doesn't apply cleanly, proceed with the healer below to create a release-branch-specific fix. +If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. + +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. ## MANDATORY: Always Use the Playwright Healer Agent From 2c4d2b53c725714e0e8c1f38cf92343dca62918d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:41:04 +0200 Subject: [PATCH 16/23] fix(skills): fall through to healer if cherry-picked fix does not resolve issue Assisted-by: OpenCode --- .claude/skills/e2e-diagnose-and-fix/SKILL.md | 2 +- .cursor/skills/e2e-diagnose-and-fix/SKILL.md | 2 +- .opencode/skill/e2e-diagnose-and-fix/SKILL.md | 2 +- .rulesync/skills/e2e-diagnose-and-fix/SKILL.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.claude/skills/e2e-diagnose-and-fix/SKILL.md b/.claude/skills/e2e-diagnose-and-fix/SKILL.md index 62075928ba..2b28d7c2b0 100644 --- a/.claude/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.claude/skills/e2e-diagnose-and-fix/SKILL.md @@ -35,7 +35,7 @@ git cherry-pick If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. -After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**, or if the cherry-picked fix doesn't resolve the issue on the release branch. ## MANDATORY: Always Use the Playwright Healer Agent diff --git a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md index fc828bc2ad..3bd0c3d4c6 100644 --- a/.cursor/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.cursor/skills/e2e-diagnose-and-fix/SKILL.md @@ -33,7 +33,7 @@ git cherry-pick If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. -After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**, or if the cherry-picked fix doesn't resolve the issue on the release branch. ## MANDATORY: Always Use the Playwright Healer Agent diff --git a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md index 62075928ba..2b28d7c2b0 100644 --- a/.opencode/skill/e2e-diagnose-and-fix/SKILL.md +++ b/.opencode/skill/e2e-diagnose-and-fix/SKILL.md @@ -35,7 +35,7 @@ git cherry-pick If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. -After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**, or if the cherry-picked fix doesn't resolve the issue on the release branch. ## MANDATORY: Always Use the Playwright Healer Agent diff --git a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md index 020ee409c8..eca3fbc7fa 100644 --- a/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md +++ b/.rulesync/skills/e2e-diagnose-and-fix/SKILL.md @@ -37,7 +37,7 @@ git cherry-pick If the cherry-pick has conflicts, **resolve them manually** using the `main` commit as the source of truth and adapting to the release branch's code. Do not abandon the cherry-pick in favor of the healer — the fix on `main` is the authoritative solution. -After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**. +After a successful cherry-pick (with or without conflict resolution), proceed to `e2e-verify-fix`. Only proceed to the healer below if **no relevant fix exists on `main`**, or if the cherry-picked fix doesn't resolve the issue on the release branch. ## MANDATORY: Always Use the Playwright Healer Agent From a15a45771f98f2ce5f79e28677ed644be3a80f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:43:10 +0200 Subject: [PATCH 17/23] fix(rules): replace hardcoded branch/image tables with regex derivation Add fallback for determining base branch from feature branches. Assisted-by: OpenCode --- .claude/rules/e2e-fix-workflow.md | 31 +++++++++++++++++--------- .cursor/rules/e2e-fix-workflow.mdc | 31 +++++++++++++++++--------- .opencode/memories/e2e-fix-workflow.md | 31 +++++++++++++++++--------- .rulesync/rules/e2e-fix-workflow.md | 31 +++++++++++++++++--------- 4 files changed, 84 insertions(+), 40 deletions(-) diff --git a/.claude/rules/e2e-fix-workflow.md b/.claude/rules/e2e-fix-workflow.md index d1c02a48e9..ff5f2a17ff 100644 --- a/.claude/rules/e2e-fix-workflow.md +++ b/.claude/rules/e2e-fix-workflow.md @@ -45,11 +45,18 @@ Build logs and JUnit XML results are in the GCS artifacts directory. Look for Pl #### Job Name → Release Branch -| Pattern in job name | Release branch | -|---------------------|---------------| -| `*-rhdh-main-*` | `main` | -| `*-rhdh-release-1.9-*` | `release-1.9` | -| `*-rhdh-release-1.8-*` | `release-1.8` | +Extract the release branch from the Prow job name using the `-rhdh--` pattern: + +```bash +BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') +``` + +If you're already on a feature branch and need to determine the base release branch: + +```bash +# Find which upstream branch the current branch is based on +git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' +``` #### Job Name → Platform and Deployment Method @@ -88,11 +95,15 @@ Do NOT use shortened names like `nightly-ocp-helm` — these do not match the gl #### Release Branch → Image Repo and Tag -| Branch | `-r` (image repo) | `-t` (image tag) | -|--------|-------------------|-------------------| -| `main` | `rhdh-community/rhdh` | `next` | -| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | -| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | +Derive from the branch name — no hardcoded version list needed: + +```bash +if [[ "$BRANCH" == "main" ]]; then + REPO="rhdh-community/rhdh"; TAG="next" +else + REPO="rhdh/rhdh-hub-rhel9"; TAG="${BRANCH#release-}" +fi +``` ## Branch and Deployment Setup diff --git a/.cursor/rules/e2e-fix-workflow.mdc b/.cursor/rules/e2e-fix-workflow.mdc index 9bd7f3ed3d..493a577ca9 100644 --- a/.cursor/rules/e2e-fix-workflow.mdc +++ b/.cursor/rules/e2e-fix-workflow.mdc @@ -48,11 +48,18 @@ Build logs and JUnit XML results are in the GCS artifacts directory. Look for Pl #### Job Name → Release Branch -| Pattern in job name | Release branch | -|---------------------|---------------| -| `*-rhdh-main-*` | `main` | -| `*-rhdh-release-1.9-*` | `release-1.9` | -| `*-rhdh-release-1.8-*` | `release-1.8` | +Extract the release branch from the Prow job name using the `-rhdh--` pattern: + +```bash +BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') +``` + +If you're already on a feature branch and need to determine the base release branch: + +```bash +# Find which upstream branch the current branch is based on +git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' +``` #### Job Name → Platform and Deployment Method @@ -91,11 +98,15 @@ Do NOT use shortened names like `nightly-ocp-helm` — these do not match the gl #### Release Branch → Image Repo and Tag -| Branch | `-r` (image repo) | `-t` (image tag) | -|--------|-------------------|-------------------| -| `main` | `rhdh-community/rhdh` | `next` | -| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | -| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | +Derive from the branch name — no hardcoded version list needed: + +```bash +if [[ "$BRANCH" == "main" ]]; then + REPO="rhdh-community/rhdh"; TAG="next" +else + REPO="rhdh/rhdh-hub-rhel9"; TAG="${BRANCH#release-}" +fi +``` ## Branch and Deployment Setup diff --git a/.opencode/memories/e2e-fix-workflow.md b/.opencode/memories/e2e-fix-workflow.md index d1c02a48e9..ff5f2a17ff 100644 --- a/.opencode/memories/e2e-fix-workflow.md +++ b/.opencode/memories/e2e-fix-workflow.md @@ -45,11 +45,18 @@ Build logs and JUnit XML results are in the GCS artifacts directory. Look for Pl #### Job Name → Release Branch -| Pattern in job name | Release branch | -|---------------------|---------------| -| `*-rhdh-main-*` | `main` | -| `*-rhdh-release-1.9-*` | `release-1.9` | -| `*-rhdh-release-1.8-*` | `release-1.8` | +Extract the release branch from the Prow job name using the `-rhdh--` pattern: + +```bash +BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') +``` + +If you're already on a feature branch and need to determine the base release branch: + +```bash +# Find which upstream branch the current branch is based on +git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' +``` #### Job Name → Platform and Deployment Method @@ -88,11 +95,15 @@ Do NOT use shortened names like `nightly-ocp-helm` — these do not match the gl #### Release Branch → Image Repo and Tag -| Branch | `-r` (image repo) | `-t` (image tag) | -|--------|-------------------|-------------------| -| `main` | `rhdh-community/rhdh` | `next` | -| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | -| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | +Derive from the branch name — no hardcoded version list needed: + +```bash +if [[ "$BRANCH" == "main" ]]; then + REPO="rhdh-community/rhdh"; TAG="next" +else + REPO="rhdh/rhdh-hub-rhel9"; TAG="${BRANCH#release-}" +fi +``` ## Branch and Deployment Setup diff --git a/.rulesync/rules/e2e-fix-workflow.md b/.rulesync/rules/e2e-fix-workflow.md index 5d1732b0a2..9906e83f98 100644 --- a/.rulesync/rules/e2e-fix-workflow.md +++ b/.rulesync/rules/e2e-fix-workflow.md @@ -51,11 +51,18 @@ Build logs and JUnit XML results are in the GCS artifacts directory. Look for Pl #### Job Name → Release Branch -| Pattern in job name | Release branch | -|---------------------|---------------| -| `*-rhdh-main-*` | `main` | -| `*-rhdh-release-1.9-*` | `release-1.9` | -| `*-rhdh-release-1.8-*` | `release-1.8` | +Extract the release branch from the Prow job name using the `-rhdh--` pattern: + +```bash +BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') +``` + +If you're already on a feature branch and need to determine the base release branch: + +```bash +# Find which upstream branch the current branch is based on +git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' +``` #### Job Name → Platform and Deployment Method @@ -94,11 +101,15 @@ Do NOT use shortened names like `nightly-ocp-helm` — these do not match the gl #### Release Branch → Image Repo and Tag -| Branch | `-r` (image repo) | `-t` (image tag) | -|--------|-------------------|-------------------| -| `main` | `rhdh-community/rhdh` | `next` | -| `release-1.9` | `rhdh/rhdh-hub-rhel9` | `1.9` | -| `release-1.8` | `rhdh/rhdh-hub-rhel9` | `1.8` | +Derive from the branch name — no hardcoded version list needed: + +```bash +if [[ "$BRANCH" == "main" ]]; then + REPO="rhdh-community/rhdh"; TAG="next" +else + REPO="rhdh/rhdh-hub-rhel9"; TAG="${BRANCH#release-}" +fi +``` ## Branch and Deployment Setup From eff409c2ceb791444184854e32f453f79c6fe4fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:45:15 +0200 Subject: [PATCH 18/23] fix(rules): remove unnecessary feature branch base detection Assisted-by: OpenCode --- .claude/rules/e2e-fix-workflow.md | 7 ------- .cursor/rules/e2e-fix-workflow.mdc | 7 ------- .opencode/memories/e2e-fix-workflow.md | 7 ------- .rulesync/rules/e2e-fix-workflow.md | 7 ------- 4 files changed, 28 deletions(-) diff --git a/.claude/rules/e2e-fix-workflow.md b/.claude/rules/e2e-fix-workflow.md index ff5f2a17ff..165041c95e 100644 --- a/.claude/rules/e2e-fix-workflow.md +++ b/.claude/rules/e2e-fix-workflow.md @@ -51,13 +51,6 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -If you're already on a feature branch and need to determine the base release branch: - -```bash -# Find which upstream branch the current branch is based on -git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' -``` - #### Job Name → Platform and Deployment Method | Pattern | Platform | Method | diff --git a/.cursor/rules/e2e-fix-workflow.mdc b/.cursor/rules/e2e-fix-workflow.mdc index 493a577ca9..7bb3480233 100644 --- a/.cursor/rules/e2e-fix-workflow.mdc +++ b/.cursor/rules/e2e-fix-workflow.mdc @@ -54,13 +54,6 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -If you're already on a feature branch and need to determine the base release branch: - -```bash -# Find which upstream branch the current branch is based on -git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' -``` - #### Job Name → Platform and Deployment Method | Pattern | Platform | Method | diff --git a/.opencode/memories/e2e-fix-workflow.md b/.opencode/memories/e2e-fix-workflow.md index ff5f2a17ff..165041c95e 100644 --- a/.opencode/memories/e2e-fix-workflow.md +++ b/.opencode/memories/e2e-fix-workflow.md @@ -51,13 +51,6 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -If you're already on a feature branch and need to determine the base release branch: - -```bash -# Find which upstream branch the current branch is based on -git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' -``` - #### Job Name → Platform and Deployment Method | Pattern | Platform | Method | diff --git a/.rulesync/rules/e2e-fix-workflow.md b/.rulesync/rules/e2e-fix-workflow.md index 9906e83f98..c12d1d3dab 100644 --- a/.rulesync/rules/e2e-fix-workflow.md +++ b/.rulesync/rules/e2e-fix-workflow.md @@ -57,13 +57,6 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -If you're already on a feature branch and need to determine the base release branch: - -```bash -# Find which upstream branch the current branch is based on -git branch -r --contains "$(git merge-base HEAD upstream/main)" | grep 'upstream/' | head -1 | sed 's|.*upstream/||' -``` - #### Job Name → Platform and Deployment Method | Pattern | Platform | Method | From 9f1cc13948c0f52b138912964915ee003f5f5463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:48:03 +0200 Subject: [PATCH 19/23] feat(commands): add --no-qodo flag to skip Qodo review in /fix-e2e Prevents depleting limited Qodo quota on iterative fixes. Assisted-by: OpenCode --- .claude/commands/fix-e2e.md | 13 ++++++++----- .cursor/commands/fix-e2e.md | 13 ++++++++----- .opencode/command/fix-e2e.md | 13 ++++++++----- .rulesync/commands/fix-e2e.md | 13 ++++++++----- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/.claude/commands/fix-e2e.md b/.claude/commands/fix-e2e.md index b56220cab4..054661ca3c 100644 --- a/.claude/commands/fix-e2e.md +++ b/.claude/commands/fix-e2e.md @@ -10,12 +10,15 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A failure URL or ticket, optionally followed by `--no-qodo`: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` - **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` +**Options**: +- `--no-qodo` — Skip Qodo agentic review (steps 5-7 in Phase 7). Use this to avoid depleting a limited Qodo quota. + ## Workflow Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. @@ -150,10 +153,10 @@ Verify the fix: 2. **Commit**: Stage changes, commit with conventional format 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` -5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) -7. **Address feedback**: Apply valid suggestions, explain rejections -8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +5. **Trigger Qodo review** (skip if `--no-qodo`): Comment `/agentic_review` on the PR +6. **Wait for review** (skip if `--no-qodo`): Poll for Qodo bot review (check every 15s, up to 5 minutes) +7. **Address feedback** (skip if `--no-qodo`): Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: Comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` ### Final Report diff --git a/.cursor/commands/fix-e2e.md b/.cursor/commands/fix-e2e.md index 010e932e46..e11832e1f1 100644 --- a/.cursor/commands/fix-e2e.md +++ b/.cursor/commands/fix-e2e.md @@ -7,12 +7,15 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A failure URL or ticket, optionally followed by `--no-qodo`: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` - **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` +**Options**: +- `--no-qodo` — Skip Qodo agentic review (steps 5-7 in Phase 7). Use this to avoid depleting a limited Qodo quota. + ## Workflow Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. @@ -147,10 +150,10 @@ Verify the fix: 2. **Commit**: Stage changes, commit with conventional format 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` -5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) -7. **Address feedback**: Apply valid suggestions, explain rejections -8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +5. **Trigger Qodo review** (skip if `--no-qodo`): Comment `/agentic_review` on the PR +6. **Wait for review** (skip if `--no-qodo`): Poll for Qodo bot review (check every 15s, up to 5 minutes) +7. **Address feedback** (skip if `--no-qodo`): Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: Comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` ### Final Report diff --git a/.opencode/command/fix-e2e.md b/.opencode/command/fix-e2e.md index b56220cab4..054661ca3c 100644 --- a/.opencode/command/fix-e2e.md +++ b/.opencode/command/fix-e2e.md @@ -10,12 +10,15 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A failure URL or ticket, optionally followed by `--no-qodo`: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` - **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` +**Options**: +- `--no-qodo` — Skip Qodo agentic review (steps 5-7 in Phase 7). Use this to avoid depleting a limited Qodo quota. + ## Workflow Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. @@ -150,10 +153,10 @@ Verify the fix: 2. **Commit**: Stage changes, commit with conventional format 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` -5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) -7. **Address feedback**: Apply valid suggestions, explain rejections -8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +5. **Trigger Qodo review** (skip if `--no-qodo`): Comment `/agentic_review` on the PR +6. **Wait for review** (skip if `--no-qodo`): Poll for Qodo bot review (check every 15s, up to 5 minutes) +7. **Address feedback** (skip if `--no-qodo`): Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: Comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` ### Final Report diff --git a/.rulesync/commands/fix-e2e.md b/.rulesync/commands/fix-e2e.md index ae0a4a993d..bb09e35bd1 100644 --- a/.rulesync/commands/fix-e2e.md +++ b/.rulesync/commands/fix-e2e.md @@ -12,12 +12,15 @@ Autonomous workflow to investigate, reproduce, fix, and submit a PR for a failin ## Input -`$ARGUMENTS` — A Prow job URL, Playwright report URL, Jira ticket ID, or Jira URL: +`$ARGUMENTS` — A failure URL or ticket, optionally followed by `--no-qodo`: - **Prow URL**: `https://prow.ci.openshift.org/view/gs/...` - **Playwright report URL**: `https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/.../index.html[#?testId=...]` - **Jira ticket ID**: `RHIDP-XXXX` - **Jira URL**: `https://redhat.atlassian.net/browse/RHIDP-XXXX` +**Options**: +- `--no-qodo` — Skip Qodo agentic review (steps 5-7 in Phase 7). Use this to avoid depleting a limited Qodo quota. + ## Workflow Execute the following phases in order. Load each skill as needed for detailed instructions. If a phase fails, report the error and stop — do not proceed blindly. @@ -152,10 +155,10 @@ Verify the fix: 2. **Commit**: Stage changes, commit with conventional format 3. **Push**: `git push -u origin ` 4. **Create draft PR**: Always use `--draft`. Determine the GitHub username from the fork remote: `git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||'`. Then use `gh pr create --draft --repo redhat-developer/rhdh --head : --base ` -5. **Trigger Qodo review**: Comment `/agentic_review` on the PR -6. **Wait for review**: Poll for Qodo bot review (check every 15s, up to 5 minutes) -7. **Address feedback**: Apply valid suggestions, explain rejections -8. **Trigger affected CI job**: After addressing review feedback, comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 +5. **Trigger Qodo review** (skip if `--no-qodo`): Comment `/agentic_review` on the PR +6. **Wait for review** (skip if `--no-qodo`): Poll for Qodo bot review (check every 15s, up to 5 minutes) +7. **Address feedback** (skip if `--no-qodo`): Apply valid suggestions, explain rejections +8. **Trigger affected CI job**: Comment `/test ?` on the PR to list available presubmit jobs, then comment `/test ` to trigger the presubmit job matching the platform and deployment method from Phase 1 9. **Monitor CI**: Watch CI checks with `gh pr checks` ### Final Report From 5d0bdd355381a5527083d923855fa2db748322f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:56:37 +0200 Subject: [PATCH 20/23] refactor: strip e2e-fix-workflow rule to mapping tables and overview MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove duplicated sections (deployment, reproduction, diagnosis, verification, PR submission) that are maintained in the skills. Keep only the workflow overview, job name mapping tables (single source of truth), and coding convention cross-references. 290 lines → 85 lines. Assisted-by: OpenCode --- .claude/rules/e2e-fix-workflow.md | 244 ++----------------------- .cursor/rules/e2e-fix-workflow.mdc | 244 ++----------------------- .opencode/memories/e2e-fix-workflow.md | 244 ++----------------------- .rulesync/rules/e2e-fix-workflow.md | 244 ++----------------------- 4 files changed, 76 insertions(+), 900 deletions(-) diff --git a/.claude/rules/e2e-fix-workflow.md b/.claude/rules/e2e-fix-workflow.md index 165041c95e..8d82a23c0c 100644 --- a/.claude/rules/e2e-fix-workflow.md +++ b/.claude/rules/e2e-fix-workflow.md @@ -1,49 +1,26 @@ # E2E Test Fix Workflow -This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. - -## Table of Contents - -- [Workflow Overview](#workflow-overview) -- [Parsing CI Failures](#parsing-ci-failures) -- [Branch and Deployment Setup](#branch-and-deployment-setup) -- [Reproducing Failures](#reproducing-failures) -- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) -- [Playwright Test Agents](#playwright-test-agents) -- [Verification and PR Submission](#verification-and-pr-submission) +Reference knowledge for the `/fix-e2e` command. For detailed instructions, load the corresponding skill for each phase. ## Workflow Overview The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: -1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +1. **Parse CI Failure** (`e2e-parse-ci-failure`) — Extract failure details from Prow URL, Playwright report, or Jira ticket 2. **Setup Fix Branch** — Create a branch from the correct upstream release branch -3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` -4. **Reproduce Failure** — Confirm the failure reproduces locally -5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents -6. **Verify Fix** — Run the test multiple times and check code quality -7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI - -Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. +3. **Deploy RHDH** (`e2e-deploy-rhdh`) — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** (`e2e-reproduce-failure`) — Confirm the failure reproduces locally +5. **Diagnose and Fix** (`e2e-diagnose-and-fix`) — Analyze root cause and implement a fix +6. **Verify Fix** (`e2e-verify-fix`) — Run the test multiple times and check code quality +7. **Submit and Review** (`e2e-submit-and-review`) — Create PR, trigger review, monitor CI -**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. +**Critical rule**: No phase may be skipped without **explicit user approval**. -## Parsing CI Failures +## Job Name Mapping Tables -### Prow URL Structure +These tables are the **single source of truth** — referenced by `e2e-parse-ci-failure` and other skills. -``` -https://prow.ci.openshift.org/view/gs/test-platform-results/logs// -``` - -Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: -``` -✘ [] › /.spec.ts: -``` - -### Job Name Mapping Tables - -#### Job Name → Release Branch +### Job Name → Release Branch Extract the release branch from the Prow job name using the `-rhdh--` pattern: @@ -51,7 +28,7 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -#### Job Name → Platform and Deployment Method +### Job Name → Platform and Deployment Method | Pattern | Platform | Method | |---------|----------|--------| @@ -65,7 +42,7 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*gke*operator*` | GKE | Operator | | `*osd-gcp*` | OSD-GCP | Helm/Operator | -#### Job Name → Playwright Projects +### Job Name → Playwright Projects | Job pattern | Projects | |-------------|----------| @@ -77,18 +54,14 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | | `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | -#### Job Name → local-run.sh `-j` Parameter - -Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. +### Job Name → local-run.sh `-j` Parameter -**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` -**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` +Use the **full Prow CI job name** directly as the `-j` parameter. Do NOT use shortened names. -Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. +**OCP** (deploy-only with `-s`): `./local-run.sh -j -r -t -s` +**K8s** (full execution, no `-s`): `./local-run.sh -j -r -t ` -#### Release Branch → Image Repo and Tag - -Derive from the branch name — no hardcoded version list needed: +### Release Branch → Image Repo and Tag ```bash if [[ "$BRANCH" == "main" ]]; then @@ -98,187 +71,8 @@ else fi ``` -## Branch and Deployment Setup - -### Branch Creation - -Always create branches from upstream, never from local copies: - -```bash -git fetch upstream -git checkout -b fix/e2e- upstream/ -``` - -### Deployment via local-run.sh - -CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. - -```bash -cd e2e-tests -# OCP jobs: use -s to deploy only, then run tests manually -./local-run.sh -j -r -t -s -# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required -./local-run.sh -j -r -t -``` - -Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. - -After deployment, source the test environment: -```bash -source e2e-tests/local-test-setup.sh -``` - -### Deployment Error Recovery - -| Error | Investigation | Common Fix | -|-------|--------------|------------| -| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | -| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | -| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | -| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | - -For config issues, search these repos for reference: -- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts -- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults - -## Reproducing Failures - -### Test Execution - -```bash -cd e2e-tests -yarn playwright test --project= --retries=0 --workers=1 -``` - -### Flakiness Detection - -If the test passes on first run, repeat 10 times: -- **10/10 pass** → cannot reproduce (check environment differences) -- **Mixed results** → flaky (focus on reliability improvements) -- **0/10 pass** → consistent failure - -For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. - -## Diagnosing and Fixing Tests - -### Failure Classification - -1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors -2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals -3. **Assertion mismatch** — Expected values changed → update test data or report product bug -4. **Data dependency** — Test data missing → add proper setup/teardown -5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional -6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` - -### RHDH Coding Conventions +## Coding Conventions All test code must follow the project's coding rules: - **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects - **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts - -### Product Bug Decision - -**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: -1. File or update a Jira bug in the `RHDHBUGS` project -2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: - ```typescript - // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX - test.fixme('Description of the product bug'); - ``` -3. Do **not** change assertions to match broken behavior -4. Proceed with the `test.fixme()` change - -## Playwright Test Agents - -The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. - -### Available Agents - -| Agent | Mode | Purpose | -|-------|------|---------| -| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | -| `playwright-test-generator` | subagent | Create new test code from a test plan | -| `playwright-test-planner` | subagent | Explore app and create test plans | - -### Healer Agent Usage (Primary for Fixes) - -The healer agent is the primary tool for test repair: -1. Runs tests with `test_run` to identify failures -2. Debugs with `test_debug` to step through failing tests -3. Inspects UI state via `browser_snapshot`, `browser_console_messages` -4. Generates correct locators with `browser_generate_locator` -5. Edits test code with `edit`/`write` tools -6. Re-runs tests to verify the fix - -Invoke with: `@playwright-test-healer Fix the failing test in ` - -## Verification and PR Submission - -### Verification Checklist - -1. Single test run passes -2. 5 consecutive runs pass (stability) -3. `yarn tsc:check` passes -4. `yarn lint:check` passes -5. `yarn prettier:check` passes - -### Pre-Commit Hooks - -Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: - -```bash -yarn install # Root workspace -cd e2e-tests && yarn install && cd .. # If e2e-tests files changed -cd .ci && yarn install && cd .. # If .ci files changed -``` - -### PR Creation - -Always create PRs as **drafts**: - -```bash -git push -u origin -# Determine GitHub username from fork remote -GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base -``` - -### Qodo Review - -```bash -gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" -``` - -The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. - -### Trigger Affected CI Job - -After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: - -```bash -# List available presubmit jobs -gh pr comment --repo redhat-developer/rhdh --body "/test ?" - -# Trigger the matching presubmit job -gh pr comment --repo redhat-developer/rhdh --body "/test " -``` - -Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. - -### CI Monitoring - -```bash -gh pr checks --repo redhat-developer/rhdh --watch -``` - -Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` - -## Reference Files - -See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: - -| Category | Key files | -|----------|-----------| -| Local test runner | `e2e-tests/local-run.sh` | -| Local test env | `e2e-tests/local-test-setup.sh` | -| Qodo config | `.pr_agent.toml` | diff --git a/.cursor/rules/e2e-fix-workflow.mdc b/.cursor/rules/e2e-fix-workflow.mdc index 7bb3480233..6f078d4d82 100644 --- a/.cursor/rules/e2e-fix-workflow.mdc +++ b/.cursor/rules/e2e-fix-workflow.mdc @@ -3,50 +3,27 @@ # E2E Test Fix Workflow -This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. - -## Table of Contents - -- [Workflow Overview](#workflow-overview) -- [Parsing CI Failures](#parsing-ci-failures) -- [Branch and Deployment Setup](#branch-and-deployment-setup) -- [Reproducing Failures](#reproducing-failures) -- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) -- [Playwright Test Agents](#playwright-test-agents) -- [Verification and PR Submission](#verification-and-pr-submission) +Reference knowledge for the `/fix-e2e` command. For detailed instructions, load the corresponding skill for each phase. ## Workflow Overview The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: -1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +1. **Parse CI Failure** (`e2e-parse-ci-failure`) — Extract failure details from Prow URL, Playwright report, or Jira ticket 2. **Setup Fix Branch** — Create a branch from the correct upstream release branch -3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` -4. **Reproduce Failure** — Confirm the failure reproduces locally -5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents -6. **Verify Fix** — Run the test multiple times and check code quality -7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI - -Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. +3. **Deploy RHDH** (`e2e-deploy-rhdh`) — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** (`e2e-reproduce-failure`) — Confirm the failure reproduces locally +5. **Diagnose and Fix** (`e2e-diagnose-and-fix`) — Analyze root cause and implement a fix +6. **Verify Fix** (`e2e-verify-fix`) — Run the test multiple times and check code quality +7. **Submit and Review** (`e2e-submit-and-review`) — Create PR, trigger review, monitor CI -**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. +**Critical rule**: No phase may be skipped without **explicit user approval**. -## Parsing CI Failures +## Job Name Mapping Tables -### Prow URL Structure +These tables are the **single source of truth** — referenced by `e2e-parse-ci-failure` and other skills. -``` -https://prow.ci.openshift.org/view/gs/test-platform-results/logs// -``` - -Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: -``` -✘ [] › /.spec.ts: -``` - -### Job Name Mapping Tables - -#### Job Name → Release Branch +### Job Name → Release Branch Extract the release branch from the Prow job name using the `-rhdh--` pattern: @@ -54,7 +31,7 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -#### Job Name → Platform and Deployment Method +### Job Name → Platform and Deployment Method | Pattern | Platform | Method | |---------|----------|--------| @@ -68,7 +45,7 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*gke*operator*` | GKE | Operator | | `*osd-gcp*` | OSD-GCP | Helm/Operator | -#### Job Name → Playwright Projects +### Job Name → Playwright Projects | Job pattern | Projects | |-------------|----------| @@ -80,18 +57,14 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | | `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | -#### Job Name → local-run.sh `-j` Parameter - -Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. +### Job Name → local-run.sh `-j` Parameter -**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` -**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` +Use the **full Prow CI job name** directly as the `-j` parameter. Do NOT use shortened names. -Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. +**OCP** (deploy-only with `-s`): `./local-run.sh -j -r -t -s` +**K8s** (full execution, no `-s`): `./local-run.sh -j -r -t ` -#### Release Branch → Image Repo and Tag - -Derive from the branch name — no hardcoded version list needed: +### Release Branch → Image Repo and Tag ```bash if [[ "$BRANCH" == "main" ]]; then @@ -101,187 +74,8 @@ else fi ``` -## Branch and Deployment Setup - -### Branch Creation - -Always create branches from upstream, never from local copies: - -```bash -git fetch upstream -git checkout -b fix/e2e- upstream/ -``` - -### Deployment via local-run.sh - -CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. - -```bash -cd e2e-tests -# OCP jobs: use -s to deploy only, then run tests manually -./local-run.sh -j -r -t -s -# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required -./local-run.sh -j -r -t -``` - -Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. - -After deployment, source the test environment: -```bash -source e2e-tests/local-test-setup.sh -``` - -### Deployment Error Recovery - -| Error | Investigation | Common Fix | -|-------|--------------|------------| -| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | -| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | -| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | -| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | - -For config issues, search these repos for reference: -- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts -- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults - -## Reproducing Failures - -### Test Execution - -```bash -cd e2e-tests -yarn playwright test --project= --retries=0 --workers=1 -``` - -### Flakiness Detection - -If the test passes on first run, repeat 10 times: -- **10/10 pass** → cannot reproduce (check environment differences) -- **Mixed results** → flaky (focus on reliability improvements) -- **0/10 pass** → consistent failure - -For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. - -## Diagnosing and Fixing Tests - -### Failure Classification - -1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors -2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals -3. **Assertion mismatch** — Expected values changed → update test data or report product bug -4. **Data dependency** — Test data missing → add proper setup/teardown -5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional -6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` - -### RHDH Coding Conventions +## Coding Conventions All test code must follow the project's coding rules: - **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects - **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts - -### Product Bug Decision - -**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: -1. File or update a Jira bug in the `RHDHBUGS` project -2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: - ```typescript - // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX - test.fixme('Description of the product bug'); - ``` -3. Do **not** change assertions to match broken behavior -4. Proceed with the `test.fixme()` change - -## Playwright Test Agents - -The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. - -### Available Agents - -| Agent | Mode | Purpose | -|-------|------|---------| -| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | -| `playwright-test-generator` | subagent | Create new test code from a test plan | -| `playwright-test-planner` | subagent | Explore app and create test plans | - -### Healer Agent Usage (Primary for Fixes) - -The healer agent is the primary tool for test repair: -1. Runs tests with `test_run` to identify failures -2. Debugs with `test_debug` to step through failing tests -3. Inspects UI state via `browser_snapshot`, `browser_console_messages` -4. Generates correct locators with `browser_generate_locator` -5. Edits test code with `edit`/`write` tools -6. Re-runs tests to verify the fix - -Invoke with: `@playwright-test-healer Fix the failing test in ` - -## Verification and PR Submission - -### Verification Checklist - -1. Single test run passes -2. 5 consecutive runs pass (stability) -3. `yarn tsc:check` passes -4. `yarn lint:check` passes -5. `yarn prettier:check` passes - -### Pre-Commit Hooks - -Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: - -```bash -yarn install # Root workspace -cd e2e-tests && yarn install && cd .. # If e2e-tests files changed -cd .ci && yarn install && cd .. # If .ci files changed -``` - -### PR Creation - -Always create PRs as **drafts**: - -```bash -git push -u origin -# Determine GitHub username from fork remote -GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base -``` - -### Qodo Review - -```bash -gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" -``` - -The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. - -### Trigger Affected CI Job - -After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: - -```bash -# List available presubmit jobs -gh pr comment --repo redhat-developer/rhdh --body "/test ?" - -# Trigger the matching presubmit job -gh pr comment --repo redhat-developer/rhdh --body "/test " -``` - -Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. - -### CI Monitoring - -```bash -gh pr checks --repo redhat-developer/rhdh --watch -``` - -Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` - -## Reference Files - -See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: - -| Category | Key files | -|----------|-----------| -| Local test runner | `e2e-tests/local-run.sh` | -| Local test env | `e2e-tests/local-test-setup.sh` | -| Qodo config | `.pr_agent.toml` | diff --git a/.opencode/memories/e2e-fix-workflow.md b/.opencode/memories/e2e-fix-workflow.md index 165041c95e..8d82a23c0c 100644 --- a/.opencode/memories/e2e-fix-workflow.md +++ b/.opencode/memories/e2e-fix-workflow.md @@ -1,49 +1,26 @@ # E2E Test Fix Workflow -This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. - -## Table of Contents - -- [Workflow Overview](#workflow-overview) -- [Parsing CI Failures](#parsing-ci-failures) -- [Branch and Deployment Setup](#branch-and-deployment-setup) -- [Reproducing Failures](#reproducing-failures) -- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) -- [Playwright Test Agents](#playwright-test-agents) -- [Verification and PR Submission](#verification-and-pr-submission) +Reference knowledge for the `/fix-e2e` command. For detailed instructions, load the corresponding skill for each phase. ## Workflow Overview The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: -1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +1. **Parse CI Failure** (`e2e-parse-ci-failure`) — Extract failure details from Prow URL, Playwright report, or Jira ticket 2. **Setup Fix Branch** — Create a branch from the correct upstream release branch -3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` -4. **Reproduce Failure** — Confirm the failure reproduces locally -5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents -6. **Verify Fix** — Run the test multiple times and check code quality -7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI - -Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. +3. **Deploy RHDH** (`e2e-deploy-rhdh`) — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** (`e2e-reproduce-failure`) — Confirm the failure reproduces locally +5. **Diagnose and Fix** (`e2e-diagnose-and-fix`) — Analyze root cause and implement a fix +6. **Verify Fix** (`e2e-verify-fix`) — Run the test multiple times and check code quality +7. **Submit and Review** (`e2e-submit-and-review`) — Create PR, trigger review, monitor CI -**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. +**Critical rule**: No phase may be skipped without **explicit user approval**. -## Parsing CI Failures +## Job Name Mapping Tables -### Prow URL Structure +These tables are the **single source of truth** — referenced by `e2e-parse-ci-failure` and other skills. -``` -https://prow.ci.openshift.org/view/gs/test-platform-results/logs// -``` - -Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: -``` -✘ [] › /.spec.ts: -``` - -### Job Name Mapping Tables - -#### Job Name → Release Branch +### Job Name → Release Branch Extract the release branch from the Prow job name using the `-rhdh--` pattern: @@ -51,7 +28,7 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -#### Job Name → Platform and Deployment Method +### Job Name → Platform and Deployment Method | Pattern | Platform | Method | |---------|----------|--------| @@ -65,7 +42,7 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*gke*operator*` | GKE | Operator | | `*osd-gcp*` | OSD-GCP | Helm/Operator | -#### Job Name → Playwright Projects +### Job Name → Playwright Projects | Job pattern | Projects | |-------------|----------| @@ -77,18 +54,14 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | | `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | -#### Job Name → local-run.sh `-j` Parameter - -Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. +### Job Name → local-run.sh `-j` Parameter -**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` -**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` +Use the **full Prow CI job name** directly as the `-j` parameter. Do NOT use shortened names. -Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. +**OCP** (deploy-only with `-s`): `./local-run.sh -j -r -t -s` +**K8s** (full execution, no `-s`): `./local-run.sh -j -r -t ` -#### Release Branch → Image Repo and Tag - -Derive from the branch name — no hardcoded version list needed: +### Release Branch → Image Repo and Tag ```bash if [[ "$BRANCH" == "main" ]]; then @@ -98,187 +71,8 @@ else fi ``` -## Branch and Deployment Setup - -### Branch Creation - -Always create branches from upstream, never from local copies: - -```bash -git fetch upstream -git checkout -b fix/e2e- upstream/ -``` - -### Deployment via local-run.sh - -CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. - -```bash -cd e2e-tests -# OCP jobs: use -s to deploy only, then run tests manually -./local-run.sh -j -r -t -s -# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required -./local-run.sh -j -r -t -``` - -Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. - -After deployment, source the test environment: -```bash -source e2e-tests/local-test-setup.sh -``` - -### Deployment Error Recovery - -| Error | Investigation | Common Fix | -|-------|--------------|------------| -| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | -| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | -| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | -| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | - -For config issues, search these repos for reference: -- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts -- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults - -## Reproducing Failures - -### Test Execution - -```bash -cd e2e-tests -yarn playwright test --project= --retries=0 --workers=1 -``` - -### Flakiness Detection - -If the test passes on first run, repeat 10 times: -- **10/10 pass** → cannot reproduce (check environment differences) -- **Mixed results** → flaky (focus on reliability improvements) -- **0/10 pass** → consistent failure - -For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. - -## Diagnosing and Fixing Tests - -### Failure Classification - -1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors -2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals -3. **Assertion mismatch** — Expected values changed → update test data or report product bug -4. **Data dependency** — Test data missing → add proper setup/teardown -5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional -6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` - -### RHDH Coding Conventions +## Coding Conventions All test code must follow the project's coding rules: - **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects - **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts - -### Product Bug Decision - -**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: -1. File or update a Jira bug in the `RHDHBUGS` project -2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: - ```typescript - // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX - test.fixme('Description of the product bug'); - ``` -3. Do **not** change assertions to match broken behavior -4. Proceed with the `test.fixme()` change - -## Playwright Test Agents - -The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. - -### Available Agents - -| Agent | Mode | Purpose | -|-------|------|---------| -| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | -| `playwright-test-generator` | subagent | Create new test code from a test plan | -| `playwright-test-planner` | subagent | Explore app and create test plans | - -### Healer Agent Usage (Primary for Fixes) - -The healer agent is the primary tool for test repair: -1. Runs tests with `test_run` to identify failures -2. Debugs with `test_debug` to step through failing tests -3. Inspects UI state via `browser_snapshot`, `browser_console_messages` -4. Generates correct locators with `browser_generate_locator` -5. Edits test code with `edit`/`write` tools -6. Re-runs tests to verify the fix - -Invoke with: `@playwright-test-healer Fix the failing test in ` - -## Verification and PR Submission - -### Verification Checklist - -1. Single test run passes -2. 5 consecutive runs pass (stability) -3. `yarn tsc:check` passes -4. `yarn lint:check` passes -5. `yarn prettier:check` passes - -### Pre-Commit Hooks - -Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: - -```bash -yarn install # Root workspace -cd e2e-tests && yarn install && cd .. # If e2e-tests files changed -cd .ci && yarn install && cd .. # If .ci files changed -``` - -### PR Creation - -Always create PRs as **drafts**: - -```bash -git push -u origin -# Determine GitHub username from fork remote -GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base -``` - -### Qodo Review - -```bash -gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" -``` - -The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. - -### Trigger Affected CI Job - -After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: - -```bash -# List available presubmit jobs -gh pr comment --repo redhat-developer/rhdh --body "/test ?" - -# Trigger the matching presubmit job -gh pr comment --repo redhat-developer/rhdh --body "/test " -``` - -Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. - -### CI Monitoring - -```bash -gh pr checks --repo redhat-developer/rhdh --watch -``` - -Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` - -## Reference Files - -See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: - -| Category | Key files | -|----------|-----------| -| Local test runner | `e2e-tests/local-run.sh` | -| Local test env | `e2e-tests/local-test-setup.sh` | -| Qodo config | `.pr_agent.toml` | diff --git a/.rulesync/rules/e2e-fix-workflow.md b/.rulesync/rules/e2e-fix-workflow.md index c12d1d3dab..f30b60b018 100644 --- a/.rulesync/rules/e2e-fix-workflow.md +++ b/.rulesync/rules/e2e-fix-workflow.md @@ -6,50 +6,27 @@ globs: [] --- # E2E Test Fix Workflow -This rule provides the complete knowledge base for the autonomous E2E CI failure investigation and fix workflow, triggered by the `/fix-e2e` command. It covers the full lifecycle: parsing CI failures, deploying RHDH, reproducing failures, diagnosing and fixing tests, and submitting PRs. - -## Table of Contents - -- [Workflow Overview](#workflow-overview) -- [Parsing CI Failures](#parsing-ci-failures) -- [Branch and Deployment Setup](#branch-and-deployment-setup) -- [Reproducing Failures](#reproducing-failures) -- [Diagnosing and Fixing Tests](#diagnosing-and-fixing-tests) -- [Playwright Test Agents](#playwright-test-agents) -- [Verification and PR Submission](#verification-and-pr-submission) +Reference knowledge for the `/fix-e2e` command. For detailed instructions, load the corresponding skill for each phase. ## Workflow Overview The `/fix-e2e` command orchestrates a 7-phase workflow to autonomously fix E2E CI failures: -1. **Parse CI Failure** — Extract failure details from Prow URL or Jira ticket +1. **Parse CI Failure** (`e2e-parse-ci-failure`) — Extract failure details from Prow URL, Playwright report, or Jira ticket 2. **Setup Fix Branch** — Create a branch from the correct upstream release branch -3. **Deploy RHDH** — Deploy RHDH to a cluster using `local-run.sh` -4. **Reproduce Failure** — Confirm the failure reproduces locally -5. **Diagnose and Fix** — Analyze root cause and implement a fix using Playwright agents -6. **Verify Fix** — Run the test multiple times and check code quality -7. **Submit and Review** — Create PR, trigger Qodo review, address feedback, monitor CI - -Each phase has a corresponding skill (in `.opencode/skills/` and `.claude/skills/`) with detailed instructions. This rule provides consolidated reference knowledge for all tools. +3. **Deploy RHDH** (`e2e-deploy-rhdh`) — Deploy RHDH to a cluster using `local-run.sh` +4. **Reproduce Failure** (`e2e-reproduce-failure`) — Confirm the failure reproduces locally +5. **Diagnose and Fix** (`e2e-diagnose-and-fix`) — Analyze root cause and implement a fix +6. **Verify Fix** (`e2e-verify-fix`) — Run the test multiple times and check code quality +7. **Submit and Review** (`e2e-submit-and-review`) — Create PR, trigger review, monitor CI -**Critical rule**: No phase may be skipped without **explicit user approval**. If a phase cannot be executed (e.g., no cluster connection for deployment/reproduction), ask the user before proceeding — never skip silently. +**Critical rule**: No phase may be skipped without **explicit user approval**. -## Parsing CI Failures +## Job Name Mapping Tables -### Prow URL Structure +These tables are the **single source of truth** — referenced by `e2e-parse-ci-failure` and other skills. -``` -https://prow.ci.openshift.org/view/gs/test-platform-results/logs// -``` - -Build logs and JUnit XML results are in the GCS artifacts directory. Look for Playwright output patterns: -``` -✘ [] › /.spec.ts: -``` - -### Job Name Mapping Tables - -#### Job Name → Release Branch +### Job Name → Release Branch Extract the release branch from the Prow job name using the `-rhdh--` pattern: @@ -57,7 +34,7 @@ Extract the release branch from the Prow job name using the `-rhdh--` pa BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | sed 's/^-rhdh-//;s/-$//') ``` -#### Job Name → Platform and Deployment Method +### Job Name → Platform and Deployment Method | Pattern | Platform | Method | |---------|----------|--------| @@ -71,7 +48,7 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*gke*operator*` | GKE | Operator | | `*osd-gcp*` | OSD-GCP | Helm/Operator | -#### Job Name → Playwright Projects +### Job Name → Playwright Projects | Job pattern | Projects | |-------------|----------| @@ -83,18 +60,14 @@ BRANCH=$(echo "$JOB_NAME" | grep -oE '\-rhdh-(main|release-[0-9]+\.[0-9]+)-' | s | `*aks*`/`*eks*`/`*gke*` helm | `showcase-k8s`, `showcase-rbac-k8s` | | `*aks*`/`*eks*`/`*gke*` operator | `showcase-k8s`, `showcase-rbac-k8s` | -#### Job Name → local-run.sh `-j` Parameter - -Use the **full Prow CI job name** directly as the `-j` parameter. The `openshift-ci-tests.sh` handler uses bash glob patterns (like `*ocp*helm*nightly*`) to match the job name, so the full name works correctly. +### Job Name → local-run.sh `-j` Parameter -**Example (OCP)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-ocp-v4-20-helm-nightly -r rhdh-community/rhdh -t next -s` -**Example (K8s)**: `./local-run.sh -j periodic-ci-redhat-developer-rhdh-main-e2e-eks-helm-nightly -r rhdh-community/rhdh -t next` +Use the **full Prow CI job name** directly as the `-j` parameter. Do NOT use shortened names. -Do NOT use shortened names like `nightly-ocp-helm` — these do not match the glob patterns. +**OCP** (deploy-only with `-s`): `./local-run.sh -j -r -t -s` +**K8s** (full execution, no `-s`): `./local-run.sh -j -r -t ` -#### Release Branch → Image Repo and Tag - -Derive from the branch name — no hardcoded version list needed: +### Release Branch → Image Repo and Tag ```bash if [[ "$BRANCH" == "main" ]]; then @@ -104,187 +77,8 @@ else fi ``` -## Branch and Deployment Setup - -### Branch Creation - -Always create branches from upstream, never from local copies: - -```bash -git fetch upstream -git checkout -b fix/e2e- upstream/ -``` - -### Deployment via local-run.sh - -CLI mode requires **all three** flags (`-j`, `-r`, `-t`). Without `-r`, the script enters interactive mode. - -```bash -cd e2e-tests -# OCP jobs: use -s to deploy only, then run tests manually -./local-run.sh -j -r -t -s -# K8s jobs (AKS, EKS, GKE): do NOT use -s — full execution required -./local-run.sh -j -r -t -``` - -Prerequisites: `podman` (machine with 8GB RAM, 4 CPUs), `oc`, `vault`, `jq`, `curl`, `rsync`, `bc`. - -After deployment, source the test environment: -```bash -source e2e-tests/local-test-setup.sh -``` - -### Deployment Error Recovery - -| Error | Investigation | Common Fix | -|-------|--------------|------------| -| CrashLoopBackOff | `oc logs -n --previous` | Fix ConfigMap, plugin config, or secrets | -| ImagePullBackOff | `oc describe pod -n ` | Verify image exists, check pull secrets | -| Helm failure | `helm status -n ` | Check values against `.ci/pipelines/value_files/` | -| Operator failure | `oc get backstage -n ` | Check CR against `.ci/pipelines/resources/rhdh-operator/` | - -For config issues, search these repos for reference: -- **rhdh-operator**: `redhat-developer/rhdh-operator` — Backstage CR, CatalogSource, operator scripts -- **rhdh-chart**: `redhat-developer/rhdh-chart` — Helm values, chart templates, defaults - -## Reproducing Failures - -### Test Execution - -```bash -cd e2e-tests -yarn playwright test --project= --retries=0 --workers=1 -``` - -### Flakiness Detection - -If the test passes on first run, repeat 10 times: -- **10/10 pass** → cannot reproduce (check environment differences) -- **Mixed results** → flaky (focus on reliability improvements) -- **0/10 pass** → consistent failure - -For headed mode, debug mode, and trace viewing, see the `playwright-locators` and `ci-e2e-testing` rules. - -## Diagnosing and Fixing Tests - -### Failure Classification - -1. **Locator drift** — UI changed, selectors don't match → update to semantic selectors -2. **Timing/race** — Test acts before UI ready → add `expect().toPass()` with intervals -3. **Assertion mismatch** — Expected values changed → update test data or report product bug -4. **Data dependency** — Test data missing → add proper setup/teardown -5. **Platform-specific** — Works on one platform, fails on another → add `skipIfJobName()` conditional -6. **Deployment config** — RHDH itself broken → fix ConfigMap/values in `.ci/pipelines/` - -### RHDH Coding Conventions +## Coding Conventions All test code must follow the project's coding rules: - **`playwright-locators`** — locator priority, anti-patterns, assertions, Page Objects - **`ci-e2e-testing`** — test structure, component annotations, utility classes, CI scripts - -### Product Bug Decision - -**`test.fixme()` is a last resort.** You must be absolutely certain the failure is a product bug — the Playwright healer agent must have confirmed the test is correct and the application behavior is wrong. Ask the user for confirmation before proceeding. Then: -1. File or update a Jira bug in the `RHDHBUGS` project -2. Mark the test with a `// TODO:` comment linking to the Jira ticket, followed by `test.fixme()`: - ```typescript - // TODO: https://redhat.atlassian.net/browse/RHDHBUGS-XXXX - test.fixme('Description of the product bug'); - ``` -3. Do **not** change assertions to match broken behavior -4. Proceed with the `test.fixme()` change - -## Playwright Test Agents - -The project uses Playwright Test Agents (configured in `e2e-tests/opencode.json`) with an MCP server for live browser interaction. - -### Available Agents - -| Agent | Mode | Purpose | -|-------|------|---------| -| `playwright-test-healer` | subagent | Debug and fix failing tests — runs tests, inspects UI, generates locators, edits code | -| `playwright-test-generator` | subagent | Create new test code from a test plan | -| `playwright-test-planner` | subagent | Explore app and create test plans | - -### Healer Agent Usage (Primary for Fixes) - -The healer agent is the primary tool for test repair: -1. Runs tests with `test_run` to identify failures -2. Debugs with `test_debug` to step through failing tests -3. Inspects UI state via `browser_snapshot`, `browser_console_messages` -4. Generates correct locators with `browser_generate_locator` -5. Edits test code with `edit`/`write` tools -6. Re-runs tests to verify the fix - -Invoke with: `@playwright-test-healer Fix the failing test in ` - -## Verification and PR Submission - -### Verification Checklist - -1. Single test run passes -2. 5 consecutive runs pass (stability) -3. `yarn tsc:check` passes -4. `yarn lint:check` passes -5. `yarn prettier:check` passes - -### Pre-Commit Hooks - -Before committing, run `yarn install` in all relevant workspaces to ensure pre-commit hooks pass: - -```bash -yarn install # Root workspace -cd e2e-tests && yarn install && cd .. # If e2e-tests files changed -cd .ci && yarn install && cd .. # If .ci files changed -``` - -### PR Creation - -Always create PRs as **drafts**: - -```bash -git push -u origin -# Determine GitHub username from fork remote -GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base -``` - -### Qodo Review - -```bash -gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" -``` - -The `.pr_agent.toml` config enables RAG across `rhdh`, `rhdh-operator`, `rhdh-chart`, and docs repos. Qodo will auto-run `/review`, `/describe`, and `/improve` on PR creation. - -### Trigger Affected CI Job - -After addressing Qodo review feedback, trigger the presubmit E2E job that matches the platform and deployment method of the original failure: - -```bash -# List available presubmit jobs -gh pr comment --repo redhat-developer/rhdh --body "/test ?" - -# Trigger the matching presubmit job -gh pr comment --repo redhat-developer/rhdh --body "/test " -``` - -Match the presubmit job by platform and deployment method — e.g., if the original failure was `*ocp*helm*nightly*`, look for a presubmit job containing `*ocp*helm*`. - -### CI Monitoring - -```bash -gh pr checks --repo redhat-developer/rhdh --watch -``` - -Re-trigger with: `gh pr comment --repo redhat-developer/rhdh --body "/retest"` - -## Reference Files - -See the `ci-e2e-testing` rule for the full list of CI scripts, test utilities, and config files. Files specific to the fix workflow: - -| Category | Key files | -|----------|-----------| -| Local test runner | `e2e-tests/local-run.sh` | -| Local test env | `e2e-tests/local-test-setup.sh` | -| Qodo config | `.pr_agent.toml` | From fcb9057ae04161e7d9f3ea16a389759f19eb3070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 15:57:59 +0200 Subject: [PATCH 21/23] fix(commands): replace hardcoded image mapping with generic pattern Reference e2e-fix-workflow rule for derivation logic. Assisted-by: OpenCode --- .claude/commands/fix-e2e.md | 5 ++--- .cursor/commands/fix-e2e.md | 5 ++--- .opencode/command/fix-e2e.md | 5 ++--- .rulesync/commands/fix-e2e.md | 5 ++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/.claude/commands/fix-e2e.md b/.claude/commands/fix-e2e.md index 054661ca3c..3ee521b136 100644 --- a/.claude/commands/fix-e2e.md +++ b/.claude/commands/fix-e2e.md @@ -84,10 +84,9 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Select the image repo and tag based on the release branch: +Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): - `main` → `-r rhdh-community/rhdh -t next` -- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` -- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` +- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` After deployment completes, set up the local test environment: ```bash diff --git a/.cursor/commands/fix-e2e.md b/.cursor/commands/fix-e2e.md index e11832e1f1..901d9744fa 100644 --- a/.cursor/commands/fix-e2e.md +++ b/.cursor/commands/fix-e2e.md @@ -81,10 +81,9 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Select the image repo and tag based on the release branch: +Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): - `main` → `-r rhdh-community/rhdh -t next` -- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` -- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` +- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` After deployment completes, set up the local test environment: ```bash diff --git a/.opencode/command/fix-e2e.md b/.opencode/command/fix-e2e.md index 054661ca3c..3ee521b136 100644 --- a/.opencode/command/fix-e2e.md +++ b/.opencode/command/fix-e2e.md @@ -84,10 +84,9 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Select the image repo and tag based on the release branch: +Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): - `main` → `-r rhdh-community/rhdh -t next` -- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` -- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` +- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` After deployment completes, set up the local test environment: ```bash diff --git a/.rulesync/commands/fix-e2e.md b/.rulesync/commands/fix-e2e.md index bb09e35bd1..04c7f70953 100644 --- a/.rulesync/commands/fix-e2e.md +++ b/.rulesync/commands/fix-e2e.md @@ -86,10 +86,9 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Select the image repo and tag based on the release branch: +Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): - `main` → `-r rhdh-community/rhdh -t next` -- `release-1.9` → `-r rhdh/rhdh-hub-rhel9 -t 1.9` -- `release-1.8` → `-r rhdh/rhdh-hub-rhel9 -t 1.8` +- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` After deployment completes, set up the local test environment: ```bash From 2425782eebe46c8c5b7e74fdaf34dc81b557d5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 16:02:07 +0200 Subject: [PATCH 22/23] fix(commands): remove inline image mapping, defer to e2e-fix-workflow rule Single source of truth for branch-to-image derivation. Assisted-by: OpenCode --- .claude/commands/fix-e2e.md | 4 +--- .cursor/commands/fix-e2e.md | 4 +--- .opencode/command/fix-e2e.md | 4 +--- .rulesync/commands/fix-e2e.md | 4 +--- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/.claude/commands/fix-e2e.md b/.claude/commands/fix-e2e.md index 3ee521b136..8b8fc9c7ad 100644 --- a/.claude/commands/fix-e2e.md +++ b/.claude/commands/fix-e2e.md @@ -84,9 +84,7 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): -- `main` → `-r rhdh-community/rhdh -t next` -- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` +Derive the image repo (`-r`) and tag (`-t`) from the release branch — see the `e2e-fix-workflow` rule for the derivation logic. After deployment completes, set up the local test environment: ```bash diff --git a/.cursor/commands/fix-e2e.md b/.cursor/commands/fix-e2e.md index 901d9744fa..03782dad30 100644 --- a/.cursor/commands/fix-e2e.md +++ b/.cursor/commands/fix-e2e.md @@ -81,9 +81,7 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): -- `main` → `-r rhdh-community/rhdh -t next` -- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` +Derive the image repo (`-r`) and tag (`-t`) from the release branch — see the `e2e-fix-workflow` rule for the derivation logic. After deployment completes, set up the local test environment: ```bash diff --git a/.opencode/command/fix-e2e.md b/.opencode/command/fix-e2e.md index 3ee521b136..8b8fc9c7ad 100644 --- a/.opencode/command/fix-e2e.md +++ b/.opencode/command/fix-e2e.md @@ -84,9 +84,7 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): -- `main` → `-r rhdh-community/rhdh -t next` -- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` +Derive the image repo (`-r`) and tag (`-t`) from the release branch — see the `e2e-fix-workflow` rule for the derivation logic. After deployment completes, set up the local test environment: ```bash diff --git a/.rulesync/commands/fix-e2e.md b/.rulesync/commands/fix-e2e.md index 04c7f70953..eebfae36f5 100644 --- a/.rulesync/commands/fix-e2e.md +++ b/.rulesync/commands/fix-e2e.md @@ -86,9 +86,7 @@ cd e2e-tests Use the **full Prow CI job name** for `-j` (not shortened names). -Derive the image repo and tag from the release branch (see the `e2e-fix-workflow` rule for the derivation logic): -- `main` → `-r rhdh-community/rhdh -t next` -- `release-X.Y` → `-r rhdh/rhdh-hub-rhel9 -t X.Y` +Derive the image repo (`-r`) and tag (`-t`) from the release branch — see the `e2e-fix-workflow` rule for the derivation logic. After deployment completes, set up the local test environment: ```bash From d1991b325ffc5cfc5ebb25db953ace39b2294aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbyn=C4=9Bk=20Dr=C3=A1pela?= Date: Tue, 14 Apr 2026 17:00:06 +0200 Subject: [PATCH 23/23] feat(skills): add [AI /e2e-fix] tag to PR titles Assisted-by: OpenCode --- .claude/skills/e2e-submit-and-review/SKILL.md | 4 ++-- .cursor/skills/e2e-submit-and-review/SKILL.md | 4 ++-- .opencode/skill/e2e-submit-and-review/SKILL.md | 4 ++-- .rulesync/skills/e2e-submit-and-review/SKILL.md | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.claude/skills/e2e-submit-and-review/SKILL.md b/.claude/skills/e2e-submit-and-review/SKILL.md index f60a90bd06..2399bdb1c8 100644 --- a/.claude/skills/e2e-submit-and-review/SKILL.md +++ b/.claude/skills/e2e-submit-and-review/SKILL.md @@ -90,7 +90,7 @@ gh pr create \ --repo redhat-developer/rhdh \ --head "${GITHUB_USER}:" \ --base \ - --title "fix(e2e): " \ + --title "fix(e2e): [AI /e2e-fix]" \ --body "$(cat <<'EOF' ## Summary - <1-2 bullet points explaining what was fixed and why> @@ -315,7 +315,7 @@ PR Status Report: GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') # Create draft PR (always use --draft) -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base --title "fix(e2e): [AI /e2e-fix]" # Trigger Qodo review gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" diff --git a/.cursor/skills/e2e-submit-and-review/SKILL.md b/.cursor/skills/e2e-submit-and-review/SKILL.md index 325bee9c7f..ffd7637c4c 100644 --- a/.cursor/skills/e2e-submit-and-review/SKILL.md +++ b/.cursor/skills/e2e-submit-and-review/SKILL.md @@ -88,7 +88,7 @@ gh pr create \ --repo redhat-developer/rhdh \ --head "${GITHUB_USER}:" \ --base \ - --title "fix(e2e): " \ + --title "fix(e2e): [AI /e2e-fix]" \ --body "$(cat <<'EOF' ## Summary - <1-2 bullet points explaining what was fixed and why> @@ -313,7 +313,7 @@ PR Status Report: GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') # Create draft PR (always use --draft) -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base --title "fix(e2e): [AI /e2e-fix]" # Trigger Qodo review gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" diff --git a/.opencode/skill/e2e-submit-and-review/SKILL.md b/.opencode/skill/e2e-submit-and-review/SKILL.md index f60a90bd06..2399bdb1c8 100644 --- a/.opencode/skill/e2e-submit-and-review/SKILL.md +++ b/.opencode/skill/e2e-submit-and-review/SKILL.md @@ -90,7 +90,7 @@ gh pr create \ --repo redhat-developer/rhdh \ --head "${GITHUB_USER}:" \ --base \ - --title "fix(e2e): " \ + --title "fix(e2e): [AI /e2e-fix]" \ --body "$(cat <<'EOF' ## Summary - <1-2 bullet points explaining what was fixed and why> @@ -315,7 +315,7 @@ PR Status Report: GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') # Create draft PR (always use --draft) -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base --title "fix(e2e): [AI /e2e-fix]" # Trigger Qodo review gh pr comment --repo redhat-developer/rhdh --body "/agentic_review" diff --git a/.rulesync/skills/e2e-submit-and-review/SKILL.md b/.rulesync/skills/e2e-submit-and-review/SKILL.md index 33abd88d4c..67b8c41674 100644 --- a/.rulesync/skills/e2e-submit-and-review/SKILL.md +++ b/.rulesync/skills/e2e-submit-and-review/SKILL.md @@ -92,7 +92,7 @@ gh pr create \ --repo redhat-developer/rhdh \ --head "${GITHUB_USER}:" \ --base \ - --title "fix(e2e): " \ + --title "fix(e2e): [AI /e2e-fix]" \ --body "$(cat <<'EOF' ## Summary - <1-2 bullet points explaining what was fixed and why> @@ -317,7 +317,7 @@ PR Status Report: GITHUB_USER=$(git remote get-url origin | sed 's|.*github.com[:/]||;s|/.*||') # Create draft PR (always use --draft) -gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base +gh pr create --draft --repo redhat-developer/rhdh --head "${GITHUB_USER}:" --base --title "fix(e2e): [AI /e2e-fix]" # Trigger Qodo review gh pr comment --repo redhat-developer/rhdh --body "/agentic_review"