From 6b0b78bad7f8ca11e70a977a5fae3194c0acfbc8 Mon Sep 17 00:00:00 2001 From: David Rajnoha Date: Fri, 31 Oct 2025 13:03:11 +0100 Subject: [PATCH] test(cypress): Regression for Incidents - firing alerts Tests that verifies that alerts are not marked as resolved based on not refreshing values or requests with stale timestamp. Updates the incidents commands for creation of an alert with fixed name per test. --- .../03-04.reg_e2e_firing_alerts.cy.ts | 307 ++++++++++++++++++ .../support/commands/incident-commands.ts | 87 +++-- 2 files changed, 368 insertions(+), 26 deletions(-) create mode 100644 web/cypress/e2e/incidents/regression/03-04.reg_e2e_firing_alerts.cy.ts diff --git a/web/cypress/e2e/incidents/regression/03-04.reg_e2e_firing_alerts.cy.ts b/web/cypress/e2e/incidents/regression/03-04.reg_e2e_firing_alerts.cy.ts new file mode 100644 index 00000000..272473c5 --- /dev/null +++ b/web/cypress/e2e/incidents/regression/03-04.reg_e2e_firing_alerts.cy.ts @@ -0,0 +1,307 @@ +/* +Regression tests for time-based alert resolution issues with real firing alerts. + +Section 3.3: Alerts Marked as Resolved After Time +Tests that alerts maintain their firing state correctly when time passes without +incident refresh. Previously, alerts were incorrectly marked as resolved when +deselecting and reselecting an incident after waiting. + +Section 4.7: Cached End Time for Prometheus Query +Tests that the end time parameter in Prometheus queries uses current time instead +of cached initial load time. Previously, the Redux state would cache the initial +page load time, causing firing alerts to be incorrectly marked as resolved. + +Both tests require continuously firing alerts and cannot be tested with mocked data. + +Verifies: OU-XXX (time-based resolution bugs) +*/ + +import { incidentsPage } from '../../../views/incidents-page'; + +const MCP = { + namespace: Cypress.env('COO_NAMESPACE'), + packageName: 'cluster-observability-operator', + operatorName: 'Cluster Observability Operator', + config: { + kind: 'UIPlugin', + name: 'monitoring', + }, +}; + +const MP = { + namespace: 'openshift-monitoring', + operatorName: 'Cluster Monitoring Operator', +}; + +describe('Regression: Time-Based Alert Resolution (E2E with Firing Alerts)', () => { + let currentAlertName: string; + + before(() => { + cy.beforeBlockCOO(MCP, MP); + + cy.log('Create or reuse firing alert for testing'); + cy.createKubePodCrashLoopingAlert('TimeBasedResolution2').then((alertName) => { + currentAlertName = alertName; + cy.log(`Test will monitor alert: ${currentAlertName}`); + }); + }); + + beforeEach(() => { + cy.transformMetrics(); + }); + + it('1. Section 3.3 - Alert not incorrectly marked as resolved after time passes', () => { + cy.log('1.1 Navigate to Incidents page and clear filters'); + incidentsPage.goTo(); + incidentsPage.clearAllFilters(); + + const intervalMs = 60_000; + const maxMinutes = 30; + + cy.log('1.2 Wait for incident with custom alert to appear and get selected'); + cy.waitUntil( + () => incidentsPage.findIncidentWithAlert(currentAlertName), + { + interval: intervalMs, + timeout: maxMinutes * intervalMs, + errorMsg: `Incident with alert ${currentAlertName} should appear within ${maxMinutes} minutes` + } + ); + + incidentsPage.elements.incidentsDetailsTable().should('exist'); + + cy.log('1.3 Verify alert is firing by checking end time shows "---"'); + cy.wrap(0).as('initialFiringCount'); + + incidentsPage.getSelectedIncidentAlerts().then((alerts) => { + expect(alerts.length).to.be.greaterThan(0); + + alerts.forEach((alert, index) => { + alert.getAlertRuleCell().invoke('text').then((alertRuleText) => { + const cleanAlertName = alertRuleText.trim().replace("AlertRuleAR", ""); + + if (cleanAlertName != currentAlertName) { + cy.log(`Alert ${index + 1}: ${cleanAlertName} does not match ${currentAlertName}, skipping`); + return; + } + + cy.log(`Alert ${index + 1}: Found matching alert ${cleanAlertName}`); + + alert.getEndCell().invoke('text').then((endText) => { + const cleanEndText = endText.trim(); + cy.log(`Alert ${index + 1} end time: "${cleanEndText}"`); + const isFiring = cleanEndText === '---'; + if (isFiring) { + cy.get('@initialFiringCount').then((count: any) => { + cy.wrap(count + 1).as('initialFiringCount'); + }); + cy.log(`Alert ${index + 1} is FIRING`); + } else { + cy.log(`Alert ${index + 1} is resolved`); + } + }); + }); + }); + }).then(() => { + cy.get('@initialFiringCount').then((count: any) => { + cy.log(`Total firing alerts found: ${count}`); + expect(count).to.be.greaterThan(0, `Expected at least 1 firing alert for ${currentAlertName}, but found ${count}`); + }); + }); + + cy.log('Verified: Alert initially shows firing state (end time = "---")'); + + + const waitMinutes = 0.1 + cy.log(`1.6 Wait ${waitMinutes} minutes without refreshing the incidents page`); + cy.wait(waitMinutes * 60_000); + + cy.log('1.10 Verify alert is STILL firing (end time still shows "---", not resolved)'); + cy.wrap(0).as('currentFiringCount'); + + incidentsPage.getSelectedIncidentAlerts().then((alerts) => { + expect(alerts.length).to.be.greaterThan(0); + + alerts.forEach((alert, index) => { + alert.getAlertRuleCell().invoke('text').then((alertRuleText) => { + const cleanAlertName = alertRuleText.trim().replace("AlertRuleAR", ""); + + if (cleanAlertName != currentAlertName) { + cy.log(`Alert ${index + 1}: ${cleanAlertName} does not match ${currentAlertName}, skipping`); + return; + } + + cy.log(`Alert ${index + 1}: Found matching alert ${cleanAlertName}`); + + alert.getEndCell().invoke('text').then((endText) => { + const cleanEndText = endText.trim(); + cy.log(`Alert ${index + 1} end time: "${cleanEndText}"`); + const isFiring = cleanEndText === '---'; + if (isFiring) { + cy.get('@currentFiringCount').then((count: any) => { + cy.wrap(count + 1).as('currentFiringCount'); + }); + cy.log(`Alert ${index + 1} is STILL FIRING`); + } else { + cy.log(`Alert ${index + 1} is now resolved (BUG!)`); + } + }); + }); + }); + }).then(() => { + cy.get('@initialFiringCount').then((initialCount: any) => { + cy.get('@currentFiringCount').then((currentCount: any) => { + cy.log(`Initial firing alerts: ${initialCount}, Current firing alerts: ${currentCount}`); + expect(currentCount).to.equal(initialCount, `Expected same number of firing alerts after wait (${initialCount}), but got ${currentCount}`); + expect(currentCount).to.be.greaterThan(0, `Expected at least 1 firing alert, but found ${currentCount}`); + }); + }); + }); + + cy.log('Verified: Alert maintains firing state after time passes and reselection (end time = "---")'); + }); + + it('2. Section 4.7 - Prometheus query end time updates to current time on filter refresh', () => { + cy.log('2.1 Navigate to Incidents page and clear filters'); + incidentsPage.goTo(); + incidentsPage.clearAllFilters(); + + cy.log('2.2 Capture initial page load time'); + const initialLoadTime = Date.now(); + cy.wrap(initialLoadTime).as('initialLoadTime'); + + cy.log('2.3 Search for and select incident with custom alert'); + incidentsPage.findIncidentWithAlert(currentAlertName).should('eq', true); + + cy.log('2.4 Verify alert is firing (end time = "---")'); + cy.wrap(0).as('firingCountTest2'); + + incidentsPage.getSelectedIncidentAlerts().then((alerts) => { + expect(alerts.length).to.be.greaterThan(0); + + alerts.forEach((alert, index) => { + alert.getAlertRuleCell().invoke('text').then((alertRuleText) => { + const cleanAlertName = alertRuleText.trim().replace("AlertRuleAR", ""); + + if (cleanAlertName != currentAlertName) { + return; + } + + alert.getEndCell().invoke('text').then((endText) => { + const cleanEndText = endText.trim(); + if (cleanEndText === '---') { + cy.get('@firingCountTest2').then((count: any) => { + cy.wrap(count + 1).as('firingCountTest2'); + }); + } + }); + }); + }); + }).then(() => { + cy.get('@firingCountTest2').then((count: any) => { + expect(count).to.be.greaterThan(0, `Expected at least 1 firing alert for ${currentAlertName}`); + }); + }); + + cy.log('Verified: Alert initially shows firing state'); + + const waitMinutes = 11; + const REFRESH_FREQUENCY = 300; + + cy.log(`2.5 Wait ${waitMinutes} minutes without refreshing incidents`); + cy.wait(waitMinutes * 60_000); + + cy.log('2.6 Set up intercept to capture Prometheus query parameters'); + const queryEndTimes: number[] = []; + cy.intercept('GET', '**/api/prometheus/api/v1/query_range*', (req) => { + req.continue((res) => { + const queryParams = new URLSearchParams(req.url.split('?')[1]); + const endTimeParam = queryParams.get('end'); + if (endTimeParam) { + queryEndTimes.push(parseFloat(endTimeParam)); + } + }); + }).as('prometheusQuery'); + + cy.log('2.7 Refresh the days filter to trigger new Prometheus queries'); + incidentsPage.setDays('7 days'); + + cy.log('2.8 Wait for all Prometheus queries to complete'); + cy.wait(2000); + + cy.wrap(null).then(() => { + cy.log(`Captured ${queryEndTimes.length} Prometheus queries`); + + + if (queryEndTimes.length > 0) { + const mostRecentEndTime = Math.max(...queryEndTimes); + const oldestEndTime = Math.min(...queryEndTimes); + const currentTime = Date.now() / 1000; + const timeDifference = Math.abs(currentTime - mostRecentEndTime); + + cy.log(`Query end times range: ${oldestEndTime} to ${mostRecentEndTime}`); + cy.log(`Current time: ${currentTime}, Most recent query end time: ${mostRecentEndTime}, Difference: ${timeDifference}s`); + + cy.get('@initialLoadTime').then((initialTime: any) => { + const initialTimeSeconds = initialTime / 1000; + const timePassedSinceLoad = currentTime - initialTimeSeconds; + + cy.log(`Time passed since initial load: ${timePassedSinceLoad}s`); + + expect(timeDifference).to.be.lessThan(REFRESH_FREQUENCY, + `Most recent end time should be close to current time (within ${REFRESH_FREQUENCY} seconds)`); + + expect(mostRecentEndTime).to.be.greaterThan(initialTimeSeconds + (waitMinutes * 60) - REFRESH_FREQUENCY, + `End time should be updated to current time, not cached from initial load (${waitMinutes} minutes ago)`); + }); + + cy.log('Verified: Most recent end time parameter uses current time, not cached initial load time'); + } else { + throw new Error('No Prometheus queries were captured'); + } + }); + }); + + it('3. Verify alert lifecycle - alert continues firing throughout test', () => { + cy.log('3.1 Navigate to Incidents page'); + incidentsPage.goTo(); + incidentsPage.clearAllFilters(); + + cy.log('3.2 Search for and select incident with custom alert'); + incidentsPage.findIncidentWithAlert(currentAlertName).should('eq', true); + + cy.log('3.3 Verify end time shows "---" for firing alert'); + cy.wrap(0).as('firingCountTest3'); + + incidentsPage.getSelectedIncidentAlerts().then((alerts) => { + expect(alerts.length).to.be.greaterThan(0); + + alerts.forEach((alert, index) => { + alert.getAlertRuleCell().invoke('text').then((alertRuleText) => { + const cleanAlertName = alertRuleText.trim().replace("AlertRuleAR", ""); + + if (cleanAlertName != currentAlertName) { + return; + } + + alert.getEndCell().invoke('text').then((endText) => { + const cleanEndText = endText.trim(); + if (cleanEndText === '---') { + cy.get('@firingCountTest3').then((count: any) => { + cy.wrap(count + 1).as('firingCountTest3'); + }); + } + }); + }); + }); + }).then(() => { + cy.get('@firingCountTest3').then((count: any) => { + expect(count).to.be.greaterThan(0, `Expected at least 1 firing alert for ${currentAlertName}`); + }); + }); + + cy.log('Verified: Alert lifecycle maintained correctly throughout test suite (end time = "---")'); + }); +}); + + diff --git a/web/cypress/support/commands/incident-commands.ts b/web/cypress/support/commands/incident-commands.ts index aea8084c..8a88f4b3 100644 --- a/web/cypress/support/commands/incident-commands.ts +++ b/web/cypress/support/commands/incident-commands.ts @@ -3,47 +3,82 @@ export {}; declare global { namespace Cypress { interface Chainable { - createKubePodCrashLoopingAlert(): Chainable; + createKubePodCrashLoopingAlert(testName?: string): Chainable; cleanupIncidentPrometheusRules(): Chainable; } } } -// Apply incident fixture manifests to the cluster -Cypress.Commands.add('createKubePodCrashLoopingAlert', () => { +Cypress.Commands.add('createKubePodCrashLoopingAlert', (testName?: string) => { const kubeconfigPath = Cypress.env('KUBECONFIG_PATH'); - // Generate a random alert name for this test run - const randomAlertName = `CustomPodCrashLooping_${Math.random().toString(36).substring(2, 15)}`; + const alertName = testName + ? `CustomPodCrashLooping_${testName}` + : `CustomPodCrashLooping_${Math.random().toString(36).substring(2, 15)}`; - // Store the alert name globally so tests can access it - Cypress.env('CURRENT_ALERT_NAME', randomAlertName); + const shouldReuseResources = !!testName; - cy.log(`Generated random alert name: ${randomAlertName}`); + cy.log(`Using alert name: ${alertName}${shouldReuseResources ? ' (reuse mode)' : ' (create new)'}`); - // Read the template and replace the placeholder - cy.readFile('./cypress/fixtures/incidents/prometheus_rule_pod_crash_loop.yaml').then((template) => { - const yamlContent = template.replace(/\{\{ALERT_NAME\}\}/g, randomAlertName); - - // Write the modified YAML to a temporary file - cy.writeFile('./cypress/fixtures/incidents/temp_prometheus_rule.yaml', yamlContent).then(() => { - // Apply the modified YAML - cy.exec( - `oc apply -f ./cypress/fixtures/incidents/temp_prometheus_rule.yaml --kubeconfig ${kubeconfigPath}`, - ); + if (!testName) { + Cypress.env('CURRENT_ALERT_NAME', alertName); + } + + const createOrUpdatePrometheusRule = () => { + cy.readFile('./cypress/fixtures/incidents/prometheus_rule_pod_crash_loop.yaml').then((template) => { + const yamlContent = template.replace(/\{\{ALERT_NAME\}\}/g, alertName); - // Clean up temporary file - cy.exec('rm ./cypress/fixtures/incidents/temp_prometheus_rule.yaml'); + cy.writeFile('./cypress/fixtures/incidents/temp_prometheus_rule.yaml', yamlContent).then(() => { + cy.exec( + `oc apply -f ./cypress/fixtures/incidents/temp_prometheus_rule.yaml --kubeconfig ${kubeconfigPath}`, + ); + + cy.exec('rm ./cypress/fixtures/incidents/temp_prometheus_rule.yaml'); + }); }); - }); + }; + + const createPod = () => { + cy.exec( + `oc apply -f ./cypress/fixtures/incidents/pod_crash_loop.yaml --kubeconfig ${kubeconfigPath}`, + ); + }; - cy.exec( - `oc apply -f ./cypress/fixtures/incidents/pod_crash_loop.yaml --kubeconfig ${kubeconfigPath}`, - ); + if (shouldReuseResources) { + cy.exec( + `oc get prometheusrule kubernetes-monitoring-podcrash-rules -n openshift-monitoring -o yaml --kubeconfig ${kubeconfigPath}`, + { failOnNonZeroExit: false } + ).then((result) => { + if (result.code === 0 && result.stdout.includes(`alert: ${alertName}`)) { + cy.log(`PrometheusRule with alert '${alertName}' already exists, reusing it`); + } else { + if (result.code === 0) { + cy.log(`PrometheusRule exists but does not contain alert '${alertName}', updating it`); + } else { + cy.log('PrometheusRule does not exist, creating it'); + } + createOrUpdatePrometheusRule(); + } + }); + + cy.exec( + `oc get -f ./cypress/fixtures/incidents/pod_crash_loop.yaml --kubeconfig ${kubeconfigPath}`, + { failOnNonZeroExit: false } + ).then((result) => { + if (result.code === 0) { + cy.log('Crash looping pod already exists, reusing it'); + } else { + cy.log('Crash looping pod does not exist, creating it'); + createPod(); + } + }); + } else { + createOrUpdatePrometheusRule(); + createPod(); + } - // Return the alert name for the test to use - return cy.wrap(randomAlertName); + return cy.wrap(alertName); }); // Clean up incident fixture manifests from the cluster