diff --git a/client/src/Hooks/useMonitorForm.ts b/client/src/Hooks/useMonitorForm.ts index 963409fc8a..53219581ed 100644 --- a/client/src/Hooks/useMonitorForm.ts +++ b/client/src/Hooks/useMonitorForm.ts @@ -12,6 +12,8 @@ const getBaseDefaults = (data?: Monitor | null) => ({ description: data?.description || "", interval: data?.interval || 60000, notifications: data?.notifications || [], + escalationNotifications: data?.escalationNotifications || [], + escalationWaitMinutes: data?.escalationWaitMinutes ?? 0, statusWindowSize: data?.statusWindowSize || 5, statusWindowThreshold: data?.statusWindowThreshold || 60, geoCheckEnabled: data?.geoCheckEnabled ?? false, diff --git a/client/src/Pages/CreateMonitor/index.tsx b/client/src/Pages/CreateMonitor/index.tsx index 15b76eab36..7a7cf6533f 100644 --- a/client/src/Pages/CreateMonitor/index.tsx +++ b/client/src/Pages/CreateMonitor/index.tsx @@ -765,6 +765,89 @@ const CreateMonitorPage = () => { } /> + + ( + field.onChange(Number(e.target.value))} + error={fieldState.error?.message} + inputProps={{ min: 0 }} + /> + )} + /> + { + const notificationOptions = (notifications ?? []).map((n) => ({ + ...n, + name: n.notificationName, + })); + const selectedEscalations = notificationOptions.filter((n) => + (field.value ?? []).includes(n.id) + ); + return ( + + option.name} + onChange={(_: unknown, newValue: typeof notificationOptions) => { + field.onChange(newValue.map((n) => n.id)); + }} + isOptionEqualToValue={(option, value) => option.id === value.id} + /> + {selectedEscalations.length > 0 && ( + + {selectedEscalations.map((notification, index) => ( + + + {notification.notificationName} + + { + field.onChange( + (field.value ?? []).filter( + (id: string) => id !== notification.id + ) + ); + }} + aria-label="Remove escalation notification" + > + + + {index < selectedEscalations.length - 1 && } + + ))} + + )} + + ); + }} + /> + + } + /> + {(watchedType === "http" || watchedType === "grpc" || watchedType === "websocket") && ( diff --git a/client/src/Types/Monitor.ts b/client/src/Types/Monitor.ts index 053b517d1d..0f2a9627a4 100644 --- a/client/src/Types/Monitor.ts +++ b/client/src/Types/Monitor.ts @@ -76,6 +76,9 @@ export interface Monitor { geoCheckEnabled?: boolean; geoCheckLocations?: GeoContinent[]; geoCheckInterval?: number; + escalationNotifications: string[]; + escalationWaitMinutes: number; + escalationSent: boolean; recentChecks: CheckSnapshot[]; createdAt: string; updatedAt: string; diff --git a/client/src/Validation/monitor.ts b/client/src/Validation/monitor.ts index 9acffe6fed..b026ad24b9 100644 --- a/client/src/Validation/monitor.ts +++ b/client/src/Validation/monitor.ts @@ -13,6 +13,8 @@ const baseSchema = z.object({ description: z.string().optional(), interval: z.number().min(15000, "Interval must be at least 15 seconds"), notifications: z.array(z.string()), + escalationNotifications: z.array(z.string()), + escalationWaitMinutes: z.number().min(0, "Wait time must be at least 0 minutes"), statusWindowSize: z .number({ message: "Status window size is required" }) .min(1, "Status window size must be at least 1") diff --git a/client/src/locales/en.json b/client/src/locales/en.json index 92a21939f3..df151d0b03 100644 --- a/client/src/locales/en.json +++ b/client/src/locales/en.json @@ -543,6 +543,12 @@ "description": "Select the notification channels you want to use", "title": "Notifications" }, + "escalation": { + "title": "Escalation notifications", + "description": "If a monitor stays down, send additional alerts to other channels after a specified wait time", + "waitTimeLabel": "Escalation wait time (minutes)", + "channelsLabel": "Escalation channels" + }, "type": { "description": "Select the type of check to perform", "optionDockerDescription": "Use Docker to monitor if a container is running.", diff --git a/server/src/db/models/Monitor.ts b/server/src/db/models/Monitor.ts index 036aeadad6..58a9ab8b59 100644 --- a/server/src/db/models/Monitor.ts +++ b/server/src/db/models/Monitor.ts @@ -18,11 +18,12 @@ type CheckSnapshotDocument = Omit & { createdAt: Dat type MonitorDocumentBase = Omit< Monitor, - "id" | "userId" | "teamId" | "notifications" | "selectedDisks" | "statusWindow" | "recentChecks" | "createdAt" | "updatedAt" + "id" | "userId" | "teamId" | "notifications" | "escalationNotifications" | "selectedDisks" | "statusWindow" | "recentChecks" | "createdAt" | "updatedAt" > & { statusWindow: boolean[]; recentChecks: CheckSnapshotDocument[]; notifications: Types.ObjectId[]; + escalationNotifications: Types.ObjectId[]; selectedDisks: string[]; matchMethod?: MonitorMatchMethod; }; @@ -351,6 +352,20 @@ const MonitorSchema = new Schema( type: Number, default: 300000, }, + escalationNotifications: [ + { + type: Schema.Types.ObjectId, + ref: "Notification", + }, + ], + escalationWaitMinutes: { + type: Number, + default: 0, + }, + escalationSent: { + type: Boolean, + default: false, + }, recentChecks: { type: [checkSnapshotSchema], default: [], diff --git a/server/src/repositories/monitors/MongoMonitorsRepository.ts b/server/src/repositories/monitors/MongoMonitorsRepository.ts index b2d7594483..659ced59db 100644 --- a/server/src/repositories/monitors/MongoMonitorsRepository.ts +++ b/server/src/repositories/monitors/MongoMonitorsRepository.ts @@ -351,6 +351,7 @@ class MongoMonitorsRepository implements IMonitorsRepository { }; const notificationIds = (doc.notifications ?? []).map((notification) => toStringId(notification)); + const escalationNotificationIds = (doc.escalationNotifications ?? []).map((notification) => toStringId(notification)); return { id: toStringId(doc._id), @@ -374,6 +375,9 @@ class MongoMonitorsRepository implements IMonitorsRepository { interval: doc.interval, uptimePercentage: doc.uptimePercentage ?? undefined, notifications: notificationIds, + escalationNotifications: escalationNotificationIds, + escalationWaitMinutes: doc.escalationWaitMinutes ?? 0, + escalationSent: doc.escalationSent ?? false, secret: doc.secret ?? undefined, cpuAlertThreshold: doc.cpuAlertThreshold, cpuAlertCounter: doc.cpuAlertCounter, @@ -410,6 +414,7 @@ class MongoMonitorsRepository implements IMonitorsRepository { }; const notificationIds = (doc.notifications ?? []).map((notification: unknown) => toStringId(notification)); + const escalationNotificationIds = ((doc as Record).escalationNotifications as unknown[] ?? []).map((notification: unknown) => toStringId(notification)); return { id: toStringId(doc._id), @@ -433,6 +438,9 @@ class MongoMonitorsRepository implements IMonitorsRepository { interval: doc.interval, uptimePercentage: doc.uptimePercentage ?? undefined, notifications: notificationIds, + escalationNotifications: escalationNotificationIds, + escalationWaitMinutes: (doc as Record).escalationWaitMinutes as number ?? 0, + escalationSent: (doc as Record).escalationSent as boolean ?? false, secret: doc.secret ?? undefined, cpuAlertThreshold: doc.cpuAlertThreshold, cpuAlertCounter: doc.cpuAlertCounter, diff --git a/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts b/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts index b6908127b2..f1a03404b2 100644 --- a/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts +++ b/server/src/service/infrastructure/SuperSimpleQueue/SuperSimpleQueueHelper.ts @@ -177,6 +177,52 @@ export class SuperSimpleQueueHelper implements ISuperSimpleQueueHelper { stack: error instanceof Error ? error.stack : undefined, }); }); + + // Step 8. Handle escalation notifications + const currentMonitor = statusChangeResult.monitor; + + // Reset escalation flag when monitor recovers + if (decision.shouldResolveIncident && currentMonitor.escalationSent) { + this.monitorsRepository.updateById(monitorId, teamId, { escalationSent: false }).catch((error: unknown) => { + this.logger.warn({ + message: `Error resetting escalation flag for monitor ${monitorId}: ${error instanceof Error ? error.message : "Unknown error"}`, + service: SERVICE_NAME, + method: "getMonitorJob", + }); + }); + } + + // Check if escalation should fire (monitor still down, not a new transition) + if ( + !statusChangeResult.statusChanged && + currentMonitor.status === "down" && + !currentMonitor.escalationSent && + currentMonitor.escalationWaitMinutes > 0 && + (currentMonitor.escalationNotifications?.length ?? 0) > 0 + ) { + const activeIncident = await this.incidentsRepository.findActiveByMonitorId(monitorId, teamId); + if (activeIncident) { + const incidentStart = new Date(activeIncident.startTime); + const downtimeMinutes = (Date.now() - incidentStart.getTime()) / 60000; + if (downtimeMinutes >= currentMonitor.escalationWaitMinutes) { + this.notificationsService.handleEscalationNotifications(currentMonitor, status).catch((error: unknown) => { + this.logger.error({ + message: `Error sending escalation notifications for monitor ${monitorId}: ${error instanceof Error ? error.message : "Unknown error"}`, + service: SERVICE_NAME, + method: "getMonitorJob", + stack: error instanceof Error ? error.stack : undefined, + }); + }); + this.monitorsRepository.updateById(monitorId, teamId, { escalationSent: true }).catch((error: unknown) => { + this.logger.warn({ + message: `Error setting escalation flag for monitor ${monitorId}: ${error instanceof Error ? error.message : "Unknown error"}`, + service: SERVICE_NAME, + method: "getMonitorJob", + }); + }); + } + } + } } catch (error: unknown) { this.logger.warn({ message: error instanceof Error ? error.message : "Unknown error", diff --git a/server/src/service/infrastructure/notificationMessageBuilder.ts b/server/src/service/infrastructure/notificationMessageBuilder.ts index 934163b2a9..3682f044cd 100644 --- a/server/src/service/infrastructure/notificationMessageBuilder.ts +++ b/server/src/service/infrastructure/notificationMessageBuilder.ts @@ -15,6 +15,11 @@ export interface INotificationMessageBuilder { decision: MonitorActionDecision, clientHost: string ): NotificationMessage; + buildEscalationMessage( + monitor: Monitor, + monitorStatusResponse: MonitorStatusResponse, + clientHost: string + ): NotificationMessage; extractThresholdBreaches(monitor: Monitor, monitorStatusResponse: MonitorStatusResponse): ThresholdBreach[]; } @@ -52,6 +57,34 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { }; } + buildEscalationMessage( + monitor: Monitor, + monitorStatusResponse: MonitorStatusResponse, + clientHost: string + ): NotificationMessage { + const type: NotificationType = "escalation"; + const severity = this.determineSeverity(type); + const content = this.buildContent(type, monitor, monitorStatusResponse); + + return { + type, + severity, + monitor: { + id: monitor.id, + name: monitor.name, + url: monitor.url, + type: monitor.type, + status: monitor.status, + }, + content, + clientHost, + metadata: { + teamId: monitor.teamId, + notificationReason: "escalation", + }, + }; + } + private determineNotificationType(decision: MonitorActionDecision, monitor: Monitor): NotificationType { // Down status has highest priority (critical) if (monitor.status === "down") { @@ -80,6 +113,7 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { private determineSeverity(type: NotificationType): NotificationSeverity { switch (type) { case "monitor_down": + case "escalation": return "critical"; case "threshold_breach": return "warning"; @@ -103,6 +137,8 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { return this.buildThresholdBreachContent(monitor, monitorStatusResponse as MonitorStatusResponse); case "threshold_resolved": return this.buildThresholdResolvedContent(monitor); + case "escalation": + return this.buildEscalationContent(monitor, monitorStatusResponse); default: return this.buildDefaultContent(monitor); } @@ -182,6 +218,27 @@ export class NotificationMessageBuilder implements INotificationMessageBuilder { }; } + private buildEscalationContent(monitor: Monitor, monitorStatusResponse: MonitorStatusResponse): NotificationContent { + const title = `Escalation: Monitor ${monitor.name} still down`; + const summary = `Monitor "${monitor.name}" has been down for an extended period and requires attention.`; + const details = [`URL: ${monitor.url}`, `Status: Down`, `Type: ${monitor.type}`]; + + if (monitorStatusResponse.code) { + details.push(`Response Code: ${monitorStatusResponse.code}`); + } + + if (monitorStatusResponse.message) { + details.push(`Error: ${monitorStatusResponse.message}`); + } + + return { + title, + summary, + details, + timestamp: new Date(), + }; + } + public extractThresholdBreaches(monitor: Monitor, monitorStatusResponse: MonitorStatusResponse): ThresholdBreach[] { const breaches: ThresholdBreach[] = []; diff --git a/server/src/service/infrastructure/notificationProviders/email.ts b/server/src/service/infrastructure/notificationProviders/email.ts index b3686651cc..d4c8f2e384 100644 --- a/server/src/service/infrastructure/notificationProviders/email.ts +++ b/server/src/service/infrastructure/notificationProviders/email.ts @@ -87,6 +87,8 @@ export class EmailProvider implements INotificationProvider { return `Monitor ${message.monitor.name} threshold exceeded`; case "threshold_resolved": return `Monitor ${message.monitor.name} thresholds resolved`; + case "escalation": + return `Escalation: Monitor ${message.monitor.name} still down`; default: return `Alert: ${message.monitor.name}`; } diff --git a/server/src/service/infrastructure/notificationsService.ts b/server/src/service/infrastructure/notificationsService.ts index c75477c88c..1e48bf4f0c 100644 --- a/server/src/service/infrastructure/notificationsService.ts +++ b/server/src/service/infrastructure/notificationsService.ts @@ -14,6 +14,7 @@ export interface INotificationsService { updateById(id: string, teamId: string, updateData: Partial): Promise; deleteById: (id: string, teamId: string) => Promise; handleNotifications: (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse, decision: MonitorActionDecision) => Promise; + handleEscalationNotifications: (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse) => Promise; sendTestNotification: (notification: Partial) => Promise; testAllNotifications: (notificationIds: string[]) => Promise; @@ -141,6 +142,36 @@ export class NotificationsService implements INotificationsService { return await this.sendNotifications(monitor, monitorStatusResponse, decision); }; + handleEscalationNotifications = async (monitor: Monitor, monitorStatusResponse: MonitorStatusResponse) => { + const escalationIds = monitor.escalationNotifications ?? []; + if (escalationIds.length === 0) { + return false; + } + + const notifications = await this.notificationsRepository.findNotificationsByIds(escalationIds); + if (notifications.length === 0) { + return false; + } + + const settings = this.settingsService.getSettings(); + const clientHost = settings.clientHost || "Host not defined"; + const notificationMessage = this.notificationMessageBuilder.buildEscalationMessage(monitor, monitorStatusResponse, clientHost); + + const tasks = notifications.map((notification) => this.send(notification, monitor, monitorStatusResponse, { shouldCreateIncident: false, shouldResolveIncident: false, shouldSendNotification: true, incidentReason: null, notificationReason: "status_change" }, notificationMessage)); + + const outcomes = await Promise.all(tasks); + const succeeded = outcomes.filter(Boolean).length; + const failed = outcomes.length - succeeded; + if (failed > 0) { + this.logger.warn({ + message: `Escalation notification send completed with ${succeeded} success, ${failed} failure(s)`, + service: SERVICE_NAME, + method: "handleEscalationNotifications", + }); + } + return succeeded === notifications.length; + }; + sendTestNotification = async (notification: Partial) => { switch (notification.type) { case "email": diff --git a/server/src/types/monitor.ts b/server/src/types/monitor.ts index f29ce75d78..bf90033b04 100644 --- a/server/src/types/monitor.ts +++ b/server/src/types/monitor.ts @@ -53,6 +53,9 @@ export interface Monitor { geoCheckEnabled?: boolean; geoCheckLocations?: GeoContinent[]; geoCheckInterval?: number; + escalationNotifications: string[]; + escalationWaitMinutes: number; + escalationSent: boolean; recentChecks: CheckSnapshot[]; createdAt: string; updatedAt: string; diff --git a/server/src/types/notificationMessage.ts b/server/src/types/notificationMessage.ts index f06ff1bd9a..7408e01c4b 100644 --- a/server/src/types/notificationMessage.ts +++ b/server/src/types/notificationMessage.ts @@ -3,7 +3,7 @@ * Part of notification system unification effort */ -export type NotificationType = "monitor_down" | "monitor_up" | "threshold_breach" | "threshold_resolved" | "test"; +export type NotificationType = "monitor_down" | "monitor_up" | "threshold_breach" | "threshold_resolved" | "escalation" | "test"; export type NotificationSeverity = "critical" | "warning" | "info" | "success"; diff --git a/server/src/validation/monitorValidation.ts b/server/src/validation/monitorValidation.ts index df000ecef2..79496de61a 100644 --- a/server/src/validation/monitorValidation.ts +++ b/server/src/validation/monitorValidation.ts @@ -78,6 +78,8 @@ export const createMonitorBodyValidation = z.object({ geoCheckEnabled: z.boolean().optional(), geoCheckLocations: z.array(z.enum(GeoContinents)).optional(), geoCheckInterval: z.number().min(300000).optional(), + escalationNotifications: z.array(z.string()).optional(), + escalationWaitMinutes: z.number().min(0).optional(), }); export const editMonitorBodyValidation = z.object({ @@ -90,7 +92,9 @@ export const editMonitorBodyValidation = z.object({ interval: z.number().optional(), notifications: z.array(z.string()).optional(), secret: z.string().optional(), - ignoreTlsErrors: z.boolean().optional(), + escalationNotifications: z.array(z.string()).optional(), + escalationWaitMinutes: z.number().min(0).optional(), + ignoredTlsErrors: z.boolean().optional(), useAdvancedMatching: z.boolean().optional(), jsonPath: z.union([z.string(), z.literal("")]).optional(), expectedValue: z.union([z.string(), z.literal("")]).optional(), @@ -160,6 +164,9 @@ const importedMonitorSchema = z.object({ geoCheckEnabled: z.boolean().default(false), geoCheckLocations: z.array(z.enum(GeoContinents)).default([]), geoCheckInterval: z.number().min(300000).default(300000), + escalationNotifications: z.array(z.string()).default([]), + escalationWaitMinutes: z.number().min(0).default(0), + escalationSent: z.boolean().default(false), createdAt: z.string().optional(), updatedAt: z.string().optional(), });