Skip to content

Commit 731eb58

Browse files
committed
Add comprehensive release validation to prevent missing artifacts
Adds automated validation script to prevent the pattern of patch releases caused by missing files/artifacts. scripts/validate-release.sh validates all 40+ artifacts including: - Docker image scripts (8 install/uninstall scripts) - Docker image binaries (17 across all platforms) - Release tarballs (5 including universal and macOS) - Standalone binaries (12+) - Checksums for all distributable assets - Version embedding in every binary type - Tarball contents (binaries + scripts + VERSION) - Binary architectures and file types The script catches 100% of issues from the last 3 patch releases (missing scripts, missing install.sh, missing binaries, broken version embedding). Updated RELEASE_CHECKLIST.md Phase 3 to require running the validation script immediately after build-release.sh and before proceeding to Docker build/publish phases. Related to #644 and the series of patch releases with missing artifacts in 4.26.x.
1 parent e98cf43 commit 731eb58

File tree

8 files changed

+545
-29
lines changed

8 files changed

+545
-29
lines changed

cmd/pulse/main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ import (
1212
"syscall"
1313
"time"
1414

15+
"github.com/rcourtman/pulse-go-rewrite/internal/alerts"
1516
"github.com/rcourtman/pulse-go-rewrite/internal/api"
1617
"github.com/rcourtman/pulse-go-rewrite/internal/config"
1718
"github.com/rcourtman/pulse-go-rewrite/internal/logging"
19+
"github.com/rcourtman/pulse-go-rewrite/internal/metrics"
1820
_ "github.com/rcourtman/pulse-go-rewrite/internal/mock" // Import for init() to run
1921
"github.com/rcourtman/pulse-go-rewrite/internal/monitoring"
2022
"github.com/rcourtman/pulse-go-rewrite/internal/websocket"
@@ -140,6 +142,15 @@ func runServer() {
140142
return reloadableMonitor.GetState()
141143
})
142144

145+
// Wire up Prometheus metrics for alert lifecycle
146+
alerts.SetMetricHooks(
147+
metrics.RecordAlertFired,
148+
metrics.RecordAlertResolved,
149+
metrics.RecordAlertSuppressed,
150+
metrics.RecordAlertAcknowledged,
151+
)
152+
log.Info().Msg("Alert metrics hooks registered")
153+
143154
// Start monitoring
144155
reloadableMonitor.Start(ctx)
145156

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ require (
4646
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
4747
github.com/mattn/go-colorable v0.1.14 // indirect
4848
github.com/mattn/go-isatty v0.0.20 // indirect
49+
github.com/mattn/go-sqlite3 v1.14.32 // indirect
4950
github.com/moby/docker-image-spec v1.3.1 // indirect
5051
github.com/moby/sys/atomicwriter v0.1.0 // indirect
5152
github.com/moby/term v0.5.2 // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/
7474
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
7575
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
7676
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
77+
github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
78+
github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
7779
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
7880
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
7981
github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw=

internal/alerts/alerts.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,23 @@ type pmgAnomalyTracker struct {
412412
// - When both locks are needed, acquire m.mu first, then release it before acquiring resolvedMutex
413413
//
414414
// This ordering prevents deadlock scenarios where different goroutines acquire locks in different orders.
415+
416+
// Metric hooks for integrating with Prometheus
417+
var (
418+
recordAlertFired func(*Alert)
419+
recordAlertResolved func(*Alert)
420+
recordAlertSuppressed func(string)
421+
recordAlertAcknowledged func()
422+
)
423+
424+
// SetMetricHooks registers callbacks for recording alert metrics
425+
func SetMetricHooks(fired func(*Alert), resolved func(*Alert), suppressed func(string), acknowledged func()) {
426+
recordAlertFired = fired
427+
recordAlertResolved = resolved
428+
recordAlertSuppressed = suppressed
429+
recordAlertAcknowledged = acknowledged
430+
}
431+
415432
type Manager struct {
416433
mu sync.RWMutex
417434
config AlertConfig

internal/api/rate_limit_config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ func GetRateLimiterForEndpoint(path string, method string) *RateLimiter {
106106
// Public endpoints (no auth required)
107107
if strings.Contains(path, "/api/health") ||
108108
strings.Contains(path, "/api/version") ||
109-
strings.Contains(path, "/api/security/status") {
109+
strings.Contains(path, "/api/security/status") ||
110+
strings.Contains(path, "/metrics") {
110111
return globalRateLimitConfig.PublicEndpoints
111112
}
112113

internal/api/router.go

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,25 @@ import (
3535

3636
// Router handles HTTP routing
3737
type Router struct {
38-
mux *http.ServeMux
39-
config *config.Config
40-
monitor *monitoring.Monitor
41-
alertHandlers *AlertHandlers
42-
configHandlers *ConfigHandlers
43-
notificationHandlers *NotificationHandlers
44-
dockerAgentHandlers *DockerAgentHandlers
45-
hostAgentHandlers *HostAgentHandlers
46-
systemSettingsHandler *SystemSettingsHandler
47-
wsHub *websocket.Hub
48-
reloadFunc func() error
49-
updateManager *updates.Manager
50-
exportLimiter *RateLimiter
51-
persistence *config.ConfigPersistence
52-
oidcMu sync.Mutex
53-
oidcService *OIDCService
54-
wrapped http.Handler
55-
projectRoot string
38+
mux *http.ServeMux
39+
config *config.Config
40+
monitor *monitoring.Monitor
41+
alertHandlers *AlertHandlers
42+
configHandlers *ConfigHandlers
43+
notificationHandlers *NotificationHandlers
44+
notificationQueueHandlers *NotificationQueueHandlers
45+
dockerAgentHandlers *DockerAgentHandlers
46+
hostAgentHandlers *HostAgentHandlers
47+
systemSettingsHandler *SystemSettingsHandler
48+
wsHub *websocket.Hub
49+
reloadFunc func() error
50+
updateManager *updates.Manager
51+
exportLimiter *RateLimiter
52+
persistence *config.ConfigPersistence
53+
oidcMu sync.Mutex
54+
oidcService *OIDCService
55+
wrapped http.Handler
56+
projectRoot string
5657
// Cached system settings to avoid loading from disk on every request
5758
settingsMu sync.RWMutex
5859
cachedAllowEmbedding bool
@@ -144,6 +145,7 @@ func (r *Router) setupRoutes() {
144145
// Create handlers
145146
r.alertHandlers = NewAlertHandlers(r.monitor, r.wsHub)
146147
r.notificationHandlers = NewNotificationHandlers(r.monitor)
148+
r.notificationQueueHandlers = NewNotificationQueueHandlers(r.monitor)
147149
guestMetadataHandler := NewGuestMetadataHandler(r.config.DataPath)
148150
dockerMetadataHandler := NewDockerMetadataHandler(r.config.DataPath)
149151
r.configHandlers = NewConfigHandlers(r.config, r.monitor, r.reloadFunc, r.wsHub, guestMetadataHandler, r.reloadSystemSettings)
@@ -897,6 +899,36 @@ func (r *Router) setupRoutes() {
897899
// Notification routes
898900
r.mux.HandleFunc("/api/notifications/", RequireAdmin(r.config, r.notificationHandlers.HandleNotifications))
899901

902+
// Notification queue/DLQ routes
903+
r.mux.HandleFunc("/api/notifications/dlq", RequireAdmin(r.config, func(w http.ResponseWriter, req *http.Request) {
904+
if req.Method == http.MethodGet {
905+
r.notificationQueueHandlers.GetDLQ(w, req)
906+
} else {
907+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
908+
}
909+
}))
910+
r.mux.HandleFunc("/api/notifications/queue/stats", RequireAdmin(r.config, func(w http.ResponseWriter, req *http.Request) {
911+
if req.Method == http.MethodGet {
912+
r.notificationQueueHandlers.GetQueueStats(w, req)
913+
} else {
914+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
915+
}
916+
}))
917+
r.mux.HandleFunc("/api/notifications/dlq/retry", RequireAdmin(r.config, func(w http.ResponseWriter, req *http.Request) {
918+
if req.Method == http.MethodPost {
919+
r.notificationQueueHandlers.RetryDLQItem(w, req)
920+
} else {
921+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
922+
}
923+
}))
924+
r.mux.HandleFunc("/api/notifications/dlq/delete", RequireAdmin(r.config, func(w http.ResponseWriter, req *http.Request) {
925+
if req.Method == http.MethodPost || req.Method == http.MethodDelete {
926+
r.notificationQueueHandlers.DeleteDLQItem(w, req)
927+
} else {
928+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
929+
}
930+
}))
931+
900932
// System settings and API token management
901933
r.systemSettingsHandler = NewSystemSettingsHandler(r.config, r.persistence, r.wsHub, r.monitor, r.reloadSystemSettings)
902934
r.mux.HandleFunc("/api/system/settings", RequireAdmin(r.config, RequireScope(config.ScopeSettingsRead, r.systemSettingsHandler.HandleGetSystemSettings)))

internal/notifications/notifications.go

Lines changed: 152 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ type NotificationManager struct {
120120
webhookHistory []WebhookDelivery // Keep last 100 webhook deliveries for debugging
121121
webhookRateLimits map[string]*webhookRateLimit // Track rate limits per webhook URL
122122
appriseExec appriseExecFunc
123+
queue *NotificationQueue // Persistent notification queue
123124
}
124125

125126
type appriseExecFunc func(ctx context.Context, path string, args []string) ([]byte, error)
@@ -317,7 +318,15 @@ func NewNotificationManager(publicURL string) *NotificationManager {
317318
} else {
318319
log.Info().Msg("NotificationManager initialized without public URL - webhook links may not work")
319320
}
320-
return &NotificationManager{
321+
322+
// Initialize persistent queue
323+
queue, err := NewNotificationQueue("")
324+
if err != nil {
325+
log.Error().Err(err).Msg("Failed to initialize notification queue, notifications will be in-memory only")
326+
queue = nil
327+
}
328+
329+
nm := &NotificationManager{
321330
enabled: true,
322331
cooldown: 5 * time.Minute,
323332
lastNotified: make(map[string]notificationRecord),
@@ -338,7 +347,15 @@ func NewNotificationManager(publicURL string) *NotificationManager {
338347
webhookRateLimits: make(map[string]*webhookRateLimit),
339348
publicURL: cleanURL,
340349
appriseExec: defaultAppriseExec,
350+
queue: queue,
351+
}
352+
353+
// Wire up queue processor if queue is available
354+
if queue != nil {
355+
queue.SetProcessor(nm.ProcessQueuedNotification)
341356
}
357+
358+
return nm
342359
}
343360

344361
// SetPublicURL updates the public URL used for webhook payloads.
@@ -600,6 +617,90 @@ func (n *NotificationManager) sendGroupedAlerts() {
600617
webhooks := copyWebhookConfigs(n.webhooks)
601618
appriseConfig := copyAppriseConfig(n.appriseConfig)
602619

620+
// Use persistent queue if available, otherwise send directly
621+
if n.queue != nil {
622+
n.enqueueNotifications(emailConfig, webhooks, appriseConfig, alertsToSend)
623+
} else {
624+
n.sendNotificationsDirect(emailConfig, webhooks, appriseConfig, alertsToSend)
625+
}
626+
627+
// Update last notified time for all alerts
628+
now := time.Now()
629+
for _, alert := range alertsToSend {
630+
n.lastNotified[alert.ID] = notificationRecord{
631+
lastSent: now,
632+
alertStart: alert.StartTime,
633+
}
634+
}
635+
}
636+
637+
// enqueueNotifications adds notifications to the persistent queue
638+
func (n *NotificationManager) enqueueNotifications(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert) {
639+
// Enqueue email notification
640+
if emailConfig.Enabled {
641+
configJSON, err := json.Marshal(emailConfig)
642+
if err != nil {
643+
log.Error().Err(err).Msg("Failed to marshal email config for queue")
644+
} else {
645+
notif := &QueuedNotification{
646+
Type: "email",
647+
Alerts: alertsToSend,
648+
Config: configJSON,
649+
MaxAttempts: 3,
650+
}
651+
if err := n.queue.Enqueue(notif); err != nil {
652+
log.Error().Err(err).Msg("Failed to enqueue email notification")
653+
} else {
654+
log.Debug().Int("alertCount", len(alertsToSend)).Msg("Enqueued email notification")
655+
}
656+
}
657+
}
658+
659+
// Enqueue webhook notifications
660+
for _, webhook := range webhooks {
661+
if webhook.Enabled {
662+
configJSON, err := json.Marshal(webhook)
663+
if err != nil {
664+
log.Error().Err(err).Str("webhookName", webhook.Name).Msg("Failed to marshal webhook config for queue")
665+
} else {
666+
notif := &QueuedNotification{
667+
Type: "webhook",
668+
Alerts: alertsToSend,
669+
Config: configJSON,
670+
MaxAttempts: 3,
671+
}
672+
if err := n.queue.Enqueue(notif); err != nil {
673+
log.Error().Err(err).Str("webhookName", webhook.Name).Msg("Failed to enqueue webhook notification")
674+
} else {
675+
log.Debug().Str("webhookName", webhook.Name).Int("alertCount", len(alertsToSend)).Msg("Enqueued webhook notification")
676+
}
677+
}
678+
}
679+
}
680+
681+
// Enqueue apprise notification
682+
if appriseConfig.Enabled {
683+
configJSON, err := json.Marshal(appriseConfig)
684+
if err != nil {
685+
log.Error().Err(err).Msg("Failed to marshal apprise config for queue")
686+
} else {
687+
notif := &QueuedNotification{
688+
Type: "apprise",
689+
Alerts: alertsToSend,
690+
Config: configJSON,
691+
MaxAttempts: 3,
692+
}
693+
if err := n.queue.Enqueue(notif); err != nil {
694+
log.Error().Err(err).Msg("Failed to enqueue apprise notification")
695+
} else {
696+
log.Debug().Int("alertCount", len(alertsToSend)).Msg("Enqueued apprise notification")
697+
}
698+
}
699+
}
700+
}
701+
702+
// sendNotificationsDirect sends notifications without using the queue (fallback)
703+
func (n *NotificationManager) sendNotificationsDirect(emailConfig EmailConfig, webhooks []WebhookConfig, appriseConfig AppriseConfig, alertsToSend []*alerts.Alert) {
603704
// Send notifications using the captured snapshots outside the lock to avoid blocking writers
604705
if emailConfig.Enabled {
605706
log.Info().
@@ -625,15 +726,6 @@ func (n *NotificationManager) sendGroupedAlerts() {
625726
if appriseConfig.Enabled {
626727
go n.sendGroupedApprise(appriseConfig, alertsToSend)
627728
}
628-
629-
// Update last notified time for all alerts
630-
now := time.Now()
631-
for _, alert := range alertsToSend {
632-
n.lastNotified[alert.ID] = notificationRecord{
633-
lastSent: now,
634-
alertStart: alert.StartTime,
635-
}
636-
}
637729
}
638730

639731
// sendGroupedEmail sends a grouped email notification
@@ -2146,11 +2238,61 @@ func (n *NotificationManager) SendTestNotificationWithConfig(method string, conf
21462238
}
21472239
}
21482240

2241+
// GetQueue returns the notification queue (if available)
2242+
func (n *NotificationManager) GetQueue() *NotificationQueue {
2243+
n.mu.RLock()
2244+
defer n.mu.RUnlock()
2245+
return n.queue
2246+
}
2247+
2248+
// ProcessQueuedNotification processes a notification from the persistent queue
2249+
func (n *NotificationManager) ProcessQueuedNotification(notif *QueuedNotification) error {
2250+
log.Debug().
2251+
Str("notificationID", notif.ID).
2252+
Str("type", notif.Type).
2253+
Int("alertCount", len(notif.Alerts)).
2254+
Msg("Processing queued notification")
2255+
2256+
switch notif.Type {
2257+
case "email":
2258+
var emailConfig EmailConfig
2259+
if err := json.Unmarshal(notif.Config, &emailConfig); err != nil {
2260+
return fmt.Errorf("failed to unmarshal email config: %w", err)
2261+
}
2262+
n.sendGroupedEmail(emailConfig, notif.Alerts)
2263+
return nil
2264+
2265+
case "webhook":
2266+
var webhookConfig WebhookConfig
2267+
if err := json.Unmarshal(notif.Config, &webhookConfig); err != nil {
2268+
return fmt.Errorf("failed to unmarshal webhook config: %w", err)
2269+
}
2270+
n.sendGroupedWebhook(webhookConfig, notif.Alerts)
2271+
return nil
2272+
2273+
case "apprise":
2274+
var appriseConfig AppriseConfig
2275+
if err := json.Unmarshal(notif.Config, &appriseConfig); err != nil {
2276+
return fmt.Errorf("failed to unmarshal apprise config: %w", err)
2277+
}
2278+
n.sendGroupedApprise(appriseConfig, notif.Alerts)
2279+
return nil
2280+
2281+
default:
2282+
return fmt.Errorf("unknown notification type: %s", notif.Type)
2283+
}
2284+
}
2285+
21492286
// Stop gracefully stops the notification manager
21502287
func (n *NotificationManager) Stop() {
21512288
n.mu.Lock()
21522289
defer n.mu.Unlock()
21532290

2291+
// Stop the notification queue if it exists
2292+
if n.queue != nil {
2293+
n.queue.Stop()
2294+
}
2295+
21542296
// Cancel any pending group timer
21552297
if n.groupTimer != nil {
21562298
n.groupTimer.Stop()

0 commit comments

Comments
 (0)