-
Notifications
You must be signed in to change notification settings - Fork 108
/
alerting.go
298 lines (258 loc) · 10.4 KB
/
alerting.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
package alerting
import (
"fmt"
"log"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/go-openapi/strfmt"
apiclient "github.com/rocket-pool/smartnode/shared/services/alerting/alertmanager/client"
apialert "github.com/rocket-pool/smartnode/shared/services/alerting/alertmanager/client/alert"
"github.com/rocket-pool/smartnode/shared/services/alerting/alertmanager/models"
"github.com/rocket-pool/smartnode/shared/services/config"
)
const (
DefaultEndsAtDurationForSeverityInfo = time.Minute * 5
DefaultEndsAtDurationForSeverityCritical = time.Minute * 60
)
// fetches the current alerts directly the alertmanager container/application's API.
// If alerting/metrics are disabled, this function returns an empty array.
func FetchAlerts(cfg *config.RocketPoolConfig) ([]*models.GettableAlert, error) {
// NOTE: don't log to stdout here since this method is on the "api" path and all stdout is parsed as a json "api" response.
if !isAlertingEnabled(cfg) {
// metrics are disabled, so no alerts will be fetched.
return nil, nil
}
//logMessage("Fetching alerts from alertmanager...")
client := createClient(cfg)
// request alerts:
resp, err := client.Alert.GetAlerts(nil)
if err != nil {
return nil, fmt.Errorf("error fetching alerts from alertmanager: %w", err)
}
return resp.Payload, nil
}
// Sends an alert when the node automatically changed a node's fee recipient or attempted to (success or failure).
// If alerting/metrics are disabled, this function does nothing.
func AlertFeeRecipientChanged(cfg *config.RocketPoolConfig, newFeeRecipient common.Address, succeeded bool) error {
if !isAlertingEnabled(cfg) {
logMessage("alerting is disabled, not sending AlertFeeRecipientChanged.")
return nil
}
if cfg.Alertmanager.AlertEnabled_FeeRecipientChanged.Value != true {
logMessage("alert for FeeRecipientChanged is disabled, not sending.")
return nil
}
// prepare the alert information:
endsAt, severity, succeededOrFailedText := getAlertSettingsForEvent(succeeded)
alert := createAlert(
fmt.Sprintf("FeeRecipientChanged-%s-%s", succeededOrFailedText, newFeeRecipient.Hex()),
fmt.Sprintf("Fee Recipient Change %s", succeededOrFailedText),
fmt.Sprintf("The fee recipient was changed to %s with status %s.", newFeeRecipient.Hex(), succeededOrFailedText),
severity,
endsAt,
map[string]string{},
)
return sendAlert(alert, cfg)
}
// Sends an alert when the node automatically reduced a minipool's bond or attempted to (success or failure).
// If alerting/metrics are disabled, this function does nothing.
func AlertMinipoolBondReduced(cfg *config.RocketPoolConfig, minipoolAddress common.Address, succeeded bool) error {
if !isAlertingEnabled(cfg) {
logMessage("alerting is disabled, not sending AlertMinipoolBondReduced.")
return nil
}
if cfg.Alertmanager.AlertEnabled_MinipoolBondReduced.Value != true {
logMessage("alert for MinipoolBondReduced is disabled, not sending.")
return nil
}
// prepare the alert information:
endsAt, severity, succeededOrFailedText := getAlertSettingsForEvent(succeeded)
alert := createAlert(
fmt.Sprintf("MinipoolBondReduced-%s-%s", succeededOrFailedText, minipoolAddress.Hex()),
fmt.Sprintf("Minipool %s reduce bond %s", minipoolAddress.Hex(), succeededOrFailedText),
fmt.Sprintf("The minipool with address %s reduced bond with status %s.", minipoolAddress.Hex(), succeededOrFailedText),
severity,
endsAt,
map[string]string{
"minipool": minipoolAddress.Hex(),
},
)
return sendAlert(alert, cfg)
}
// Sends an alert when the node automatically distributes a minipool's balance (success or failure).
// If alerting/metrics are disabled, this function does nothing.
func AlertMinipoolBalanceDistributed(cfg *config.RocketPoolConfig, minipoolAddress common.Address, succeeded bool) error {
if !isAlertingEnabled(cfg) {
logMessage("alerting is disabled, not sending AlertMinipoolBalanceDistributed.")
return nil
}
if cfg.Alertmanager.AlertEnabled_MinipoolBalanceDistributed.Value != true {
logMessage("alert for MinipoolBalanceDistributed is disabled, not sending.")
return nil
}
// prepare the alert information:
endsAt, severity, succeededOrFailedText := getAlertSettingsForEvent(succeeded)
alert := createAlert(
fmt.Sprintf("MinipoolBalanceDistributed-%s-%s", succeededOrFailedText, minipoolAddress.Hex()),
fmt.Sprintf("Minipool %s balance distributed %s", minipoolAddress.Hex(), succeededOrFailedText),
fmt.Sprintf("The minipool with address %s had its balance distributed with status %s.", minipoolAddress.Hex(), succeededOrFailedText),
severity,
endsAt,
map[string]string{
"minipool": minipoolAddress.Hex(),
},
)
return sendAlert(alert, cfg)
}
// Sends an alert when the node automatically prompted a minipool or attempted to (success or failure).
// If alerting/metrics are disabled, this function does nothing.
func AlertMinipoolPromoted(cfg *config.RocketPoolConfig, minipoolAddress common.Address, succeeded bool) error {
if !isAlertingEnabled(cfg) {
logMessage("alerting is disabled, not sending AlertMinipoolPromoted.")
return nil
}
if cfg.Alertmanager.AlertEnabled_MinipoolPromoted.Value != true {
logMessage("alert for MinipoolPromoted is disabled, not sending.")
return nil
}
// prepare the alert information:
endsAt, severity, succeededOrFailedText := getAlertSettingsForEvent(succeeded)
alert := createAlert(
fmt.Sprintf("MinipoolPromoted-%s-%s", succeededOrFailedText, minipoolAddress.Hex()),
fmt.Sprintf("Minipool %s promote %s", minipoolAddress.Hex(), succeededOrFailedText),
fmt.Sprintf("The vacant minipool with address %s promoted with status %s.", minipoolAddress.Hex(), succeededOrFailedText),
severity,
endsAt,
map[string]string{
"minipool": minipoolAddress.Hex(),
},
)
return sendAlert(alert, cfg)
}
// Sends an alert when the node automatically staked a minipool or attempted to (success or failure).
// If alerting/metrics are disabled, this function does nothing.
func AlertMinipoolStaked(cfg *config.RocketPoolConfig, minipoolAddress common.Address, succeeded bool) error {
if !isAlertingEnabled(cfg) {
logMessage("alerting is disabled, not sending AlertMinipoolStaked.")
return nil
}
if cfg.Alertmanager.AlertEnabled_MinipoolStaked.Value != true {
logMessage("alert for MinipoolStaked is disabled, not sending.")
return nil
}
// prepare the alert information:
endsAt, severity, succeededOrFailedText := getAlertSettingsForEvent(succeeded)
alert := createAlert(
fmt.Sprintf("MinipoolStaked-%s-%s", succeededOrFailedText, minipoolAddress.Hex()),
fmt.Sprintf("Minipool %s stake %s", minipoolAddress.Hex(), succeededOrFailedText),
fmt.Sprintf("The minipool with address %s staked with status %s.", minipoolAddress.Hex(), succeededOrFailedText),
severity,
endsAt,
map[string]string{
"minipool": minipoolAddress.Hex(),
},
)
return sendAlert(alert, cfg)
}
// Gets various settings for an alert based on whether a process succeeded or failed.
func getAlertSettingsForEvent(succeeded bool) (strfmt.DateTime, Severity, string) {
endsAt := strfmt.DateTime(time.Now().Add(DefaultEndsAtDurationForSeverityInfo))
severity := SeverityInfo
if !succeeded {
severity = SeverityCritical
endsAt = strfmt.DateTime(time.Now().Add(DefaultEndsAtDurationForSeverityCritical))
}
succeededOrFailedText := "failed"
if succeeded {
succeededOrFailedText = "succeeded"
}
return endsAt, severity, succeededOrFailedText
}
func AlertExecutionClientSyncComplete(cfg *config.RocketPoolConfig) error {
if cfg.Alertmanager.AlertEnabled_ExecutionClientSyncComplete.Value != true {
logMessage("alert for ExecutionClientSyncComplete is disabled, not sending.")
return nil
}
return alertClientSyncComplete(cfg, ClientKindExecution)
}
func AlertBeaconClientSyncComplete(cfg *config.RocketPoolConfig) error {
if cfg.Alertmanager.AlertEnabled_BeaconClientSyncComplete.Value != true {
logMessage("alert for BeaconClientSyncComplete is disabled, not sending.")
return nil
}
return alertClientSyncComplete(cfg, ClientKindBeacon)
}
type ClientKind string
const (
ClientKindExecution ClientKind = "Execution"
ClientKindBeacon ClientKind = "Beacon"
)
func alertClientSyncComplete(cfg *config.RocketPoolConfig, client ClientKind) error {
alertName := fmt.Sprintf("%sClientSyncComplete", client)
if !isAlertingEnabled(cfg) {
logMessage(fmt.Sprintf("alerting is disabled, not sending %s.", alertName))
return nil
}
alert := createAlert(
alertName,
fmt.Sprintf("%s Client Sync Complete", client),
fmt.Sprintf("The %s client has completed syncing.", client),
SeverityInfo,
strfmt.DateTime(time.Now().Add(time.Minute*1)),
nil,
)
return sendAlert(alert, cfg)
}
func sendAlert(alert *models.PostableAlert, cfg *config.RocketPoolConfig) error {
logMessage("sending alert for %s: %s", alert.Labels["alertname"], alert.Annotations["summary"])
params := apialert.NewPostAlertsParams().WithDefaults().WithAlerts(models.PostableAlerts{alert})
client := createClient(cfg)
_, err := client.Alert.PostAlerts(params)
if err != nil {
return fmt.Errorf("error posting alert: %s", err.Error())
}
return nil
}
type Severity string
const (
SeverityInfo Severity = "info"
SeverityWarning Severity = "warning"
SeverityCritical Severity = "critical"
)
func isAlertingEnabled(cfg *config.RocketPoolConfig) bool {
return cfg.Alertmanager.EnableAlerting.Value == true
}
// Creates a uniform alert with the basic labels and annotations we expect.
func createAlert(uniqueName string, summary string, description string, severity Severity, endsAt strfmt.DateTime, extraLabels map[string]string) *models.PostableAlert {
alert := &models.PostableAlert{
Annotations: map[string]string{
"description": description,
"summary": summary,
},
Alert: models.Alert{
Labels: map[string]string{
"alertname": uniqueName,
"severity": string(severity),
},
},
EndsAt: endsAt,
}
for k, v := range extraLabels {
alert.Labels[k] = v
}
return alert
}
func createClient(cfg *config.RocketPoolConfig) *apiclient.Alertmanager {
// use the alertmanager container name for the hostname
host := fmt.Sprintf("%s:%d", config.AlertmanagerContainerName, cfg.Alertmanager.Port.Value)
if cfg.IsNativeMode {
host = fmt.Sprintf("%s:%d", cfg.Alertmanager.NativeModeHost.Value, cfg.Alertmanager.NativeModePort.Value)
}
transport := apiclient.DefaultTransportConfig().WithHost(host)
client := apiclient.NewHTTPClientWithConfig(strfmt.Default, transport)
return client
}
func logMessage(format string, args ...interface{}) {
msg := fmt.Sprintf(format, args...)
log.Printf("[alerting] %s\n", msg)
}