Skip to content

Commit

Permalink
fix: use lock before iterating on a map
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-karan committed Feb 18, 2022
1 parent eabfd62 commit 7640674
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 35 deletions.
4 changes: 0 additions & 4 deletions cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"
"os"
"strings"
"time"

"github.com/knadh/koanf"
"github.com/knadh/koanf/parsers/toml"
Expand Down Expand Up @@ -104,9 +103,6 @@ func initProviders(ko *koanf.Koanf, lo *logrus.Logger) []prvs.Provider {
lo.WithError(err).Fatal("error initialising google chat provider")
}

// Start a background worker to cleanup alerts based on TTL mechanism.
go gchat.InitPruner(1 * time.Hour)

lo.WithField("room", gchat.GetRoom()).Info("initialised provider")
provs = append(provs, gchat)
}
Expand Down
2 changes: 1 addition & 1 deletion config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ max_idle_conns = 50
timeout = "7s"
# proxy_url = "http://internal-squid-proxy.com:3128"
template = "static/message.tmpl"
active_alerts_ttl = "5s"
active_alerts_ttl = "12h"

[providers.dev_alerts]
endpoint = "https://chat.googleapis.com/v1/spaces/xxx/messages?key=key&token=token%3D"
Expand Down
4 changes: 2 additions & 2 deletions dev/prometheus/alert.rules
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ groups:
expr: vector(1)==1
for: 10s
labels:
severity: deadman
severity: warning
room: dev_alerts
annotations:
title: "This is a dummy alert"
Expand All @@ -18,7 +18,7 @@ groups:
expr: vector(2)==2
for: 10s
labels:
severity: deadman
severity: warning
room: prod_alerts
annotations:
title: "This is a dummy alert"
Expand Down
51 changes: 27 additions & 24 deletions internal/providers/google_chat/alerts.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import (

"github.com/gofrs/uuid"
alertmgrtmpl "github.com/prometheus/alertmanager/template"
"github.com/sirupsen/logrus"
)

// ActiveAlerts represents a map of alerts unique fingerprint hash
// with their details.
type ActiveAlerts struct {
lo *logrus.Logger
sync.RWMutex
alerts map[string]AlertDetails
}
Expand Down Expand Up @@ -51,14 +53,6 @@ func (d *ActiveAlerts) add(a alertmgrtmpl.Alert) error {
return nil
}

// remove removes the alert from the active alerts map.
func (d *ActiveAlerts) remove(fingerprint string) {
d.Lock()
defer d.Unlock()

delete(d.alerts, fingerprint)
}

// loookup retrievs the UUID for the alert based on the fingerprint.
func (d *ActiveAlerts) loookup(fingerprint string) string {
d.RLock()
Expand All @@ -71,6 +65,28 @@ func (d *ActiveAlerts) loookup(fingerprint string) string {
return d.alerts[fingerprint].UUID.String()
}

// Prune iterates on a list of active alerts inside the map
// and deletes them if they exceed the specified TTL.
func (d *ActiveAlerts) Prune(ttl time.Duration) {
d.Lock()
defer d.Unlock()

var (
now = time.Now()
expired = now.Add(-ttl)
)

// Iterate on map of active alerts.
for k, a := range d.alerts {
// If the alert creation field is past our specified TTL, remove it from the map.
if a.StartsAt.Before(expired) {
d.lo.WithField("fingerprint", k).WithField("created", a.StartsAt).WithField("expired", expired).Debug("removing alert from active alerts")
delete(d.alerts, k)
}
}

}

// InitPruner is used to remove active alerts in the
// map once their TTL is reached. The cleanup activity happens at periodic intervals.
// This is a blocking function so the caller must invoke as a goroutine.
Expand All @@ -87,26 +103,13 @@ func (d *ActiveAlerts) loookup(fingerprint string) string {
// function as a GoRoutine and check if the alert creation timestamp has crossed our specified TTL. If it has, it'll delete the alert
// entry from the map.
// This check happens at a periodic interval specified by `pruneInterval` by the caller.
func (m *GoogleChatManager) InitPruner(pruneInterval time.Duration) {
func (d *ActiveAlerts) startPruneWorker(pruneInterval time.Duration, ttl time.Duration) {
var (
evalTicker = time.NewTicker(pruneInterval).C
)

for range evalTicker {
m.lo.Debug("pruning active alerts based on ttl")

var (
now = time.Now()
expired = now.Add(-m.ttl)
)

// Iterate on map of active alerts.
for k, a := range m.activeAlerts.alerts {
// If the alert creation field is past our specified TTL, remove it from the map.
if a.StartsAt.Before(expired) {
m.lo.WithField("fingerprint", k).WithField("created", a.StartsAt).WithField("expired", expired).Debug("removing alert from active alerts")
m.activeAlerts.remove(k)
}
}
d.lo.Debug("pruning active alerts based on ttl")
d.Prune(ttl)
}
}
10 changes: 6 additions & 4 deletions internal/providers/google_chat/google_chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ type GoogleChatManager struct {
room string
client *http.Client
msgTmpl *template.Template
ttl time.Duration
}

type GoogleChatOpts struct {
Expand Down Expand Up @@ -70,7 +69,7 @@ func NewGoogleChat(opts GoogleChatOpts) (*GoogleChatManager, error) {
return nil, err
}

return &GoogleChatManager{
mgr := &GoogleChatManager{
lo: opts.Log,
client: client,
endpoint: opts.Endpoint,
Expand All @@ -79,8 +78,11 @@ func NewGoogleChat(opts GoogleChatOpts) (*GoogleChatManager, error) {
alerts: alerts,
},
msgTmpl: tmpl,
ttl: opts.ActiveAlertsTTL,
}, nil
}
// Start a background worker to cleanup alerts based on TTL mechanism.
go mgr.activeAlerts.startPruneWorker(1*time.Hour, opts.ActiveAlertsTTL)

return mgr, nil
}

// Push accepts the list of alerts and dispatches them to Webhook API endpoint.
Expand Down

0 comments on commit 7640674

Please sign in to comment.