Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix config reloads not respecting group interval #3074

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dispatch/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,10 +446,11 @@ func (ag *aggrGroup) run(nf notifyFunc) {
ctx = notify.WithRepeatInterval(ctx, ag.opts.RepeatInterval)
ctx = notify.WithMuteTimeIntervals(ctx, ag.opts.MuteTimeIntervals)
ctx = notify.WithActiveTimeIntervals(ctx, ag.opts.ActiveTimeIntervals)
ctx = notify.WithGroupInterval(ctx, ag.opts.GroupInterval)

// Wait the configured interval before calling flush again.
ag.mtx.Lock()
ag.next.Reset(ag.opts.GroupInterval)
ag.next.Reset(ag.opts.GroupWait)
ag.hasFlushed = true
ag.mtx.Unlock()

Expand Down
28 changes: 24 additions & 4 deletions notify/notify.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ const (
keyNow
keyMuteTimeIntervals
keyActiveTimeIntervals
keyGroupInterval
)

// WithReceiverName populates a context with a receiver name.
Expand Down Expand Up @@ -162,6 +163,11 @@ func WithActiveTimeIntervals(ctx context.Context, at []string) context.Context {
return context.WithValue(ctx, keyActiveTimeIntervals, at)
}

// WithGroupInterval populates a context with a group interval.
func WithGroupInterval(ctx context.Context, t time.Duration) context.Context {
return context.WithValue(ctx, keyGroupInterval, t)
}

// RepeatInterval extracts a repeat interval from the context. Iff none exists, the
// second argument is false.
func RepeatInterval(ctx context.Context) (time.Duration, bool) {
Expand Down Expand Up @@ -190,6 +196,13 @@ func GroupLabels(ctx context.Context) (model.LabelSet, bool) {
return v, ok
}

// GroupInterval extracts group interval from the context. Iff none exists, the
// second argument is false.
func GroupInterval(ctx context.Context) (time.Duration, bool) {
v, ok := ctx.Value(keyGroupInterval).(time.Duration)
return v, ok
}

// Now extracts a now timestamp from the context. Iff none exists, the
// second argument is false.
func Now(ctx context.Context) (time.Time, bool) {
Expand Down Expand Up @@ -560,14 +573,16 @@ func hashAlert(a *types.Alert) uint64 {
return hash
}

func (n *DedupStage) needsUpdate(entry *nflogpb.Entry, firing, resolved map[uint64]struct{}, repeat time.Duration) bool {
func (n *DedupStage) needsUpdate(entry *nflogpb.Entry, firing, resolved map[uint64]struct{}, repeat, groupInterval time.Duration) bool {
// If we haven't notified about the alert group before, notify right away
// unless we only have resolved alerts.
if entry == nil {
return len(firing) > 0
}

if !entry.IsFiringSubset(firing) {
groupIntervalMuted := len(entry.FiringAlerts) > 0 && entry.Timestamp.After(time.Now().Add(-groupInterval))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance we want to use keyNow from the context here?


if !entry.IsFiringSubset(firing) && !groupIntervalMuted {
return true
}

Expand All @@ -582,7 +597,7 @@ func (n *DedupStage) needsUpdate(entry *nflogpb.Entry, firing, resolved map[uint
return len(entry.FiringAlerts) > 0
}

if n.rs.SendResolved() && !entry.IsResolvedSubset(resolved) {
if n.rs.SendResolved() && !groupIntervalMuted {
return true
}

Expand All @@ -597,6 +612,11 @@ func (n *DedupStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Al
return ctx, nil, errors.New("group key missing")
}

groupInterval, ok := GroupInterval(ctx)
if !ok {
return ctx, nil, errors.New("group interval missing")
}

repeatInterval, ok := RepeatInterval(ctx)
if !ok {
return ctx, nil, errors.New("repeat interval missing")
Expand Down Expand Up @@ -636,7 +656,7 @@ func (n *DedupStage) Exec(ctx context.Context, _ log.Logger, alerts ...*types.Al
return ctx, nil, errors.Errorf("unexpected entry result size %d", len(entries))
}

if n.needsUpdate(entry, firingSet, resolvedSet, repeatInterval) {
if n.needsUpdate(entry, firingSet, resolvedSet, repeatInterval, groupInterval) {
return ctx, alerts, nil
}
return ctx, nil, nil
Expand Down
2 changes: 1 addition & 1 deletion notify/notify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func TestDedupStageNeedsUpdate(t *testing.T) {
now: func() time.Time { return now },
rs: sendResolved(c.resolve),
}
res := s.needsUpdate(c.entry, c.firingAlerts, c.resolvedAlerts, c.repeat)
res := s.needsUpdate(c.entry, c.firingAlerts, c.resolvedAlerts, c.repeat, 2*time.Hour)
require.Equal(t, c.res, res)
}
}
Expand Down