From 632eadcc55425474ddd90efc697217bdd7724ba7 Mon Sep 17 00:00:00 2001 From: Ali Date: Wed, 15 Apr 2026 17:52:55 +0500 Subject: [PATCH] inhibit: fix gcCallback dropping live index entry for regex source matchers When multiple source alerts match a regex source_matcher and share the same equal-label fingerprint, the sindex holds only one of them. Previously gcCallback unconditionally removed the sindex entry for an alert's equal fingerprint on GC, even when a different alert was indexed there or another active source alert could take the slot. This caused inhibition to silently stop working after one of the matching source alerts expired from the cache, because findEqualSourceAlert could no longer locate any source alert for the target's equal labels. Fix: in gcCallback only remove the index entry if the GC'd alert is the one currently indexed, then scan the source cache for another active alert with the same equal-label fingerprint to promote as a replacement. Adds a regression test that fires two regex-matching source alerts with the same 'env' equal label, GC's the first, and asserts the target is still inhibited by the second. Fixes #5162 Signed-off-by: Ali --- inhibit/inhibit.go | 23 +++++++++++++++-- inhibit/inhibit_test.go | 56 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 42e54cad69..afb1c0d04d 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -390,8 +390,27 @@ func (r *InhibitRule) findEqualSourceAlert(lset model.LabelSet, now time.Time) ( func (r *InhibitRule) gcCallback(alerts []*types.Alert) { for _, a := range alerts { - fp := r.fingerprintEquals(a.Labels) - r.sindex.Delete(fp) + eq := r.fingerprintEquals(a.Labels) + + // Only act on the index entry if the GC'd alert is the one currently indexed. + // When multiple source alerts share the same equal-label fingerprint (e.g. via + // a regex source_matchers like alertname =~ "(a|b|c)"), the index holds only one + // of them. If a different alert is indexed, removing the entry would break + // inhibition for that other alert. + indexed, ok := r.sindex.Get(eq) + if !ok || indexed != a.Fingerprint() { + continue + } + + // The GC'd alert was the indexed one. Remove it and promote another still-active + // source alert with the same equal labels, if any. + r.sindex.Delete(eq) + for _, candidate := range r.scache.List() { + if r.fingerprintEquals(candidate.Labels) == eq { + r.sindex.Set(eq, candidate.Fingerprint()) + break + } + } } } diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 0da2e1c980..7342622698 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -620,3 +620,59 @@ func BenchmarkFingerprintEquals(b *testing.B) { }) } } + +// TestInhibitRule_gcCallback_preserves_regex_source_match is a regression test for +// the case where multiple source alerts match via a regex matcher and share the same +// equal-label fingerprint. When one of them is GC'd, inhibition for the remaining +// active source alert must continue to work. +func TestInhibitRule_gcCallback_preserves_regex_source_match(t *testing.T) { + // Build an InhibitRule with a regex source matcher (alertname =~ "src.*") and + // equal: [env] so that two source alerts with the same env share an index slot. + sourceMatcher, err := labels.NewMatcher(labels.MatchRegexp, "alertname", "src.*") + require.NoError(t, err) + targetMatcher, err := labels.NewMatcher(labels.MatchEqual, "alertname", "target") + require.NoError(t, err) + + rule := &InhibitRule{ + SourceMatchers: labels.Matchers{sourceMatcher}, + TargetMatchers: labels.Matchers{targetMatcher}, + Equal: map[model.LabelName]struct{}{"env": {}}, + scache: store.NewAlerts(), + sindex: newIndex(), + } + + now := time.Now() + makeAlert := func(name, env string) *types.Alert { + return &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "alertname": model.LabelValue(name), + "env": model.LabelValue(env), + }, + StartsAt: now.Add(-1 * time.Minute), + EndsAt: now.Add(10 * time.Minute), + }, + } + } + + src1 := makeAlert("src1", "prod") + src2 := makeAlert("src2", "prod") + target := makeAlert("target", "prod") + + // Both source alerts arrive and are indexed. + require.NoError(t, rule.scache.Set(src1)) + rule.updateIndex(src1) + require.NoError(t, rule.scache.Set(src2)) + rule.updateIndex(src2) + + // Target alert should be inhibited while both sources are active. + _, inhibited := rule.hasEqual(target.Labels, false, now) + require.True(t, inhibited, "target should be inhibited while both source alerts are active") + + // src1 expires from the cache (GC). + rule.gcCallback([]*types.Alert{src1}) + + // src2 is still active; target must still be inhibited. + _, inhibited = rule.hasEqual(target.Labels, false, now) + require.True(t, inhibited, "target should still be inhibited after src1 is GC'd — src2 is still active") +}