Skip to content

Commit

Permalink
Rule: Support ruleGroup limit (#4868)
Browse files Browse the repository at this point in the history
* Rule: Support ruleGroup limit (#4837)

Signed-off-by: Jimmie Han <hanjinming@outlook.com>

* Rules: format rpc.proto

Signed-off-by: Jimmie Han <hanjinming@outlook.com>

* Rules: fix unit test

Signed-off-by: Jimmie Han <hanjinming@outlook.com>

* Rules: add e2e test

Signed-off-by: Jimmie Han <hanjinming@outlook.com>

* Rules: add e2e rule test case hit limit

Signed-off-by: Jimmie Han <hanjinming@outlook.com>

* fix unit test ci, use runutil.Retry instead of time.Sleep.

Signed-off-by: Jimmie Han <hanjinming@outlook.com>
  • Loading branch information
hanjm committed Nov 19, 2021
1 parent d2d74da commit 243526d
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 68 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -27,6 +27,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#4847](https://github.com/thanos-io/thanos/pull/4847) Query: add `--alert.query-url` which is used in the user interface for rules/alerts pages. By default the HTTP listen address is used for this URL.
- [#4856](https://github.com/thanos-io/thanos/pull/4856) Mixin: Add Query Frontend Grafana dashboard.
- [#4874](https://github.com/thanos-io/thanos/pull/4874) Query: Add `--endpoint-strict` flag to statically configure Thanos API server endpoints. It is similar to `--store-strict` but supports passing any Thanos gRPC APIs: StoreAPI, MetadataAPI, RulesAPI, TargetsAPI and ExemplarsAPI.
- [#4868](https://github.com/thanos-io/thanos/pull/4868) Rule: Support ruleGroup limit introduced by Prometheus v2.31.0.

### Fixed

Expand Down
1 change: 1 addition & 0 deletions pkg/rules/manager.go
Expand Up @@ -44,6 +44,7 @@ func (g Group) toProto() *rulespb.RuleGroup {
Name: g.Name(),
File: g.OriginalFile,
Interval: g.Interval().Seconds(),
Limit: int64(g.Limit()),
PartialResponseStrategy: g.PartialResponseStrategy,
// UTC needed due to https://github.com/gogo/protobuf/issues/519.
LastEvaluation: g.GetLastEvaluation().UTC(),
Expand Down
67 changes: 63 additions & 4 deletions pkg/rules/manager_test.go
Expand Up @@ -16,13 +16,15 @@ import (
"time"

"github.com/go-kit/kit/log"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/pkg/exemplar"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/thanos-io/thanos/pkg/extprom"
"github.com/thanos-io/thanos/pkg/runutil"
"gopkg.in/yaml.v3"

"github.com/thanos-io/thanos/pkg/store/storepb"
Expand Down Expand Up @@ -293,6 +295,7 @@ func TestConfigRuleAdapterUnmarshalMarshalYAML(t *testing.T) {
- alert: some
expr: up
partial_response_strategy: ABORT
limit: 10
- name: something2
rules:
- alert: some
Expand All @@ -302,7 +305,8 @@ func TestConfigRuleAdapterUnmarshalMarshalYAML(t *testing.T) {
b, err := yaml.Marshal(c)
testutil.Ok(t, err)
testutil.Equals(t, `groups:
- name: something1
- limit: 10
name: something1
rules:
- alert: some
expr: up
Expand Down Expand Up @@ -383,9 +387,7 @@ groups:
// We need to run the underlying rule managers to update them more than
// once (otherwise there's a deadlock).
thanosRuleMgr.Run()
defer func() {
thanosRuleMgr.Stop()
}()
t.Cleanup(thanosRuleMgr.Stop)

err = thanosRuleMgr.Update(1*time.Second, []string{
filepath.Join(dir, "no_strategy.yaml"),
Expand All @@ -397,3 +399,60 @@ groups:
testutil.Ok(t, err)
testutil.Equals(t, 0, len(thanosRuleMgr.RuleGroups()))
}

func TestManagerRunRulesWithRuleGroupLimit(t *testing.T) {
dir, err := ioutil.TempDir("", "test_rule_rule_groups")
testutil.Ok(t, err)
t.Cleanup(func() { testutil.Ok(t, os.RemoveAll(dir)) })
filename := filepath.Join(dir, "with_limit.yaml")
testutil.Ok(t, ioutil.WriteFile(filename, []byte(`
groups:
- name: "something1"
interval: 1ms
limit: 1
rules:
- alert: "some"
expr: "up>0"
for: 0s
`), os.ModePerm))

thanosRuleMgr := NewManager(
context.Background(),
nil,
dir,
rules.ManagerOptions{
Logger: log.NewLogfmtLogger(os.Stderr),
Queryable: nopQueryable{},
},
func(partialResponseStrategy storepb.PartialResponseStrategy) rules.QueryFunc {
return func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) {
return []promql.Sample{
{
Point: promql.Point{T: 0, V: 1},
Metric: labels.FromStrings("foo", "bar"),
},
{
Point: promql.Point{T: 0, V: 1},
Metric: labels.FromStrings("foo1", "bar1"),
},
}, nil
}
},
nil,
"http://localhost",
)
thanosRuleMgr.Run()
t.Cleanup(thanosRuleMgr.Stop)
testutil.Ok(t, thanosRuleMgr.Update(time.Millisecond, []string{filename}))
testutil.Equals(t, 1, len(thanosRuleMgr.protoRuleGroups()))
testutil.Equals(t, 1, len(thanosRuleMgr.protoRuleGroups()[0].Rules))
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
testutil.Ok(t, runutil.Retry(time.Millisecond, ctx.Done(), func() error {
if thanosRuleMgr.protoRuleGroups()[0].Rules[0].GetAlert().Health != string(rules.HealthBad) {
return errors.New("expect HealthBad")
}
return nil
}))
testutil.Equals(t, "exceeded limit of 1 with 2 alerts", thanosRuleMgr.protoRuleGroups()[0].Rules[0].GetAlert().LastError)
}
5 changes: 3 additions & 2 deletions pkg/rules/rulespb/custom_test.go
Expand Up @@ -57,6 +57,7 @@ func TestJSONUnmarshalMarshal(t *testing.T) {
Interval: 2442,
LastEvaluation: now,
EvaluationDurationSeconds: 2.1,
Limit: 0,
PartialResponseStrategy: storepb.PartialResponseStrategy_ABORT,
Rules: []*Rule{},
},
Expand All @@ -78,7 +79,7 @@ func TestJSONUnmarshalMarshal(t *testing.T) {
},
},
// Different than input due to default enum fields.
expectedJSONOutput: `{"groups":[{"name":"","file":"","rules":[],"interval":0,"evaluationTime":0,"lastEvaluation":"0001-01-01T00:00:00Z","partialResponseStrategy":"ABORT"}]}`,
expectedJSONOutput: `{"groups":[{"name":"","file":"","rules":[],"interval":0,"evaluationTime":0,"lastEvaluation":"0001-01-01T00:00:00Z","limit":0,"partialResponseStrategy":"ABORT"}]}`,
},
{
name: "one valid group, with 1 with no rule type",
Expand Down Expand Up @@ -230,7 +231,7 @@ func TestJSONUnmarshalMarshal(t *testing.T) {
},
},
// Different than input due to the alerts slice being initialized to a zero-length slice instead of nil.
expectedJSONOutput: `{"groups":[{"name":"group1","file":"file1.yml","rules":[{"state":"pending","name":"alert1","query":"up == 0","duration":60,"labels":{"a2":"b2","c2":"d2"},"annotations":{"ann1":"ann44","ann2":"ann33"},"alerts":[],"health":"health2","lastError":"1","evaluationTime":1.1,"lastEvaluation":"0001-01-01T00:00:00Z","type":"alerting"}],"interval":2442,"evaluationTime":2.1,"lastEvaluation":"0001-01-01T00:00:00Z","partialResponseStrategy":"ABORT"}]}`,
expectedJSONOutput: `{"groups":[{"name":"group1","file":"file1.yml","rules":[{"state":"pending","name":"alert1","query":"up == 0","duration":60,"labels":{"a2":"b2","c2":"d2"},"annotations":{"ann1":"ann44","ann2":"ann33"},"alerts":[],"health":"health2","lastError":"1","evaluationTime":1.1,"lastEvaluation":"0001-01-01T00:00:00Z","type":"alerting"}],"interval":2442,"evaluationTime":2.1,"lastEvaluation":"0001-01-01T00:00:00Z","limit":0,"partialResponseStrategy":"ABORT"}]}`,
},
{
name: "one valid group, with 1 rule and alert each and second empty group.",
Expand Down

0 comments on commit 243526d

Please sign in to comment.