diff --git a/CHANGELOG.md b/CHANGELOG.md index ef4bd40f61..9af5342c39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ * [BUGFIX] Experimental Alertmanager API: Do not allow empty Alertmanager configurations or bad template filenames to be submitted through the configuration API. #3185 * [BUGFIX] Reduce failures to update heartbeat when using Consul. #3259 * [BUGFIX] When using ruler sharding, moving all user rule groups from ruler to a different one and then back could end up with some user groups not being evaluated at all. #3235 +* [BUGFIX] Fixes the metric `cortex_prometheus_rule_group_duration_seconds` in the Ruler, it wouldn't report any values. #3310 ## 1.4.0 / 2020-10-02 diff --git a/pkg/ruler/manager_metrics.go b/pkg/ruler/manager_metrics.go index b888a06571..f4c3942229 100644 --- a/pkg/ruler/manager_metrics.go +++ b/pkg/ruler/manager_metrics.go @@ -148,7 +148,7 @@ func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) { // If same user is later re-added, all metrics will start from 0, which is fine. data.SendSumOfSummariesPerUser(out, m.EvalDuration, "prometheus_rule_evaluation_duration_seconds") - data.SendSumOfSummariesPerUser(out, m.IterationDuration, "cortex_prometheus_rule_group_duration_seconds") + data.SendSumOfSummariesPerUser(out, m.IterationDuration, "prometheus_rule_group_duration_seconds") data.SendSumOfCountersPerUser(out, m.IterationsMissed, "prometheus_rule_group_iterations_missed_total") data.SendSumOfCountersPerUser(out, m.IterationsScheduled, "prometheus_rule_group_iterations_total") diff --git a/pkg/ruler/manager_metrics_test.go b/pkg/ruler/manager_metrics_test.go index 2686e24235..91149f59da 100644 --- a/pkg/ruler/manager_metrics_test.go +++ b/pkg/ruler/manager_metrics_test.go @@ -61,12 +61,27 @@ cortex_prometheus_rule_evaluations_total{rule_group="group_two",user="user2"} 10 cortex_prometheus_rule_evaluations_total{rule_group="group_two",user="user3"} 100 # HELP cortex_prometheus_rule_group_duration_seconds The duration of rule group evaluations. # TYPE cortex_prometheus_rule_group_duration_seconds summary -cortex_prometheus_rule_group_duration_seconds_sum{user="user1"} 0 -cortex_prometheus_rule_group_duration_seconds_count{user="user1"} 0 -cortex_prometheus_rule_group_duration_seconds_sum{user="user2"} 0 -cortex_prometheus_rule_group_duration_seconds_count{user="user2"} 0 -cortex_prometheus_rule_group_duration_seconds_sum{user="user3"} 0 -cortex_prometheus_rule_group_duration_seconds_count{user="user3"} 0 +cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.01"} 1 +cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.05"} 1 +cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.5"} 1 +cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.9"} 1 +cortex_prometheus_rule_group_duration_seconds{user="user1",quantile="0.99"} 1 +cortex_prometheus_rule_group_duration_seconds_sum{user="user1"} 1 +cortex_prometheus_rule_group_duration_seconds_count{user="user1"} 1 +cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.01"} 10 +cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.05"} 10 +cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.5"} 10 +cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.9"} 10 +cortex_prometheus_rule_group_duration_seconds{user="user2",quantile="0.99"} 10 +cortex_prometheus_rule_group_duration_seconds_sum{user="user2"} 10 +cortex_prometheus_rule_group_duration_seconds_count{user="user2"} 1 +cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.01"} 100 +cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.05"} 100 +cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.5"} 100 +cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.9"} 100 +cortex_prometheus_rule_group_duration_seconds{user="user3",quantile="0.99"} 100 +cortex_prometheus_rule_group_duration_seconds_sum{user="user3"} 100 +cortex_prometheus_rule_group_duration_seconds_count{user="user3"} 1 # HELP cortex_prometheus_rule_group_iterations_missed_total The total number of rule group evaluations missed due to slow rule group evaluation. # TYPE cortex_prometheus_rule_group_iterations_missed_total counter cortex_prometheus_rule_group_iterations_missed_total{user="user1"} 1