From d4b728814e851e8ea95083ed2c2d63a9878b5aea Mon Sep 17 00:00:00 2001 From: Muhammad Talal Anwar Date: Tue, 18 Aug 2020 14:56:20 +0200 Subject: [PATCH 1/4] controller: improve data structures - Added absentPrometheusRule wrapper struct to distinguish from regular PromtheusRule and improve code logic. - Added keep-labels flag so that third-party users of the operator can disable tier/service labels if they want to. - Use informer's lister for getting rule groups across all PrometheusRules in a namespace instead of clientset. - Changed alert rule level disable label to `no_alert_on_absence`. --- README.md | 36 ++-- internal/controller/alert_rule.go | 42 ++-- internal/controller/controller.go | 95 ++++---- internal/controller/prometheusrule.go | 203 +++++++++++++----- internal/controller/utils.go | 11 +- main.go | 10 +- test/controller_test.go | 17 +- test/fixtures/doc.go | 10 +- .../swift_openstack_swift.json | 2 +- ...t_kubernetes_absent_metrics_alert_rules.go | 2 + ...mt_openstack_absent_metrics_alert_rules.go | 2 + ...ft_openstack_absent_metrics_alert_rules.go | 2 + test/suite_test.go | 7 +- 13 files changed, 280 insertions(+), 159 deletions(-) diff --git a/README.md b/README.md index 4ad268c4..2b1e736b 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,9 @@ - [Absent metric alert definition](#absent-metric-alert-definition) - [Template](#template) - [Labels](#labels) - - [Default tier and service](#default-tier-and-service) + - [Defaults](#defaults) + - [Carry over from original alert rule](#carry-over-from-original-alert-rule) + - [Tier and service](#tier-and-service) The absent metrics operator is a companion operator for the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). @@ -76,7 +78,7 @@ labels: severity: info annotations: summary: missing foo_bar - description: The metric 'foo_bar' is missing. Alerts using it may not fire as intended. + description: The metric 'foo_bar' is missing. 'ImportantAlert' alert using it may not fire as intended. ``` ## Installation @@ -129,15 +131,15 @@ absent-metrics-operator/disable: true ``` If you want to disable the operator for only a specific alert rule instead of -all the alerts in a `PrometheusRule`, you can use the same label at the -rule-level: +all the alerts in a `PrometheusRule`, you can add the following label to the +alert rule: ```yaml alert: ImportantAlert expr: foo_bar > 0 for: 5m labels: - absent-metrics-operator/disable: true + no_alert_on_absence: true ... ``` @@ -186,7 +188,7 @@ labels: severity: info annotations: summary: missing $metric - description: The metric '$metric' is missing. Alerts using it may not fire as intended. + description: The metric '$metric' is missing. '$alert-name' alert using it may not fire as intended. ``` Consider the metric `limes_successful_scrapes:rate5m` with tier `os` and @@ -196,13 +198,23 @@ Then the alert name would be `AbsentOsLimesSuccessfulScrapesRate5m`. ### Labels -- `tier` and `service` labels are carried over from the original alert rule - unless those labels use templating (i.e. use `$labels`), in which case the - default `tier` and `service` values for that Prometheus server in that - namespace will be used. -- `severity` is always `info`. +#### Defaults -#### Default tier and service +The following labels are always present on every absent metric alert rule: + +- `severity` is alway `info`. + +#### Carry over from original alert rule + +You can specify which labels to carry over from the original alert rule by +specifying a comma-separated list of labels to the `--keep-labels` flag. The +default value for this flag is `service,tier`. + +##### Tier and service + +`tier` and `service` labels are carried over from the original alert rule +unless those labels use templating (i.e. use `$labels`), in which case the +default `tier` and `service` values will be used. The operator determines a default `tier` and `service` for a specific Prometheus server in a namespace by traversing through all the alert rule diff --git a/internal/controller/alert_rule.go b/internal/controller/alert_rule.go index 36863ab7..2c7f142e 100644 --- a/internal/controller/alert_rule.go +++ b/internal/controller/alert_rule.go @@ -66,7 +66,10 @@ func (mex *metricNameExtractor) Visit(node parser.Node, path []parser.Node) (par // // The rule group names for the absent metric alerts have the format: // promRuleName/originalGroupName. -func parseRuleGroups(promRuleName, defaultTier, defaultService string, in []monitoringv1.RuleGroup) ([]monitoringv1.RuleGroup, error) { +func (c *Controller) parseRuleGroups( + promRuleName, defaultTier, defaultService string, + in []monitoringv1.RuleGroup) ([]monitoringv1.RuleGroup, error) { + out := make([]monitoringv1.RuleGroup, 0, len(in)) for _, g := range in { var absentRules []monitoringv1.Rule @@ -75,12 +78,12 @@ func parseRuleGroups(promRuleName, defaultTier, defaultService string, in []moni if r.Record != "" { continue } - // Do not parse alert rule if it has disable label. - if r.Labels != nil && mustParseBool(r.Labels[labelDisable]) { + // Do not parse alert rule if it has the no alert on absence label. + if r.Labels != nil && mustParseBool(r.Labels[labelNoAlertOnAbsence]) { continue } - rules, err := ParseAlertRule(defaultTier, defaultService, r) + rules, err := c.ParseAlertRule(defaultTier, defaultService, r) if err != nil { return nil, err } @@ -103,7 +106,7 @@ func parseRuleGroups(promRuleName, defaultTier, defaultService string, in []moni // Since an original alert expression can reference multiple time series therefore // a slice of []monitoringv1.Rule is returned as the result would be multiple // absent metric alert rules (one for each time series). -func ParseAlertRule(tier, service string, in monitoringv1.Rule) ([]monitoringv1.Rule, error) { +func (c *Controller) ParseAlertRule(tier, service string, in monitoringv1.Rule) ([]monitoringv1.Rule, error) { exprStr := in.Expr.String() mex := &metricNameExtractor{expr: exprStr, found: map[string]struct{}{}} exprNode, err := parser.ParseExpr(exprStr) @@ -119,20 +122,27 @@ func ParseAlertRule(tier, service string, in monitoringv1.Rule) ([]monitoringv1. return nil, nil } + // Default labels + lab := map[string]string{ + "severity": "info", + } + // Carry over labels from the original alert if origLab := in.Labels; origLab != nil { - if v, ok := origLab["tier"]; ok && !strings.Contains(v, "$labels") { - tier = v - } - if v, ok := origLab["service"]; ok && !strings.Contains(v, "$labels") { - service = v + for k := range c.keepLabel { + v := origLab[k] + emptyOrTmplVal := v == "" || strings.Contains(v, "$labels") + if k == labelTier && emptyOrTmplVal { + v = tier + } + if k == labelService && emptyOrTmplVal { + v = service + } + if v != "" { + lab[k] = v + } } } - lab := map[string]string{ - "tier": tier, - "service": service, - "severity": "info", - } // Sort metric names alphabetically for consistent test results. metrics := make([]string, 0, len(mex.found)) @@ -145,7 +155,7 @@ func ParseAlertRule(tier, service string, in monitoringv1.Rule) ([]monitoringv1. for _, m := range metrics { // Generate an alert name from metric name: // network:tis_a_metric:rate5m -> AbsentTierServiceNetworkTisAMetricRate5m - words := []string{"absent", tier, service} + words := []string{"absent", lab[labelTier], lab[labelService]} sL1 := strings.Split(m, "_") for _, v := range sL1 { sL2 := strings.Split(v, ":") diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 6107dadb..3690b308 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -42,8 +42,13 @@ import ( ) const ( - labelManagedBy = "absent-metrics-operator/managed-by" - labelDisable = "absent-metrics-operator/disable" + labelOperatorManagedBy = "absent-metrics-operator/managed-by" + labelOperatorDisable = "absent-metrics-operator/disable" + + labelNoAlertOnAbsence = "no_alert_on_absence" + + labelTier = "tier" + labelService = "service" ) const ( @@ -64,8 +69,9 @@ const ( // Controller is the controller implementation for acting on PrometheusRule // resources. type Controller struct { - logger *log.Logger - metrics *Metrics + logger *log.Logger + metrics *Metrics + keepLabel map[string]bool kubeClientset kubernetes.Interface promClientset monitoringclient.Interface @@ -76,7 +82,13 @@ type Controller struct { } // New creates a new Controller. -func New(cfg *rest.Config, resyncPeriod time.Duration, r prometheus.Registerer, logger *log.Logger) (*Controller, error) { +func New( + cfg *rest.Config, + resyncPeriod time.Duration, + r prometheus.Registerer, + keepLabel map[string]bool, + logger *log.Logger) (*Controller, error) { + kClient, err := kubernetes.NewForConfig(cfg) if err != nil { return nil, errors.Wrap(err, "instantiating kubernetes client failed") @@ -90,6 +102,7 @@ func New(cfg *rest.Config, resyncPeriod time.Duration, r prometheus.Registerer, c := &Controller{ logger: logger, metrics: NewMetrics(r), + keepLabel: keepLabel, kubeClientset: kClient, promClientset: pClient, workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "prometheusrules"), @@ -171,10 +184,10 @@ func (c *Controller) enqueuePromRule(obj interface{}) { // the operator itself or if the annotation for disabling the operator is // present. l := obj.(*monitoringv1.PrometheusRule).GetLabels() - if mustParseBool(l[labelManagedBy]) { + if mustParseBool(l[labelOperatorManagedBy]) { return } - if mustParseBool(l[labelDisable]) { + if mustParseBool(l[labelOperatorDisable]) { c.logger.Info("msg", "operator disabled, skipping", "key", key) return } @@ -274,65 +287,40 @@ func (c *Controller) syncHandler(key string) error { // The resource may no longer exist, in which case we clean up any // orphaned absent alert rules. c.logger.Info("msg", "PrometheusRule no longer exists", "key", key) - return c.deleteAbsentAlertRulesNamespace(namespace, name) + return c.cleanUpOrphanedAbsentAlertsNamespace(namespace, name) default: // Requeue object for later processing. return err } // Find the Prometheus server for this resource. - promServerName, ok := promRule.Labels["prometheus"] + prometheusServer, ok := promRule.Labels["prometheus"] if !ok { // This shouldn't happen but just in case it does. c.logger.ErrorWithBackoff("msg", "no 'prometheus' label found on the PrometheusRule", "key", key) return nil } - // Default tier and service label values to use for absent metric alerts. - // See parseRuleGroups() for info on why we need this. - var tier, service string - - // Get the PrometheusRule resource that defines the absent metric alert - // rules for this namespace. - ctx := context.Background() - absentPromRuleExists := false - absentPromRuleName := fmt.Sprintf("%s-absent-metric-alert-rules", promServerName) - absentPromRule, err := c.promClientset.MonitoringV1().PrometheusRules(namespace).Get(ctx, absentPromRuleName, metav1.GetOptions{}) + // Get the corresponding AbsentPrometheusRule. + existingAbsentPromRule := false + absentPromRule, err := c.getAbsentPrometheusRule(namespace, prometheusServer) switch { case err == nil: - absentPromRuleExists = true - tier, service = getTierAndService(absentPromRule.Spec.Groups) + existingAbsentPromRule = true case apierrors.IsNotFound(err): - // Try to get a value for tier and service by traversing through all the - // PrometheusRules for the specific Prometheus server in this namespace. - prList, err := c.promClientset.MonitoringV1().PrometheusRules(namespace).List(ctx, metav1.ListOptions{}) + absentPromRule, err = c.newAbsentPrometheusRule(namespace, prometheusServer) if err != nil { - // Requeue object for later processing. - return errors.Wrap(err, "could not list PrometheusRules") + return errors.Wrap(err, "could not create new AbsentPrometheusRule") } - var rg []monitoringv1.RuleGroup - for _, pr := range prList.Items { - if v := pr.Labels["prometheus"]; v == promServerName { - rg = append(rg, pr.Spec.Groups...) - } - } - tier, service = getTierAndService(rg) default: // This could have been caused by a temporary network failure, or any - // other transient reason. Requeue object for later processing. - return errors.Wrap(err, "could not get absent PrometheusRule "+absentPromRuleName) - } - if tier == "" || service == "" { - // Ideally, we shouldn't arrive at this point because this would mean - // that there was not a single alert rule for the prometheus server in - // this namespace that did not use templating for its tier and service - // labels. - c.logger.Info("msg", fmt.Sprintf("could not find default tier and service for Prometheus server '%s'", promServerName), - "key", key) + // other transient reason therefore we requeue object for later + // processing. + return errors.Wrap(err, "could not get AbsentPrometheusRule") } // Parse alert rules into absent metric alert rules. - rg, err := parseRuleGroups(name, tier, service, promRule.Spec.Groups) + rg, err := c.parseRuleGroups(name, absentPromRule.Tier, absentPromRule.Service, promRule.Spec.Groups) if err != nil { // We choose to absorb the error here as the worker would requeue the // resource otherwise and we'll be stuck parsing broken alert rules. @@ -343,15 +331,18 @@ func (c *Controller) syncHandler(key string) error { } switch lenRg := len(rg); { - case lenRg == 0 && absentPromRuleExists: - // This can happen when changes have been made to a PrometheusRule that - // result in no absent alert rules. E.g. absent() operator was used. In - // this case we clean up orphaned absent alert rules. - err = c.deleteAbsentAlertRules(namespace, name, absentPromRule) - case lenRg > 0 && absentPromRuleExists: - err = c.updateAbsentPrometheusRule(namespace, absentPromRule, rg) + case lenRg == 0 && existingAbsentPromRule: + // This can happen when changes have been made to a PrometheusRule + // that result in no absent alert rules. E.g. absent() + // operator was used. + // In this case we clean up orphaned absent alert rules. + err = c.cleanUpOrphanedAbsentAlerts(name, absentPromRule) + case lenRg > 0 && existingAbsentPromRule: + err = c.updateAbsentPrometheusRule(absentPromRule, rg) case lenRg > 0: - err = c.createAbsentPrometheusRule(namespace, absentPromRuleName, promServerName, rg) + absentPromRule.Spec.Groups = rg + _, err = c.promClientset.MonitoringV1().PrometheusRules(namespace). + Create(context.Background(), absentPromRule.PrometheusRule, metav1.CreateOptions{}) } if err != nil { return err diff --git a/internal/controller/prometheusrule.go b/internal/controller/prometheusrule.go index c3996cc2..c2a799de 100644 --- a/internal/controller/prometheusrule.go +++ b/internal/controller/prometheusrule.go @@ -23,54 +23,135 @@ import ( monitoringv1 "github.com/coreos/prometheus-operator/pkg/apis/monitoring/v1" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" ) -// createAbsentPrometheusRule creates a new PrometheusRule with the given -// RuleGroup and name for the given namespace and prometheus server. -func (c *Controller) createAbsentPrometheusRule(namespace, name, promServerName string, rg []monitoringv1.RuleGroup) error { - // Add a label that identifies that this PrometheusRule is created - // and managed by this operator. - labels := map[string]string{ - "prometheus": promServerName, - "type": "alerting-rules", - labelManagedBy: "true", +// AbsentPrometheusRuleName returns the name of an AbsentPrometheusRule. +func AbsentPrometheusRuleName(prometheusServer string) string { + return fmt.Sprintf("%s-absent-metric-alert-rules", prometheusServer) +} + +// absentPrometheusRule is a wrapper around *monitoringv1.PrometheusRule with +// some additional info that we use for working with AbsentPrometheusRules. +// +// An absentPrometheusRule is the corresponding resource that is generated for +// a PrometheusRule resource for defining the absent metric alerts. +type absentPrometheusRule struct { + *monitoringv1.PrometheusRule + + // Default values to use for absent metric alerts. + // See parseRuleGroups() on why we need this. + Tier string + Service string +} + +// TODO: add tier and service as labels. +func (c *Controller) getAbsentPrometheusRule(namespace, prometheusServer string) (*absentPrometheusRule, error) { + n := AbsentPrometheusRuleName(prometheusServer) + pr, err := c.promClientset.MonitoringV1().PrometheusRules(namespace).Get(context.Background(), n, metav1.GetOptions{}) + if err != nil { + return nil, err } - pr := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: labels, - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: rg, + + aPR := absentPrometheusRule{ + PrometheusRule: pr, + } + + // Find default tier and service values for this Prometheus server in this + // namespace. + if c.keepLabel[labelTier] || c.keepLabel[labelService] { + // Fast path: get values from resource labels + t, s := aPR.Labels[labelTier], aPR.Labels[labelService] + if t == "" || s == "" { + // If we can't get the values from resource then we fall back to + // the slower method of getting them by checking alert rules. + t, s = getTierAndService(aPR.Spec.Groups) + } + if t == "" || s == "" { + c.logger.Info("msg", fmt.Sprintf("could not find default tier and service for Prometheus server '%s' in namespace '%s'", + prometheusServer, namespace)) + } + if c.keepLabel[labelTier] { + aPR.Tier = t + aPR.Labels[labelTier] = t + } + if c.keepLabel[labelService] { + aPR.Service = s + aPR.Labels[labelService] = s + } + } + + return &aPR, nil +} + +func (c *Controller) newAbsentPrometheusRule(namespace, prometheusServer string) (*absentPrometheusRule, error) { + n := AbsentPrometheusRuleName(prometheusServer) + aPR := absentPrometheusRule{ + PrometheusRule: &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: n, + Namespace: namespace, + Labels: map[string]string{ + // Add a label that identifies that this PrometheusRule is + // created and managed by this operator. + labelOperatorManagedBy: "true", + "prometheus": prometheusServer, + "type": "alerting-rules", + }, + }, }, } - _, err := c.promClientset.MonitoringV1().PrometheusRules(namespace).Create(context.Background(), pr, metav1.CreateOptions{}) - if err != nil { - return errors.Wrap(err, "could not create new absent PrometheusRule") + // Find default tier and service values for this Prometheus server in this + // namespace. + if c.keepLabel[labelTier] || c.keepLabel[labelService] { + prList, err := c.promRuleLister.List(labels.Everything()) + if err != nil { + return nil, errors.Wrap(err, "could not list PrometheusRules") + } + var rg []monitoringv1.RuleGroup + for _, pr := range prList { + s := pr.Labels["prometheus"] + if pr.Namespace == namespace && s == prometheusServer { + rg = append(rg, pr.Spec.Groups...) + } + } + t, s := getTierAndService(rg) + if t == "" || s == "" { + // Ideally, we shouldn't arrive at this point because this would + // mean that there was not a single alert rule for the prometheus + // server in this namespace that did not use templating for its + // tier and service labels. + c.logger.Info("msg", fmt.Sprintf("could not find default tier and service for Prometheus server '%s' in namespace '%s'", + prometheusServer, namespace)) + } + if c.keepLabel[labelTier] { + aPR.Tier = t + aPR.Labels[labelTier] = t + } + if c.keepLabel[labelService] { + aPR.Service = s + aPR.Labels[labelService] = s + } } - c.logger.Info("msg", "successfully created new absent PrometheusRule", - "key", fmt.Sprintf("%s/%s", namespace, name)) - return nil + return &aPR, nil } -// updateAbsentPrometheusRule takes a PrometheusRule and updates it with the -// provided slice of RuleGroup. +// updateAbsentPrometheusRule updates an AbsentPrometheusRule with the provided +// slice of RuleGroup. func (c *Controller) updateAbsentPrometheusRule( - namespace string, - absentPR *monitoringv1.PrometheusRule, - rg []monitoringv1.RuleGroup) error { + absentPromRule *absentPrometheusRule, + absentAlertRuleGroups []monitoringv1.RuleGroup) error { - // Check if the absent PrometheusRule already has these rule groups. + // Check if the AbsentPrometheusRule already has these rule groups. // Update if it does, otherwise append. - old := absentPR.Spec.Groups + old := absentPromRule.Spec.Groups var new []monitoringv1.RuleGroup updated := make(map[string]bool) OuterLoop: for _, oldG := range old { - for _, g := range rg { + for _, g := range absentAlertRuleGroups { if oldG.Name == g.Name { // Add the new updated RuleGroup. new = append(new, g) @@ -82,7 +163,7 @@ OuterLoop: new = append(new, oldG) } // Add the pending RuleGroups. - for _, g := range rg { + for _, g := range absentAlertRuleGroups { if !updated[g.Name] { new = append(new, g) } @@ -93,29 +174,31 @@ OuterLoop: return nil } - absentPR.Spec.Groups = new - _, err := c.promClientset.MonitoringV1().PrometheusRules(namespace).Update(context.Background(), absentPR, metav1.UpdateOptions{}) + absentPromRule.Spec.Groups = new + _, err := c.promClientset.MonitoringV1().PrometheusRules(absentPromRule.Namespace). + Update(context.Background(), absentPromRule.PrometheusRule, metav1.UpdateOptions{}) if err != nil { - return errors.Wrap(err, "could not update absent PrometheusRule") + return errors.Wrap(err, "could not update AbsentPrometheusRule") } c.logger.Info("msg", "successfully updated absent metric alert rules", - "key", fmt.Sprintf("%s/%s", namespace, absentPR.Name)) + "key", fmt.Sprintf("%s/%s", absentPromRule.Namespace, absentPromRule.Name)) return nil } -// deleteAbsentAlertRulesNamespace deletes absent alert rules concerning -// a specific PrometheusRule from all absent alert PrometheusRule resources -// across a namespace. -func (c *Controller) deleteAbsentAlertRulesNamespace(namespace, promRuleName string) error { +// cleanUpOrphanedAbsentAlertsNamespace deletes orphaned absent alert rules +// concerning a specific PrometheusRule from a namespace. +func (c *Controller) cleanUpOrphanedAbsentAlertsNamespace(namespace, promRuleName string) error { prList, err := c.promClientset.MonitoringV1().PrometheusRules(namespace). - List(context.Background(), metav1.ListOptions{LabelSelector: labelManagedBy}) + List(context.Background(), metav1.ListOptions{LabelSelector: labelOperatorManagedBy}) if err != nil { - return errors.Wrap(err, "could not list absent PrometheusRules") + return errors.Wrap(err, "could not list AbsentPrometheusRules") } for _, pr := range prList.Items { - if err := c.deleteAbsentAlertRules(namespace, promRuleName, pr); err != nil { + aPR := &absentPrometheusRule{PrometheusRule: pr} + err := c.cleanUpOrphanedAbsentAlerts(promRuleName, aPR) + if err != nil { return err } } @@ -123,35 +206,39 @@ func (c *Controller) deleteAbsentAlertRulesNamespace(namespace, promRuleName str return nil } -// deleteAbsentAlertRules deletes absent alert rules concerning a specific -// PrometheusRule from a specific absent PrometheusRule. -func (c *Controller) deleteAbsentAlertRules(namespace, promRuleName string, absentPR *monitoringv1.PrometheusRule) error { - old := absentPR.Spec.Groups - var new []monitoringv1.RuleGroup +// cleanUpOrphanedAbsentAlerts deletes orphaned absent alert rules concerning a +// specific PrometheusRule from a specific AbsentPrometheusRule. +func (c *Controller) cleanUpOrphanedAbsentAlerts(promRuleName string, absentPromRule *absentPrometheusRule) error { + old := absentPromRule.Spec.Groups + new := make([]monitoringv1.RuleGroup, 0, len(old)) for _, g := range old { - // The rule group names for absent PrometheusRule have the format: + // The rule group names for AbsentPrometheusRule have the format: // originPromRuleName/ruleGroupName. - if !strings.Contains(g.Name, promRuleName) { - new = append(new, g) + sL := strings.Split(g.Name, "/") + if len(sL) > 0 && sL[0] == promRuleName { + continue } + new = append(new, g) } if reflect.DeepEqual(old, new) { return nil } var err error - absentPR.Spec.Groups = new - if len(absentPR.Spec.Groups) == 0 { - err = c.promClientset.MonitoringV1().PrometheusRules(namespace).Delete(context.Background(), absentPR.Name, metav1.DeleteOptions{}) + absentPromRule.Spec.Groups = new + if len(absentPromRule.Spec.Groups) == 0 { + err = c.promClientset.MonitoringV1().PrometheusRules(absentPromRule.Namespace). + Delete(context.Background(), absentPromRule.Name, metav1.DeleteOptions{}) if err == nil { - c.logger.Info("msg", "successfully deleted orphaned absent PrometheusRule", - "key", fmt.Sprintf("%s/%s", namespace, absentPR.Name)) + c.logger.Info("msg", "successfully deleted orphaned AbsentPrometheusRule", + "key", fmt.Sprintf("%s/%s", absentPromRule.Namespace, absentPromRule.Name)) } } else { - _, err = c.promClientset.MonitoringV1().PrometheusRules(namespace).Update(context.Background(), absentPR, metav1.UpdateOptions{}) + _, err = c.promClientset.MonitoringV1().PrometheusRules(absentPromRule.Namespace). + Update(context.Background(), absentPromRule.PrometheusRule, metav1.UpdateOptions{}) if err == nil { c.logger.Info("msg", "successfully cleaned up orphaned absent metric alert rules", - "key", fmt.Sprintf("%s/%s", namespace, absentPR.Name)) + "key", fmt.Sprintf("%s/%s", absentPromRule.Namespace, absentPromRule.Name)) } } if err != nil { diff --git a/internal/controller/utils.go b/internal/controller/utils.go index 786df63f..6004d2f9 100644 --- a/internal/controller/utils.go +++ b/internal/controller/utils.go @@ -21,8 +21,9 @@ import ( monitoringv1 "github.com/coreos/prometheus-operator/pkg/apis/monitoring/v1" ) -// getTierAndService returns the most common tier and service combination -// for a namespace. +// getTierAndService returns the most common tier and service combination used +// in the alert definitions. +// // See parseRuleGroups() for info on why we need this. func getTierAndService(rg []monitoringv1.RuleGroup) (tier, service string) { // Map of tier to service to number of occurrences. @@ -32,11 +33,11 @@ func getTierAndService(rg []monitoringv1.RuleGroup) (tier, service string) { if r.Record != "" { continue } - t, ok := r.Labels["tier"] + t, ok := r.Labels[labelTier] if !ok || strings.Contains(t, "$labels") { continue } - s, ok := r.Labels["service"] + s, ok := r.Labels[labelService] if !ok || strings.Contains(s, "$labels") { continue } @@ -60,6 +61,8 @@ func getTierAndService(rg []monitoringv1.RuleGroup) (tier, service string) { return tier, service } +// mustParseBool is a wrapper for strconv.ParseBool() that returns false in +// case of an error. func mustParseBool(str string) bool { v, err := strconv.ParseBool(str) if err != nil { diff --git a/main.go b/main.go index 48e67ca1..7155fa63 100644 --- a/main.go +++ b/main.go @@ -55,13 +55,14 @@ var ( ) func main() { - var logLevel, logFormat, kubeconfig string + var logLevel, logFormat, kubeconfig, keepLabels string flagset := flag.CommandLine flagset.StringVar(&logLevel, "log-level", log.LevelInfo, fmt.Sprintf("Log level to use. Possible values: %s", strings.Join(availableLogLevels, ", "))) flagset.StringVar(&logFormat, "log-format", log.FormatLogfmt, fmt.Sprintf("Log format to use. Possible values: %s", strings.Join(availableLogFormats, ", "))) flagset.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig. Only required if out-of-cluster") + flagset.StringVar(&keepLabels, "keep-labels", "service,tier", "A comma separated list of labels to keep from the original alert rule") if err := flagset.Parse(os.Args[1:]); err != nil { logFatalf("could not parse flagset: %s", err.Error()) } @@ -77,11 +78,16 @@ func main() { r := prometheus.NewRegistry() // Create controller + keepLabelMap := make(map[string]bool) + kL := strings.Split(keepLabels, ",") + for _, v := range kL { + keepLabelMap[strings.TrimSpace(v)] = true + } cfg, err := clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { logger.Fatal("msg", "instantiating cluster config failed", "err", err) } - c, err := controller.New(cfg, controller.DefaultResyncPeriod, r, log.With(*logger, "component", "controller")) + c, err := controller.New(cfg, controller.DefaultResyncPeriod, r, keepLabelMap, log.With(*logger, "component", "controller")) if err != nil { logger.Fatal("msg", "could not instantiate controller", "err", err) } diff --git a/test/controller_test.go b/test/controller_test.go index 446ca855..6b9febcf 100644 --- a/test/controller_test.go +++ b/test/controller_test.go @@ -27,7 +27,6 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/sapcc/absent-metrics-operator/internal/controller" "github.com/sapcc/absent-metrics-operator/test/fixtures" ) @@ -37,7 +36,7 @@ var waitForControllerToProcess = func() { time.Sleep(500 * time.Millisecond) } var _ = Describe("Controller", func() { ctx := context.Background() - Describe("Absent PrometheusRule creation", func() { + Describe("AbsentPrometheusRule creation", func() { It("should create "+fixtures.K8sAbsentPromRuleName+" in resmgmt namespace", func() { expected := fixtures.ResMgmtK8sAbsentPromRule var actual monitoringv1.PrometheusRule @@ -90,7 +89,7 @@ var _ = Describe("Controller", func() { }) }) - Describe("Absent PrometheusRule update", func() { + Describe("AbsentPrometheusRule update", func() { objKey := client.ObjectKey{Namespace: "swift", Name: "openstack-swift.alerts"} prObjKey := client.ObjectKey{Namespace: "swift", Name: fixtures.OSAbsentPromRuleName} tier, service := "os", "swift" @@ -118,9 +117,9 @@ var _ = Describe("Controller", func() { err = k8sClient.Update(ctx, &pr) Expect(err).ToNot(HaveOccurred()) - // Check if the corresponding Absent PrometheusRule was updated. + // Check if the corresponding AbsentPrometheusRule was updated. expected := fixtures.SwiftOSAbsentPromRule - rL, err := controller.ParseAlertRule(tier, service, rule) + rL, err := c.ParseAlertRule(tier, service, rule) Expect(err).ToNot(HaveOccurred()) i = len(expected.Spec.Groups) - 1 expected.Spec.Groups[i].Rules = append(expected.Spec.Groups[i].Rules, rL...) @@ -144,9 +143,9 @@ var _ = Describe("Controller", func() { err = k8sClient.Update(ctx, &pr) Expect(err).ToNot(HaveOccurred()) - // Check if the corresponding Absent PrometheusRule was updated. + // Check if the corresponding AbsentPrometheusRule was updated. expected := fixtures.SwiftOSAbsentPromRule - rL, err := controller.ParseAlertRule(tier, service, rule) + rL, err := c.ParseAlertRule(tier, service, rule) Expect(err).ToNot(HaveOccurred()) expected.Spec.Groups[0].Rules[0] = rL[0] @@ -159,7 +158,7 @@ var _ = Describe("Controller", func() { }) }) - Describe("Absent PrometheusRule cleanup", func() { + Describe("AbsentPrometheusRule cleanup", func() { Context("with PrometheusRule deletion", func() { It("should delete the "+fixtures.K8sAbsentPromRuleName+" from resmgmt namespace", func() { var pr monitoringv1.PrometheusRule @@ -180,7 +179,7 @@ var _ = Describe("Controller", func() { // the "openstack-limes-api.alerts" and // "openstack-limes-roleassign.alerts", deleting one resource // should only result in cleanup of its corresponding alerts, - // not the deletion of the entire absent PrometheusRule + // not the deletion of the entire AbsentPrometheusRule // resource. var pr monitoringv1.PrometheusRule pr.Name = "openstack-limes-roleassign.alerts" diff --git a/test/fixtures/doc.go b/test/fixtures/doc.go index ebbce204..55ca9fb4 100644 --- a/test/fixtures/doc.go +++ b/test/fixtures/doc.go @@ -15,8 +15,10 @@ // Package fixtures contains Go struct fixtures that are used by the tests. package fixtures -// Common constants for reusability. -const ( - K8sAbsentPromRuleName = "kubernetes-absent-metric-alert-rules" - OSAbsentPromRuleName = "openstack-absent-metric-alert-rules" +import "github.com/sapcc/absent-metrics-operator/internal/controller" + +// Common variables for reusability. +var ( + K8sAbsentPromRuleName = controller.AbsentPrometheusRuleName("kubernetes") + OSAbsentPromRuleName = controller.AbsentPrometheusRuleName("openstack") ) diff --git a/test/fixtures/prometheusrules/swift_openstack_swift.json b/test/fixtures/prometheusrules/swift_openstack_swift.json index 0a41089f..be468cb7 100644 --- a/test/fixtures/prometheusrules/swift_openstack_swift.json +++ b/test/fixtures/prometheusrules/swift_openstack_swift.json @@ -72,7 +72,7 @@ "tier": "os", "playbook": "docs/support/playbook/swift/rings.html", "meta": "Rings are not equal on all Swift nodes", - "absent-metrics-operator/disable": "true" + "no_alert_on_absence": "true" }, "annotations": { "description": "Rings are not equal on all nodes", diff --git a/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go b/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go index 3ef2e7b6..607cf8d5 100644 --- a/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go +++ b/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go @@ -36,6 +36,8 @@ var ResMgmtK8sAbsentPromRule = monitoringv1.PrometheusRule{ "prometheus": "kubernetes", "type": "alerting-rules", "absent-metrics-operator/managed-by": "true", + "tier": "os", + "service": "keppel", }, }, Spec: monitoringv1.PrometheusRuleSpec{ diff --git a/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go b/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go index a1311cf4..c4402de6 100644 --- a/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go +++ b/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go @@ -36,6 +36,8 @@ var ResMgmtOSAbsentPromRule = monitoringv1.PrometheusRule{ "prometheus": "openstack", "type": "alerting-rules", "absent-metrics-operator/managed-by": "true", + "tier": "os", + "service": "limes", }, }, Spec: monitoringv1.PrometheusRuleSpec{ diff --git a/test/fixtures/swift_openstack_absent_metrics_alert_rules.go b/test/fixtures/swift_openstack_absent_metrics_alert_rules.go index 0e412856..41fd6ddc 100644 --- a/test/fixtures/swift_openstack_absent_metrics_alert_rules.go +++ b/test/fixtures/swift_openstack_absent_metrics_alert_rules.go @@ -36,6 +36,8 @@ var SwiftOSAbsentPromRule = monitoringv1.PrometheusRule{ "prometheus": "openstack", "type": "alerting-rules", "absent-metrics-operator/managed-by": "true", + "tier": "os", + "service": "swift", }, }, Spec: monitoringv1.PrometheusRuleSpec{ diff --git a/test/suite_test.go b/test/suite_test.go index c238b884..7af5c136 100644 --- a/test/suite_test.go +++ b/test/suite_test.go @@ -45,6 +45,7 @@ var ( testEnv *envtest.Environment k8sClient client.Client + c *controller.Controller wg *errgroup.Group cancel context.CancelFunc ) @@ -86,7 +87,11 @@ var _ = BeforeSuite(func() { By("starting controller") l, err := log.New(GinkgoWriter, log.FormatLogfmt, log.LevelAll) Expect(err).ToNot(HaveOccurred()) - c, err := controller.New(cfg, 1*time.Second, prometheus.NewRegistry(), l) + kL := map[string]bool{ + "tier": true, + "service": true, + } + c, err = controller.New(cfg, 1*time.Second, prometheus.NewRegistry(), kL, l) Expect(err).ToNot(HaveOccurred()) ctx := context.Background() From 58cc93883bc72815f1e161ff5b31b388d756eb42 Mon Sep 17 00:00:00 2001 From: Muhammad Talal Anwar Date: Tue, 18 Aug 2020 15:47:15 +0200 Subject: [PATCH 2/4] Add playbook --- CHANGELOG.md | 3 + README.md | 94 +++++-------------- doc/playbook.md | 35 +++++++ internal/controller/alert_rule.go | 1 + ...t_kubernetes_absent_metrics_alert_rules.go | 1 + ...mt_openstack_absent_metrics_alert_rules.go | 1 + ...ft_openstack_absent_metrics_alert_rules.go | 1 + 7 files changed, 67 insertions(+), 69 deletions(-) create mode 100644 doc/playbook.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d485474..2dc1d666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Operator can be disabled for a specific alert rule. +- `playbook` label to absent metric alerts. +- `keep-labels` flag for specifying which labels to carry over from alert + rules. ## [0.1.0] - 2020-08-13 diff --git a/README.md b/README.md index 2b1e736b..2e6a7f8e 100644 --- a/README.md +++ b/README.md @@ -8,20 +8,24 @@ > Project status: **alpha**. The API and user facing objects may change. +In this document: + +- [Overview](#overview) - [Motivation](#motivation) -- [Installation](#installation) - - [Pre\-compiled binaries and Docker images](#pre-compiled-binaries-and-docker-images) - - [Building from source](#building-from-source) - [Usage](#usage) - - [Disable for specific alerts](#disable-for-specific-alerts) - - [Caveat](#caveat) - [Metrics](#metrics) - [Absent metric alert definition](#absent-metric-alert-definition) - [Template](#template) - [Labels](#labels) - [Defaults](#defaults) - [Carry over from original alert rule](#carry-over-from-original-alert-rule) - - [Tier and service](#tier-and-service) + - [Tier and service](#tier-and-service) + +In other documents: + +- [Operator's Playbook](./doc/playbook.md) + +## Overview The absent metrics operator is a companion operator for the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). @@ -81,74 +85,21 @@ annotations: description: The metric 'foo_bar' is missing. 'ImportantAlert' alert using it may not fire as intended. ``` -## Installation - -### Pre-compiled binaries and Docker images - -See the latest [release](https://github.com/sapcc/absent-metrics-operator/releases/latest). - -### Building from source - -The only required build dependency is [Go](https://golang.org/). - -``` -$ git clone https://github.com/sapcc/absent-metrics-operator.git -$ cd absent-metrics-operator -$ make install -``` - -This will put the binary in `/usr/bin/`. - -Alternatively, you can also build directly with the `go get` command: - -``` -$ go get -u github.com/sapcc/absent-metrics-operator -``` - -This will put the binary in `$GOPATH/bin/`. - ## Usage -``` -$ absent-metrics-operator --kubeconfig="$KUBECONFIG" -``` +We provide pre-compiled binaries and container images. See the latest +[release](https://github.com/sapcc/absent-metrics-operator/releases/latest). -`kubeconfig` flag is only required if running outside a cluster. +Alternatively, you can build with `make`, install with `make install`, `go get`, or +`docker build`. -For detailed usage instructions: +For usage instructions: ``` $ absent-metrics-operator --help ``` -### Disable for specific alerts - -You can disable the operator for a specific `PrometheusRule` resource by adding -the following label to it: - -```yaml -absent-metrics-operator/disable: true -``` - -If you want to disable the operator for only a specific alert rule instead of -all the alerts in a `PrometheusRule`, you can add the following label to the -alert rule: - -```yaml -alert: ImportantAlert -expr: foo_bar > 0 -for: 5m -labels: - no_alert_on_absence: true - ... -``` - -#### Caveat - -If you disable the operator for a specific alert or a specific -`PrometheusRule`, however there are other alerts or `PrometheusRules` which -have alert definitions that use the same metric(s) then the absent metric -alerts for those metric(s) will be created regardless. +You can disable the the operator for a specific `PrometheusRule` or a specific alert definition, refer to the [operator's Playbook](./doc/playbook.md) for more info. ### Metrics @@ -203,6 +154,8 @@ Then the alert name would be `AbsentOsLimesSuccessfulScrapesRate5m`. The following labels are always present on every absent metric alert rule: - `severity` is alway `info`. +- `playbook` provides a [link](./doc/playbook.md) to documentation that can be + referenced on how to deal with an absent metric alert. #### Carry over from original alert rule @@ -210,14 +163,17 @@ You can specify which labels to carry over from the original alert rule by specifying a comma-separated list of labels to the `--keep-labels` flag. The default value for this flag is `service,tier`. -##### Tier and service +#### Tier and service -`tier` and `service` labels are carried over from the original alert rule -unless those labels use templating (i.e. use `$labels`), in which case the -default `tier` and `service` values will be used. +`tier` and `service` labels are a special case they are carried over from the +original alert rule unless those labels use templating (i.e. use `$labels`), in +which case the default `tier` and `service` values will be used. The operator determines a default `tier` and `service` for a specific Prometheus server in a namespace by traversing through all the alert rule definitions for that Prometheus server in that namespace. It chooses the most common `tier` and `service` label combination that is used across those alerts as the default values. + +The value of these labels are also for used (if enabled with `keep-labels`) in +the name for the absent metric alert. See [template](#Template). diff --git a/doc/playbook.md b/doc/playbook.md new file mode 100644 index 00000000..05247240 --- /dev/null +++ b/doc/playbook.md @@ -0,0 +1,35 @@ +# Operator's Playbook + +This document assumes that you have already read and understood the [general +README](../README.md). If not, start reading there. + +### Disable for specific alerts + +You can disable the operator for a specific `PrometheusRule` resource by adding +the following label to it: + +```yaml +absent-metrics-operator/disable: "true" +``` + +If you want to disable the operator for only a specific alert rule instead of +all the alerts in a `PrometheusRule`, you can add the `no_alert_on_absence` +label to the alert rule. For example: + +```yaml +alert: ImportantAlert +expr: foo_bar > 0 +for: 5m +labels: + no_alert_on_absence: "true" + ... +``` + +**Note**: make sure that you use `"true"` and not `true`. + +#### Caveat + +If you disable the operator for a specific alert or a specific +`PrometheusRule`, however there are other alerts or `PrometheusRules` which +have alert definitions that use the same metric(s) then the absent metric +alerts for those metric(s) will be created regardless. diff --git a/internal/controller/alert_rule.go b/internal/controller/alert_rule.go index 2c7f142e..6eae0ff9 100644 --- a/internal/controller/alert_rule.go +++ b/internal/controller/alert_rule.go @@ -125,6 +125,7 @@ func (c *Controller) ParseAlertRule(tier, service string, in monitoringv1.Rule) // Default labels lab := map[string]string{ "severity": "info", + "playbook": "https://git.io/absent-metrics-operator-playbook", } // Carry over labels from the original alert diff --git a/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go b/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go index 607cf8d5..6b61532e 100644 --- a/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go +++ b/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go @@ -24,6 +24,7 @@ var kepLab = map[string]string{ "tier": "os", "service": "keppel", "severity": "info", + "playbook": "https://git.io/absent-metrics-operator-playbook", } // ResMgmtK8sAbsentPromRule represents the PrometheusRule that should be diff --git a/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go b/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go index c4402de6..759f9b86 100644 --- a/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go +++ b/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go @@ -24,6 +24,7 @@ var limesLab = map[string]string{ "tier": "os", "service": "limes", "severity": "info", + "playbook": "https://git.io/absent-metrics-operator-playbook", } // ResMgmtOSAbsentPromRule represents the PrometheusRule that should be diff --git a/test/fixtures/swift_openstack_absent_metrics_alert_rules.go b/test/fixtures/swift_openstack_absent_metrics_alert_rules.go index 41fd6ddc..cee070ac 100644 --- a/test/fixtures/swift_openstack_absent_metrics_alert_rules.go +++ b/test/fixtures/swift_openstack_absent_metrics_alert_rules.go @@ -24,6 +24,7 @@ var swiftLab = map[string]string{ "tier": "os", "service": "swift", "severity": "info", + "playbook": "https://git.io/absent-metrics-operator-playbook", } // SwiftOSAbsentPromRule represents the PrometheusRule that should be generated From 5528419dbb1f3d73741b25a433a4775fad41cb2b Mon Sep 17 00:00:00 2001 From: Muhammad Talal Anwar Date: Tue, 18 Aug 2020 16:22:37 +0200 Subject: [PATCH 3/4] github/workflows: add doc/ to paths-ignore --- .github/workflows/build-test.yml | 8 +++++--- .github/workflows/golangci-lint.yml | 8 +++++--- .github/workflows/license.yml | 8 +++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index b4d9a972..9ded50d6 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -5,12 +5,14 @@ on: branches: - master paths-ignore: - - '**.md' + - "**.md" + - "doc/**" pull_request: branches: - - '*' + - "*" paths-ignore: - - '**.md' + - "**.md" + - "doc/**" jobs: test: diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 851c2f71..1c6179e1 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -5,12 +5,14 @@ on: branches: - master paths-ignore: - - '**.md' + - "**.md" + - "doc/**" pull_request: branches: - - '*' + - "*" paths-ignore: - - '**.md' + - "**.md" + - "doc/**" jobs: lint: diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index 6bee4858..bc70344d 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -5,12 +5,14 @@ on: branches: - master paths-ignore: - - '**.md' + - "**.md" + - "doc/**" pull_request: branches: - - '*' + - "*" paths-ignore: - - '**.md' + - "**.md" + - "doc/**" jobs: check: From ca7abac8121b096e1deb38e3836073437551a547 Mon Sep 17 00:00:00 2001 From: Muhammad Talal Anwar Date: Tue, 18 Aug 2020 16:35:47 +0200 Subject: [PATCH 4/4] controller: use full link for playbook --- README.md | 2 +- internal/controller/alert_rule.go | 2 +- test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go | 2 +- test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go | 2 +- test/fixtures/swift_openstack_absent_metrics_alert_rules.go | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2e6a7f8e..b9dd4514 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ For usage instructions: $ absent-metrics-operator --help ``` -You can disable the the operator for a specific `PrometheusRule` or a specific alert definition, refer to the [operator's Playbook](./doc/playbook.md) for more info. +You can disable the the operator for a specific `PrometheusRule` or a specific alert definition, refer to the [operator's playbook](./doc/playbook.md) for more info. ### Metrics diff --git a/internal/controller/alert_rule.go b/internal/controller/alert_rule.go index 6eae0ff9..5f02ce49 100644 --- a/internal/controller/alert_rule.go +++ b/internal/controller/alert_rule.go @@ -125,7 +125,7 @@ func (c *Controller) ParseAlertRule(tier, service string, in monitoringv1.Rule) // Default labels lab := map[string]string{ "severity": "info", - "playbook": "https://git.io/absent-metrics-operator-playbook", + "playbook": "https://github.com/sapcc/absent-metrics-operator/blob/master/doc/playbook.md", } // Carry over labels from the original alert diff --git a/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go b/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go index 6b61532e..a4f6a72b 100644 --- a/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go +++ b/test/fixtures/resmgmt_kubernetes_absent_metrics_alert_rules.go @@ -24,7 +24,7 @@ var kepLab = map[string]string{ "tier": "os", "service": "keppel", "severity": "info", - "playbook": "https://git.io/absent-metrics-operator-playbook", + "playbook": "https://github.com/sapcc/absent-metrics-operator/blob/master/doc/playbook.md", } // ResMgmtK8sAbsentPromRule represents the PrometheusRule that should be diff --git a/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go b/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go index 759f9b86..0b6b7067 100644 --- a/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go +++ b/test/fixtures/resmgmt_openstack_absent_metrics_alert_rules.go @@ -24,7 +24,7 @@ var limesLab = map[string]string{ "tier": "os", "service": "limes", "severity": "info", - "playbook": "https://git.io/absent-metrics-operator-playbook", + "playbook": "https://github.com/sapcc/absent-metrics-operator/blob/master/doc/playbook.md", } // ResMgmtOSAbsentPromRule represents the PrometheusRule that should be diff --git a/test/fixtures/swift_openstack_absent_metrics_alert_rules.go b/test/fixtures/swift_openstack_absent_metrics_alert_rules.go index cee070ac..cf6262d0 100644 --- a/test/fixtures/swift_openstack_absent_metrics_alert_rules.go +++ b/test/fixtures/swift_openstack_absent_metrics_alert_rules.go @@ -24,7 +24,7 @@ var swiftLab = map[string]string{ "tier": "os", "service": "swift", "severity": "info", - "playbook": "https://git.io/absent-metrics-operator-playbook", + "playbook": "https://github.com/sapcc/absent-metrics-operator/blob/master/doc/playbook.md", } // SwiftOSAbsentPromRule represents the PrometheusRule that should be generated