Skip to content

Commit

Permalink
controller: add fast path for alert rule parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
talal committed Aug 21, 2020
1 parent f7b9443 commit 1ffe5be
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 28 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.5.1] - 2020-08-21

### Changed

- Prevent superfluous processing if the resource doesn't have any alert rules.

## [0.5.0] - 2020-08-21

### Added
Expand Down Expand Up @@ -41,7 +47,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Initial release.

[unreleased]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.0...HEAD
[unreleased]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.1...HEAD
[0.5.1]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.0...v0.5.1
[0.5.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.4.0...v0.5.0
[0.4.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.3.0...v0.4.0
[0.3.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.2.0...v0.3.0
Expand Down
15 changes: 8 additions & 7 deletions internal/controller/alert_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ func (mex *metricNameExtractor) Visit(node parser.Node, path []parser.Node) (par
case promlabels.MatchEqual, promlabels.MatchNotEqual:
name = v.Value
case promlabels.MatchRegexp, promlabels.MatchNotRegexp:
// Currently, we don't create absent metric alerts for regex
// name label matching.
// Currently, we don't create absent alerts for regex name
// label matching.
// However, there are cases where some alert expressions use
// the regexp matching even though an equality would suffice.
// E.g.:
Expand Down Expand Up @@ -101,13 +101,14 @@ func (mex *metricNameExtractor) Visit(node parser.Node, path []parser.Node) (par
}

// parseRuleGroups takes a slice of RuleGroup that has alert rules and returns
// a new slice of RuleGroup that has the corresponding absent metric alert rules.
// a new slice of RuleGroup that has the corresponding absent alert rules.
//
// The original tier and service labels of alert rules will be carried over to
// the corresponding absent alert rule unless templating was used (i.e. $labels)
// for these labels in which case the provided default tier and service will be used.
// the corresponding absent alerts unless templating was used (i.e. $labels)
// for these labels in which case the provided default tier and service will be
// used.
//
// The rule group names for the absent metric alerts have the format:
// The rule group names for the absent alerts have the format:
// promRuleName/originalGroupName.
func (c *Controller) parseRuleGroups(
promRuleName, defaultTier, defaultService string,
Expand Down Expand Up @@ -145,7 +146,7 @@ func (c *Controller) parseRuleGroups(
return out, nil
}

// ParseAlertRule converts an alert rule to absent metric alert rules.
// ParseAlertRule converts an alert rule to absent metric alert rule.
// Since an original alert expression can reference multiple time series therefore
// a slice of []monitoringv1.Rule is returned as the result would be multiple
// absent metric alert rules (one for each time series).
Expand Down
48 changes: 32 additions & 16 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (c *Controller) processNextWorkItem() bool {
}

// syncHandler gets a PrometheusRule from the queue and updates the
// corresponding absent metric alert PrometheusRule for it.
// corresponding absentPrometheusRule for it.
func (c *Controller) syncHandler(key string) error {
// Convert the namespace/name string into a distinct namespace and name.
namespace, name, err := cache.SplitMetaNamespaceKey(key)
Expand All @@ -293,7 +293,7 @@ func (c *Controller) syncHandler(key string) error {
// continue processing down below
case apierrors.IsNotFound(err):
// The resource may no longer exist, in which case we clean up any
// orphaned absent alert rules.
// orphaned absent alerts.
c.logger.Info("msg", "PrometheusRule no longer exists", "key", key)
return c.cleanUpOrphanedAbsentAlertsNamespace(namespace, name)
default:
Expand All @@ -318,13 +318,29 @@ func (c *Controller) syncHandler(key string) error {
case apierrors.IsNotFound(err):
absentPromRule, err = c.newAbsentPrometheusRule(namespace, prometheusServer)
if err != nil {
return errors.Wrap(err, "could not create new AbsentPrometheusRule")
return errors.Wrap(err, "could not initialize new AbsentPrometheusRule")
}
default:
// This could have been caused by a temporary network failure, or any
// other transient reason therefore we requeue object for later
// processing.
return errors.Wrap(err, "could not get AbsentPrometheusRule")
return errors.Wrap(err, "could not get existing AbsentPrometheusRule")
}

rg := promRule.Spec.Groups
// Fast path: check that the PrometheusRule contains at least one alert
// rule; no need to continue further otherwise.
hasAlerts := false
for _, g := range rg {
for _, r := range g.Rules {
if r.Alert != "" {
hasAlerts = true
}
}
}
if !hasAlerts {
// We still need to clean up any orphaned absent alerts that may exist.
return c.cleanUpOrphanedAbsentAlerts(name, absentPromRule)
}

defaultTier := absentPromRule.Tier
Expand All @@ -345,8 +361,8 @@ func (c *Controller) syncHandler(key string) error {
c.logger.ErrorWithBackoff("msg", "could not find a value for 'service' label", "key", key)
}
}
// Parse alert rules into absent metric alert rules.
rg, err := c.parseRuleGroups(name, defaultTier, defaultService, promRule.Spec.Groups)
// Parse alert rules into absent alert rules.
absentRg, err := c.parseRuleGroups(name, defaultTier, defaultService, rg)
if err != nil {
// We choose to absorb the error here as the worker would requeue the
// resource otherwise and we'll be stuck parsing broken alert rules.
Expand All @@ -356,17 +372,17 @@ func (c *Controller) syncHandler(key string) error {
return nil
}

switch lenRg := len(rg); {
case lenRg == 0 && existingAbsentPromRule:
// This can happen when changes have been made to a PrometheusRule
// that result in no absent alert rules. E.g. absent()
// operator was used.
// In this case we clean up orphaned absent alert rules.
switch l := len(absentRg); {
case l == 0 && existingAbsentPromRule:
// This can happen when changes have been made to alert rules that
// result in no absent alerts.
// E.g. absent() or the 'no_alert_on_absence' label was used.
// In this case we clean up orphaned absent alerts.
err = c.cleanUpOrphanedAbsentAlerts(name, absentPromRule)
case lenRg > 0 && existingAbsentPromRule:
err = c.updateAbsentPrometheusRule(absentPromRule, rg)
case lenRg > 0:
absentPromRule.Spec.Groups = rg
case l > 0 && existingAbsentPromRule:
err = c.updateAbsentPrometheusRule(absentPromRule, absentRg)
case l > 0:
absentPromRule.Spec.Groups = absentRg
_, err = c.promClientset.MonitoringV1().PrometheusRules(namespace).
Create(context.Background(), absentPromRule.PrometheusRule, metav1.CreateOptions{})
}
Expand Down
8 changes: 4 additions & 4 deletions internal/controller/prometheusrule.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ func AbsentPrometheusRuleName(prometheusServer string) string {
// some additional info that we use for working with absentPrometheusRules.
//
// An absentPrometheusRule is the corresponding resource that is generated for
// a PrometheusRule resource for defining the absent metric alerts.
// a PrometheusRule resource for defining the absent alerts.
type absentPrometheusRule struct {
*monitoringv1.PrometheusRule

// Default values to use for absent metric alerts.
// Default values to use for absent alerts.
// See parseRuleGroups() on why we need this.
Tier string
Service string
Expand Down Expand Up @@ -174,7 +174,7 @@ OuterLoop:
return nil
}

// cleanUpOrphanedAbsentAlertsNamespace deletes orphaned absent alert rules
// cleanUpOrphanedAbsentAlertsNamespace deletes orphaned absent alerts
// concerning a specific PrometheusRule from a namespace.
func (c *Controller) cleanUpOrphanedAbsentAlertsNamespace(namespace, promRuleName string) error {
prList, err := c.promClientset.MonitoringV1().PrometheusRules(namespace).
Expand All @@ -194,7 +194,7 @@ func (c *Controller) cleanUpOrphanedAbsentAlertsNamespace(namespace, promRuleNam
return nil
}

// cleanUpOrphanedAbsentAlerts deletes orphaned absent alert rules concerning a
// cleanUpOrphanedAbsentAlerts deletes orphaned absent alerts concerning a
// specific PrometheusRule from a specific absentPrometheusRule.
func (c *Controller) cleanUpOrphanedAbsentAlerts(promRuleName string, absentPromRule *absentPrometheusRule) error {
old := absentPromRule.Spec.Groups
Expand Down

0 comments on commit 1ffe5be

Please sign in to comment.