Skip to content

Commit

Permalink
Add alert relabelling for Thanos Rule based on Prometheus' logic (tha…
Browse files Browse the repository at this point in the history
…nos-io#4125)

* Add alert relabelling for Thanos Rule based on Prometheus' logic

Signed-off-by: Christian Schulz <trutty3@gmail.com>

* Update documentation

Signed-off-by: Christian Schulz <trutty3@gmail.com>

* Group imports as per review

Signed-off-by: Christian Schulz <trutty3@gmail.com>

Co-authored-by: Christian Schulz <christian.schulz2@f-i-ts.de>
  • Loading branch information
trutty and Christian Schulz committed May 14, 2021
1 parent 27427e8 commit 745f0f3
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 27 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Expand Up @@ -18,7 +18,7 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re
- [#4171](https://github.com/thanos-io/thanos/pull/4171) Docker: Busybox image updated to latest (1.33.1)
- [#4175](https://github.com/thanos-io/thanos/pull/4175) Added Tag Configuration Support Lightstep Tracing
- [#4176](https://github.com/thanos-io/thanos/pull/4176) Query API: Adds optional `Stats param` to return stats for query APIs

- [#4125](https://github.com/thanos-io/thanos/pull/4125) Rule: Add `--alert.relabel-config` / `--alert.relabel-config-file` allowing to specify alert relabel configurations like [Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config)
### Fixed
-
### Changed
Expand Down
2 changes: 2 additions & 0 deletions cmd/thanos/config.go
Expand Up @@ -202,6 +202,7 @@ type alertMgrConfig struct {
alertmgrsDNSSDInterval time.Duration
alertExcludeLabels []string
alertQueryURL *string
alertRelabelConfigPath *extflag.PathOrContent
}

func (ac *alertMgrConfig) registerFlag(cmd extflag.FlagClause) *alertMgrConfig {
Expand All @@ -215,5 +216,6 @@ func (ac *alertMgrConfig) registerFlag(cmd extflag.FlagClause) *alertMgrConfig {
ac.alertQueryURL = cmd.Flag("alert.query-url", "The external Thanos Query URL that would be set in all alerts 'Source' field").String()
cmd.Flag("alert.label-drop", "Labels by name to drop before sending to alertmanager. This allows alert to be deduplicated on replica label (repeated). Similar Prometheus alert relabelling").
StringsVar(&ac.alertExcludeLabels)
ac.alertRelabelConfigPath = extflag.RegisterPathOrContent(cmd, "alert.relabel-config", "YAML file that contains alert relabelling configuration.", false)
return ac
}
23 changes: 19 additions & 4 deletions cmd/thanos/rule.go
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/relabel"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/tsdb"
Expand Down Expand Up @@ -69,9 +70,10 @@ type ruleConfig struct {
query queryConfig
queryConfigYAML []byte

alertmgr alertMgrConfig
alertmgrsConfigYAML []byte
alertQueryURL *url.URL
alertmgr alertMgrConfig
alertmgrsConfigYAML []byte
alertQueryURL *url.URL
alertRelabelConfigYAML []byte

resendDelay time.Duration
evalInterval time.Duration
Expand Down Expand Up @@ -172,6 +174,11 @@ func registerRule(app *extkingpin.App) {
return errors.New("--alertmanagers.url and --alertmanagers.config* parameters cannot be defined at the same time")
}

conf.alertRelabelConfigYAML, err = conf.alertmgr.alertRelabelConfigPath.Content()
if err != nil {
return err
}

httpLogOpts, err := logging.ParseHTTPOptions(*reqLogDecision, reqLogConfig)
if err != nil {
return errors.Wrap(err, "error while parsing config for request logging")
Expand Down Expand Up @@ -352,6 +359,14 @@ func runRule(
level.Warn(logger).Log("msg", "no alertmanager configured")
}

var alertRelabelConfigs []*relabel.Config
if len(conf.alertRelabelConfigYAML) > 0 {
alertRelabelConfigs, err = alert.LoadRelabelConfigs(conf.alertRelabelConfigYAML)
if err != nil {
return err
}
}

amProvider := dns.NewProvider(
logger,
extprom.WrapRegistererWithPrefix("thanos_rule_alertmanagers_", reg),
Expand All @@ -377,7 +392,7 @@ func runRule(

var (
ruleMgr *thanosrules.Manager
alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(conf.lset), conf.alertmgr.alertExcludeLabels)
alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(conf.lset), conf.alertmgr.alertExcludeLabels, alertRelabelConfigs)
)
{
// Run rule evaluation and alert notifications.
Expand Down
7 changes: 7 additions & 0 deletions docs/components/rule.md
Expand Up @@ -244,6 +244,13 @@ Flags:
--alert.query-url=ALERT.QUERY-URL
The external Thanos Query URL that would be set
in all alerts 'Source' field
--alert.relabel-config=<content>
Alternative to 'alert.relabel-config-file' flag
(mutually exclusive). Content of YAML file that
contains alert relabelling configuration.
--alert.relabel-config-file=<file-path>
Path to YAML file that contains alert
relabelling configuration.
--alertmanagers.config=<content>
Alternative to 'alertmanagers.config-file' flag
(mutually exclusive). Content of YAML file that
Expand Down
30 changes: 16 additions & 14 deletions pkg/alert/alert.go
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/relabel"
"go.uber.org/atomic"

"github.com/thanos-io/thanos/pkg/runutil"
Expand Down Expand Up @@ -85,11 +86,12 @@ func (a *Alert) ResolvedAt(ts time.Time) bool {
// Queue is a queue of alert notifications waiting to be sent. The queue is consumed in batches
// and entries are dropped at the front if it runs full.
type Queue struct {
logger log.Logger
maxBatchSize int
capacity int
toAddLset labels.Labels
toExcludeLabels labels.Labels
logger log.Logger
maxBatchSize int
capacity int
toAddLset labels.Labels
toExcludeLabels labels.Labels
alertRelabelConfigs []*relabel.Config

mtx sync.Mutex
queue []*Alert
Expand Down Expand Up @@ -120,19 +122,20 @@ func relabelLabels(lset labels.Labels, excludeLset []string) (toAdd labels.Label

// NewQueue returns a new queue. The given label set is attached to all alerts pushed to the queue.
// The given exclude label set tells what label names to drop including external labels.
func NewQueue(logger log.Logger, reg prometheus.Registerer, capacity, maxBatchSize int, externalLset labels.Labels, excludeLabels []string) *Queue {
func NewQueue(logger log.Logger, reg prometheus.Registerer, capacity, maxBatchSize int, externalLset labels.Labels, excludeLabels []string, alertRelabelConfigs []*relabel.Config) *Queue {
toAdd, toExclude := relabelLabels(externalLset, excludeLabels)

if logger == nil {
logger = log.NewNopLogger()
}
q := &Queue{
logger: logger,
capacity: capacity,
morec: make(chan struct{}, 1),
maxBatchSize: maxBatchSize,
toAddLset: toAdd,
toExcludeLabels: toExclude,
logger: logger,
capacity: capacity,
morec: make(chan struct{}, 1),
maxBatchSize: maxBatchSize,
toAddLset: toAdd,
toExcludeLabels: toExclude,
alertRelabelConfigs: alertRelabelConfigs,

dropped: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_alert_queue_alerts_dropped_total",
Expand Down Expand Up @@ -214,7 +217,6 @@ func (q *Queue) Push(alerts []*Alert) {
q.pushed.Add(float64(len(alerts)))

// Attach external labels and drop excluded labels before sending.
// TODO(bwplotka): User proper relabelling with https://github.com/thanos-io/thanos/issues/660.
for _, a := range alerts {
lb := labels.NewBuilder(labels.Labels{})
for _, l := range a.Labels {
Expand All @@ -226,7 +228,7 @@ func (q *Queue) Push(alerts []*Alert) {
for _, l := range q.toAddLset {
lb.Set(l.Name, l.Value)
}
a.Labels = lb.Labels()
a.Labels = relabel.Process(lb.Labels(), q.alertRelabelConfigs...)
}

// Queue capacity should be significantly larger than a single alert
Expand Down
43 changes: 35 additions & 8 deletions pkg/alert/alert_test.go
Expand Up @@ -13,7 +13,9 @@ import (
"time"

"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/relabel"

promtestutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/thanos-io/thanos/pkg/testutil"
Expand All @@ -24,9 +26,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) {
batchsize := 1
pushes := 3

q := NewQueue(
nil, nil, qcapacity, batchsize, nil, nil,
)
q := NewQueue(nil, nil, qcapacity, batchsize, nil, nil, nil)
for i := 0; i < pushes; i++ {
q.Push([]*Alert{
{},
Expand All @@ -45,11 +45,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) {
}

func TestQueue_Push_Relabelled(t *testing.T) {
q := NewQueue(
nil, nil, 10, 10,
labels.FromStrings("a", "1", "replica", "A"), // Labels to be added.
[]string{"b", "replica"}, // Labels to be dropped (excluding those added).
)
q := NewQueue(nil, nil, 10, 10, labels.FromStrings("a", "1", "replica", "A"), []string{"b", "replica"}, nil)

q.Push([]*Alert{
{Labels: labels.FromStrings("b", "2", "c", "3")},
Expand All @@ -63,6 +59,37 @@ func TestQueue_Push_Relabelled(t *testing.T) {
testutil.Equals(t, labels.FromStrings("a", "1"), q.queue[2].Labels)
}

func TestQueue_Push_Relabelled_Alerts(t *testing.T) {
q := NewQueue(
nil, nil, 10, 10, labels.New(), []string{},
[]*relabel.Config{
{
SourceLabels: model.LabelNames{"a"},
Separator: ";",
Regex: relabel.MustNewRegexp(".*(b).*"),
TargetLabel: "d",
Action: relabel.Replace,
Replacement: "$1",
},
},
)

q.Push([]*Alert{
{Labels: labels.FromMap(map[string]string{
"a": "abc",
})},
})

testutil.Equals(t, 1, len(q.queue))
testutil.Equals(
t, labels.FromMap(map[string]string{
"a": "abc",
"d": "b",
}),
q.queue[0].Labels,
)
}

func assertSameHosts(t *testing.T, expected []*url.URL, found []*url.URL) {
testutil.Equals(t, len(expected), len(found))

Expand Down
10 changes: 10 additions & 0 deletions pkg/alert/config.go
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/relabel"
"gopkg.in/yaml.v2"

"github.com/thanos-io/thanos/pkg/discovery/dns"
Expand Down Expand Up @@ -130,3 +131,12 @@ func BuildAlertmanagerConfig(address string, timeout time.Duration) (Alertmanage
APIVersion: APIv1,
}, nil
}

// LoadRelabelConfigs loads a list of relabel.Config from YAML data.
func LoadRelabelConfigs(confYaml []byte) ([]*relabel.Config, error) {
var cfg []*relabel.Config
if err := yaml.UnmarshalStrict(confYaml, &cfg); err != nil {
return nil, err
}
return cfg, nil
}

0 comments on commit 745f0f3

Please sign in to comment.