Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add alert relabelling for Thanos Rule based on Prometheus' logic #4125

Merged
merged 3 commits into from
May 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re
- [#4171](https://github.com/thanos-io/thanos/pull/4171) Docker: Busybox image updated to latest (1.33.1)
- [#4175](https://github.com/thanos-io/thanos/pull/4175) Added Tag Configuration Support Lightstep Tracing
- [#4176](https://github.com/thanos-io/thanos/pull/4176) Query API: Adds optional `Stats param` to return stats for query APIs

- [#4125](https://github.com/thanos-io/thanos/pull/4125) Rule: Add `--alert.relabel-config` / `--alert.relabel-config-file` allowing to specify alert relabel configurations like [Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config)
### Fixed
-
### Changed
Expand Down
2 changes: 2 additions & 0 deletions cmd/thanos/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ type alertMgrConfig struct {
alertmgrsDNSSDInterval time.Duration
alertExcludeLabels []string
alertQueryURL *string
alertRelabelConfigPath *extflag.PathOrContent
}

func (ac *alertMgrConfig) registerFlag(cmd extflag.FlagClause) *alertMgrConfig {
Expand All @@ -215,5 +216,6 @@ func (ac *alertMgrConfig) registerFlag(cmd extflag.FlagClause) *alertMgrConfig {
ac.alertQueryURL = cmd.Flag("alert.query-url", "The external Thanos Query URL that would be set in all alerts 'Source' field").String()
cmd.Flag("alert.label-drop", "Labels by name to drop before sending to alertmanager. This allows alert to be deduplicated on replica label (repeated). Similar Prometheus alert relabelling").
StringsVar(&ac.alertExcludeLabels)
ac.alertRelabelConfigPath = extflag.RegisterPathOrContent(cmd, "alert.relabel-config", "YAML file that contains alert relabelling configuration.", false)
return ac
}
23 changes: 19 additions & 4 deletions cmd/thanos/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/relabel"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/tsdb"
Expand Down Expand Up @@ -69,9 +70,10 @@ type ruleConfig struct {
query queryConfig
queryConfigYAML []byte

alertmgr alertMgrConfig
alertmgrsConfigYAML []byte
alertQueryURL *url.URL
alertmgr alertMgrConfig
alertmgrsConfigYAML []byte
alertQueryURL *url.URL
alertRelabelConfigYAML []byte

resendDelay time.Duration
evalInterval time.Duration
Expand Down Expand Up @@ -172,6 +174,11 @@ func registerRule(app *extkingpin.App) {
return errors.New("--alertmanagers.url and --alertmanagers.config* parameters cannot be defined at the same time")
}

conf.alertRelabelConfigYAML, err = conf.alertmgr.alertRelabelConfigPath.Content()
if err != nil {
return err
}

httpLogOpts, err := logging.ParseHTTPOptions(*reqLogDecision, reqLogConfig)
if err != nil {
return errors.Wrap(err, "error while parsing config for request logging")
Expand Down Expand Up @@ -352,6 +359,14 @@ func runRule(
level.Warn(logger).Log("msg", "no alertmanager configured")
}

var alertRelabelConfigs []*relabel.Config
if len(conf.alertRelabelConfigYAML) > 0 {
alertRelabelConfigs, err = alert.LoadRelabelConfigs(conf.alertRelabelConfigYAML)
if err != nil {
return err
}
}

amProvider := dns.NewProvider(
logger,
extprom.WrapRegistererWithPrefix("thanos_rule_alertmanagers_", reg),
Expand All @@ -377,7 +392,7 @@ func runRule(

var (
ruleMgr *thanosrules.Manager
alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(conf.lset), conf.alertmgr.alertExcludeLabels)
alertQ = alert.NewQueue(logger, reg, 10000, 100, labelsTSDBToProm(conf.lset), conf.alertmgr.alertExcludeLabels, alertRelabelConfigs)
)
{
// Run rule evaluation and alert notifications.
Expand Down
7 changes: 7 additions & 0 deletions docs/components/rule.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,13 @@ Flags:
--alert.query-url=ALERT.QUERY-URL
The external Thanos Query URL that would be set
in all alerts 'Source' field
--alert.relabel-config=<content>
Alternative to 'alert.relabel-config-file' flag
(mutually exclusive). Content of YAML file that
contains alert relabelling configuration.
--alert.relabel-config-file=<file-path>
Path to YAML file that contains alert
relabelling configuration.
--alertmanagers.config=<content>
Alternative to 'alertmanagers.config-file' flag
(mutually exclusive). Content of YAML file that
Expand Down
30 changes: 16 additions & 14 deletions pkg/alert/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/relabel"
"go.uber.org/atomic"

"github.com/thanos-io/thanos/pkg/runutil"
Expand Down Expand Up @@ -85,11 +86,12 @@ func (a *Alert) ResolvedAt(ts time.Time) bool {
// Queue is a queue of alert notifications waiting to be sent. The queue is consumed in batches
// and entries are dropped at the front if it runs full.
type Queue struct {
logger log.Logger
maxBatchSize int
capacity int
toAddLset labels.Labels
toExcludeLabels labels.Labels
logger log.Logger
maxBatchSize int
capacity int
toAddLset labels.Labels
toExcludeLabels labels.Labels
alertRelabelConfigs []*relabel.Config

mtx sync.Mutex
queue []*Alert
Expand Down Expand Up @@ -120,19 +122,20 @@ func relabelLabels(lset labels.Labels, excludeLset []string) (toAdd labels.Label

// NewQueue returns a new queue. The given label set is attached to all alerts pushed to the queue.
// The given exclude label set tells what label names to drop including external labels.
func NewQueue(logger log.Logger, reg prometheus.Registerer, capacity, maxBatchSize int, externalLset labels.Labels, excludeLabels []string) *Queue {
func NewQueue(logger log.Logger, reg prometheus.Registerer, capacity, maxBatchSize int, externalLset labels.Labels, excludeLabels []string, alertRelabelConfigs []*relabel.Config) *Queue {
toAdd, toExclude := relabelLabels(externalLset, excludeLabels)

if logger == nil {
logger = log.NewNopLogger()
}
q := &Queue{
logger: logger,
capacity: capacity,
morec: make(chan struct{}, 1),
maxBatchSize: maxBatchSize,
toAddLset: toAdd,
toExcludeLabels: toExclude,
logger: logger,
capacity: capacity,
morec: make(chan struct{}, 1),
maxBatchSize: maxBatchSize,
toAddLset: toAdd,
toExcludeLabels: toExclude,
alertRelabelConfigs: alertRelabelConfigs,

dropped: promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_alert_queue_alerts_dropped_total",
Expand Down Expand Up @@ -214,7 +217,6 @@ func (q *Queue) Push(alerts []*Alert) {
q.pushed.Add(float64(len(alerts)))

// Attach external labels and drop excluded labels before sending.
// TODO(bwplotka): User proper relabelling with https://github.com/thanos-io/thanos/issues/660.
for _, a := range alerts {
lb := labels.NewBuilder(labels.Labels{})
for _, l := range a.Labels {
Expand All @@ -226,7 +228,7 @@ func (q *Queue) Push(alerts []*Alert) {
for _, l := range q.toAddLset {
lb.Set(l.Name, l.Value)
}
a.Labels = lb.Labels()
a.Labels = relabel.Process(lb.Labels(), q.alertRelabelConfigs...)
}

// Queue capacity should be significantly larger than a single alert
Expand Down
43 changes: 35 additions & 8 deletions pkg/alert/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import (
"time"

"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
trutty marked this conversation as resolved.
Show resolved Hide resolved
"github.com/prometheus/prometheus/pkg/relabel"

promtestutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/thanos-io/thanos/pkg/testutil"
Expand All @@ -24,9 +26,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) {
batchsize := 1
pushes := 3

q := NewQueue(
nil, nil, qcapacity, batchsize, nil, nil,
)
q := NewQueue(nil, nil, qcapacity, batchsize, nil, nil, nil)
for i := 0; i < pushes; i++ {
q.Push([]*Alert{
{},
Expand All @@ -45,11 +45,7 @@ func TestQueue_Pop_all_Pushed(t *testing.T) {
}

func TestQueue_Push_Relabelled(t *testing.T) {
q := NewQueue(
nil, nil, 10, 10,
labels.FromStrings("a", "1", "replica", "A"), // Labels to be added.
[]string{"b", "replica"}, // Labels to be dropped (excluding those added).
)
q := NewQueue(nil, nil, 10, 10, labels.FromStrings("a", "1", "replica", "A"), []string{"b", "replica"}, nil)

q.Push([]*Alert{
{Labels: labels.FromStrings("b", "2", "c", "3")},
Expand All @@ -63,6 +59,37 @@ func TestQueue_Push_Relabelled(t *testing.T) {
testutil.Equals(t, labels.FromStrings("a", "1"), q.queue[2].Labels)
}

func TestQueue_Push_Relabelled_Alerts(t *testing.T) {
q := NewQueue(
nil, nil, 10, 10, labels.New(), []string{},
[]*relabel.Config{
{
SourceLabels: model.LabelNames{"a"},
Separator: ";",
Regex: relabel.MustNewRegexp(".*(b).*"),
TargetLabel: "d",
Action: relabel.Replace,
Replacement: "$1",
},
},
)

q.Push([]*Alert{
{Labels: labels.FromMap(map[string]string{
"a": "abc",
})},
})

testutil.Equals(t, 1, len(q.queue))
testutil.Equals(
t, labels.FromMap(map[string]string{
"a": "abc",
"d": "b",
}),
q.queue[0].Labels,
)
}

func assertSameHosts(t *testing.T, expected []*url.URL, found []*url.URL) {
testutil.Equals(t, len(expected), len(found))

Expand Down
10 changes: 10 additions & 0 deletions pkg/alert/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/relabel"
"gopkg.in/yaml.v2"

"github.com/thanos-io/thanos/pkg/discovery/dns"
Expand Down Expand Up @@ -130,3 +131,12 @@ func BuildAlertmanagerConfig(address string, timeout time.Duration) (Alertmanage
APIVersion: APIv1,
}, nil
}

// LoadRelabelConfigs loads a list of relabel.Config from YAML data.
func LoadRelabelConfigs(confYaml []byte) ([]*relabel.Config, error) {
var cfg []*relabel.Config
if err := yaml.UnmarshalStrict(confYaml, &cfg); err != nil {
return nil, err
}
return cfg, nil
}