From 7aeee63936ff0820ffeffd28c5d2427791e92619 Mon Sep 17 00:00:00 2001 From: Cody Kaczynski Date: Mon, 28 Oct 2024 21:32:48 -0400 Subject: [PATCH] feat: add custom labels to `sloth_slo_info` metric --- CHANGELOG.md | 7 ++++++- README.md | 3 +++ cmd/sloth/commands/generate.go | 3 ++- cmd/sloth/commands/k8scontroller.go | 3 ++- .../tests/testdata/output/deployment_custom.yaml | 2 +- .../output/deployment_custom_no_extras.yaml | 2 +- .../output/deployment_custom_slo_config.yaml | 2 +- .../testdata/output/deployment_default.yaml | 2 +- .../kubernetes/helm/sloth/tests/values_test.go | 2 +- deploy/kubernetes/helm/sloth/values.yaml | 4 ++-- .../raw/sloth-with-common-plugins.yaml | 2 +- deploy/kubernetes/raw/sloth.yaml | 2 +- internal/k8sprometheus/model.go | 1 + internal/k8sprometheus/spec.go | 1 + internal/prometheus/model.go | 1 + internal/prometheus/recording_rules.go | 16 ++++++++++------ internal/prometheus/spec.go | 2 ++ internal/prometheus/spec_test.go | 12 ++++++++++++ pkg/kubernetes/api/sloth/v1/types.go | 3 +++ pkg/prometheus/api/v1/v1.go | 2 ++ 20 files changed, 54 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca1c93f1..930860ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +## [v0.13.0] - 2024-10-30 + +- Custom labels for `sloth_slo_info{}` metric [#4](https://github.com/linode-obs/sloth/pull/4) + ## [v0.12.0] - 2023-07-03 - Custom rule_group intervals for all recording rule types or a global default. @@ -166,7 +170,8 @@ - Support raw query based SLI. - Kubernetes (prometheus-operator) CRD generation support. -[unreleased]: https://github.com/slok/sloth/compare/v0.12.0...HEAD +[unreleased]: https://github.com/linode-obs/sloth/compare/v0.13.0...HEAD +[v0.13.0]: https://github.com/slok/sloth/compare/v0.12.0...v0.13.0 [v0.12.0]: https://github.com/slok/sloth/compare/v0.11.0...v0.12.0 [v0.11.0]: https://github.com/slok/sloth/compare/v0.10.0...v0.11.0 [v0.10.0]: https://github.com/slok/sloth/compare/v0.9.0...v0.10.0 diff --git a/README.md b/README.md index bbcafe10..189962ec 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,9 @@ slos: description: "Common SLO based on availability for HTTP request responses." labels: category: availability + # These labels only apply to the `sloth_slo_info{}` metric - they are `string: string` typed. + infoLabels: + foo: "bar" sli: events: error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}])) diff --git a/cmd/sloth/commands/generate.go b/cmd/sloth/commands/generate.go index 2d5c24ae..50d28aa5 100644 --- a/cmd/sloth/commands/generate.go +++ b/cmd/sloth/commands/generate.go @@ -39,11 +39,12 @@ type generateCommand struct { sliPluginsPaths []string sloPeriodWindowsPath string sloPeriod string + infoLabels map[string]string } // NewGenerateCommand returns the generate command. func NewGenerateCommand(app *kingpin.Application) Command { - c := &generateCommand{extraLabels: map[string]string{}} + c := &generateCommand{extraLabels: map[string]string{}, infoLabels: map[string]string{}} cmd := app.Command("generate", "Generates Prometheus SLOs.") cmd.Flag("input", "SLO spec input file path or directory (if directory is used, slos will be discovered recursively and out must be a directory).").Short('i').StringVar(&c.slosInput) cmd.Flag("out", "Generated rules output file path or directory. If `-` it will use stdout (if input is a directory this must be a directory).").Default("-").Short('o').StringVar(&c.slosOut) diff --git a/cmd/sloth/commands/k8scontroller.go b/cmd/sloth/commands/k8scontroller.go index 5b0ffbff..b132b332 100644 --- a/cmd/sloth/commands/k8scontroller.go +++ b/cmd/sloth/commands/k8scontroller.go @@ -53,6 +53,7 @@ const ( type kubeControllerCommand struct { extraLabels map[string]string + infoLabels map[string]string workers int kubeConfig string kubeContext string @@ -73,7 +74,7 @@ type kubeControllerCommand struct { // NewKubeControllerCommand returns the Kubernetes controller command. func NewKubeControllerCommand(app *kingpin.Application) Command { - c := &kubeControllerCommand{extraLabels: map[string]string{}} + c := &kubeControllerCommand{extraLabels: map[string]string{}, infoLabels: map[string]string{}} cmd := app.Command("kubernetes-controller", "Runs Sloth in Kubernetes controller/operator mode.") cmd.Alias("controller") cmd.Alias("k8s-controller") diff --git a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom.yaml b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom.yaml index 1b7b0ff1..d741d3bd 100644 --- a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom.yaml +++ b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom.yaml @@ -39,7 +39,7 @@ spec: runAsUser: 100 containers: - name: sloth - image: slok/sloth-test:v1.42.42 + image: linode-obs/sloth-test:v1.42.42 args: - kubernetes-controller - --resync-interval=17m diff --git a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_no_extras.yaml b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_no_extras.yaml index c2d4ab0d..3601fe22 100644 --- a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_no_extras.yaml +++ b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_no_extras.yaml @@ -39,7 +39,7 @@ spec: runAsUser: 100 containers: - name: sloth - image: slok/sloth-test:v1.42.42 + image: linode-obs/sloth-test:v1.42.42 args: - kubernetes-controller - --resync-interval=17m diff --git a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_slo_config.yaml b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_slo_config.yaml index 6dbd8b11..8cb9444b 100644 --- a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_slo_config.yaml +++ b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_custom_slo_config.yaml @@ -40,7 +40,7 @@ spec: runAsUser: 100 containers: - name: sloth - image: slok/sloth-test:v1.42.42 + image: linode-obs/sloth-test:v1.42.42 args: - kubernetes-controller - --resync-interval=17m diff --git a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml index 2d19c3a5..1602755a 100644 --- a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml +++ b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml @@ -32,7 +32,7 @@ spec: serviceAccountName: sloth containers: - name: sloth - image: ghcr.io/slok/sloth:v0.11.0 + image: ghcr.io/linode-obs/sloth:v0.13.0 args: - kubernetes-controller - --sli-plugins-path=/plugins diff --git a/deploy/kubernetes/helm/sloth/tests/values_test.go b/deploy/kubernetes/helm/sloth/tests/values_test.go index bbfcc51e..9345af41 100644 --- a/deploy/kubernetes/helm/sloth/tests/values_test.go +++ b/deploy/kubernetes/helm/sloth/tests/values_test.go @@ -13,7 +13,7 @@ func customValues() msi { }, "image": msi{ - "repository": "slok/sloth-test", + "repository": "linode-obs/sloth-test", "tag": "v1.42.42", }, diff --git a/deploy/kubernetes/helm/sloth/values.yaml b/deploy/kubernetes/helm/sloth/values.yaml index 8c8b309d..26b38422 100644 --- a/deploy/kubernetes/helm/sloth/values.yaml +++ b/deploy/kubernetes/helm/sloth/values.yaml @@ -1,8 +1,8 @@ labels: {} image: - repository: ghcr.io/slok/sloth - tag: v0.11.0 + repository: ghcr.io/linode-obs/sloth + tag: v0.13.0 # -- Container resources: requests and limits for CPU, Memory resources: diff --git a/deploy/kubernetes/raw/sloth-with-common-plugins.yaml b/deploy/kubernetes/raw/sloth-with-common-plugins.yaml index 21de1723..93ec8c37 100644 --- a/deploy/kubernetes/raw/sloth-with-common-plugins.yaml +++ b/deploy/kubernetes/raw/sloth-with-common-plugins.yaml @@ -85,7 +85,7 @@ spec: serviceAccountName: sloth containers: - name: sloth - image: ghcr.io/slok/sloth:v0.11.0 + image: ghcr.io/linode-obs/sloth:v0.13.0 args: - kubernetes-controller - --sli-plugins-path=/plugins diff --git a/deploy/kubernetes/raw/sloth.yaml b/deploy/kubernetes/raw/sloth.yaml index 458928f3..9a8f9a09 100644 --- a/deploy/kubernetes/raw/sloth.yaml +++ b/deploy/kubernetes/raw/sloth.yaml @@ -85,7 +85,7 @@ spec: serviceAccountName: sloth containers: - name: sloth - image: ghcr.io/slok/sloth:v0.11.0 + image: ghcr.io/linode-obs/sloth:v0.13.0 args: - kubernetes-controller ports: diff --git a/internal/k8sprometheus/model.go b/internal/k8sprometheus/model.go index d338fabc..49fef4ca 100644 --- a/internal/k8sprometheus/model.go +++ b/internal/k8sprometheus/model.go @@ -15,6 +15,7 @@ type K8sMeta struct { Namespace string Annotations map[string]string Labels map[string]string + InfoLabels map[string]string } // SLOGroup is a Kubernetes SLO group. Is created based on a regular Prometheus diff --git a/internal/k8sprometheus/spec.go b/internal/k8sprometheus/spec.go index d2226435..375f854a 100644 --- a/internal/k8sprometheus/spec.go +++ b/internal/k8sprometheus/spec.go @@ -101,6 +101,7 @@ func mapSpecToModel(ctx context.Context, defaultWindowPeriod time.Duration, plug TimeWindow: defaultWindowPeriod, Objective: specSLO.Objective, Labels: mergeLabels(spec.Labels, specSLO.Labels), + InfoLabels: specSLO.InfoLabels, PageAlertMeta: prometheus.AlertMeta{Disable: true}, TicketAlertMeta: prometheus.AlertMeta{Disable: true}, } diff --git a/internal/prometheus/model.go b/internal/prometheus/model.go index 4b7f2c42..77e711ed 100644 --- a/internal/prometheus/model.go +++ b/internal/prometheus/model.go @@ -53,6 +53,7 @@ type SLO struct { Labels map[string]string `validate:"dive,keys,prom_label_key,endkeys,required,prom_label_value"` PageAlertMeta AlertMeta TicketAlertMeta AlertMeta + InfoLabels map[string]string `validate:"dive,keys,prom_label_key,endkeys,required,prom_label_value"` } type SLOGroup struct { diff --git a/internal/prometheus/recording_rules.go b/internal/prometheus/recording_rules.go index b9f4a6c4..d8447076 100644 --- a/internal/prometheus/recording_rules.go +++ b/internal/prometheus/recording_rules.go @@ -205,6 +205,15 @@ const MetadataRecordingRulesGenerator = metadataRecordingRulesGenerator(false) func (m metadataRecordingRulesGenerator) GenerateMetadataRecordingRules(ctx context.Context, info info.Info, slo SLO, alerts alert.MWMBAlertGroup) ([]rulefmt.Rule, error) { labels := mergeLabels(slo.GetSLOIDPromLabels(), slo.Labels) + infoLabels := mergeLabels(labels, map[string]string{ + sloVersionLabelName: info.Version, + sloModeLabelName: string(info.Mode), + sloSpecLabelName: info.Spec, + sloObjectiveLabelName: strconv.FormatFloat(slo.Objective, 'f', -1, 64), + }) + + infoLabels = mergeLabels(infoLabels, slo.InfoLabels) + // Metatada Recordings. const ( metricSLOObjectiveRatio = "slo:objective:ratio" @@ -293,12 +302,7 @@ func (m metadataRecordingRulesGenerator) GenerateMetadataRecordingRules(ctx cont { Record: metricSLOInfo, Expr: `vector(1)`, - Labels: mergeLabels(labels, map[string]string{ - sloVersionLabelName: info.Version, - sloModeLabelName: string(info.Mode), - sloSpecLabelName: info.Spec, - sloObjectiveLabelName: strconv.FormatFloat(slo.Objective, 'f', -1, 64), - }), + Labels: infoLabels, }, } diff --git a/internal/prometheus/spec.go b/internal/prometheus/spec.go index eb499b36..807c23b8 100644 --- a/internal/prometheus/spec.go +++ b/internal/prometheus/spec.go @@ -68,6 +68,7 @@ func (y YAMLSpecLoader) LoadSpec(ctx context.Context, data []byte) (*SLOGroup, e func (y YAMLSpecLoader) mapSpecToModel(ctx context.Context, spec prometheusv1.Spec) (*SLOGroup, error) { models := make([]SLO, 0, len(spec.SLOs)) for _, specSLO := range spec.SLOs { + slo := SLO{ ID: fmt.Sprintf("%s-%s", spec.Service, specSLO.Name), RuleGroupInterval: specSLO.Interval.RuleGroupInterval, @@ -82,6 +83,7 @@ func (y YAMLSpecLoader) mapSpecToModel(ctx context.Context, spec prometheusv1.Sp Labels: mergeLabels(spec.Labels, specSLO.Labels), PageAlertMeta: AlertMeta{Disable: true}, TicketAlertMeta: AlertMeta{Disable: true}, + InfoLabels: specSLO.InfoLabels, } // Set SLIs. diff --git a/internal/prometheus/spec_test.go b/internal/prometheus/spec_test.go index 8c33a4fd..74f31778 100644 --- a/internal/prometheus/spec_test.go +++ b/internal/prometheus/spec_test.go @@ -137,6 +137,8 @@ labels: slos: - name: "slo-test" objective: 99 + infoLabels: + foo: bar sli: plugin: id: test_plugin @@ -156,6 +158,7 @@ slos: Service: "test-svc", TimeWindow: 30 * 24 * time.Hour, Labels: map[string]string{"gk1": "gv1"}, + InfoLabels: map[string]string{"foo": "bar"}, SLI: prometheus.SLI{ Raw: &prometheus.SLIRaw{ ErrorRatioQuery: `plugin_raw_expr{service="test-svc",slo="slo-test",objective="99.000000",gk1="gv1",k1="v1",k2="true"}`, @@ -178,6 +181,8 @@ labels: slos: - name: "slo-test" objective: 99 + infoLabels: + foo: bar sli: raw: error_ratio_query: test_expr_ratio_2 @@ -194,6 +199,7 @@ slos: Service: "test-svc", TimeWindow: 28 * 24 * time.Hour, Labels: map[string]string{"gk1": "gv1"}, + InfoLabels: map[string]string{"foo": "bar"}, SLI: prometheus.SLI{ Raw: &prometheus.SLIRaw{ ErrorRatioQuery: `test_expr_ratio_2`, @@ -217,6 +223,8 @@ slos: - name: "slo1" labels: category: test + infoLabels: + foo: bar objective: 99.99 description: "This is a test." sli: @@ -244,6 +252,8 @@ slos: - name: "slo2" labels: category: test2 + infoLabels: + foo: bar objective: 99.9 sli: raw: @@ -272,6 +282,7 @@ slos: "owner": "myteam", "category": "test", }, + InfoLabels: map[string]string{"foo": "bar"}, PageAlertMeta: prometheus.AlertMeta{ Disable: false, Name: "testAlert", @@ -314,6 +325,7 @@ slos: "owner": "myteam", "category": "test2", }, + InfoLabels: map[string]string{"foo": "bar"}, PageAlertMeta: prometheus.AlertMeta{Disable: true}, TicketAlertMeta: prometheus.AlertMeta{Disable: true}, }, diff --git a/pkg/kubernetes/api/sloth/v1/types.go b/pkg/kubernetes/api/sloth/v1/types.go index 00e27c73..67983d8a 100644 --- a/pkg/kubernetes/api/sloth/v1/types.go +++ b/pkg/kubernetes/api/sloth/v1/types.go @@ -68,6 +68,9 @@ type SLO struct { // +optional Labels map[string]string `json:"labels,omitempty"` + // Info labels added to the `sloth_slo_info` metric + InfoLabels map[string]string `json:"infoLabels,omitempty"` + // +kubebuilder:validation:Required // // SLI is the indicator (service level indicator) for this specific SLO. diff --git a/pkg/prometheus/api/v1/v1.go b/pkg/prometheus/api/v1/v1.go index 44f71a3c..8445ad4e 100644 --- a/pkg/prometheus/api/v1/v1.go +++ b/pkg/prometheus/api/v1/v1.go @@ -86,6 +86,8 @@ type SLO struct { // alerting rules for this specific SLO. These labels are merged with the // previous level labels. Labels map[string]string `yaml:"labels,omitempty"` + // Labels appended to `sloth_slo_info` + InfoLabels map[string]string `yaml:"infoLabels,omitempty"` // SLI is the indicator (service level indicator) for this specific SLO. SLI SLI `yaml:"sli"` // Alerting is the configuration with all the things related with the SLO