linode-obs · dennisme · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,15 @@
 
 ## [Unreleased]
 
+## [v0.13.1] - 2024-10-31
+
+- Update Mockery from 2.14.0 to 2.46.3
+- Run `make go-gen`
+- Run `make kube-gen`
+- Run `make examples-gen`
+- Bump Helm Chart version from 0.8.1 to 0.8.2
+- Bump image version from v0.13.0 to v0.13.1
+
 ## [v0.13.0] - 2024-10-31
 
 - Fix buildvcs issues with test scripts

diff --git a/deploy/kubernetes/helm/sloth/Chart.yaml b/deploy/kubernetes/helm/sloth/Chart.yaml
@@ -4,4 +4,4 @@ description: Base chart for Sloth.
 type: application
 home: https://github.com/linode-obs/sloth
 kubeVersion: ">= 1.19.0-0"
-version: 0.8.1
+version: 0.8.2
diff --git a/deploy/kubernetes/helm/sloth/crds/sloth.slok.dev_prometheusservicelevels.yaml b/deploy/kubernetes/helm/sloth/crds/sloth.slok.dev_prometheusservicelevels.yaml
@@ -148,6 +148,11 @@ spec:
                     description:
                       description: Description is the description of the SLO.
                       type: string
+                    infoLabels:
+                      additionalProperties:
+                        type: string
+                      description: Info labels added to the `sloth_slo_info` metric
+                      type: object
                     labels:
                       additionalProperties:
                         type: string

diff --git a/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml b/deploy/kubernetes/helm/sloth/tests/testdata/output/deployment_default.yaml
@@ -32,7 +32,7 @@ spec:
       serviceAccountName: sloth
       containers:
         - name: sloth
-          image: ghcr.io/linode-obs/sloth:v0.13.0
+          image: ghcr.io/linode-obs/sloth:v0.13.1
           args:
             - kubernetes-controller
             - --sli-plugins-path=/plugins

diff --git a/deploy/kubernetes/helm/sloth/values.yaml b/deploy/kubernetes/helm/sloth/values.yaml
@@ -2,7 +2,7 @@ labels: {}
 
 image:
   repository: ghcr.io/linode-obs/sloth
-  tag: v0.13.0
+  tag: v0.13.1
 
 # -- Container resources: requests and limits for CPU, Memory
 resources:

diff --git a/deploy/kubernetes/raw/sloth-with-common-plugins.yaml b/deploy/kubernetes/raw/sloth-with-common-plugins.yaml
@@ -85,7 +85,7 @@ spec:
       serviceAccountName: sloth
       containers:
         - name: sloth
-          image: ghcr.io/linode-obs/sloth:v0.13.0
+          image: ghcr.io/linode-obs/sloth:v0.13.1
           args:
             - kubernetes-controller
             - --sli-plugins-path=/plugins

diff --git a/deploy/kubernetes/raw/sloth.yaml b/deploy/kubernetes/raw/sloth.yaml
@@ -85,7 +85,7 @@ spec:
       serviceAccountName: sloth
       containers:
         - name: sloth
-          image: ghcr.io/linode-obs/sloth:v0.13.0
+          image: ghcr.io/linode-obs/sloth:v0.13.1
           args:
             - kubernetes-controller
           ports:

diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile
@@ -3,7 +3,7 @@ FROM golang:1.23
 LABEL org.opencontainers.image.source https://github.com/slok/sloth
 
 ARG GOLANGCI_LINT_VERSION="1.61.0"
-ARG MOCKERY_VERSION="2.14.0"
+ARG MOCKERY_VERSION="2.46.3"
 ARG GOMARKDOC_VERSION="0.4.1"
 ARG HELM_VERSION="3.10.0"
 ARG ostype=Linux

diff --git a/examples/_gen/custom_rule_group_interval.yml b/examples/_gen/custom_rule_group_interval.yml
@@ -0,0 +1,242 @@
+
+---
+# Code generated by Sloth (dev): https://github.com/slok/sloth.
+# DO NOT EDIT.
+
+groups:
+- name: sloth-slo-sli-recordings-myapp-cpu-availability
+  interval: 4m
+  rules:
+  - record: slo:sli_error:ratio_rate5m
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[5m])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[5m])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 5m
+  - record: slo:sli_error:ratio_rate30m
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[30m])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[30m])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 30m
+  - record: slo:sli_error:ratio_rate1h
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[1h])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[1h])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 1h
+  - record: slo:sli_error:ratio_rate2h
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[2h])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[2h])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 2h
+  - record: slo:sli_error:ratio_rate6h
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[6h])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[6h])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 6h
+  - record: slo:sli_error:ratio_rate1d
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[1d])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[1d])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 1d
+  - record: slo:sli_error:ratio_rate3d
+    expr: |
+      (sum(
+        rate(node_cpu_seconds_total{mode="softirq"}[3d])
+      )
+      )
+      /
+      (sum(
+        rate(node_cpu_seconds_total[3d])
+      )
+      )
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 3d
+  - record: slo:sli_error:ratio_rate30d
+    expr: |
+      sum_over_time(slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}[30d])
+      / ignoring (sloth_window)
+      count_over_time(slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}[30d])
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_window: 30d
+- name: sloth-slo-meta-recordings-myapp-cpu-availability
+  interval: 2m
+  rules:
+  - record: slo:objective:ratio
+    expr: vector(0.9998999999999999)
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+  - record: slo:error_budget:ratio
+    expr: vector(1-0.9998999999999999)
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+  - record: slo:time_period:days
+    expr: vector(30)
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+  - record: slo:current_burn_rate:ratio
+    expr: |
+      slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
+      / on(sloth_id, sloth_slo, sloth_service) group_left
+      slo:error_budget:ratio{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+  - record: slo:period_burn_rate:ratio
+    expr: |
+      slo:sli_error:ratio_rate30d{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
+      / on(sloth_id, sloth_slo, sloth_service) group_left
+      slo:error_budget:ratio{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+  - record: slo:period_error_budget_remaining:ratio
+    expr: 1 - slo:period_burn_rate:ratio{sloth_id="myapp-cpu-availability", sloth_service="myapp",
+      sloth_slo="cpu-availability"}
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+  - record: sloth_slo_info
+    expr: vector(1)
+    labels:
+      cmd: examplesgen.sh
+      owner: myteam
+      sloth_id: myapp-cpu-availability
+      sloth_mode: cli-gen-prom
+      sloth_objective: "99.99"
+      sloth_service: myapp
+      sloth_slo: cpu-availability
+      sloth_spec: prometheus/v1
+      sloth_version: dev
+- name: sloth-slo-alerts-myapp-cpu-availability
+  interval: 2m
+  rules:
+  - alert: MyServiceHighErrorRate
+    expr: |
+      (
+          max(slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (14.4 * 0.00010000000000005117)) without (sloth_window)
+          and
+          max(slo:sli_error:ratio_rate1h{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (14.4 * 0.00010000000000005117)) without (sloth_window)
+      )
+      or
+      (
+          max(slo:sli_error:ratio_rate30m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (6 * 0.00010000000000005117)) without (sloth_window)
+          and
+          max(slo:sli_error:ratio_rate6h{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (6 * 0.00010000000000005117)) without (sloth_window)
+      )
+    labels:
+      category: availability
+      routing_key: myteam
+      severity: pageteam
+      sloth_severity: page
+    annotations:
+      summary: High error rate on 'myservice' requests responses
+      title: (page) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget
+        burn rate is too fast.
diff --git a/internal/k8sprometheus/k8sprometheusmock/prometheus_rules_ensurer.go b/internal/k8sprometheus/k8sprometheusmock/prometheus_rules_ensurer.go