Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

## [Unreleased]

## [v0.13.1] - 2024-10-31

- Update Mockery from 2.14.0 to 2.46.3
- Run `make go-gen`
- Run `make kube-gen`
- Run `make examples-gen`
- Bump Helm Chart version from 0.8.1 to 0.8.2
- Bump image version from v0.13.0 to v0.13.1

## [v0.13.0] - 2024-10-31

- Fix buildvcs issues with test scripts
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/helm/sloth/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ description: Base chart for Sloth.
type: application
home: https://github.com/linode-obs/sloth
kubeVersion: ">= 1.19.0-0"
version: 0.8.1
version: 0.8.2
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ spec:
description:
description: Description is the description of the SLO.
type: string
infoLabels:
additionalProperties:
type: string
description: Info labels added to the `sloth_slo_info` metric
type: object
labels:
additionalProperties:
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ spec:
serviceAccountName: sloth
containers:
- name: sloth
image: ghcr.io/linode-obs/sloth:v0.13.0
image: ghcr.io/linode-obs/sloth:v0.13.1
args:
- kubernetes-controller
- --sli-plugins-path=/plugins
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/helm/sloth/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ labels: {}

image:
repository: ghcr.io/linode-obs/sloth
tag: v0.13.0
tag: v0.13.1

# -- Container resources: requests and limits for CPU, Memory
resources:
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/raw/sloth-with-common-plugins.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ spec:
serviceAccountName: sloth
containers:
- name: sloth
image: ghcr.io/linode-obs/sloth:v0.13.0
image: ghcr.io/linode-obs/sloth:v0.13.1
args:
- kubernetes-controller
- --sli-plugins-path=/plugins
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/raw/sloth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ spec:
serviceAccountName: sloth
containers:
- name: sloth
image: ghcr.io/linode-obs/sloth:v0.13.0
image: ghcr.io/linode-obs/sloth:v0.13.1
args:
- kubernetes-controller
ports:
Expand Down
2 changes: 1 addition & 1 deletion docker/dev/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM golang:1.23
LABEL org.opencontainers.image.source https://github.com/slok/sloth

ARG GOLANGCI_LINT_VERSION="1.61.0"
ARG MOCKERY_VERSION="2.14.0"
ARG MOCKERY_VERSION="2.46.3"
ARG GOMARKDOC_VERSION="0.4.1"
ARG HELM_VERSION="3.10.0"
ARG ostype=Linux
Expand Down
242 changes: 242 additions & 0 deletions examples/_gen/custom_rule_group_interval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@

---
# Code generated by Sloth (dev): https://github.com/slok/sloth.
# DO NOT EDIT.

groups:
- name: sloth-slo-sli-recordings-myapp-cpu-availability
interval: 4m
rules:
- record: slo:sli_error:ratio_rate5m
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[5m])
)
)
/
(sum(
rate(node_cpu_seconds_total[5m])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 5m
- record: slo:sli_error:ratio_rate30m
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[30m])
)
)
/
(sum(
rate(node_cpu_seconds_total[30m])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 30m
- record: slo:sli_error:ratio_rate1h
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[1h])
)
)
/
(sum(
rate(node_cpu_seconds_total[1h])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 1h
- record: slo:sli_error:ratio_rate2h
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[2h])
)
)
/
(sum(
rate(node_cpu_seconds_total[2h])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 2h
- record: slo:sli_error:ratio_rate6h
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[6h])
)
)
/
(sum(
rate(node_cpu_seconds_total[6h])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 6h
- record: slo:sli_error:ratio_rate1d
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[1d])
)
)
/
(sum(
rate(node_cpu_seconds_total[1d])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 1d
- record: slo:sli_error:ratio_rate3d
expr: |
(sum(
rate(node_cpu_seconds_total{mode="softirq"}[3d])
)
)
/
(sum(
rate(node_cpu_seconds_total[3d])
)
)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 3d
- record: slo:sli_error:ratio_rate30d
expr: |
sum_over_time(slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}[30d])
/ ignoring (sloth_window)
count_over_time(slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}[30d])
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
sloth_window: 30d
- name: sloth-slo-meta-recordings-myapp-cpu-availability
interval: 2m
rules:
- record: slo:objective:ratio
expr: vector(0.9998999999999999)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
- record: slo:error_budget:ratio
expr: vector(1-0.9998999999999999)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
- record: slo:time_period:days
expr: vector(30)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
- record: slo:current_burn_rate:ratio
expr: |
slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
/ on(sloth_id, sloth_slo, sloth_service) group_left
slo:error_budget:ratio{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
- record: slo:period_burn_rate:ratio
expr: |
slo:sli_error:ratio_rate30d{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
/ on(sloth_id, sloth_slo, sloth_service) group_left
slo:error_budget:ratio{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"}
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
- record: slo:period_error_budget_remaining:ratio
expr: 1 - slo:period_burn_rate:ratio{sloth_id="myapp-cpu-availability", sloth_service="myapp",
sloth_slo="cpu-availability"}
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_service: myapp
sloth_slo: cpu-availability
- record: sloth_slo_info
expr: vector(1)
labels:
cmd: examplesgen.sh
owner: myteam
sloth_id: myapp-cpu-availability
sloth_mode: cli-gen-prom
sloth_objective: "99.99"
sloth_service: myapp
sloth_slo: cpu-availability
sloth_spec: prometheus/v1
sloth_version: dev
- name: sloth-slo-alerts-myapp-cpu-availability
interval: 2m
rules:
- alert: MyServiceHighErrorRate
expr: |
(
max(slo:sli_error:ratio_rate5m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (14.4 * 0.00010000000000005117)) without (sloth_window)
and
max(slo:sli_error:ratio_rate1h{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (14.4 * 0.00010000000000005117)) without (sloth_window)
)
or
(
max(slo:sli_error:ratio_rate30m{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (6 * 0.00010000000000005117)) without (sloth_window)
and
max(slo:sli_error:ratio_rate6h{sloth_id="myapp-cpu-availability", sloth_service="myapp", sloth_slo="cpu-availability"} > (6 * 0.00010000000000005117)) without (sloth_window)
)
labels:
category: availability
routing_key: myteam
severity: pageteam
sloth_severity: page
annotations:
summary: High error rate on 'myservice' requests responses
title: (page) {{$labels.sloth_service}} {{$labels.sloth_slo}} SLO error budget
burn rate is too fast.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading