From 4bf3c05bc57980a8cf2cd57a3cb14d2a67c89169 Mon Sep 17 00:00:00 2001 From: Douglas Camata <159076+douglascamata@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:14:10 +0100 Subject: [PATCH 1/5] mixins: Add code/grpc-code dimension to error widgets Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> --- examples/dashboards/overview.json | 15 +++++---------- examples/dashboards/query-frontend.json | 3 +-- examples/dashboards/query.json | 12 ++++-------- examples/dashboards/receive.json | 12 ++++-------- examples/dashboards/rule.json | 6 ++---- examples/dashboards/sidecar.json | 6 ++---- examples/dashboards/store.json | 6 ++---- .../thanos-grafana-builder/builder.libsonnet | 18 ++++++++++++++++++ .../lib/thanos-grafana-builder/grpc.libsonnet | 5 +++-- .../lib/thanos-grafana-builder/http.libsonnet | 5 +++-- 10 files changed, 44 insertions(+), 44 deletions(-) diff --git a/examples/dashboards/overview.json b/examples/dashboards/overview.json index 2d274bac3c..29bd665535 100644 --- a/examples/dashboards/overview.json +++ b/examples/dashboards/overview.json @@ -161,10 +161,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{handler=\"query\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{handler=\"query\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{handler=\"query\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{handler=\"query\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -466,10 +465,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{handler=\"query_range\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{handler=\"query_range\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{handler=\"query_range\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{handler=\"query_range\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -823,10 +821,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",grpc_type=\"unary\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -1180,10 +1177,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",grpc_type=\"unary\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -1485,10 +1481,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{handler=\"receive\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{handler=\"receive\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{handler=\"receive\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{handler=\"receive\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/examples/dashboards/query-frontend.json b/examples/dashboards/query-frontend.json index ee46320359..0502d378b9 100644 --- a/examples/dashboards/query-frontend.json +++ b/examples/dashboards/query-frontend.json @@ -242,10 +242,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query-frontend\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/examples/dashboards/query.json b/examples/dashboards/query.json index f25474aed0..8a5b27de51 100644 --- a/examples/dashboards/query.json +++ b/examples/dashboards/query.json @@ -145,10 +145,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{job=~\"$job\", handler=\"query\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -450,10 +449,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query_range\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query_range\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{job=~\"$job\", handler=\"query_range\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"query_range\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -807,10 +805,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_client_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_client_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -1164,10 +1161,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_client_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_client_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_client_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/examples/dashboards/receive.json b/examples/dashboards/receive.json index be45ee84fb..dee7683335 100644 --- a/examples/dashboards/receive.json +++ b/examples/dashboards/receive.json @@ -145,10 +145,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"receive\",code=~\"5..\"}[$interval])) / sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"receive\"}[$interval]))", + "expr": "sum by (job, code) (rate(http_requests_total{job=~\"$job\", handler=\"receive\",code=~\"5..\"}[$interval])) / ignoring (code) group_left() sum by (job) (rate(http_requests_total{job=~\"$job\", handler=\"receive\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -1632,10 +1631,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\", grpc_method=\"RemoteWrite\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -1989,10 +1987,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\", grpc_method!=\"RemoteWrite\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -2346,10 +2343,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/examples/dashboards/rule.json b/examples/dashboards/rule.json index 569a5aaeec..cb1250f7cc 100644 --- a/examples/dashboards/rule.json +++ b/examples/dashboards/rule.json @@ -966,10 +966,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -1323,10 +1322,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/examples/dashboards/sidecar.json b/examples/dashboards/sidecar.json index 26f3f5db05..0ab12060a1 100644 --- a/examples/dashboards/sidecar.json +++ b/examples/dashboards/sidecar.json @@ -197,10 +197,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -553,10 +552,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/examples/dashboards/store.json b/examples/dashboards/store.json index 7f3b614cfb..031c6b58c1 100644 --- a/examples/dashboards/store.json +++ b/examples/dashboards/store.json @@ -197,10 +197,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"unary\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"unary\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], @@ -554,10 +553,9 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (job) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", + "expr": "sum by (job, grpc_code) (rate(grpc_server_handled_total{grpc_code=~\"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss\",job=~\"$job\", grpc_type=\"server_stream\"}[$interval])) / ignoring (grpc_code) group_left() sum by (job) (rate(grpc_server_handled_total{job=~\"$job\", grpc_type=\"server_stream\"}[$interval]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "error", "step": 10 } ], diff --git a/mixin/lib/thanos-grafana-builder/builder.libsonnet b/mixin/lib/thanos-grafana-builder/builder.libsonnet index ecbc61e69c..39cedb404d 100644 --- a/mixin/lib/thanos-grafana-builder/builder.libsonnet +++ b/mixin/lib/thanos-grafana-builder/builder.libsonnet @@ -78,6 +78,24 @@ local utils = import '../utils.libsonnet'; ], }, + qpsErrTotalPerLabelPanel(selectorErr, selectorTotal, dimensions, perLabel):: { + local errExpr = 'sum by (%s, %s) (rate(%s[$interval]))' % [dimensions, perLabel, selectorErr], + local totalExpr = 'sum by (%s) (rate(%s[$interval]))' % [dimensions, selectorTotal], + + aliasColors: { + 'error': '#E24D42', + }, + targets: [ + { + expr: '%s / ignoring (%s) group_left() %s' % [errExpr, perLabel, totalExpr], + format: 'time_series', + intervalFactor: 2, + step: 10, + }, + ], + yaxes: $.yaxes({ format: 'percentunit' }), + } + $.stack, + qpsErrTotalPanel(selectorErr, selectorTotal, dimensions):: { local expr(selector) = 'sum by (%s) (rate(%s[$interval]))' % [dimensions, selector], diff --git a/mixin/lib/thanos-grafana-builder/grpc.libsonnet b/mixin/lib/thanos-grafana-builder/grpc.libsonnet index 6c58ad262a..62d1ae0d41 100644 --- a/mixin/lib/thanos-grafana-builder/grpc.libsonnet +++ b/mixin/lib/thanos-grafana-builder/grpc.libsonnet @@ -37,9 +37,10 @@ local utils = import '../utils.libsonnet'; } + $.stack, grpcErrorsPanel(metric, selector, dimensions):: - $.qpsErrTotalPanel( + $.qpsErrTotalPerLabelPanel( '%s{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss",%s}' % [metric, selector], '%s{%s}' % [metric, selector], - dimensions + dimensions, + 'grpc_code', ), } diff --git a/mixin/lib/thanos-grafana-builder/http.libsonnet b/mixin/lib/thanos-grafana-builder/http.libsonnet index e21caf941d..f3554411f8 100644 --- a/mixin/lib/thanos-grafana-builder/http.libsonnet +++ b/mixin/lib/thanos-grafana-builder/http.libsonnet @@ -25,9 +25,10 @@ local utils = import '../utils.libsonnet'; } + $.stack, httpErrPanel(metric, selector, dimensions):: - $.qpsErrTotalPanel( + $.qpsErrTotalPerLabelPanel( '%s{%s,code=~"5.."}' % [metric, selector], '%s{%s}' % [metric, selector], - dimensions + dimensions, + 'code', ), } From 9d8122aa338ec4cef02812a9c8dca59ae1769d6c Mon Sep 17 00:00:00 2001 From: Douglas Camata <159076+douglascamata@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:22:54 +0100 Subject: [PATCH 2/5] Update changelog Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dca8190444..75d8fb74ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#6168](https://github.com/thanos-io/thanos/pull/6168) Receiver: Make ketama hashring fail early when configured with number of nodes lower than the replication factor. - [#6201](https://github.com/thanos-io/thanos/pull/6201) Query-Frontend: Disable absent and absent_over_time for vertical sharding. - [#6212](https://github.com/thanos-io/thanos/pull/6212) Query-Frontend: Disable scalar for vertical sharding. -- [#6107](https://github.com/thanos-io/thanos/pull/6082) Change default user id in container image from 0(root) to 1001 +- [#6107](https://github.com/thanos-io/thanos/pull/6082) Change default user id in container image from 0(root) to 1001. +- [#6231](https://github.com/thanos-io/thanos/pull/6231) mixins: Add code/grpc-code dimension to error widgets. ### Removed From 877f40f0452cb71f3031ec594c6ba3946c298a7f Mon Sep 17 00:00:00 2001 From: Douglas Camata <159076+douglascamata@users.noreply.github.com> Date: Thu, 23 Mar 2023 16:19:24 +0100 Subject: [PATCH 3/5] Fix messed up merge conflict resolution Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> --- CHANGELOG.md | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ffecf17c9..6719248175 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#6183](https://github.com/thanos-io/thanos/pull/6183) Receiver: fix off by one in multitsdb flush that will result in empty blocks if the head only contains one sample - [#6197](https://github.com/thanos-io/thanos/pull/6197) Exemplar OTel: Fix exemplar for otel to use traceId instead of spanId and sample only if trace is sampled - [#6207](https://github.com/thanos-io/thanos/pull/6207) Receive: Remove the shipper once a tenant has been pruned. -- [#6216](https://github.com/thanos-io/thanos/pull/6216) Receiver: removed hard-coded value of EnableExemplarStorage flag and set it according to max-exemplar value +- [#6216](https://github.com/thanos-io/thanos/pull/6216) Receiver: removed hard-coded value of EnableExemplarStorage flag and set it according to max-exemplar value. ### Changed - [#6168](https://github.com/thanos-io/thanos/pull/6168) Receiver: Make ketama hashring fail early when configured with number of nodes lower than the replication factor. @@ -30,7 +30,6 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#6212](https://github.com/thanos-io/thanos/pull/6212) Query-Frontend: Disable scalar for vertical sharding. - [#6107](https://github.com/thanos-io/thanos/pull/6082) Change default user id in container image from 0(root) to 1001 - [#6228](https://github.com/thanos-io/thanos/pull/6228) Conditionally generate debug messages in ProxyStore to avoid memory bloat. -- [#6107](https://github.com/thanos-io/thanos/pull/6082) Change default user id in container image from 0(root) to 1001. - [#6231](https://github.com/thanos-io/thanos/pull/6231) mixins: Add code/grpc-code dimension to error widgets. ### Removed @@ -53,26 +52,49 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed -- [#5995](https://github.com/thanos-io/thanos/pull/5995) Sidecar: Loads the TLS certificate during startup. -- [#6044](https://github.com/thanos-io/thanos/pull/6044) Receive: mark ouf of window errors as conflict, if out-of-window samples ingestion is activated +### Changed + +### Removed + +## [v0.31.0](https://github.com/thanos-io/thanos/tree/release-0.31) - 22.03.2023 + +### Added + +- [#5990](https://github.com/thanos-io/thanos/pull/5990) Cache/Redis: add support for Redis Sentinel via new option `master_name`. +- [#6008](https://github.com/thanos-io/thanos/pull/6008) *: Add counter metric `gate_queries_total` to gate. +- [#5926](https://github.com/thanos-io/thanos/pull/5926) Receiver: Add experimental string interning in writer. Can be enabled with a hidden flag `--writer.intern`. +- [#5773](https://github.com/thanos-io/thanos/pull/5773) Store: Support disabling cache index header file by setting `--disable-caching-index-header-file`. When toggled, Stores can run without needing persistent disks. +- [#5653](https://github.com/thanos-io/thanos/pull/5653) Receive: Allow setting hashing algorithm per tenant in hashrings config. +- [#6074](https://github.com/thanos-io/thanos/pull/6074) *: Add histogram metrics `thanos_store_server_series_requested` and `thanos_store_server_chunks_requested` to all Stores. +- [#6074](https://github.com/thanos-io/thanos/pull/6074) *: Allow configuring series and sample limits per `Series` request for all Stores. +- [#6104](https://github.com/thanos-io/thanos/pull/6104) Store: Support S3 session token. +- [#5548](https://github.com/thanos-io/thanos/pull/5548) Query: Add experimental support for load balancing across multiple Store endpoints. +- [#6148](https://github.com/thanos-io/thanos/pull/6148) Query-frontend: Add `traceID` to slow query detected log line. +- [#6153](https://github.com/thanos-io/thanos/pull/6153) Query-frontend: Add `remote_user` (from http basic auth) and `remote_addr` to slow query detected log line. + +### Fixed + +- [#5995](https://github.com/thanos-io/thanos/pull/5995) Sidecar: Loads TLS certificate during startup. +- [#6044](https://github.com/thanos-io/thanos/pull/6044) Receive: Mark out-of-window errors as conflict when out-of-window samples ingestion is used. - [#6050](https://github.com/thanos-io/thanos/pull/6050) Store: Re-try bucket store initial sync upon failure. -- [#6067](https://github.com/thanos-io/thanos/pull/6067) Receive: fixed panic when querying uninitialized TSDBs. -- [#6082](https://github.com/thanos-io/thanos/pull/6082) Store: Don't error when no stores are matched. -- [#6098](https://github.com/thanos-io/thanos/pull/6098) Cache/Redis: upgrade `rueidis` to v0.0.93 to fix potential panic when the client-side caching is disabled. -- [#6103](https://github.com/thanos-io/thanos/pull/6103) Mixins(Rule): Fix query for long rule evaluations. -- [#6121](https://github.com/thanos-io/thanos/pull/6121) Receive: Deduplicate metamonitoring queries. +- [#6067](https://github.com/thanos-io/thanos/pull/6067) Receive: Fix panic when querying uninitialized TSDBs. +- [#6082](https://github.com/thanos-io/thanos/pull/6082) Query: Don't error when no stores are matched. +- [#6098](https://github.com/thanos-io/thanos/pull/6098) Cache/Redis: Upgrade `rueidis` to v0.0.93 to fix potential panic when the client-side caching is disabled. +- [#6103](https://github.com/thanos-io/thanos/pull/6103) Mixins(Rule): Fix expression for long rule evaluations. +- [#6121](https://github.com/thanos-io/thanos/pull/6121) Receive: Deduplicate meta-monitoring queries for [Active Series Limiting](https://thanos.io/tip/components/receive.md/#active-series-limiting-experimental). - [#6137](https://github.com/thanos-io/thanos/pull/6137) Downsample: Repair of non-empty XOR chunks during 1h downsampling. - [#6125](https://github.com/thanos-io/thanos/pull/6125) Query Frontend: Fix vertical shardable instant queries do not produce sorted results for `sort`, `sort_desc`, `topk` and `bottomk` functions. +- [#6203](https://github.com/thanos-io/thanos/pull/6203) Receive: Fix panic in head compaction under high query load. ### Changed -- [#6010](https://github.com/thanos-io/thanos/pull/6010) *: Upgrade Prometheus to v0.41.0. +- [#6010](https://github.com/thanos-io/thanos/pull/6010) *: Upgrade Prometheus to v0.42.0. - [#5999](https://github.com/thanos-io/thanos/pull/5999) *: Upgrade Alertmanager dependency to v0.25.0. - [#5887](https://github.com/thanos-io/thanos/pull/5887) Tracing: Make sure rate limiting sampler is the default, as was the case in version pre-0.29.0. - [#5997](https://github.com/thanos-io/thanos/pull/5997) Rule: switch to miekgdns DNS resolver as the default one. -- [#6035](https://github.com/thanos-io/thanos/pull/6035) Replicate: Support all types of matchers to match blocks for replication. Change matcher parameter from string slice to a single string. - [#6126](https://github.com/thanos-io/thanos/pull/6126) Build with Go 1.20 -- [#6131](https://github.com/thanos-io/thanos/pull/6131) Store: *breaking :warning:* Use Histograms for bucket metrics. +- [#6035](https://github.com/thanos-io/thanos/pull/6035) Tools (replicate): Support all types of matchers to match blocks for replication. Change matcher parameter from string slice to a single string. +- [#6131](https://github.com/thanos-io/thanos/pull/6131) Store: *breaking :warning:* Use Histograms instead of Summaries for bucket metrics. ## [v0.30.2](https://github.com/thanos-io/thanos/tree/release-0.30) - 28.01.2023 @@ -1576,4 +1598,4 @@ Initial version to have a stable reference before [gossip protocol removal](docs - Compact / Downsample offline commands. - Bucket commands. - Downsampling support for UI. -- Grafana dashboards for Thanos components. +- Grafana dashboards for Thanos components. \ No newline at end of file From 037096d6f4f6826dca773e0d19bd197aa95ffee8 Mon Sep 17 00:00:00 2001 From: Douglas Camata <159076+douglascamata@users.noreply.github.com> Date: Thu, 23 Mar 2023 16:25:12 +0100 Subject: [PATCH 4/5] Readd empty line at the end of changelog Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6719248175..94ae5d8dce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1598,4 +1598,4 @@ Initial version to have a stable reference before [gossip protocol removal](docs - Compact / Downsample offline commands. - Bucket commands. - Downsampling support for UI. -- Grafana dashboards for Thanos components. \ No newline at end of file +- Grafana dashboards for Thanos components. From 90d2c40ce78c2d1358d489267615b679f92c4e70 Mon Sep 17 00:00:00 2001 From: Douglas Camata <159076+douglascamata@users.noreply.github.com> Date: Thu, 23 Mar 2023 16:52:08 +0100 Subject: [PATCH 5/5] Rerun CI Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com>