diff --git a/.github/workflows/grafana-lint.yml b/.github/workflows/grafana-lint.yml index 53d5d4b..c25e03e 100644 --- a/.github/workflows/grafana-lint.yml +++ b/.github/workflows/grafana-lint.yml @@ -35,6 +35,7 @@ jobs: - name: Validate alert rules with promtool run: | - docker run --rm -v "$PWD/deploy/grafana:/rules:ro" \ + docker run --rm --entrypoint=promtool \ + -v "$PWD/deploy/grafana:/rules:ro" \ prom/prometheus:v2.55.1 \ - promtool check rules /rules/alerts.yaml + check rules /rules/alerts.yaml diff --git a/deploy/grafana/charon.json b/deploy/grafana/charon.json index 0d0c05e..3afaa03 100644 --- a/deploy/grafana/charon.json +++ b/deploy/grafana/charon.json @@ -13,7 +13,7 @@ ] }, "description": "Charon liquidation bot — scanner, executor, and profit telemetry. Scrapes the charon-metrics Prometheus exporter (default :9091). Chain/Instance variables default to All (.*) so panels render before the first scrape populates label_values; they auto-refine once metrics flow. Mempool / gas / RPC-latency panels deferred pending backing series: mempool #300, gas #301, rpc-latency #302. Alerting rules live in deploy/grafana/alerts.yaml.", - "editable": true, + "editable": false, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, @@ -53,7 +53,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (chain) (rate(charon_scanner_blocks_total{instance=~\"$instance\",chain=~\"$chain\"}[1m]))", + "expr": "sum by (chain) (rate(charon_scanner_blocks_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval]))", "legendFormat": "{{chain}}", "range": true, "refId": "A" @@ -64,7 +64,7 @@ }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "description": "Per-block pipeline wall-clock latency. p50 and p95 from the histogram. BSC produces a block every ~3s; quantiles use a [5m] range (~100 observations) so they stay stable across scrapes.", + "description": "Per-block pipeline wall-clock latency. p50 and p95 from the histogram. BSC produces a block every ~3s; quantiles use $__rate_interval (typically ~4× scrape interval) so they stay stable across scrapes while respecting the resolution Grafana selects for the current panel.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, @@ -90,7 +90,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\"}[5m])))", + "expr": "histogram_quantile(0.5, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])))", "legendFormat": "p50 {{chain}}", "range": true, "refId": "A" @@ -98,7 +98,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\"}[5m])))", + "expr": "histogram_quantile(0.95, sum by (le, chain) (rate(charon_pipeline_block_duration_seconds_bucket{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])))", "legendFormat": "p95 {{chain}}", "range": true, "refId": "B" @@ -139,7 +139,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (bucket) (charon_scanner_positions{instance=~\"$instance\",chain=~\"$chain\"})", + "expr": "sum by (bucket) (charon_scanner_positions{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"})", "legendFormat": "{{bucket}}", "range": true, "refId": "A" @@ -182,7 +182,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "charon_executor_queue_depth{instance=~\"$instance\"}", + "expr": "charon_executor_queue_depth{instance=~\"$instance\",job=~\"$job\"}", "legendFormat": "queue depth", "range": true, "refId": "A" @@ -223,7 +223,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(increase(charon_executor_profit_usd_cents_sum{instance=~\"$instance\",chain=~\"$chain\"}[$__range])) / 100", + "expr": "sum(increase(charon_executor_profit_usd_cents_sum{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__range])) / 100", "legendFormat": "profit (selected range)", "range": true, "refId": "A" @@ -264,7 +264,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (result) (rate(charon_executor_simulations_total{instance=~\"$instance\",chain=~\"$chain\"}[1m])) * 60", + "expr": "sum by (result) (rate(charon_executor_simulations_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])) * 60", "legendFormat": "{{result}}", "range": true, "refId": "A" @@ -301,7 +301,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "label_replace(sum(rate(charon_executor_opportunities_queued_total{instance=~\"$instance\",chain=~\"$chain\"}[1m])) * 60, \"stage\", \"queued\", \"\", \"\")", + "expr": "label_replace(sum(rate(charon_executor_opportunities_queued_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])) * 60, \"stage\", \"queued\", \"\", \"\")", "legendFormat": "{{stage}}", "range": true, "refId": "A" @@ -309,7 +309,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (stage) (rate(charon_executor_opportunities_dropped_total{instance=~\"$instance\",chain=~\"$chain\"}[1m])) * 60", + "expr": "sum by (stage) (rate(charon_executor_opportunities_dropped_total{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval])) * 60", "legendFormat": "{{stage}}", "range": true, "refId": "B" @@ -353,7 +353,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (le) (rate(charon_executor_profit_usd_cents_bucket{instance=~\"$instance\",chain=~\"$chain\"}[5m]))", + "expr": "sum by (le) (rate(charon_executor_profit_usd_cents_bucket{instance=~\"$instance\",chain=~\"$chain\",job=~\"$job\"}[$__rate_interval]))", "format": "heatmap", "legendFormat": "{{le}}", "range": true, @@ -367,7 +367,7 @@ "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Running build metadata. Only `version` is surfaced; `git_sha` is intentionally hidden via transform exclusion until /metrics has auth (open #214) and LAN exposure is addressed (#213). Surfacing the exact SHA of a running binary to anyone with Grafana read access is an intelligence leak while those are unresolved.", "fieldConfig": { - "defaults": { "custom": { "align": "auto" } }, + "defaults": { "custom": { "align": "auto" }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, @@ -382,7 +382,7 @@ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "editorMode": "code", - "expr": "charon_build_info{instance=~\"$instance\"}", + "expr": "charon_build_info{instance=~\"$instance\",job=~\"$job\"}", "format": "table", "instant": true, "legendFormat": "__auto", @@ -424,6 +424,25 @@ "skipUrlSync": false, "type": "datasource" }, + { + "allValue": ".+", + "current": { "selected": false, "text": "charon", "value": "charon" }, + "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "definition": "label_values(charon_build_info, job)", + "description": "Prometheus scrape job. Defaults to `charon`.", + "hide": 0, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "options": [], + "query": { "query": "label_values(charon_build_info, job)", "refId": "StandardVariableQuery" }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, { "allValue": ".*", "current": { "selected": true, "text": "All", "value": "$__all" }, @@ -444,11 +463,11 @@ "type": "query" }, { - "allValue": ".*", + "allValue": ".+", "current": { "selected": true, "text": "All", "value": "$__all" }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, "definition": "label_values(charon_build_info, instance)", - "description": "Instance label (Prometheus scrape target). Defaults to All (.*) so panels render before the first scrape populates the dropdown.", + "description": "Instance label (Prometheus scrape target). Defaults to All (.+) so panels render before the first scrape populates the dropdown.", "hide": 0, "includeAll": true, "label": "Instance",