diff --git a/.github/workflows/mixin.yml b/.github/workflows/mixin.yml new file mode 100644 index 00000000..2c282069 --- /dev/null +++ b/.github/workflows/mixin.yml @@ -0,0 +1,34 @@ +--- +name: mixin +on: + pull_request: + paths: + - "elasticsearch-mixin/**" + +jobs: + check-mixin: + name: check + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: 1.22.5 + - name: Install dependencies + run: | + go install github.com/google/go-jsonnet/cmd/jsonnet@v0.20.0 + go install github.com/google/go-jsonnet/cmd/jsonnetfmt@v0.20.0 + go install github.com/google/go-jsonnet/cmd/jsonnet-lint@v0.20.0 + go install github.com/monitoring-mixins/mixtool/cmd/mixtool@16dc166166d91e93475b86b9355a4faed2400c18 + go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@v0.5.1 + - name: Lint + run: bash ./scripts/lint-jsonnet.sh + - name: Compile mixin + run: bash ./scripts/compile-mixin.sh + - name: Verify compiled mixin matches repo + run: | + git diff --exit-code -- ./elasticsearch-mixin || (echo "Compiled mixin does not match repo" && exit 1) + # Check if there are any new untracked files + test -z "$(git status --porcelain)" || (echo "Untracked files found, please run ./scripts/compile-mixin.sh" && exit 1) diff --git a/.gitignore b/.gitignore index e470da31..9687f990 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ elasticsearch_exporter *-stamp .tarballs /vendor +vendor/ diff --git a/elasticsearch-mixin/README.md b/elasticsearch-mixin/README.md new file mode 100644 index 00000000..ef290faa --- /dev/null +++ b/elasticsearch-mixin/README.md @@ -0,0 +1,37 @@ +# Elasticsearch Exporter Mixin + +This is a mixin for the elasticsearch_exporter to define dashboards, alerts, and monitoring queries for use with this exporter. + +Good example of upstream mixin for reference: https://github.com/kubernetes-monitoring/kubernetes-mixin + + +docker-compose +- docker-compose exec elasticsearch bash + - bin/elasticsearch-reset-password -u elastic -f +- login to grafana +- add prometheus datasource (http://prometheus:9090) +- http://127.0.0.1:3000 +- http://127.0.0.1:9090/targets?search= +- http://127.0.0.1:9114/metrics + +## Development + +### JSONNET +https://jsonnet.org/ + +```go install github.com/google/go-jsonnet/cmd/jsonnet@latest``` + +### JSONNET BUNDLER +jsonnet bundler is a package manager for jsonnet + +https://github.com/jsonnet-bundler/jsonnet-bundler + +```go install -a github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest``` + +### Grafonnet +Grafana libraries for jsonnet: https://grafana.github.io/grafonnet/ + +```jb install github.com/grafana/grafonnet/gen/grafonnet-latest@main``` + +validate +go install github.com/grafana/dashboard-linter@latest diff --git a/elasticsearch-mixin/compiled/alerts.yaml b/elasticsearch-mixin/compiled/alerts.yaml new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/elasticsearch-mixin/compiled/alerts.yaml @@ -0,0 +1 @@ +{} diff --git a/elasticsearch-mixin/compiled/dashboards/cluster.json b/elasticsearch-mixin/compiled/dashboards/cluster.json new file mode 100644 index 00000000..bc9399a3 --- /dev/null +++ b/elasticsearch-mixin/compiled/dashboards/cluster.json @@ -0,0 +1,687 @@ +{ + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [ ], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_number_of_nodes{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 3, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_number_of_data_nodes{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Data Nodes", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 4, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_number_of_pending_tasks{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Pending Tasks", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 5, + "panels": [ ], + "title": "Shards", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 6, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_active_shards{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Active", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 5 + }, + "id": 7, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_active_primary_shards{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Active Primary", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 5 + }, + "id": 8, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_initializing_shards{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Initializing", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 5 + }, + "id": 9, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_reloacting_shards{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Relocating", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 5 + }, + "id": 10, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_unassigned_shards{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "Unassigned", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 5 + }, + "id": 11, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~\"$cluster\"}\n)\n" + } + ], + "title": "DelayedUnassigned", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 12, + "panels": [ ], + "title": "Documents", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 13, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "elasticsearch_indices_docs{cluster=~\"$cluster\"}\n" + } + ], + "title": "Indexed Documents", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 14, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "elasticsearch_indices_store_size_bytes{cluster=~\"$cluster\"}\n" + } + ], + "title": "Index Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 9 + }, + "id": 15, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(elasticsearch_indices_indexing_index_total{cluster=~\"$cluster\"}[$__rate_interval])\n", + "legendFormat": "{{name}}" + } + ], + "title": "Index Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 9 + }, + "id": 16, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(elasticsearch_indices_search_query_total{cluster=~\"$cluster\"}[$__rate_interval])\n", + "legendFormat": "{{name}}" + } + ], + "title": "Query Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 9 + }, + "id": 17, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(elasticsearch_thread_pool_queue_count{cluster=~\"$cluster\",type!=\"management\"}) by (type)\n", + "legendFormat": "{{type}}" + } + ], + "title": "Queue Count", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 18, + "panels": [ ], + "title": "Memory", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 13 + }, + "id": 19, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "elasticsearch_jvm_memory_used_bytes{cluster=~\"$cluster\"}\n", + "legendFormat": "{{name}} {{area}}" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 13 + }, + "id": 20, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "avg_over_time(\n elasticsearch_jvm_memory_used_bytes{cluster=~\"$cluster\"}[15m]\n) /\nelasticsearch_jvm_memory_max_bytes{cluster=~\"$cluster\"}\n", + "legendFormat": "{{name}} {{area}}" + } + ], + "title": "Memory 15m Avg", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 13 + }, + "id": 21, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "elasticsearch_jvm_memory_max_bytes{cluster=~\"$cluster\"}\n", + "legendFormat": "{{name}} {{area}}" + } + ], + "title": "Memory Max", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 13 + }, + "id": 22, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(\n elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\"}[$__rate_interval]\n)\n", + "legendFormat": "{{name}} {{gc}}" + } + ], + "title": "GC Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 23, + "panels": [ ], + "title": "Threads", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 17 + }, + "id": 24, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "elasticsearch_thread_pool_active_count{cluster=~\"$cluster\"}\n", + "legendFormat": "{{type}}" + } + ], + "title": "Thread Pools", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 17 + }, + "id": 25, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "elasticsearch_thread_pool_rejected_count{cluster=~\"$cluster\"}\n", + "legendFormat": "{{name}} {{type}}" + } + ], + "title": "Thread Pool Rejections", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 26, + "panels": [ ], + "title": "Network", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 21 + }, + "id": 27, + "pluginVersion": "v10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(\n elasticsearch_transport_rx_size_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]\n)\n", + "legendFormat": "{{name}} TX" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "rate(\n elasticsearch_transport_tx_size_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]\n)\n", + "legendFormat": "{{name}} RX" + } + ], + "title": "Transport Rate", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 36, + "tags": [ + "elasticsearch-exporter-mixin" + ], + "templating": { + "list": [ + { + "name": "datasource", + "query": "prometheus", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "name": "cluster", + "query": "label_values(elasticsearch_cluster_health_status, cluster)", + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "utc", + "title": "Elasticsearch Exporter / Cluster" + } \ No newline at end of file diff --git a/elasticsearch-mixin/compiled/rules.yaml b/elasticsearch-mixin/compiled/rules.yaml new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/elasticsearch-mixin/compiled/rules.yaml @@ -0,0 +1 @@ +{} diff --git a/elasticsearch-mixin/config.libsonnet b/elasticsearch-mixin/config.libsonnet new file mode 100644 index 00000000..3cf4a4f9 --- /dev/null +++ b/elasticsearch-mixin/config.libsonnet @@ -0,0 +1,6 @@ +{ + _config+:: { + dashboardNamePrefix: 'Elasticsearch Exporter / ', + dashboardTags: ['elasticsearch-exporter-mixin'], + }, +} diff --git a/elasticsearch-mixin/dashboards.jsonnet b/elasticsearch-mixin/dashboards.jsonnet new file mode 100644 index 00000000..9a03074e --- /dev/null +++ b/elasticsearch-mixin/dashboards.jsonnet @@ -0,0 +1,3 @@ +local dashboards = (import 'mixin.libsonnet').grafanaDashboards; + +{ [name]: dashboards[name] for name in std.objectFields(dashboards) } diff --git a/elasticsearch-mixin/dashboards/cluster.libsonnet b/elasticsearch-mixin/dashboards/cluster.libsonnet new file mode 100644 index 00000000..45744279 --- /dev/null +++ b/elasticsearch-mixin/dashboards/cluster.libsonnet @@ -0,0 +1,66 @@ +local g = import 'g.libsonnet'; + +local dashboard = g.dashboard; +local row = g.panel.row; + +local panels = import './panels.libsonnet'; +local queries = import './queries.libsonnet'; +local variables = import './variables.libsonnet'; + +{ + grafanaDashboards+:: { + 'cluster.json': + dashboard.new('%s Cluster' % $._config.dashboardNamePrefix) + + dashboard.withTags($._config.dashboardTags) + + dashboard.withRefresh('1m') + + dashboard.time.withFrom(value='now-1h') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables([ + variables.datasource, + variables.cluster, + ]) + + dashboard.withPanels( + g.util.grid.makeGrid([ + row.new('Overview') + + row.withPanels([ + panels.stat.nodes('Nodes', queries.runningNodes), + panels.stat.nodes('Data Nodes', queries.dataNodes), + panels.stat.nodes('Pending Tasks', queries.pendingTasks), + ]), + row.new('Shards') + + row.withPanels([ + panels.stat.nodes('Active', queries.activeShards), + panels.stat.nodes('Active Primary', queries.activePrimaryShards), + panels.stat.nodes('Initializing', queries.initializingShards), + panels.stat.nodes('Relocating', queries.reloactingShards), + panels.stat.nodes('Unassigned', queries.unassignedShards), + panels.stat.nodes('DelayedUnassigned', queries.delayedUnassignedShards), + ]), + row.new('Documents') + + row.withPanels([ + panels.timeSeries.base('Indexed Documents', queries.indexedDocuments), + panels.timeSeries.bytes('Index Size', queries.indexSize), + panels.timeSeries.base('Index Rate', queries.indexRate), + panels.timeSeries.base('Query Rate', queries.queryRate), + panels.timeSeries.base('Queue Count', queries.queueCount), + ]), + row.new('Memory') + + row.withPanels([ + panels.timeSeries.bytes('Memory Usage', queries.memoryUsage), + panels.timeSeries.ratioMax1('Memory 15m Avg', queries.memoryUsageAverage15), + panels.timeSeries.bytes('Memory Max', queries.memoryMax), + panels.timeSeries.seconds('GC Rate', queries.gcSeconds), + ]), + row.new('Threads') + + row.withPanels([ + panels.timeSeries.base('Thread Pools', queries.threadPoolActive), + panels.timeSeries.base('Thread Pool Rejections', queries.threadPoolRejections), + ]), + row.new('Network') + + row.withPanels([ + panels.timeSeries.bytes('Transport Rate', [queries.transportTXRate, queries.transportRXRate]), + ]), + ], panelWidth=4, panelHeight=3), + ), + }, +} diff --git a/elasticsearch-mixin/dashboards/dashboards.libsonnet b/elasticsearch-mixin/dashboards/dashboards.libsonnet new file mode 100644 index 00000000..16802c25 --- /dev/null +++ b/elasticsearch-mixin/dashboards/dashboards.libsonnet @@ -0,0 +1 @@ +(import 'cluster.libsonnet') diff --git a/elasticsearch-mixin/dashboards/g.libsonnet b/elasticsearch-mixin/dashboards/g.libsonnet new file mode 100644 index 00000000..69aac830 --- /dev/null +++ b/elasticsearch-mixin/dashboards/g.libsonnet @@ -0,0 +1 @@ +import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet' diff --git a/elasticsearch-mixin/dashboards/panels.libsonnet b/elasticsearch-mixin/dashboards/panels.libsonnet new file mode 100644 index 00000000..f5f24d45 --- /dev/null +++ b/elasticsearch-mixin/dashboards/panels.libsonnet @@ -0,0 +1,38 @@ +local g = import 'g.libsonnet'; + +{ + stat: { + local stat = g.panel.stat, + + base(title, targets): + stat.new(title) + + stat.queryOptions.withTargets(targets), + + nodes: self.base, + }, + + timeSeries: { + local timeSeries = g.panel.timeSeries, + + base(title, targets): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets), + + ratio(title, targets): + self.base(title, targets) + + timeSeries.standardOptions.withUnit('percentunit'), + + ratioMax1(title, targets): + self.ratio(title, targets) + + timeSeries.standardOptions.withMax(1) + + timeSeries.standardOptions.withMin(0), + + bytes(title, targets): + self.base(title, targets) + + timeSeries.standardOptions.withUnit('bytes'), + + seconds(title, targets): + self.base(title, targets) + + timeSeries.standardOptions.withUnit('s'), + }, +} diff --git a/elasticsearch-mixin/dashboards/queries.libsonnet b/elasticsearch-mixin/dashboards/queries.libsonnet new file mode 100644 index 00000000..d7a8e661 --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries.libsonnet @@ -0,0 +1,11 @@ +local g = import './g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import './variables.libsonnet'; + +(import './queries/general.libsonnet') + +(import './queries/shard.libsonnet') + +(import './queries/document.libsonnet') + +(import './queries/memory.libsonnet') + +(import './queries/threads.libsonnet') + +(import './queries/network.libsonnet') diff --git a/elasticsearch-mixin/dashboards/queries/document.libsonnet b/elasticsearch-mixin/dashboards/queries/document.libsonnet new file mode 100644 index 00000000..369ce885 --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries/document.libsonnet @@ -0,0 +1,50 @@ +local g = import '../g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import '../variables.libsonnet'; + +{ + indexedDocuments: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + elasticsearch_indices_docs{cluster=~"$cluster"} + ||| + ), + + indexSize: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + elasticsearch_indices_store_size_bytes{cluster=~"$cluster"} + ||| + ), + + indexRate: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + rate(elasticsearch_indices_indexing_index_total{cluster=~"$cluster"}[$__rate_interval]) + ||| + ) + + prometheusQuery.withLegendFormat('{{name}}'), + + queryRate: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + rate(elasticsearch_indices_search_query_total{cluster=~"$cluster"}[$__rate_interval]) + ||| + ) + + prometheusQuery.withLegendFormat('{{name}}'), + + queueCount: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum(elasticsearch_thread_pool_queue_count{cluster=~"$cluster",type!="management"}) by (type) + ||| + ) + + prometheusQuery.withLegendFormat('{{type}}'), + +} diff --git a/elasticsearch-mixin/dashboards/queries/general.libsonnet b/elasticsearch-mixin/dashboards/queries/general.libsonnet new file mode 100644 index 00000000..053207ac --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries/general.libsonnet @@ -0,0 +1,35 @@ +local g = import '../g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import '../variables.libsonnet'; + +{ + runningNodes: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_number_of_nodes{cluster=~"$cluster"} + ) + ||| + ), + dataNodes: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_number_of_data_nodes{cluster=~"$cluster"} + ) + ||| + ), + + pendingTasks: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_number_of_pending_tasks{cluster=~"$cluster"} + ) + ||| + ), +} diff --git a/elasticsearch-mixin/dashboards/queries/memory.libsonnet b/elasticsearch-mixin/dashboards/queries/memory.libsonnet new file mode 100644 index 00000000..59e8d274 --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries/memory.libsonnet @@ -0,0 +1,47 @@ +local g = import '../g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import '../variables.libsonnet'; + +{ + memoryUsage: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + elasticsearch_jvm_memory_used_bytes{cluster=~"$cluster"} + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} {{area}}'), + + memoryUsageAverage15: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + avg_over_time( + elasticsearch_jvm_memory_used_bytes{cluster=~"$cluster"}[15m] + ) / + elasticsearch_jvm_memory_max_bytes{cluster=~"$cluster"} + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} {{area}}'), + + memoryMax: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + elasticsearch_jvm_memory_max_bytes{cluster=~"$cluster"} + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} {{area}}'), + + gcSeconds: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + rate( + elasticsearch_jvm_gc_collection_seconds_sum{cluster=~"$cluster"}[$__rate_interval] + ) + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} {{gc}}'), +} diff --git a/elasticsearch-mixin/dashboards/queries/network.libsonnet b/elasticsearch-mixin/dashboards/queries/network.libsonnet new file mode 100644 index 00000000..76aab2b5 --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries/network.libsonnet @@ -0,0 +1,28 @@ +local g = import '../g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import '../variables.libsonnet'; + +{ + transportTXRate: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + rate( + elasticsearch_transport_rx_size_bytes_total{cluster=~"$cluster"}[$__rate_interval] + ) + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} TX'), + + transportRXRate: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + rate( + elasticsearch_transport_tx_size_bytes_total{cluster=~"$cluster"}[$__rate_interval] + ) + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} RX'), +} diff --git a/elasticsearch-mixin/dashboards/queries/shard.libsonnet b/elasticsearch-mixin/dashboards/queries/shard.libsonnet new file mode 100644 index 00000000..6bebb4e6 --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries/shard.libsonnet @@ -0,0 +1,66 @@ +local g = import '../g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import '../variables.libsonnet'; + +{ + activeShards: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_active_shards{cluster=~"$cluster"} + ) + ||| + ), + + activePrimaryShards: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_active_primary_shards{cluster=~"$cluster"} + ) + ||| + ), + + initializingShards: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_initializing_shards{cluster=~"$cluster"} + ) + ||| + ), + + reloactingShards: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_reloacting_shards{cluster=~"$cluster"} + ) + ||| + ), + + unassignedShards: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_unassigned_shards{cluster=~"$cluster"} + ) + ||| + ), + + delayedUnassignedShards: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + sum( + elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~"$cluster"} + ) + ||| + ), +} diff --git a/elasticsearch-mixin/dashboards/queries/threads.libsonnet b/elasticsearch-mixin/dashboards/queries/threads.libsonnet new file mode 100644 index 00000000..1fdb3e63 --- /dev/null +++ b/elasticsearch-mixin/dashboards/queries/threads.libsonnet @@ -0,0 +1,24 @@ +local g = import '../g.libsonnet'; +local prometheusQuery = g.query.prometheus; + +local variables = import '../variables.libsonnet'; + +{ + threadPoolActive: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + elasticsearch_thread_pool_active_count{cluster=~"$cluster"} + ||| + ) + + prometheusQuery.withLegendFormat('{{type}}'), + + threadPoolRejections: + prometheusQuery.new( + '$' + variables.datasource.name, + ||| + elasticsearch_thread_pool_rejected_count{cluster=~"$cluster"} + ||| + ) + + prometheusQuery.withLegendFormat('{{name}} {{type}}'), +} diff --git a/elasticsearch-mixin/dashboards/variables.libsonnet b/elasticsearch-mixin/dashboards/variables.libsonnet new file mode 100644 index 00000000..53233a23 --- /dev/null +++ b/elasticsearch-mixin/dashboards/variables.libsonnet @@ -0,0 +1,15 @@ +local g = import './g.libsonnet'; +local var = g.dashboard.variable; + +{ + datasource: + var.datasource.new('datasource', 'prometheus'), + + cluster: + var.query.new('cluster') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'cluster', + 'elasticsearch_cluster_health_status', + ), +} diff --git a/elasticsearch-mixin/jsonnetfile.json b/elasticsearch-mixin/jsonnetfile.json new file mode 100644 index 00000000..2414c867 --- /dev/null +++ b/elasticsearch-mixin/jsonnetfile.json @@ -0,0 +1,15 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" + } + }, + "version": "main" + } + ], + "legacyImports": true +} diff --git a/elasticsearch-mixin/jsonnetfile.lock.json b/elasticsearch-mixin/jsonnetfile.lock.json new file mode 100644 index 00000000..31b59c22 --- /dev/null +++ b/elasticsearch-mixin/jsonnetfile.lock.json @@ -0,0 +1,46 @@ +{ + "version": 1, + "dependencies": [ + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" + } + }, + "version": "1c56af39815c4903e47c27194444456f005f65df", + "sum": "GxEO83uxgsDclLp/fmlUJZDbSGpeUZY6Ap3G2cgdL1g=" + }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v10.4.0" + } + }, + "version": "1c56af39815c4903e47c27194444456f005f65df", + "sum": "DKj+Sn+rlI48g/aoJpzkfPge46ya0jLk5kcZoiZ2X/I=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/docsonnet.git", + "subdir": "doc-util" + } + }, + "version": "6ac6c69685b8c29c54515448eaca583da2d88150", + "sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U=" + }, + { + "source": { + "git": { + "remote": "https://github.com/jsonnet-libs/xtd.git", + "subdir": "" + } + }, + "version": "63d430b69a95741061c2f7fc9d84b1a778511d9c", + "sum": "qiZi3axUSXCVzKUF83zSAxklwrnitMmrDK4XAfjPMdE=" + } + ], + "legacyImports": false +} diff --git a/elasticsearch-mixin/mixin.libsonnet b/elasticsearch-mixin/mixin.libsonnet new file mode 100644 index 00000000..7083cb16 --- /dev/null +++ b/elasticsearch-mixin/mixin.libsonnet @@ -0,0 +1,3 @@ +// (import 'alerts/alerts.libsonnet') + +(import 'dashboards/dashboards.libsonnet') + +(import 'config.libsonnet') diff --git a/scripts/compile-mixin.sh b/scripts/compile-mixin.sh new file mode 100755 index 00000000..1cd1f849 --- /dev/null +++ b/scripts/compile-mixin.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +MIXIN_PATH=./elasticsearch-mixin +MIXIN_OUT_PATH=./elasticsearch-mixin/compiled + +rm -rf ${MIXIN_OUT_PATH} && mkdir ${MIXIN_OUT_PATH} +pushd ${MIXIN_PATH} && jb install && popd +mixtool generate all --output-alerts ${MIXIN_OUT_PATH}/alerts.yaml --output-rules ${MIXIN_OUT_PATH}/rules.yaml --directory ${MIXIN_OUT_PATH}/dashboards ${MIXIN_PATH}/mixin.libsonnet diff --git a/scripts/lint-jsonnet.sh b/scripts/lint-jsonnet.sh new file mode 100755 index 00000000..74348bf5 --- /dev/null +++ b/scripts/lint-jsonnet.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Run lint on all jsonnet files in the repository +RESULT=0; +for f in $(find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print); do + # jsonnet fmt -i "$$f" + echo "Linting ${f}" + jsonnetfmt -- "${f}" | diff -u "${f}" - + RESULT=$((RESULT+$?)) +done + +echo "Linting complete" +exit $RESULT