diff --git a/monitor-snapshot/master/ansible-monitor.tar.gz b/monitor-snapshot/master/ansible-monitor.tar.gz index 2ba4bb1f..cf72ff02 100644 Binary files a/monitor-snapshot/master/ansible-monitor.tar.gz and b/monitor-snapshot/master/ansible-monitor.tar.gz differ diff --git a/monitor-snapshot/master/operator/dashboards/pd.json b/monitor-snapshot/master/operator/dashboards/pd.json index cac005df..02d418c2 100644 --- a/monitor-snapshot/master/operator/dashboards/pd.json +++ b/monitor-snapshot/master/operator/dashboards/pd.json @@ -10763,7 +10763,7 @@ "dashLength": 10, "dashes": false, "datasource": "tidb-cluster", - "description": "The time consumed of handling TiDB requests", + "description": "The server-side time consumed of handling the TSO requests", "editable": true, "error": false, "fill": 1, @@ -10908,7 +10908,7 @@ "dashLength": 10, "dashes": false, "datasource": "tidb-cluster", - "description": "The time consumed of handling TiDB requests", + "description": "The client-side time consumed of handling the TSO requests", "editable": true, "error": false, "fill": 1, @@ -10951,14 +10951,14 @@ "steppedLine": false, "targets": [ { - "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type)", + "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"tso.*\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"tso.*\"}[30s])) by (type)", "intervalFactor": 2, "legendFormat": "avg {{type}}", "refId": "A", "step": 2 }, { - "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"tso.*\"}[30s])) by (type, le))", "hide": false, "intervalFactor": 2, "legendFormat": "90% {{type}}", @@ -10966,7 +10966,7 @@ "step": 2 }, { - "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"tso.*\"}[30s])) by (type, le))", "hide": false, "intervalFactor": 2, "legendFormat": "99% {{type}}", @@ -10974,7 +10974,7 @@ "step": 2 }, { - "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (type, le))", + "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"tso.*\"}[30s])) by (type, le))", "hide": false, "intervalFactor": 2, "legendFormat": "99.9% {{type}}", @@ -10986,7 +10986,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD client requests handle duration", + "title": "PD client TSO handle duration", "tooltip": { "msResolution": false, "shared": true, @@ -11030,7 +11030,7 @@ "dashLength": 10, "dashes": false, "datasource": "tidb-cluster", - "description": "The count of TSO grpc requests", + "description": "The count of TSO gRPC requests", "editable": true, "error": false, "fill": 1, @@ -11101,7 +11101,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Handle requests count", + "title": "Handle TSO requests count", "tooltip": { "msResolution": false, "shared": true, @@ -11145,7 +11145,7 @@ "dashLength": 10, "dashes": false, "datasource": "tidb-cluster", - "description": "The batch size of TiDB tso requests", + "description": "The batch size of the TSO requests", "editable": true, "error": false, "fill": 1, @@ -11215,7 +11215,556 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Tso Request batch size", + "title": "TSO request batch size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "tidb-cluster", + "description": "The server-side time consumed by handling the QueryRegion requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 132 + }, + "hiddenSeries": false, + "id": 1626, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(pd_server_query_region_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90% query", + "refId": "A", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(pd_server_query_region_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99% query", + "refId": "B", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(pd_server_query_region_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99.9% query", + "refId": "C", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(pd_core_query_region_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (le, type))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90% {{type}}", + "refId": "D", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(pd_core_query_region_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (le, type))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99% {{type}}", + "refId": "E", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(pd_core_query_region_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s])) by (le, type))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99.9% {{type}}", + "refId": "F", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PD server QueryRegion handle duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "tidb-cluster", + "description": "The client-side time consumed by handling the QueryRegion requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 132 + }, + "hiddenSeries": false, + "id": 1627, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "avg(rate(pd_client_request_handle_requests_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"query_region.*\"}[30s])) by (type) / avg(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"query_region.*\"}[30s])) by (type)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg {{type}}", + "refId": "A", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"query_region.*\"}[30s])) by (type, le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90% {{type}}", + "refId": "B", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"query_region.*\"}[30s])) by (type, le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99% {{type}}", + "refId": "C", + "step": 2 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=~\"query_region.*\"}[30s])) by (type, le))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99.9% {{type}}", + "refId": "D", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PD client QueryRegion handle duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "tidb-cluster", + "description": "The count of the QueryRegion gRPC requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 141 + }, + "hiddenSeries": false, + "id": 1628, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(pd_server_query_region_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "QueryRegion request/secs", + "refId": "A", + "step": 2 + }, + { + "exemplar": true, + "expr": "sum(rate(pd_core_query_region_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Handle QueryRegion requests count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "tidb-cluster", + "description": "The batch size of the QueryRegion requests", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 141 + }, + "hiddenSeries": false, + "id": 1629, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_query_region_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "0.99 {{type}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_query_region_batch_size_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (le, type))", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "0.90 {{type}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "sum(rate(pd_client_request_handle_query_region_batch_size_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type) / sum(rate(pd_client_request_handle_query_region_batch_size_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "avg {{type}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "QueryRegion request batch size", "tooltip": { "msResolution": false, "shared": true, diff --git a/monitor-snapshot/master/operator/dashboards/tidb.json b/monitor-snapshot/master/operator/dashboards/tidb.json index b1a0357b..bc5befa3 100644 --- a/monitor-snapshot/master/operator/dashboards/tidb.json +++ b/monitor-snapshot/master/operator/dashboards/tidb.json @@ -13431,6 +13431,108 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "tidb-cluster", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 37 + }, + "hiddenSeries": false, + "id": 23763572019, + "legend": { + "alignAsTable": true, + "hideEmpty": true, + "hideZero": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(pd_client_request_circuit_breaker_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (name, event)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{name}}-{{event}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Circuit Breaker Event", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false,