diff --git a/README.md b/README.md index 7d81d9c..9f69124 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,5 @@ - [Lotus日常运维CheckList](./documents/lotus-ops-checklist.md) - [常用环境变量说明](./documents/environment-usage.md) - [自动Pledge脚本使用说明](./documents/auto-pledge.md) -- 时空证明运行流程、原理详解 -- 爆块逻辑及优化方法 -- 消息池清理操作 -- 源码编译及编译常见问题 - 常见运维问题定位与解决 + - [顽固扇区如何删除?](./documents/questions.md#1-顽固扇区如何删除) \ No newline at end of file diff --git a/ansible/2-start-window-post-miner.yaml b/ansible/1-start-window-post-miner.yaml similarity index 94% rename from ansible/2-start-window-post-miner.yaml rename to ansible/1-start-window-post-miner.yaml index 9914eb7..1c34301 100644 --- a/ansible/2-start-window-post-miner.yaml +++ b/ansible/1-start-window-post-miner.yaml @@ -1,14 +1,13 @@ - hosts: window-post-miner remote_user: '{{ remote_user }}' environment: - RUST_LOG: '{{ rust_log }}' - RUST_BACKTRACE: '{{ rust_backtrace }}' FIL_PROOFS_PARAMETER_CACHE: '{{ fil_proofs_parameter_cache }}' FIL_PROOFS_MAXIMIZE_CACHING: '{{ fil_proofs_maximize_caching }}' FIL_PROOFS_USE_GPU_COLUMN_BUILDER: '{{ fil_proofs_use_gpu_column_builder }}' FIL_PROOFS_USE_GPU_TREE_BUILDER: '{{ fil_proofs_use_gpu_tree_builder }}' FULLNODE_API_INFO: '{{ fullnode_api_info }}' LOTUS_PATH: '{{ lotus_path }}' + TMPDIR: '{{ tmp_dir }}' LOTUS_MINER_PATH: '{{ lotus_miner_path }}' TMPDIR: '{{ tmp_dir }}' tasks: diff --git a/ansible/1-start-winning-post-miner.yaml b/ansible/2-start-winning-post-miner.yaml similarity index 97% rename from ansible/1-start-winning-post-miner.yaml rename to ansible/2-start-winning-post-miner.yaml index b261a32..889ae10 100644 --- a/ansible/1-start-winning-post-miner.yaml +++ b/ansible/2-start-winning-post-miner.yaml @@ -9,6 +9,7 @@ FIL_PROOFS_USE_GPU_TREE_BUILDER: '{{ fil_proofs_use_gpu_tree_builder }}' FULLNODE_API_INFO: '{{ fullnode_api_info }}' LOTUS_PATH: '{{ lotus_path }}' + TMPDIR: '{{ tmp_dir }}' LOTUS_MINER_PATH: '{{ lotus_miner_path }}' TMPDIR: '{{ tmp_dir }}' tasks: diff --git a/ansible/5-start-seal-miner.yaml b/ansible/5-start-seal-miner.yaml index 0fe685f..91135b7 100644 --- a/ansible/5-start-seal-miner.yaml +++ b/ansible/5-start-seal-miner.yaml @@ -9,6 +9,7 @@ FIL_PROOFS_USE_GPU_TREE_BUILDER: '{{ fil_proofs_use_gpu_tree_builder }}' FULLNODE_API_INFO: '{{ fullnode_api_info }}' LOTUS_PATH: '{{ lotus_path }}' + TMPDIR: '{{ tmp_dir }}' LOTUS_MINER_PATH: '{{ lotus_miner_path }}' TMPDIR: '{{ tmp_dir }}' tasks: diff --git a/ansible/6-start-precommit-worker.yaml b/ansible/6-start-precommit-worker.yaml index 1636d3c..41fda84 100644 --- a/ansible/6-start-precommit-worker.yaml +++ b/ansible/6-start-precommit-worker.yaml @@ -1,9 +1,10 @@ -- hosts: precommit-worker +- hosts: 10.0.1.16 remote_user: '{{ remote_user }}' environment: RUST_LOG: '{{ rust_log }}' RUST_BACKTRACE: '{{ rust_backtrace }}' FIL_PROOFS_PARAMETER_CACHE: '{{ fil_proofs_parameter_cache }}' + FIL_PROOFS_PARENT_CACHE: '{{ fil_proofs_parent_cache }}' FIL_PROOFS_MAXIMIZE_CACHING: '{{ fil_proofs_maximize_caching }}' FIL_PROOFS_USE_GPU_COLUMN_BUILDER: '{{ fil_proofs_use_gpu_column_builder }}' FIL_PROOFS_USE_GPU_TREE_BUILDER: '{{ fil_proofs_use_gpu_tree_builder }}' @@ -14,7 +15,7 @@ - debug: var=ansible_all_ipv4_addresses[0] - debug: var=ansible_hostname - debug: var=lotus_miner_path - + - name: Create cache path, lotus worker path, tmp path, log path file: path: '{{ item }}' @@ -24,8 +25,9 @@ mode: u=rwxr,g=xr,o=x loop: - '{{ fil_proofs_parent_cache }}' - - '{{ lotus_worker_path }}' - - '{{ tmp_dir }}/tmp' + - '{{ lotus_worker_path_0 }}' + - '{{ lotus_worker_path_1 }}' + - '{{ tmp_dir }}' - '{{ log_path }}' - name: Copy myscheduler config to precommit worker @@ -33,16 +35,33 @@ src: '{{ workspace }}/lotus-ops/config/myscheduler-precommit.json' dest: '{{ item }}/myscheduler.json' loop: - - '{{ lotus_worker_path }}' + - '{{ lotus_worker_path_0 }}' + - '{{ lotus_worker_path_1 }}' - name: Start precommit worker 0 shell: | - tmux new -s lotus -d -n worker - tmux send-keys -t lotus:worker "export LOTUS_WORKER_PATH={{ lotus_worker_path }}" C-m - tmux send-keys -t lotus:worker "export CUDA_VISIBLE_DEVICES=0" C-m - tmux send-keys -t lotus:worker "export TMPDIR={{ tmp_dir }}/tmp" C-m - tmux send-keys -t lotus:worker "export FIL_PROOFS_MULTICORE_SDR_PRODUCERS=1" C-m - tmux send-keys -t lotus:worker "export ENV_CPU_CORE_BEGIN_NUM=0" C-m - tmux send-keys -t lotus:worker "export ENV_CPU_CORE_END_NUM=16" C-m - tmux send-keys -t lotus:worker "lotus-worker run --listen={{ ansible_all_ipv4_addresses[0] }}:{{ worker_port }} --commit=false" C-m - tmux pipe-pane -o "cat >>{{ log_path }}/lotus-worker-`date +%Y-%m-%d-%H-%M`.log" \ No newline at end of file + tmux new -s lotus -d -n worker0 + tmux send-keys -t lotus:worker0 "export LOTUS_WORKER_PATH={{ lotus_worker_path_0 }}" C-m + tmux send-keys -t lotus:worker0 "export CUDA_VISIBLE_DEVICES=0" C-m + tmux send-keys -t lotus:worker0 "export TMPDIR={{ tmp_dir }}" C-m + tmux send-keys -t lotus:worker0 "export FIL_PROOFS_MULTICORE_SDR_PRODUCERS=1" C-m + tmux send-keys -t lotus:worker0 "export ENV_CPU_CORE_BEGIN_NUM=0" C-m + tmux send-keys -t lotus:worker0 "export ENV_CPU_CORE_END_NUM=8" C-m + tmux send-keys -t lotus:worker0 "echo $ENV_CPU_CORE_BEGIN_NUM" C-m + tmux send-keys -t lotus:worker0 "echo $ENV_CPU_CORE_END_NUM" C-m + tmux send-keys -t lotus:worker0 "lotus-worker run --listen={{ ansible_all_ipv4_addresses[0] }}:{{ lotus_worker_port_0 }} --commit=false" C-m + tmux pipe-pane -o "cat >>{{ log_path }}/lotus-worker0-`date +%Y-%m-%d-%H-%M`.log" + + - name: Start commit worker 1 + shell: | + tmux new-window -t lotus -n worker1 + tmux send-keys -t lotus:worker1 "export LOTUS_WORKER_PATH={{ lotus_worker_path_1 }}" C-m + tmux send-keys -t lotus:worker1 "export CUDA_VISIBLE_DEVICES=0" C-m + tmux send-keys -t lotus:worker1 "export TMPDIR={{ tmp_dir }}" C-m + tmux send-keys -t lotus:worker1 "export FIL_PROOFS_MULTICORE_SDR_PRODUCERS=1" C-m + tmux send-keys -t lotus:worker1 "export ENV_CPU_CORE_BEGIN_NUM=8" C-m + tmux send-keys -t lotus:worker1 "export ENV_CPU_CORE_END_NUM=16" C-m + tmux send-keys -t lotus:worker1 "echo $ENV_CPU_CORE_BEGIN_NUM" C-m + tmux send-keys -t lotus:worker1 "echo $ENV_CPU_CORE_END_NUM" C-m + tmux send-keys -t lotus:worker1 "lotus-worker run --listen={{ ansible_default_ipv4.address }}:{{ lotus_worker_port_1 }} --commit=false" C-m + tmux pipe-pane -o "cat >>{{ log_path }}/lotus-worker1-`date +%Y-%m-%d-%H-%M`.log" diff --git a/ansible/7-start-commit-worker.yaml b/ansible/7-start-commit-worker.yaml index a30c9a7..904be23 100644 --- a/ansible/7-start-commit-worker.yaml +++ b/ansible/7-start-commit-worker.yaml @@ -1,4 +1,4 @@ -- hosts: commit-worker +- hosts: 10.0.2.13 remote_user: '{{ remote_user }}' environment: RUST_LOG: '{{ rust_log }}' @@ -54,4 +54,4 @@ tmux send-keys -t lotus:worker1 "export CUDA_VISIBLE_DEVICES=1" C-m tmux send-keys -t lotus:worker1 "export TMPDIR={{ tmp_dir }}/tmp1" C-m tmux send-keys -t lotus:worker1 "lotus-worker run --listen={{ ansible_all_ipv4_addresses[0] }}:{{ lotus_worker_port_1 }} --addpiece=false --precommit1=false --precommit2=false" C-m - tmux pipe-pane -o "cat >>{{ log_path }}/lotus-worker1-`date +%Y-%m-%d-%H-%M`.log" \ No newline at end of file + tmux pipe-pane -o "cat >>{{ log_path }}/lotus-worker1-`date +%Y-%m-%d-%H-%M`.log" diff --git a/ansible/copy-bin-to-amd.yaml b/ansible/copy-bin-to-amd.yaml index 3444dbb..1f8b36d 100644 --- a/ansible/copy-bin-to-amd.yaml +++ b/ansible/copy-bin-to-amd.yaml @@ -5,7 +5,7 @@ become_method: sudo vars: - lotus_bin_path: 'lotus-bin' - - version: '1.7.0' + - version: 'v1.10.0' - platform: 'amd' tasks: - name: Copy lotus bin to all amd @@ -14,8 +14,8 @@ dest: '/usr/local/bin/' mode: +x loop: - - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/{{ platform }}/lotus' - - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/{{ platform }}/lotus-miner' - - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/{{ platform }}/lotus-worker' - - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/{{ platform }}/lotus-shed' - - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/{{ platform }}/lotus-bench' \ No newline at end of file + - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/lotus' + - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/lotus-miner' + - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/lotus-worker' + - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/lotus-shed' + - '{{ workspace }}/{{ lotus_bin_path }}/{{ version }}/lotus-bench' diff --git a/ansible/install-node-exporter.yaml b/ansible/install-node-exporter.yaml index 6611b77..6ae3f36 100644 --- a/ansible/install-node-exporter.yaml +++ b/ansible/install-node-exporter.yaml @@ -1,4 +1,5 @@ -- hosts: public-daemon,winning-post-miner,window-post-miner,seal-miner,precommit-worker,commit-worker +# - hosts: public-daemon,private-daemon,winning-post-miner,window-post-miner,seal-miner,precommit-worker,commit-worker +- hosts: 10.0.1.17 remote_user: '{{ remote_user }}' gather_facts: no become: yes @@ -7,7 +8,7 @@ tasks: - name: Create prometheus run direction file: - path: '/home/fil/disk_md0/prometheus/run' + path: '/home/fil/prometheus/run' state: directory - name: Copy node exporter script to remote server diff --git a/ansible/lotus-version-check.yaml b/ansible/lotus-version-check.yaml new file mode 100644 index 0000000..17241ee --- /dev/null +++ b/ansible/lotus-version-check.yaml @@ -0,0 +1,28 @@ +- hosts: amd + tasks: + - debug: var=ansible_default_ipv4.address + - debug: var=ansible_hostname + + - name: Create workspace directory + file: + path: '{{ workspace }}' + state: directory + owner: '{{ remote_user }}' + group: '{{ remote_user }}' + mode: +x + + - name: Copy check script to remote server + copy: + src: '{{ workspace }}/lotus-ops/scripts/lotus-version-check.sh' + dest: '{{ workspace }}' + owner: '{{ remote_user }}' + group: '{{ remote_user }}' + mode: +x + + - name: Lotus version check + shell: '{{ workspace }}/lotus-version-check.sh' + register: check_result + + - name: Echo check result + debug: + msg: '{{ check_result.stdout }}' diff --git a/ansible/worker-pre-check.yaml b/ansible/worker-pre-check.yaml index 4d8e522..91b891f 100644 --- a/ansible/worker-pre-check.yaml +++ b/ansible/worker-pre-check.yaml @@ -25,4 +25,4 @@ - name: Echo check result debug: - msg: '{{ check_result.stdout }}' \ No newline at end of file + msg: '{{ check_result.stdout }}' diff --git a/config/Miner-monitoring.json b/config/Miner-monitoring.json index ac647e0..8a689b1 100644 --- a/config/Miner-monitoring.json +++ b/config/Miner-monitoring.json @@ -17,7 +17,7 @@ "editable": true, "gnetId": 8919, "graphTooltip": 0, - "id": 1, + "id": null, "iteration": 1615014350433, "links": [ { diff --git a/config/Server-monitoring.json b/config/Server-monitoring.json new file mode 100644 index 0000000..8a689b1 --- /dev/null +++ b/config/Server-monitoring.json @@ -0,0 +1,3688 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:5598", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Filecoin miner dashboard.", + "editable": true, + "gnetId": 8919, + "graphTooltip": 0, + "id": null, + "iteration": 1615014350433, + "links": [ + { + "$$hashKey": "object:454", + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "更新node_exporter", + "tooltip": "", + "type": "link", + "url": "https://github.com/prometheus/node_exporter/releases" + }, + { + "$$hashKey": "object:455", + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "更新当前仪表板", + "tooltip": "", + "type": "link", + "url": "https://grafana.com/dashboards/8919" + }, + { + "$$hashKey": "object:456", + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "StarsL.cn", + "tooltip": "", + "type": "link", + "url": "https://starsl.cn" + }, + { + "$$hashKey": "object:457", + "asDropdown": true, + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 187, + "panels": [], + "title": "资源总览", + "type": "row" + }, + { + "columns": [], + "datasource": "Prometheus", + "description": "分区使用率、磁盘读取、磁盘写入、下载带宽、上传带宽,如果有多个网卡或者多个分区,是采集的使用率最高的网卡或者分区的数值。", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 20, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 185, + "pageSize": 50, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "$$hashKey": "object:5955", + "alias": "主机名", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "link": false, + "linkTooltip": "", + "linkUrl": "", + "mappingType": 1, + "pattern": "nodename", + "thresholds": [], + "type": "string", + "unit": "bytes" + }, + { + "$$hashKey": "object:5956", + "alias": "IP(链接到明细)", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": true, + "linkTargetBlank": false, + "linkTooltip": "浏览主机明细", + "linkUrl": "/d/9CWBz0bik/node-exporter?orgId=1&var-job=${job}&var-hostname=All&var-node=${__cell}&var-device=All", + "mappingType": 1, + "pattern": "instance", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "$$hashKey": "object:5957", + "alias": "内存", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "$$hashKey": "object:5958", + "alias": "CPU核", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": null, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "$$hashKey": "object:5959", + "alias": " 运行时间", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #D", + "thresholds": [], + "type": "number", + "unit": "s" + }, + { + "$$hashKey": "object:5960", + "alias": "分区使用率*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #E", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "$$hashKey": "object:5961", + "alias": "CPU使用率", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #F", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "$$hashKey": "object:5962", + "alias": "内存使用率", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "$$hashKey": "object:5963", + "alias": "磁盘读取*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #H", + "thresholds": [ + "10485760", + "20485760" + ], + "type": "number", + "unit": "Bps" + }, + { + "$$hashKey": "object:5964", + "alias": "磁盘写入*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #I", + "thresholds": [ + "10485760", + "20485760" + ], + "type": "number", + "unit": "Bps" + }, + { + "$$hashKey": "object:5965", + "alias": "下载带宽*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #J", + "thresholds": [ + "30485760", + "104857600" + ], + "type": "number", + "unit": "bps" + }, + { + "$$hashKey": "object:5966", + "alias": "上传带宽*", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #K", + "thresholds": [ + "30485760", + "104857600" + ], + "type": "number", + "unit": "bps" + }, + { + "$$hashKey": "object:5967", + "alias": "5m负载", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #L", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "$$hashKey": "object:5968", + "alias": "", + "align": "right", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "node_uname_info - 0", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "主机名", + "refId": "A" + }, + { + "expr": "sum(time() - node_boot_time_seconds)by(instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "运行时间", + "refId": "D" + }, + { + "expr": "node_memory_MemTotal_bytes - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总内存", + "refId": "B" + }, + { + "expr": "count(node_cpu_seconds_total{mode='system'}) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总核数", + "refId": "C" + }, + { + "expr": "node_load5", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "5分钟负载", + "refId": "L" + }, + { + "expr": "(1 - avg(irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) by (instance)) * 100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU使用率", + "refId": "F" + }, + { + "expr": "(1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes)))* 100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存使用率", + "refId": "G" + }, + { + "expr": "max((node_filesystem_size_bytes{fstype=~\"ext.?|xfs\"}-node_filesystem_free_bytes{fstype=~\"ext.?|xfs\"}) *100/(node_filesystem_avail_bytes {fstype=~\"ext.?|xfs\"}+(node_filesystem_size_bytes{fstype=~\"ext.?|xfs\"}-node_filesystem_free_bytes{fstype=~\"ext.?|xfs\"})))by(instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "分区使用率", + "refId": "E" + }, + { + "expr": "max(irate(node_disk_read_bytes_total[5m])) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大读取", + "refId": "H" + }, + { + "expr": "max(irate(node_disk_written_bytes_total[5m])) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大写入", + "refId": "I" + }, + { + "expr": "max(irate(node_network_receive_bytes_total[5m])*8) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "下载带宽", + "refId": "J" + }, + { + "expr": "max(irate(node_network_transmit_bytes_total[5m])*8) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "上传带宽", + "refId": "K" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "服务器资源总览表", + "transform": "table", + "type": "table-old" + }, + { + "aliasColors": { + "192.168.200.241:9100_Total": "dark-red", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "sdb_每秒I/O操作%": "#d683ce", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE", + "磁盘花费在I/O操作占比": "#ba43a9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 191, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "总平均使用率", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + }, + { + "alias": "总核数", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(node_cpu_seconds_total{job=~\"$job\", mode='system'})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总核数", + "refId": "B", + "step": 240 + }, + { + "expr": "sum(node_load5{job=~\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总5分钟负载", + "refId": "A", + "step": 240 + }, + { + "expr": "avg(1 - avg(irate(node_cpu_seconds_total{job=~\"$job\",mode=\"idle\"}[5m])) by (instance)) * 100", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "总平均使用率", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$job:整体总负载与整体平均CPU使用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": "总负载", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "percent", + "label": "平均使用率", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.200.241:9100_总内存": "dark-red", + "内存_Avaliable": "#6ED0E0", + "内存_Cached": "#EF843C", + "内存_Free": "#629E51", + "内存_Total": "#6d1f62", + "内存_Used": "#eab839", + "可用": "#9ac48a", + "总内存": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 1, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 21 + }, + "height": "300", + "hiddenSeries": false, + "id": 195, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "总内存", + "color": "#C4162A", + "fill": 0 + }, + { + "alias": "总平均使用率", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes{job=~\"$job\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总内存", + "refId": "A", + "step": 4 + }, + { + "expr": "sum(node_memory_MemTotal_bytes{job=~\"$job\"} - node_memory_MemAvailable_bytes{job=~\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总已用", + "refId": "B", + "step": 4 + }, + { + "expr": "(sum(node_memory_MemTotal_bytes{job=~\"$job\"} - node_memory_MemAvailable_bytes{job=~\"$job\"}) / sum(node_memory_MemTotal_bytes{job=~\"$job\"}))*100", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "总平均使用率", + "refId": "H" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$job:整体总内存与整体平均内存使用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "bytes", + "label": "总内存量", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "percent", + "label": "平均使用率", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 21 + }, + "hiddenSeries": false, + "id": 197, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "总平均使用率", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + }, + { + "alias": "总磁盘量", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(avg(node_filesystem_size_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总磁盘量", + "refId": "E" + }, + { + "expr": "sum(avg(node_filesystem_size_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总使用量", + "refId": "C" + }, + { + "expr": "(sum(avg(node_filesystem_size_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))))", + "format": "time_series", + "instant": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "总平均使用率", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "$job:整体总磁盘与整体平均磁盘使用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "bytes", + "label": "总磁盘量", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "decimals": null, + "format": "percent", + "label": "平均使用率", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 189, + "panels": [], + "title": "资源明细:【$show_hostname】", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorPrefix": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "threshcisLabels": false, + "threshcisMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 30 + }, + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "null", + "nullText": null, + "pluginVersion": "6.4.2", + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(time() - node_boot_time_seconds{instance=~\"$node\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 40 + } + ], + "threshciss": "1,2", + "thresholds": "1,3", + "title": "运行时间", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "decimals": 1, + "displayName": "", + "mappings": [ + { + "from": "", + "id": 1, + "operator": "", + "text": "N/A", + "to": "", + "type": 1, + "value": "0" + } + ], + "max": 100, + "min": 0.1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 2, + "y": 30 + }, + "id": 177, + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.4.0", + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[5m])) * 100)", + "instant": true, + "interval": "", + "legendFormat": "总CPU使用率", + "refId": "A" + }, + { + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[5m])) * 100", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "IOwait使用率", + "refId": "C" + }, + { + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$node\"} / (node_memory_MemTotal_bytes{instance=~\"$node\"})))* 100", + "instant": true, + "interval": "", + "legendFormat": "内存使用率", + "refId": "B" + }, + { + "expr": "(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"})*100 /(node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}+(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大分区({{mountpoint}})使用率", + "refId": "D" + }, + { + "expr": "(1 - ((node_memory_SwapFree_bytes{instance=~\"$node\"} + 1)/ (node_memory_SwapTotal_bytes{instance=~\"$node\"} + 1))) * 100", + "instant": true, + "legendFormat": "交换分区使用率", + "refId": "F" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "", + "type": "bargauge" + }, + { + "columns": [], + "datasource": "Prometheus", + "description": "本看板中的:磁盘总量、使用量、可用量、使用率保持和df命令的Size、Used、Avail、Use% 列的值一致,并且Use%的值会四舍五入保留一位小数,会更加准确。\n\n注:df中Use%算法为:(size - free) * 100 / (avail + (size - free)),结果是整除则为该值,非整除则为该值+1,结果的单位是%。\n参考df命令源码:", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 6, + "w": 11, + "x": 5, + "y": 30 + }, + "id": 181, + "links": [ + { + "targetBlank": true, + "title": "https://github.com/coreutils/coreutils/blob/master/src/df.c", + "url": "https://github.com/coreutils/coreutils/blob/master/src/df.c" + } + ], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": false + }, + "styles": [ + { + "$$hashKey": "object:818", + "alias": "分区", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "mountpoint", + "thresholds": [ + "" + ], + "type": "string", + "unit": "bytes" + }, + { + "$$hashKey": "object:819", + "alias": "可用空间", + "align": "auto", + "colorMode": "value", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #A", + "thresholds": [ + "10000000000", + "20000000000" + ], + "type": "number", + "unit": "bytes" + }, + { + "$$hashKey": "object:820", + "alias": "使用率", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percent" + }, + { + "$$hashKey": "object:821", + "alias": "总空间", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "$$hashKey": "object:822", + "alias": "文件系统", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "mappingType": 1, + "pattern": "fstype", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "$$hashKey": "object:823", + "alias": "设备名", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "mappingType": 1, + "pattern": "device", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "$$hashKey": "object:824", + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "preserveFormat": true, + "sanitize": false, + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总量", + "refId": "C" + }, + { + "expr": "node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-0", + "format": "table", + "hide": false, + "instant": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "expr": "(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}) *100/(node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}+(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + } + ], + "title": "【$show_hostname】:各分区可用空间(EXT.*/XFS)", + "transform": "table", + "type": "table-old" + }, + { + "aliasColors": { + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_in": "light-red", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_in下载": "green", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_out上传": "yellow", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_in下载": "purple", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_out": "purple", + "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_out上传": "blue" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 30 + }, + "hiddenSeries": false, + "id": 183, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "show": false, + "sort": "current", + "sortDesc": true, + "total": true, + "values": true + }, + "lines": false, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 1, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*_out上传$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(node_network_receive_bytes_total{instance=~\"$node\",device=~\"$device\"}[60m])", + "interval": "60m", + "intervalFactor": 1, + "legendFormat": "{{device}}_in下载", + "metric": "", + "refId": "A", + "step": 600, + "target": "" + }, + { + "expr": "increase(node_network_transmit_bytes_total{instance=~\"$node\",device=~\"$device\"}[60m])", + "hide": false, + "interval": "60m", + "intervalFactor": 1, + "legendFormat": "{{device}}_out上传", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "每小时流量$device", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "上传(-)/下载(+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 0, + "y": 32 + }, + "id": 75, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "70%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes{instance=~\"$node\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 20 + } + ], + "thresholds": "2,3", + "title": "总内存", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorPostfix": false, + "colorValue": true, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 36 + }, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 6, + "nullPointMode": "null", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_cpu_seconds_total{instance=~\"$node\", mode='system'})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "1,2", + "title": "CPU 核数", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 22, + "x": 2, + "y": 36 + }, + "id": 199, + "maxPerRow": 2, + "options": { + "displayMode": "lcd", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.4.0", + "repeat": null, + "repeatDirection": "v", + "targets": [ + { + "expr": "100 - (irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[5m])) * 100", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "{{cpu}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU核心使用率", + "transformations": [], + "type": "bargauge" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 0, + "y": 39 + }, + "id": 201, + "options": { + "displayMode": "lcd", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.4.0", + "targets": [ + { + "expr": "100 - (avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[5m])) * 100)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU平均使用率", + "type": "bargauge" + }, + { + "aliasColors": { + "192.168.200.241:9100_Total": "dark-red", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "sdb_每秒I/O操作%": "#d683ce", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE", + "磁盘花费在I/O操作占比": "#ba43a9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 43 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "$$hashKey": "object:263", + "alias": "/.*总使用率/", + "color": "#C4162A", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"system\"}[5m])) by (instance) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "系统使用率", + "refId": "A", + "step": 20 + }, + { + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"user\"}[5m])) by (instance) *100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "用户使用率", + "refId": "B", + "step": 240 + }, + { + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"iowait\"}[5m])) by (instance) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "磁盘IO使用率", + "refId": "D", + "step": 240 + }, + { + "expr": "(1 - avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[5m])) by (instance))*100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总使用率", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU使用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:278", + "decimals": 0, + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:279", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.200.241:9100_总内存": "dark-red", + "使用率": "yellow", + "内存_Avaliable": "#6ED0E0", + "内存_Cached": "#EF843C", + "内存_Free": "#629E51", + "内存_Total": "#6d1f62", + "内存_Used": "#eab839", + "可用": "#9ac48a", + "总内存": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 43 + }, + "height": "300", + "hiddenSeries": false, + "id": 156, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "总内存", + "color": "#C4162A", + "fill": 0 + }, + { + "alias": "使用率", + "color": "rgb(0, 209, 255)", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总内存", + "refId": "A", + "step": 4 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"} - node_memory_MemAvailable_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "已用", + "refId": "B", + "step": 4 + }, + { + "expr": "node_memory_MemAvailable_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "可用", + "refId": "F", + "step": 4 + }, + { + "expr": "node_memory_Buffers_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "内存_Buffers", + "refId": "D", + "step": 4 + }, + { + "expr": "node_memory_MemFree_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "内存_Free", + "refId": "C", + "step": 4 + }, + { + "expr": "node_memory_Cached_bytes{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "内存_Cached", + "refId": "E", + "step": 4 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"} - (node_memory_Cached_bytes{instance=~\"$node\"} + node_memory_Buffers_bytes{instance=~\"$node\"} + node_memory_MemFree_bytes{instance=~\"$node\"})", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "G" + }, + { + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$node\"} / (node_memory_MemTotal_bytes{instance=~\"$node\"})))* 100", + "format": "time_series", + "hide": false, + "interval": "30m", + "intervalFactor": 10, + "legendFormat": "使用率", + "refId": "H" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "内存信息", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "percent", + "label": "内存使用率", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.10.227:9100_em1_in下载": "super-light-green", + "192.168.10.227:9100_em1_out上传": "dark-blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 43 + }, + "height": "300", + "hiddenSeries": false, + "id": 157, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*_out上传$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{instance=~'$node',device=~\"$device\"}[5m])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_in下载", + "refId": "A", + "step": 4 + }, + { + "expr": "irate(node_network_transmit_bytes_total{instance=~'$node',device=~\"$device\"}[5m])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_out上传", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "每秒网络带宽使用$device", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "label": "上传(-)/下载(+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "15分钟": "#6ED0E0", + "1分钟": "#BF1B00", + "5分钟": "#CCA300" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 1, + "grid": {}, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 52 + }, + "height": "300", + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*总核数/", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "1分钟负载", + "metric": "", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "node_load5{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "5分钟负载", + "refId": "B", + "step": 20 + }, + { + "expr": "node_load15{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "15分钟负载", + "refId": "C", + "step": 20 + }, + { + "expr": " sum(count(node_cpu_seconds_total{instance=~\"$node\", mode='system'}) by (cpu,instance)) by(instance)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU总核数", + "refId": "D", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "系统平均负载", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "vda_write": "#6ED0E0" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Read bytes 每个磁盘分区每秒读取的比特数\nWritten bytes 每个磁盘分区每秒写入的比特数", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 52 + }, + "height": "300", + "hiddenSeries": false, + "id": 168, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*_读取$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_bytes_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_读取", + "refId": "A", + "step": 10 + }, + { + "expr": "irate(node_disk_written_bytes_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_写入", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "每秒磁盘读写容量", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "Bps", + "label": "读取(-)/写入(+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 1, + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 52 + }, + "hiddenSeries": false, + "id": 174, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Inodes.*/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}) *100/(node_filesystem_avail_bytes {instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}+(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~'$node',fstype=~\"ext.*|xfs\",mountpoint !~\".*pod.*\"}))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A" + }, + { + "expr": "node_filesystem_files_free{instance=~'$node',fstype=~\"ext.?|xfs\"} / node_filesystem_files{instance=~'$node',fstype=~\"ext.?|xfs\"}", + "hide": true, + "interval": "", + "legendFormat": "Inodes:{{instance}}:{{mountpoint}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "磁盘使用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "decimals": 2, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "vda_write": "#6ED0E0" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Reads completed: 每个磁盘分区每秒读完成次数\n\nWrites completed: 每个磁盘分区每秒写完成次数\n\nIO now 每个磁盘分区每秒正在处理的输入/输出请求数", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 60 + }, + "height": "300", + "hiddenSeries": false, + "id": 161, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*_读取$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_读取", + "refId": "A", + "step": 10 + }, + { + "expr": "irate(node_disk_writes_completed_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_写入", + "refId": "B", + "step": 10 + }, + { + "expr": "node_disk_io_now{instance=~\"$node\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "磁盘读写速率(IOPS)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "iops", + "label": "读取(-)/写入(+)I/O ops/sec", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "sdb_每秒I/O操作%": "#d683ce", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE", + "磁盘花费在I/O操作占比": "#ba43a9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": null, + "description": "每一秒钟的自然时间内,花费在I/O上的耗时。(wall-clock time)\n\nnode_disk_io_time_seconds_total:\n磁盘花费在输入/输出操作上的秒数。该值为累加值。(Milliseconds Spent Doing I/Os)\n\nirate(node_disk_io_time_seconds_total[1m]):\n计算每秒的速率:(last值-last前一个值)/时间戳差值,即:1秒钟内磁盘花费在I/O操作的时间占比。", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 60 + }, + "hiddenSeries": false, + "id": 175, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "sort": null, + "sortDesc": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_每秒I/O操作%", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "每1秒内I/O操作耗时占比", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "vda": "#6ED0E0" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Read time seconds 每个磁盘分区读操作花费的秒数\n\nWrite time seconds 每个磁盘分区写操作花费的秒数\n\nIO time seconds 每个磁盘分区输入/输出操作花费的秒数\n\nIO time weighted seconds每个磁盘分区输入/输出操作花费的加权秒数", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 1, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 60 + }, + "height": "300", + "hiddenSeries": false, + "id": 160, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/,*_读取$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_time_seconds_total{instance=~\"$node\"}[5m]) / irate(node_disk_reads_completed_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_读取", + "refId": "B" + }, + { + "expr": "irate(node_disk_write_time_seconds_total{instance=~\"$node\"}[5m]) / irate(node_disk_writes_completed_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_写入", + "refId": "C" + }, + { + "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10 + }, + { + "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=~\"$node\"}[5m])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_加权", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "每次IO读写的耗时(参考:小于100ms)(beta)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "读取(-)/写入(+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "192.168.200.241:9100_TCP_alloc": "semi-dark-blue", + "TCP": "#6ED0E0", + "TCP_alloc": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Sockets_used - 已使用的所有协议套接字总量\n\nCurrEstab - 当前状态为 ESTABLISHED 或 CLOSE-WAIT 的 TCP 连接数\n\nTCP_alloc - 已分配(已建立、已申请到sk_buff)的TCP套接字数量\n\nTCP_tw - 等待关闭的TCP连接数\n\nUDP_inuse - 正在使用的 UDP 套接字数量\n\nRetransSegs - TCP 重传报文数\n\nOutSegs - TCP 发送的报文数\n\nInSegs - TCP 接收的报文数", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 69 + }, + "height": "300", + "hiddenSeries": false, + "id": 158, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Sockets_used/", + "color": "#E02F44", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CurrEstab", + "refId": "A", + "step": 20 + }, + { + "expr": "node_sockstat_TCP_tw{instance=~'$node'}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_tw", + "refId": "D" + }, + { + "expr": "node_sockstat_sockets_used{instance=~'$node'}", + "hide": false, + "interval": "30m", + "intervalFactor": 1, + "legendFormat": "Sockets_used", + "refId": "B" + }, + { + "expr": "node_sockstat_UDP_inuse{instance=~'$node'}", + "interval": "", + "legendFormat": "UDP_inuse", + "refId": "C" + }, + { + "expr": "node_sockstat_TCP_alloc{instance=~'$node'}", + "interval": "", + "legendFormat": "TCP_alloc", + "refId": "E" + }, + { + "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=~'$node'}[5m])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}_Tcp_PassiveOpens", + "refId": "G" + }, + { + "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=~'$node'}[5m])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}_Tcp_ActiveOpens", + "refId": "F" + }, + { + "expr": "irate(node_netstat_Tcp_InSegs{instance=~'$node'}[5m])", + "interval": "", + "legendFormat": "Tcp_InSegs", + "refId": "H" + }, + { + "expr": "irate(node_netstat_Tcp_OutSegs{instance=~'$node'}[5m])", + "interval": "", + "legendFormat": "Tcp_OutSegs", + "refId": "I" + }, + { + "expr": "irate(node_netstat_Tcp_RetransSegs{instance=~'$node'}[5m])", + "hide": false, + "interval": "", + "legendFormat": "Tcp_RetransSegs", + "refId": "J" + }, + { + "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=~'$node'}[5m])", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "K" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "网络Socket连接信息", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "transformations": [], + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "已使用的所有协议套接字总量", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "filefd_192.168.200.241:9100": "super-light-green", + "switches_192.168.200.241:9100": "semi-dark-red", + "使用的文件描述符_10.118.72.128:9100": "red", + "每秒上下文切换次数_10.118.71.245:9100": "yellow", + "每秒上下文切换次数_10.118.72.128:9100": "yellow" + }, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {}, + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 69 + }, + "hiddenSeries": false, + "hideTimeOverride": false, + "id": 16, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0", + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/每秒上下文切换次数.*/", + "color": "#FADE2A", + "lines": false, + "pointradius": 1, + "points": true, + "yaxis": 2 + }, + { + "alias": "/使用的文件描述符.*/", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filefd_allocated{instance=~\"$node\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 5, + "legendFormat": "使用的文件描述符", + "refId": "B" + }, + { + "expr": "irate(node_context_switches_total{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 5, + "legendFormat": "每秒上下文切换次数", + "refId": "A" + }, + { + "expr": " (node_filefd_allocated{instance=~\"$node\"}/node_filefd_maximum{instance=~\"$node\"}) *100", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 5, + "legendFormat": "使用的文件描述符占比_{{instance}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "打开的文件描述符(左 )/每秒上下文切换次数(右)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "使用的文件描述符", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "context_switches", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "Prometheus", + "node_exporter", + "StarsL.cn" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "Miner-10-0-99-102", + "value": "Miner-10-0-99-102" + }, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info, job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "JOB", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(node_uname_info, job)", + "refId": "Prometheus-job-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info{job=~\"$job\"}, nodename)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "主机名", + "multi": false, + "name": "hostname", + "options": [], + "query": { + "query": "label_values(node_uname_info{job=~\"$job\"}, nodename)", + "refId": "Prometheus-hostname-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "selected": false, + "text": "10.0.99.102:9100", + "value": "10.0.99.102:9100" + }, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info{job=~\"$job\",nodename=~\"$hostname\"},instance)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": true, + "multiFormat": "regex values", + "name": "node", + "options": [], + "query": { + "query": "label_values(node_uname_info{job=~\"$job\",nodename=~\"$hostname\"},instance)", + "refId": "Prometheus-node-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(node_network_info{device!~'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'},device)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "网卡", + "multi": true, + "multiFormat": "regex values", + "name": "device", + "options": [], + "query": { + "query": "label_values(node_network_info{device!~'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'},device)", + "refId": "Prometheus-device-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "/home/caslx/disk_md0", + "value": "/home/caslx/disk_md0" + }, + "datasource": "Prometheus", + "definition": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.?|xfs\",mountpoint!~\".*pods.*\"}) by (mountpoint))))", + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": "最大挂载目录", + "multi": false, + "name": "maxmount", + "options": [], + "query": { + "query": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{instance=~'$node',fstype=~\"ext.?|xfs\",mountpoint!~\".*pods.*\"}) by (mountpoint))))", + "refId": "Prometheus-maxmount-Variable-Query" + }, + "refresh": 2, + "regex": "/.*\\\"(.*)\\\".*/", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "Miner-10-0-99-102", + "value": "Miner-10-0-99-102" + }, + "datasource": "Prometheus", + "definition": "label_values(node_uname_info{job=~\"$job\",instance=~\"$node\"}, nodename)", + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": "展示使用的主机名", + "multi": false, + "name": "show_hostname", + "options": [], + "query": { + "query": "label_values(node_uname_info{job=~\"$job\",instance=~\"$node\"}, nodename)", + "refId": "Prometheus-show_hostname-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "now": true, + "refresh_intervals": [ + "15s", + "30s", + "1m", + "5m", + "15m", + "30m" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Miner", + "uid": "9CWBz0bik", + "version": 5 +} \ No newline at end of file diff --git a/config/myscheduler-commit.json b/config/myscheduler-commit.json index 1ad6995..cf26a38 100644 --- a/config/myscheduler-commit.json +++ b/config/myscheduler-commit.json @@ -13,4 +13,4 @@ "AllowP2C2Parallel": false, "IgnoreOutOfSpace": true, "AutoPledgeDiff": 0 -} \ No newline at end of file +} diff --git a/config/myscheduler-precommit.json b/config/myscheduler-precommit.json index 1d50e54..a000ab2 100644 --- a/config/myscheduler-precommit.json +++ b/config/myscheduler-precommit.json @@ -1,6 +1,6 @@ { "WorkerName": "", - "AddPieceMax": 2, + "AddPieceMax": 1, "PreCommit1Max": 7, "PreCommit2Max": 1, "Commit2Max": 0, @@ -12,5 +12,5 @@ "IsPlanOffline": false, "AllowP2C2Parallel": false, "IgnoreOutOfSpace": true, - "AutoPledgeDiff": 0 -} \ No newline at end of file + "AutoPledgeDiff": 1 +} diff --git a/documents/distributed-miner-configuration.md b/documents/distributed-miner-configuration.md index 02f1d58..572fd7e 100644 --- a/documents/distributed-miner-configuration.md +++ b/documents/distributed-miner-configuration.md @@ -122,18 +122,18 @@ Seal-miner启动后,需要分配一部分worker给这个新的Seal-miner。 ![connection](../images/dirtributed-miner-architecture.png) -- **Daemon-public**:需要配置公网IP,给Daemon-public配公网IP主要是为了提升节点的稳定性和评分,只有节点健康稳定,爆块才会稳定。但是如果直接把导入了钱包的Daemon暴露在公网中也是非常不安全的,毕竟Lotus的钱包管理还是非常原始的。Daemon-public的作用就是连接更多的外部节点。然后让Daemon-private也连接到Daemon-public,因为是内网,连接会非常快。Daemon-public是内网的Miner和外部区块链通信的一个桥梁。 +- **Public-daemon**:需要配置公网IP,给Public-daemon配公网IP主要是为了提升节点的稳定性和评分,只有节点健康稳定,爆块才会稳定。但是如果直接把导入了钱包的Daemon暴露在公网中也是非常不安全的,毕竟Lotus的钱包管理还是非常原始的。Public-daemon的作用就是连接更多的外部节点。然后让Private-daemon也连接到Public-daemon,因为是内网,连接会非常快。Public-daemon是内网的Miner和外部区块链通信的一个桥梁。 [如何给Daemon配置公网IP?](https://github.com/filguard/lotus-ops/blob/master/documents/daemon-operation.md#4-%E7%BB%99deamon%E9%85%8D%E7%BD%AE%E5%85%AC%E7%BD%91ip) -- **Daemon-private**:内网的Miner全部连接Daemon-private,同时这个Daemon也启动在Winning-post-miner上,保证出块的Miner连接是最快的,考虑冗余的话,可以在Window-post-miner上再同步一个轻节点Daemon作为备份,关于轻节点Daemon,请[参照快照导出导入和快照剪裁](https://github.com/filguard/lotus-ops/blob/master/documents/daemon-operation.md#3-%E5%AF%BC%E5%85%A5%E5%AF%BC%E5%87%BA%E5%90%8C%E6%AD%A5%E6%95%B0%E6%8D%AE%E8%A3%81%E5%89%AA%E5%BF%AB%E7%85%A7)。 +- **Private-daemon**:内网的Miner全部连接Private-daemon,同时这个Daemon也启动在Winning-post-miner上,保证出块的Miner连接是最快的,考虑冗余的话,可以在Window-post-miner上再同步一个轻节点Daemon作为备份,关于轻节点Daemon,请[参照快照导出导入和快照剪裁](https://github.com/filguard/lotus-ops/blob/master/documents/daemon-operation.md#3-%E5%AF%BC%E5%85%A5%E5%AF%BC%E5%87%BA%E5%90%8C%E6%AD%A5%E6%95%B0%E6%8D%AE%E8%A3%81%E5%89%AA%E5%BF%AB%E7%85%A7)。 -- **Winning-post-miner**:只负责出块,因为这台机器负载比较低,所以在上面同时启动Daemon-private。另外,Winning-post-miner也是sector-counter的服务端,负责统一分配扇区ID,其他Miner(主要是Seal-miner和Deal-miner)都从这台机器上申请扇区ID。 +- **Winning-post-miner**:只负责出块,因为这台机器负载比较低,所以在上面同时启动Private-daemon。另外,Winning-post-miner也是sector-counter的服务端,负责统一分配扇区ID,其他Miner(主要是Seal-miner和Deal-miner)都从这台机器上申请扇区ID。 - **Window-post-miner**:只负责时空证明,上面可以同时同步一个备份的轻节点Daemon。 -- **Seal-miner**:负责分配任务,管理所有的Seal-worker,连接Daemon-private。 +- **Seal-miner**:负责分配任务,管理所有的Seal-worker,连接Private-daemon。 -- **Deal-miner**:负责接单,连接Daemon-private,需要配置`multiaddress`,需要连接几台Seal-worker,和上面Seal-miner连接的Worker不同,相当于是把所有的Seal-worker分成了2组,Seal-miner连接一组,Deal-miner连接一组。要根据订单的数量,来分配对应数量的Seal-worker,按照我们的经验,1 ~ 2 台Seal-worker就能满足订单密封的需要。 +- **Deal-miner**:负责接单,连接Private-daemon,需要配置`multiaddress`,需要连接几台Seal-worker,和上面Seal-miner连接的Worker不同,相当于是把所有的Seal-worker分成了2组,Seal-miner连接一组,Deal-miner连接一组。要根据订单的数量,来分配对应数量的Seal-worker,按照我们的经验,1 ~ 2 台Seal-worker就能满足订单密封的需要。 ### 7.3 分布式Miner如何切换回单Miner? 初始化一个不含任何元数据(扇区数据)的Winning-post-miner和Window-post-miner,专门用来做时空证明和爆块。切换回单Miner的时候,只需要停掉Winning-post-miner和Window-post-miner,然后在Seal-miner上开启`window-post`和`winning-post`功能即可,也就是以Seal-miner作为回退后的单Miner(因为Seal-miner上的数据是完整的,包含所有扇区数据,Winning-post-miner和Window-post-miner上没有扇区数据)。 diff --git a/documents/incorrect-sector.md b/documents/incorrect-sector.md deleted file mode 100644 index 738030a..0000000 --- a/documents/incorrect-sector.md +++ /dev/null @@ -1 +0,0 @@ -# 错误扇区处理 \ No newline at end of file diff --git a/documents/questions.md b/documents/questions.md new file mode 100644 index 0000000..81492d5 --- /dev/null +++ b/documents/questions.md @@ -0,0 +1,65 @@ +# 常见运维问题定位与解决 + +## 1 顽固扇区如何删除? +#### 1.1 什么是顽固扇区? +所谓顽固扇区,是指因Worker掉线、扇区超时等原因导致的,无法被成功调度并完成封装的扇区。 + +顽固扇区并**不是**指`SealPreCommit1Failed`,`PreCommitFailed`,`CommitFailed`这几种状态的扇区,因为这几种状态的扇区,均可直接执行`lotus-miner sectors remove --really-do-it `来直接删除。 + +顽固扇区一般是指`PreCommit1`,`PreCommit2`,`Committing`,`FinalizeSector`这几种正常状态的扇区,但是无法成功被调度并封装,执行以下两个命令也无法删除。 +```sh +lotus-miner sectors update-state --really-do-it Removing +lotus-miner sectors remove --really-do-it +``` + +#### 1.2 顽固扇区有什么影响?如何发现它? +顽固扇区会停留在扇区列表中,一直等待不断调度(但又无法调度成功),不仅浪费了调度系统的队列资源,还会影响扇区封装任务下发的数量。你会发现,扇区明明在列表中,就是无法封装成功,想删还删不掉,非常的痛苦。 + +因为Lotus的扇区号都是自增的,只要执行`lotus-miner sectors list --fast`查看一下扇区列表,那么列表的最后面,都是正在封装的扇区,而且这些扇区ID都是连续的。正常情况下,前面封装完的扇区都是`Proving`状态,如果看到前面`Proving`状态的扇区中夹杂了`PreCommit1`,`PreCommit2`,`Committing`这些状态的扇区,那这些就是顽固扇区了。 + + + +另外,也可以通过`lotus-miner sealing jobs | sort -k2`查看正在封装的任务列表,如果发现列表头部的扇区ID和后面的差距很大,如果差了几十个甚至更多,那这些扇区也就是顽固扇区了,或者是扇区封装已经进入死循环了,要尽快处理掉。 + +#### 1.3 顽固扇区如何删除? +顽固扇区已经无法被成功调度,只能采取“非常手段”来处理。 +顽固扇区无法被调度,是因为扇区本身的文件已经丢失了,那么可以创建一个空的扇区文件,让扇区“假装”在封装,只要能被调度上,出现在`lotus-miner sealing jobs`中,就好处理了。 + +##### `PreCommit1`,`PreCommit2`状态的顽固扇区处理 +针对`PreCommit1`,`PreCommit2`这两种状态的扇区,按如下的方法来处理: + +① 找一个空闲的Worker(没有任何封装任务),在`LOTUS_WORKER_PATH`的`unsealed`和`seald`目录下,分别创建对应扇区ID的空扇区文件,假设顽固扇区的ID为,那分别在`unsealed`和`seald`目录下执行: +```sh +touch s-t0xxxxxx-100 +# 其中t0xxxxxx是矿工ID +``` + +② 重启该Worker,注意观察Worker的日志中,该扇区会不会开始封装,如果没有开始封装,可以再重启一下Miner。 + +③ 待该扇区开始封装,在`lotus-miner sealing jobs`列表中能看到以后,就可以执行命令先终止掉任务。 +```sh +lotus-miner sealing abort +``` +然后再执行删除该扇区。 +```sh +lotus-miner sectors remove +``` +执行了该命令以后,将会在Miner的日志中看到以下错误,不要惊慌,这种日志属于正常提示。 + + + +##### `Committing`状态的顽固扇区处理 +针对`Committing`状态的顽固扇区,同样是按照上面的方法,在`unsealed`和`seald`目录下创建一个空的扇区文件。 + +但是**不同**的是,创建并重启Worker以后,该扇区并不会出现在`lotus-miner sealing jobs`列表中,而是直接变为`CommitFailed`,这个时候,执行以下命令删除扇区即可: +```sh +lotus-miner sectors remove +``` + +#### 1.4 删除以后还要做什么? +顽固扇区删除以后,最好重启一下Seal-Miner,这样扇区状态就会重新更新,顽固扇区也就不会被再次调度了。 + +## Worker掉线如何处理? + +## 任务积压如何处理? + diff --git a/images/dirtributed-miner-architecture.png b/images/dirtributed-miner-architecture.png index 459b5f4..1539331 100644 Binary files a/images/dirtributed-miner-architecture.png and b/images/dirtributed-miner-architecture.png differ diff --git a/images/remove-error.png b/images/remove-error.png new file mode 100644 index 0000000..c7ae2d4 Binary files /dev/null and b/images/remove-error.png differ diff --git a/images/sectors-list.png b/images/sectors-list.png new file mode 100644 index 0000000..d0e4807 Binary files /dev/null and b/images/sectors-list.png differ diff --git a/scripts/build-amd.sh b/scripts/build-amd.sh index e2a53d9..b30544d 100755 --- a/scripts/build-amd.sh +++ b/scripts/build-amd.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -env RUSTFLAGS="-C target-cpu=native -g" FFI_BUILD_FROM_SOURCE=1 FFI_USE_GPU2=1 make clean all lotus-bench lotus-shed +env RUSTFLAGS="-C target-cpu=native -g" FFI_BUILD_FROM_SOURCE=1 FFI_USE_GPU2=0 make clean all lotus-bench lotus-shed sudo make install diff --git a/scripts/build-intel.sh b/scripts/build-intel.sh index 4bff7d1..446ada4 100755 --- a/scripts/build-intel.sh +++ b/scripts/build-intel.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -env CGO_CFLAGS_ALLOW="-D__BLST_PORTABLE__" RUSTFLAGS="-C target-cpu=native -g" FFI_BUILD_FROM_SOURCE=1 CGO_CFLAGS="-D__BLST_PORTABLE__" FFI_USE_GPU2=1 make clean all lotus-bench lotus-shed +env CGO_CFLAGS_ALLOW="-D__BLST_PORTABLE__" RUSTFLAGS="-C target-cpu=native -g" FFI_BUILD_FROM_SOURCE=1 CGO_CFLAGS="-D__BLST_PORTABLE__" FFI_USE_GPU2=0 make clean all lotus-bench lotus-shed sudo make install diff --git a/scripts/copy-nvidia-driver.sh b/scripts/copy-nvidia-driver.sh new file mode 100755 index 0000000..3958c97 --- /dev/null +++ b/scripts/copy-nvidia-driver.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +driverFile=$1 + +for i in h1{11..20}; +do + scp -r $driverFile fil@$i:~/ +done + +# commit worker +for i in h2{11..15}; +do + scp -r $driverFile fil@$i:~/ +done + diff --git a/scripts/install-node-exporter.sh b/scripts/install-node-exporter.sh index a083c11..1f12c5a 100755 --- a/scripts/install-node-exporter.sh +++ b/scripts/install-node-exporter.sh @@ -10,7 +10,7 @@ sudo mv node_exporter-0.18.1.linux-amd64/node_exporter /usr/local/bin/ sudo chmod a+x /usr/local/bin/node_exporter rm -rf $workspace/node_exporter-0.18.1.linux-amd64* -mkdir -p /home/$currentUser/disk_md0/prometheus/run +mkdir -p /home/$currentUser/prometheus/run sudo systemctl enable node-exporter sudo systemctl start node-exporter diff --git a/scripts/install-prometheus.sh b/scripts/install-prometheus.sh index 2fcb2a2..639999b 100755 --- a/scripts/install-prometheus.sh +++ b/scripts/install-prometheus.sh @@ -4,20 +4,29 @@ sudo mkdir -p /home/$currentUser/disk_md0/prometheus sudo tee /home/$currentUser/disk_md0/prometheus/prometheus.yaml <<-'EOF' scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'Worker' + - job_name: 'Lotus' scrape_interval: 5s static_configs: - - targets: ['10.0.1.11:9100', '10.0.1.12:9100', '10.0.1.13:9100', '10.0.1.14:9100', '10.0.1.15:9100', '10.0.2.11:9100'] - - - job_name: 'Daemon' - scrape_interval: 5s - static_configs: - - targets: ['10.0.99.10:9100', '10.0.99.11:9100'] - - - job_name: 'Miner' - scrape_interval: 5s - static_configs: - - targets: ['10.0.99.12:9100', '10.0.99.13:9100', '10.0.99.14:9100'] + - targets: + - 10.0.99.10:9100 + - 10.0.99.11:9100 + - 10.0.99.12:9100 + - 10.0.99.13:9100 + - 10.0.99.14:9100 + - 10.0.1.11:9100 + - 10.0.1.12:9100 + - 10.0.1.13:9100 + - 10.0.1.14:9100 + - 10.0.1.15:9100 + - 10.0.1.16:9100 + - 10.0.1.17:9100 + - 10.0.1.18:9100 + - 10.0.1.19:9100 + - 10.0.1.20:9100 + - 10.0.2.12:9100 + - 10.0.2.13:9100 + - 10.0.2.14:9100 + - 10.0.2.15:9100 EOF sudo docker pull prom/prometheus:latest @@ -31,4 +40,4 @@ sudo docker run -d \ --config.file=/prometheus/prometheus.yaml \ --storage.tsdb.path=/prometheus/database \ --storage.tsdb.retention.time=90d \ - --web.enable-admin-api \ No newline at end of file + --web.enable-admin-api diff --git a/scripts/lotus-version-check.sh b/scripts/lotus-version-check.sh new file mode 100755 index 0000000..f1cb2dc --- /dev/null +++ b/scripts/lotus-version-check.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +echo $(lotus -v) diff --git a/scripts/setup-alias.sh b/scripts/setup-alias.sh old mode 100644 new mode 100755 diff --git a/scripts/setup-ssd-raid-0.sh b/scripts/setup-ssd-raid-0.sh index 1893b37..1317543 100755 --- a/scripts/setup-ssd-raid-0.sh +++ b/scripts/setup-ssd-raid-0.sh @@ -2,7 +2,7 @@ currentUser=fil mountPoint=/home/$currentUser/disk_md0 -for i in {0..1}; +for i in {0..4}; do ssd=/dev/nvme${i}n1 echo $ssd @@ -20,7 +20,7 @@ echo "${ssd} was fdisked" sleep 1s done -mdadm --verbose --create /dev/md0 --chunk=128 --level=raid0 --raid-devices=2 /dev/nvme[0,1]n1p1 < /etc/mdadm/mdadm.conf echo "Update initramfs" -update-initramfs -u +# update-initramfs -u echo "Format" mkfs.xfs -f -d agcount=128,su=128k,sw=2 -r extsize=256k /dev/md0 @@ -47,4 +47,4 @@ chown $currentUser:$currentUser $mountPoint echo "Setup fstab" uuid=$(blkid -o export /dev/md0 | awk 'NR==2 {print}') -echo "${uid} ${mountPoint} xfs defaults 0 0" >> /etc/fstab \ No newline at end of file +echo "${uid} ${mountPoint} xfs defaults 0 0" >> /etc/fstab diff --git a/scripts/ssh-copy-id.sh b/scripts/ssh-copy-id.sh index 9d4b2f2..655b9ee 100755 --- a/scripts/ssh-copy-id.sh +++ b/scripts/ssh-copy-id.sh @@ -1,19 +1,21 @@ #!/usr/bin/env bash -for i in m50 m51 + +for i in h1{16..20}; do ssh-keyscan $i >> ~/.ssh/known_hosts - ssh-copy-id $i + ssh-copy-id -f fil@$i done -for i in w{60..78}; +# commit worker +for i in h2{11..15}; do ssh-keyscan $i >> ~/.ssh/known_hosts - ssh-copy-id $i + ssh-copy-id fil@$i done -# commit worker -for i in w{80..84}; +# miner +for i in m{10..14}; do ssh-keyscan $i >> ~/.ssh/known_hosts - ssh-copy-id $i + ssh-copy-id fil@$i done diff --git a/scripts/start-bench.sh b/scripts/start-bench.sh index 2661faa..6b60f05 100755 --- a/scripts/start-bench.sh +++ b/scripts/start-bench.sh @@ -1,5 +1,5 @@ export RUST_LOG=debug -export FIL_PROOFS_PARAMETER_CACHE=/home/fil/disk_md0/proof_params/v28 +export FIL_PROOFS_PARAMETER_CACHE=/home/fil/proof_params/v28 export FIL_PROOFS_MAXIMIZE_CACHING=1 export FIL_PROOFS_USE_GPU_COLUMN_BUILDER=1 export FIL_PROOFS_USE_GPU_TREE_BUILDER=1 @@ -8,4 +8,4 @@ export FIL_PROOFS_MULTICORE_SDR_PRODUCERS=1 # export ENV_CPU_CORE_BEGIN_NUM=0 # export ENV_CPU_CORE_END_NUM=8 -lotus-bench sealing --sector-size 32GiB --skip-unseal --num-sectors=1 --parallel=1 --storage-dir /home/fil/disk_md0/bench --skip-commit2 true --skip-unseal true 2>&1 | tee /home/fil/logs/bench.log +lotus-bench sealing --sector-size 32GiB --skip-unseal --num-sectors=8 --parallel=1 --storage-dir /home/fil/disk_md0/bench --skip-unseal true 2>&1 | tee /home/fil/logs/bench.log diff --git a/scripts/worker-pre-check.sh b/scripts/worker-pre-check.sh old mode 100644 new mode 100755 index 2330458..3dc1214 --- a/scripts/worker-pre-check.sh +++ b/scripts/worker-pre-check.sh @@ -2,4 +2,4 @@ echo $(nvidia-smi | grep "GeForce") echo $(df -hl | grep "disk_md0") -echo "内存:"$(free -g | grep Mem | awk '{print $2}')G \ No newline at end of file +echo "内存:"$(free -gh | grep Mem | awk '{print $2}')