diff --git a/how-to/deploy/orchestrated/tiup.md b/how-to/deploy/orchestrated/tiup.md index 1b8044067a3c..789721bdad50 100644 --- a/how-to/deploy/orchestrated/tiup.md +++ b/how-to/deploy/orchestrated/tiup.md @@ -46,8 +46,8 @@ category: how-to - 在 AMD64 架构下,建议使用 CentOS 7.3 及以上版本 Linux 操作系统 - 在 ARM 架构下,建议使用 CentOS 7.6 1810 版本 Linux 操作系统 - TiKV 数据文件的文件系统推荐使用 EXT4 格式,也可以使用 CentOS 默认的 XFS 格式(参考[第 3 步](#第-3-步在-tikv-部署目标机器上添加数据盘-ext4-文件系统挂载参数)) -- 机器之间内网互通(建议关闭防火墙 `firewalld`,或者开放 TiDB 集群的节点间所需端口) -- 如果需要绑核操作,需要安装 `numactl` 工具 +- 机器之间内网互通(建议[关闭防火墙 `firewalld`](#如何关闭部署机器的防火墙),或者开放 TiDB 集群的节点间所需端口) +- 如果需要绑核操作,需要[安装 `numactl` 工具](#如何安装-numactl-工具) 其他软硬件环境配置可参考官方文档 [TiDB 软件和硬件环境建议配置](/how-to/deploy/hardware-recommendations.md)。 @@ -147,8 +147,8 @@ category: how-to # start 启动 TiDB 集群 # stop 关闭 TiDB 集群 # restart 重启 TiDB 集群 - # scale-in 扩容 TiDB 集群 - # scale-out 缩容 TiDB 集群 + # scale-in 缩容 TiDB 集群 + # scale-out 扩容 TiDB 集群 # destroy 销毁指定 TiDB 集群 # upgrade 升级指定 TiDB 集群 # exec 在 TiDB 集群的目标主机执行命令 @@ -168,7 +168,7 @@ category: how-to ```shell tiup update cluster ``` - + 预期输出如下 `“Update successfully!”` 字样: ```log @@ -334,26 +334,45 @@ cat topology.yaml ``` ```yaml -# Global variables are applied to all deployments and as the default value of -# them if the specific deployment value missing. +# # Global variables are applied to all deployments and as the default value of +# # them if the specific deployment value missing. + global: user: "tidb" ssh_port: 22 deploy_dir: "/tidb-deploy" data_dir: "/tidb-data" +# # Monitored variables are used to all the machine monitored: - deploy_dir: "/tidb-deploy/monitored-9100" - data_dir: "/tidb-data/monitored-9100" - log_dir: "/tidb-deploy/monitored-9100/log" + node_exporter_port: 9100 + blackbox_exporter_port: 9115 + # deploy_dir: "/tidb-deploy/monitored-9100" + # data_dir: "/tidb-data/monitored-9100" + # log_dir: "/tidb-deploy/monitored-9100/log" + +# # Server configs are used to specify the runtime configuration of TiDB components +# # All configuration items can be found in TiDB docs: +# # - TiDB: https://pingcap.com/docs/stable/reference/configuration/tidb-server/configuration-file/ +# # - TiKV: https://pingcap.com/docs/stable/reference/configuration/tikv-server/configuration-file/ +# # - PD: https://pingcap.com/docs/stable/reference/configuration/pd-server/configuration-file/ +# # All configuration items use points to represent the hierarchy, e.g: +# # readpool.storage.use-unified-pool +# # ^ ^ +# # You can overwrite this configuration via instance-level `config` field server_configs: tidb: log.slow-threshold: 300 - log.level: warn binlog.enable: false binlog.ignore-error: false tikv: + # server.grpc-concurrency: 4 + # raftstore.apply-pool-size: 2 + # raftstore.store-pool-size: 2 + # rocksdb.max-sub-compactions: 1 + # storage.block-cache.capacity: "16GB" + # readpool.unified.max-thread-count: 12 readpool.storage.use-unified-pool: true readpool.coprocessor.use-unified-pool: true pd: @@ -361,7 +380,10 @@ server_configs: schedule.region-schedule-limit: 2048 schedule.replica-schedule-limit: 64 replication.enable-placement-rules: true - + tiflash: + logger.level: "info" + # pump: + # gc: 7 pd_servers: - host: 10.0.1.4 @@ -369,9 +391,9 @@ pd_servers: # name: "pd-1" # client_port: 2379 # peer_port: 2380 - # deploy_dir: "deploy/pd-2379" - # data_dir: "data/pd-2379" - # log_dir: "deploy/pd-2379/log" + # deploy_dir: "/tidb-deploy/pd-2379" + # data_dir: "/tidb-data/pd-2379" + # log_dir: "/tidb-deploy/pd-2379/log" # numa_node: "0,1" # # Config is used to overwrite the `server_configs.pd` values # config: @@ -379,63 +401,114 @@ pd_servers: # schedule.max-merge-region-keys: 200000 - host: 10.0.1.5 - host: 10.0.1.6 + tidb_servers: - host: 10.0.1.7 # ssh_port: 22 # port: 4000 # status_port: 10080 - # deploy_dir: "deploy/tidb-4000" - # log_dir: "deploy/tidb-4000/log" + # deploy_dir: "/tidb-deploy/tidb-4000" + # log_dir: "/tidb-deploy/tidb-4000/log" # numa_node: "0,1" # # Config is used to overwrite the `server_configs.tidb` values # config: - # log.level: warn - # log.slow-query-file: tidb-slow-overwritten.log + # log.slow-query-file: tidb-slow-overwrited.log - host: 10.0.1.8 - host: 10.0.1.9 + tikv_servers: - host: 10.0.1.1 # ssh_port: 22 # port: 20160 # status_port: 20180 - # deploy_dir: "deploy/tikv-20160" - # data_dir: "data/tikv-20160" - # log_dir: "deploy/tikv-20160/log" + # deploy_dir: "/tidb-deploy/tikv-20160" + # data_dir: "/tidb-data/tikv-20160" + # log_dir: "/tidb-deploy/tikv-20160/log" # numa_node: "0,1" # # Config is used to overwrite the `server_configs.tikv` values - # config: - # server.labels: - # zone: sh - # dc: sha - # rack: rack1 - # host: host1 + # config: + # server.grpc-concurrency: 4 + # server.labels: { zone: "zone1", dc: "dc1", host: "host1" } - host: 10.0.1.2 - host: 10.0.1.3 + tiflash_servers: - host: 10.0.1.10 - # ssh_port: 22 - # tcp_port: 9000 - # http_port: 8123 - # flash_service_port: 3930 - # flash_proxy_port: 20170 - # flash_proxy_status_port: 20292 - # metrics_port: 8234 - # deploy_dir: deploy/tiflash-9000 - # data_dir: deploy/tiflash-9000/data - # log_dir: deploy/tiflash-9000/log - # numa_node: "0,1" - # # Config is used to overwrite the `server_configs.tiflash` values - # config: - # logger: - # level: "info" - # learner_config: - # log-level: "info" + # ssh_port: 22 + # tcp_port: 9000 + # http_port: 8123 + # flash_service_port: 3930 + # flash_proxy_port: 20170 + # flash_proxy_status_port: 20292 + # metrics_port: 8234 + # deploy_dir: /tidb-deploy/tiflash-9000 + # data_dir: /tidb-data/tiflash-9000 + # log_dir: /tidb-deploy/tiflash-9000/log + # numa_node: "0,1" + # # Config is used to overwrite the `server_configs.tiflash` values + # config: + # logger.level: "info" + # learner_config: + # log-level: "info" + # - host: 10.0.1.15 + # - host: 10.0.1.16 + +# pump_servers: +# - host: 10.0.1.17 +# ssh_port: 22 +# port: 8250 +# deploy_dir: "/tidb-deploy/pump-8249" +# data_dir: "/tidb-data/pump-8249" +# log_dir: "/tidb-deploy/pump-8249/log" +# numa_node: "0,1" +# # Config is used to overwrite the `server_configs.drainer` values +# config: +# gc: 7 +# - host: 10.0.1.18 +# - host: 10.0.1.19 + +# drainer_servers: +# - host: 10.0.1.17 +# port: 8249 +# data_dir: "/tidb-data/drainer-8249" +# # if drainer doesn't have checkpoint, use initial commitTS to initial checkpoint +# # will get a latest timestamp from pd if setting to be -1 (default -1) +# commit_ts: -1 +# deploy_dir: "/tidb-deploy/drainer-8249" +# log_dir: "/tidb-deploy/drainer-8249/log" +# numa_node: "0,1" +# # Config is used to overwrite the `server_configs.drainer` values +# config: +# syncer.db-type: "mysql" +# syncer.to.host: "127.0.0.1" +# syncer.to.user: "root" +# syncer.to.password: "" +# syncer.to.port: 3306 +# - host: 10.0.1.19 + + monitoring_servers: - host: 10.0.1.4 + # ssh_port: 22 + # port: 9090 + # deploy_dir: "/tidb-deploy/prometheus-8249" + # data_dir: "/tidb-data/prometheus-8249" + # log_dir: "/tidb-deploy/prometheus-8249/log" + grafana_servers: - host: 10.0.1.4 + # port: 3000 + # deploy_dir: /tidb-deploy/grafana-3000 + alertmanager_servers: - host: 10.0.1.4 + # ssh_port: 22 + # web_port: 9093 + # cluster_port: 9094 + # deploy_dir: "/tidb-deploy/alertmanager-9093" + # data_dir: "/tidb-data/alertmanager-9093" + # log_dir: "/tidb-deploy/alertmanager-9093/log" + ``` ### 场景 2:单机多实例 @@ -450,19 +523,36 @@ alertmanager_servers: - TiKV 进行配置优化 - - readpool 线程池自适应,配置 `readpool.unified.max-thread-count` 参数可以使 `readpool.storage` 和 `readpool.coprocessor` 共用统一线程池,同时要分别开启自适应开关。计算公式如下: - - ``` - readpool.unified.max-thread-count = cores * 0.8 / TiKV 数量 - ``` - - - storage CF (all RocksDB column families) 内存自适应,配置 `storage.block-cache.capacity` 参数即可实现 CF 之间自动平衡内存使用。计算公式如下: - - ``` - storage.block-cache.capacity = (MEM_TOTAL * 0.5 / TiKV 实例数量) - ``` - - - 如果多个 TiKV 实例部署在同一块物理磁盘上,需要修改 `conf/tikv.yml` 中的 capacity 参数: + - readpool 线程池自适应,配置 `readpool.unified.max-thread-count` 参数可以使 `readpool.storage` 和 `readpool.coprocessor` 共用统一线程池,同时要分别开启自适应开关。 + + - 开启 `readpool.storage` 和 `readpool.coprocessor`: + + ```yaml + readpool.storage.use-unified-pool: true + readpool.coprocessor.use-unified-pool: true + ``` + + - 计算公式如下: + + ``` + readpool.unified.max-thread-count = cores * 0.8 / TiKV 数量 + ``` + + - storage CF (all RocksDB column families) 内存自适应,配置 `storage.block-cache.capacity` 参数即可实现 CF 之间自动平衡内存使用。 + + - `storage.block-cache` 默认开启 CF 自适应,无需修改。 + + ```yaml + storage.block-cache.shared: true + ``` + + - 计算公式如下: + + ``` + storage.block-cache.capacity = (MEM_TOTAL * 0.5 / TiKV 实例数量) + ``` + + - 如果多个 TiKV 实例部署在同一块物理磁盘上,需要在 tikv 配置中添加 capacity 参数: ``` raftstore.capactiy = 磁盘总容量 / TiKV 实例数量 @@ -514,6 +604,8 @@ alertmanager_servers: > > - 配置文件模版时,注意修改必要参数、IP、端口及目录。 > +> - 各个组件的 deploy_dir,默认会使用 global 中的 /-。例如 tidb 端口指定 4001,则 deploy_dir 默认为 /tidb-deploy/tidb-4001。因此,在多实例场景下指定非默认端口时,无需再次指定目录。 +> > - [部署 TiFlash](/reference/tiflash/deploy.md) 需要在 topology.yaml 配置文件中将 `replication.enable-placement-rules` 设置为 `true`,以开启 PD 的 [Placement Rules](/how-to/configure/placement-rules.md) 功能。 > > - tiflash_servers 实例级别配置 `"-host"` 目前只支持 IP,不支持域名。 @@ -606,8 +698,7 @@ tikv_servers: log_dir: "/tidb-deploy/tikv-20160/log" numa_node: "0" config: - server.labels: - host: tikv1 + server.labels: { host: "tikv1" } - host: 10.0.1.1 port: 20161 status_port: 20181 @@ -616,8 +707,7 @@ tikv_servers: log_dir: "/tidb-deploy/tikv-20161/log" numa_node: "1" config: - server.labels: - host: tikv1 + server.labels: { host: "tikv1" } - host: 10.0.1.2 port: 20160 status_port: 20180 @@ -626,8 +716,7 @@ tikv_servers: log_dir: "/tidb-deploy/tikv-20160/log" numa_node: "0" config: - server.labels: - host: tikv2 + server.labels: { host: "tikv2" } - host: 10.0.1.2 port: 20161 status_port: 20181 @@ -636,8 +725,7 @@ tikv_servers: log_dir: "/tidb-deploy/tikv-20161/log" numa_node: "1" config: - server.labels: - host: tikv2 + server.labels: { host: "tikv2" } - host: 10.0.1.3 port: 20160 status_port: 20180 @@ -646,8 +734,7 @@ tikv_servers: log_dir: "/tidb-deploy/tikv-20160/log" numa_node: "0" config: - server.labels: - host: tikv3 + server.labels: { host: "tikv3" } - host: 10.0.1.3 port: 20161 status_port: 20181 @@ -656,8 +743,7 @@ tikv_servers: log_dir: "/tidb-deploy/tikv-20161/log" numa_node: "1" config: - server.labels: - host: tikv3 + server.labels: { host: "tikv3" } tiflash_servers: - host: 10.0.1.10 data_dir: /data1/tiflash/data @@ -832,7 +918,7 @@ Flags: # Flags 可选参数,有以下的作用: # 通过 -h 可以查看帮助; # 通过 -i 执行权限认证; -# --user 通过指定用户来完成 ssh 登录,默认为 root 用户; +# --user 通过指定用户来完成 ssh 登录,该用户必须拥有 sudo 权限,默认为 root 用户; # -y 提过拓扑信息确认直接执行部署任务 ``` @@ -841,6 +927,7 @@ Flags: > 通过 TiUP 进行集群部署可以使用密钥或者交互密码方式来进行安全认证: > > - 如果是密钥方式,可以通过 `-i` 或者 `--identity_file` 来指定密钥的路径; +> > - 如果是密码方式,无需添加其他参数,`Enter` 即可进入密码交互窗口。 ### 第 5 步:执行部署命令 @@ -848,78 +935,17 @@ Flags: {{< copyable "shell-regular" >}} ```shell -tiup cluster deploy tidb-test v4.0.0-beta.2 ./topology.yaml --user root -i /home/root/.ssh/gcp_rsa +tiup cluster deploy tidb-test v4.0.0-rc ./topology.yaml --user root -i /home/root/.ssh/gcp_rsa ``` 以上部署命令中: - 通过 TiUP cluster 部署的集群名称为 `tidb-test` -- 部署版本为 `v4.0.0-beta.2` +- 部署版本为 `v4.0.0-rc`,其他版本可以参考[如何查看 TiUP 支持管理的 TiDB 版本](#如何查看-tiup-支持管理的-tidb-版本)的介绍 - 初始化配置文件为 `topology.yaml` -- 通过 root 的密钥登录到目标主机完成集群部署,也可以用其他有 ssh 和 sudo 权限的用户完成部署 +- 通过 root 的密钥登录到目标主机完成集群部署,也可以用其他有 ssh 和 sudo 权限的用户完成部署。 -预期日志输出样例,部署成功会有 `Started cluster tidb-test successfully` 关键词: - -```log -Starting /home/tidb/.tiup/components/cluster/v0.0.9/cluster deploy tidb-test v4.0.0-beta.2 ./topology.yaml --user root --identity_file /home/root/.ssh/gcp_rsa -Please confirm your topology: -TiDB Cluster: tidb-test -TiDB Version: v4.0.0-beta.2 -Type Host Ports Directories ----- ---- ----- ----------- -pd 10.0.1.4 2379/2380 /tidb-deploy/pd-2379,/tidb-data/pd-2379 -pd 10.0.1.5 2379/2380 /tidb-deploy/pd-2379,/tidb-data/pd-2379 -pd 10.0.1.6 2379/2380 /tidb-deploy/pd-2379,/tidb-data/pd-2379 -tikv 10.0.1.1 2060/20080 /tidb-deploy/tikv-2060,/tidb-data/tikv-2060 -tikv 10.0.1.2 2060/20080 /tidb-deploy/tikv-2060,/tidb-data/tikv-2060 -tikv 10.0.1.3 2060/20080 /tidb-deploy/tikv-2060,/tidb-data/tikv-2060 -tidb 10.0.1.7 4000/10080 /tidb-deploy/tidb-4000 -tidb 10.0.1.8 4000/10080 /tidb-deploy/tidb-4000 -tidb 10.0.1.9 4000/10080 /tidb-deploy/tidb-4000 -prometheus 10.0.1.4 9090 /tidb-deploy/prometheus-9090,/tidb-data/prometheus-9090 -grafana 10.0.1.4 3000 /tidb-deploy/grafana-3000 -alertmanager 10.0.1.4 9104/9105 /tidb-deploy/alertmanager-9104,/tidb-data/alertmanager-9104 -Attention: - 1. If the topology is not what you expected, check your yaml file. - 2. Please confirm there is no port/directory conflicts in same host. -Do you want to continue? [y/N]: y -Input SSH password: -+ [ Serial ] - SSHKeyGen: path=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa -+ [Parallel] - Download: component=blackbox_exporter, version=v0.12.0 -+ [Parallel] - Download: component=pd, version=v4.0.0-beta.2 -+ [Parallel] - Download: component=node_exporter, version=v0.17.0 -+ [Parallel] - Download: component=tikv, version=v4.0.0-beta.2 -+ [Parallel] - Download: component=tidb, version=v4.0.0-beta.2 -+ [Parallel] - Download: component=alertmanager, version=v0.17.0 -+ [Parallel] - Download: component=prometheus, version=v2.8.1 -+ [Parallel] - Download: component=grafana, version=v6.1.6 - -...... 部分日志忽略...... - -Checking service state of pd - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 09:54:22 CST; 50s ago -Checking service state of tikv - 10.0.1.1 - Active: active (running) since Saturday 2020-04-04 09:54:35 CST; 38s ago - 10.0.1.2 - Active: active (running) since Saturday 2020-04-04 09:54:38 CST; 37s ago - 10.0.1.3 - Active: active (running) since Saturday 2020-04-04 09:54:41 CST; 35s ago -Checking service state of tidb - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 09:54:56 CST; 22s ago -Checking service state of prometheus - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 09:55:03 CST; 16s ago -Checking service state of grafana - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 09:55:05 CST; 16s ago -Checking service state of alertmanager - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 09:55:08 CST; 14s ago -Started cluster `tidb-test` successfully -``` +预期日志结尾输出会有 Deployed cluster `tidb-test` successfully 关键词,表示部署成功。 ## 4. 验证集群部署状态 @@ -957,7 +983,7 @@ tiup cluster list Starting /home/tidb/.tiup/components/cluster/v0.4.3/cluster list Name User Version Path PrivateKey ---- ---- ------- ---- ---------- -tidb-test tidb v4.0.0-beta.2 /home/tidb/.tiup/storage/cluster/clusters/tidb-test /home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa +tidb-test tidb v4.0.0-rc /home/tidb/.tiup/storage/cluster/clusters/tidb-test /home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa ``` ### 第 7 步:检查 `tidb-test` 集群情况 @@ -968,26 +994,27 @@ tidb-test tidb v4.0.0-beta.2 /home/tidb/.tiup/storage/cluster/clusters tiup cluster display tidb-test ``` -预期输出包括实例 ID、角色、主机、监听端口和状态(为启动,所以状态为 Down/inactive)、目录信息: +预期输出包括实例 ID、角色、主机、监听端口和状态(由于还未启动,所以状态为 Down/inactive)、目录信息: ```log Starting /home/tidb/.tiup/components/cluster/v0.4.3/cluster display tidb-test TiDB Cluster: tidb-test -TiDB Version: v4.0.0-beta.2 -ID Role Host Ports Status Data Dir Deploy Dir --- ---- ---- ----- ------ -------- ---------- -10.0.1.4:9104 alertmanager 10.0.1.4 9104/9105 inactive /tidb-data/alertmanager-9104 /tidb-deploy/alertmanager-9104 -10.0.1.4:3000 grafana 10.0.1.4 3000 inactive - /tidb-deploy/grafana-3000 -10.0.1.4:2379 pd 10.0.1.4 2379/2380 Down /tidb-data/pd-2379 /tidb-deploy/pd-2379 -10.0.1.5:2379 pd 10.0.1.5 2379/2380 Down /tidb-data/pd-2379 /tidb-deploy/pd-2379 -10.0.1.6:2379 pd 10.0.1.6 2379/2380 Down /tidb-data/pd-2379 /tidb-deploy/pd-2379 -10.0.1.4:9090 prometheus 10.0.1.4 9090 inactive /tidb-data/prometheus-9090 /tidb-deploy/prometheus-9090 -10.0.1.7:4000 tidb 10.0.1.7 4000/10080 Down - /tidb-deploy/tidb-4000 -10.0.1.8:4000 tidb 10.0.1.8 4000/10080 Down - /tidb-deploy/tidb-4000 -10.0.1.9:4000 tidb 10.0.1.9 4000/10080 Down - /tidb-deploy/tidb-4000 -10.0.1.1:20160 tikv 10.0.1.1 20160/20180 Down /tidb-data/tikv-20160 /tidb-deploy/tikv-2060 -10.0.1.2:20160 tikv 10.0.1.2 20160/20180 Down /tidb-data/tikv-20160 /tidb-deploy/tikv-2060 -10.0.1.3:20160 tikv 10.0.1.4 20160/20180 Down /tidb-data/tikv-20160 /tidb-deploy/tikv-2060 +TiDB Version: v4.0.0-rc +ID Role Host Ports Status Data Dir Deploy Dir +-- ---- ---- ----- ------ -------- ---------- +10.0.1.4:9104 alertmanager 10.0.1.4 9104/9105 inactive /tidb-data/alertmanager-9104 /tidb-deploy/alertmanager-9104 +10.0.1.4:3000 grafana 10.0.1.4 3000 inactive - /tidb-deploy/grafana-3000 +10.0.1.4:2379 pd 10.0.1.4 2379/2380 Down /tidb-data/pd-2379 /tidb-deploy/pd-2379 +10.0.1.5:2379 pd 10.0.1.5 2379/2380 Down /tidb-data/pd-2379 /tidb-deploy/pd-2379 +10.0.1.6:2379 pd 10.0.1.6 2379/2380 Down /tidb-data/pd-2379 /tidb-deploy/pd-2379 +10.0.1.4:9090 prometheus 10.0.1.4 9090 inactive /tidb-data/prometheus-9090 /tidb-deploy/prometheus-9090 +10.0.1.7:4000 tidb 10.0.1.7 4000/10080 Down - /tidb-deploy/tidb-4000 +10.0.1.8:4000 tidb 10.0.1.8 4000/10080 Down - /tidb-deploy/tidb-4000 +10.0.1.9:4000 tidb 10.0.1.9 4000/10080 Down - /tidb-deploy/tidb-4000 +10.0.1.10:9000 tiflash 10.0.1.4 9000/8123/3930/20170/20292/8234 Down /tidb-data-lzs/tiflash-10000 /tidb-deploy-lzs/tiflash-10000 +10.0.1.1:20160 tikv 10.0.1.1 20160/20180 Down /tidb-data/tikv-20160 /tidb-deploy/tikv-2060 +10.0.1.2:20160 tikv 10.0.1.2 20160/20180 Down /tidb-data/tikv-20160 /tidb-deploy/tikv-2060 +10.0.1.3:20160 tikv 10.0.1.4 20160/20180 Down /tidb-data/tikv-20160 /tidb-deploy/tikv-2060 ``` ## 5. 启动集群 @@ -1000,53 +1027,7 @@ ID Role Host Ports Status Data Dir tiup cluster start tidb-test ``` -预期日志输出样例,结果输出返回 `Started cluster tidb-test successfully` ,说明启动成功: - -```log -Starting /home/tidb/.tiup/components/cluster/v0.4.3/cluster start tidb-test -+ [ Serial ] - SSHKeySet: privateKey=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa, publicKey=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa.pub -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.1 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.2 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [ Serial ] - ClusterOperate: operation=StartOperation, options={Roles:[] Nodes:[] Force:false} -Starting component pd - Starting instance pd 10.0.1.4:2379 - Start pd 10.0.1.4:2379 success -Starting component node_exporter - Starting instance 10.0.1.4 - Start 10.0.1.4 success - -...... 部分日志忽略...... - - Checking service state of pd - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 01:08:04 CST; 43s ago -Checking service state of tikv - 10.0.1.1 - Active: active (running) since Saturday 2020-04-04 01:08:15 CST; 33s ago - 10.0.1.2 - Active: active (running) since Saturday 2020-04-04 01:08:18 CST; 31s ago - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 01:08:21 CST; 29s ago -Checking service state of tidb - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 01:08:36 CST; 16s ago -Checking service state of prometheus - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 01:08:39 CST; 15s ago -Checking service state of grafana - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 01:08:41 CST; 14s ago -Checking service state of alertmanager - 10.0.1.4 - Active: active (running) since Saturday 2020-04-04 01:08:44 CST; 12s ago -Started cluster `tidb-test` successfully -``` +预期结果输出 `Started cluster tidb-test successfully` 标志启动成功。 ## 6. 验证集群运行状态 @@ -1063,28 +1044,29 @@ tiup cluster display tidb-test ```log Starting /home/tidb/.tiup/components/cluster/v0.4.3/cluster display tidb-test TiDB Cluster: tidb-test -TiDB Version: v4.0.0-beta.2 -ID Role Host Ports Status Data Dir Deploy Dir --- ---- ---- ----- ------ -------- ---------- -10.0.1.4:9104 alertmanager 10.0.1.4 9104/9105 Up /tidb-data/alertmanager-9104 /tidb-deploy/alertmanager-9104 -10.0.1.4:3000 grafana 10.0.1.4 3000 Up - /tidb-deploy/grafana-3000 -10.0.1.4:2379 pd 10.0.1.4 2379/2380 Healthy|L /tidb-data/pd-2379 /tidb-deploy/pd-2379 -10.0.1.5:2379 pd 10.0.1.5 2379/2380 Healthy /tidb-data/pd-2379 /tidb-deploy/pd-2379 -10.0.1.6:2379 pd 10.0.1.6 2379/2380 Healthy /tidb-data/pd-2379 /tidb-deploy/pd-2379 -10.0.1.4:9090 prometheus 10.0.1.4 9090 Up /tidb-data/prometheus-9090 /tidb-deploy/prometheus-9090 -10.0.1.7:4000 tidb 10.0.1.7 4000/10080 Up - /tidb-deploy/tidb-4000 -10.0.1.8:4000 tidb 10.0.1.8 4000/10080 Up - /tidb-deploy/tidb-4000 -10.0.1.9:4000 tidb 10.0.1.9 4000/10080 Up - /tidb-deploy/tidb-4000 -10.0.1.1:2060 tikv 10.0.1.1 2060/20080 Up /tidb-data/tikv-2060 /tidb-deploy/tikv-2060 -10.0.1.2:2060 tikv 10.0.1.2 2060/20080 Up /tidb-data/tikv-2060 /tidb-deploy/tikv-2060 -10.0.1.3:2060 tikv 10.0.1.4 2060/20080 Up /tidb-data/tikv-2060 /tidb-deploy/tikv-2060 +TiDB Version: v4.0.0-rc +ID Role Host Ports Status Data Dir Deploy Dir +-- ---- ---- ----- ------ -------- ---------- +10.0.1.4:9104 alertmanager 10.0.1.4 9104/9105 Up /tidb-data/alertmanager-9104 /tidb-deploy/alertmanager-9104 +10.0.1.4:3000 grafana 10.0.1.4 3000 Up - /tidb-deploy/grafana-3000 +10.0.1.4:2379 pd 10.0.1.4 2379/2380 Healthy|L /tidb-data/pd-2379 /tidb-deploy/pd-2379 +10.0.1.5:2379 pd 10.0.1.5 2379/2380 Healthy /tidb-data/pd-2379 /tidb-deploy/pd-2379 +10.0.1.6:2379 pd 10.0.1.6 2379/2380 Healthy /tidb-data/pd-2379 /tidb-deploy/pd-2379 +10.0.1.4:9090 prometheus 10.0.1.4 9090 Up /tidb-data/prometheus-9090 /tidb-deploy/prometheus-9090 +10.0.1.7:4000 tidb 10.0.1.7 4000/10080 Up - /tidb-deploy/tidb-4000 +10.0.1.8:4000 tidb 10.0.1.8 4000/10080 Up - /tidb-deploy/tidb-4000 +10.0.1.9:4000 tidb 10.0.1.9 4000/10080 Up - /tidb-deploy/tidb-4000 +10.0.1.10:9000 tiflash 10.0.1.4 9000/8123/3930/20170/20292/8234 Up /tidb-data-lzs/tiflash-9000 /tidb-deploy-lzs/tiflash-9000 +10.0.1.1:2060 tikv 10.0.1.1 2060/20080 Up /tidb-data/tikv-2060 /tidb-deploy/tikv-2060 +10.0.1.2:2060 tikv 10.0.1.2 2060/20080 Up /tidb-data/tikv-2060 /tidb-deploy/tikv-2060 +10.0.1.3:2060 tikv 10.0.1.4 2060/20080 Up /tidb-data/tikv-2060 /tidb-deploy/tikv-2060 ``` ### 第 10 步:通过 TiDB Dashboard 和 Grafana 检查集群状态 #### 查看 TiDB Dashboard 检查 TiDB Cluster 状态 -- 通过 `{pd-leader-ip}:2379/dashboard` 登录 TiDB Dashboard +- 通过 `{pd-leader-ip}:2379/dashboard` 登录 TiDB Dashboard,登陆用户和口令为 TiDB 数据库 root 用户和口令,如果你修改过数据库的 root 密码,则以修改后的密码为准,默认密码为空。 ![TiDB-Dashboard](/media/tiup/tidb-dashboard.png) @@ -1102,7 +1084,7 @@ ID Role Host Ports Status Data Dir ![Grafana-overview](/media/tiup/grafana-overview.png) -### 登录数据库执行简单 DML、DDL 操作和查询 SQL 语句 +### 第 11 步:登录数据库执行简单 DML、DDL 操作和查询 SQL 语句 > **注意:** > @@ -1208,50 +1190,7 @@ Bye cluster stop tidb-test ``` -预期结果输出,注意 `Stopped cluster tidb-test successfully` 标志关闭成功: - -```log -Starting /home/tidb/.tiup/components/cluster/v0.4.3/cluster stop tidb-test -+ [ Serial ] - SSHKeySet: privateKey=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa, publicKey=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa.pub -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.5 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.5 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.2 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.1 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [ Serial ] - ClusterOperate: operation=StopOperation, options={Roles:[] Nodes:[] Force:false} -Stopping component alertmanager - Stopping instance 10.0.1.4 - Stop alertmanager 10.0.1.4:9104 success - -...... 部分日志忽略...... - -Checking service state of pd - 10.0.1.5 - Active: inactive (dead) since Saturday 2020-04-04 15:35:42 CST; 15s ago -Checking service state of tikv - 10.0.1.1 - Active: inactive (dead) since Saturday 2020-04-04 15:35:21 CST; 38s ago - 10.0.1.2 - Active: inactive (dead) since Saturday 2020-04-04 15:35:23 CST; 37s ago - 10.0.1.3 - Active: inactive (dead) since Saturday 2020-04-04 15:35:24 CST; 37s ago -Checking service state of tidb - 10.0.1.5 - Active: inactive (dead) since Saturday 2020-04-04 15:35:15 CST; 49s ago -Checking service state of prometheus - 10.0.1.4 - Active: inactive (dead) since Saturday 2020-04-04 15:35:12 CST; 53s ago -Checking service state of grafana - 10.0.1.4 - Active: inactive (dead) since Saturday 2020-04-04 15:35:10 CST; 56s ago -Checking service state of alertmanager - 10.0.1.4 - Active: inactive (dead) since Saturday 2020-04-04 15:35:09 CST; 59s ago -Stopped cluster `tidb-test` successfully -``` +预期结果输出 `Stopped cluster tidb-test successfully` 标志关闭成功。 ## 8. 销毁集群 @@ -1267,48 +1206,7 @@ Stopped cluster `tidb-test` successfully tiup cluster destroy tidb-test ``` -预期结果输出,注意 `Destroy cluster tidb-test successfully` 标志销毁成功: - -```log -Starting /home/tidb/.tiup/components/cluster/v0.4.3/cluster destroy tidb-test -This operation will destroy TiDB v4.0.0-beta.2 cluster tidb-test and its data. -Do you want to continue? [y/N]: y -Destroying cluster... -+ [ Serial ] - SSHKeySet: privateKey=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa, publicKey=/home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa.pub -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.2 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.1 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [Parallel] - UserSSH: user=tidb, host=10.0.1.4 -+ [ Serial ] - ClusterOperate: operation=StopOperation, options={Roles:[] Nodes:[] Force:false} -Stopping component alertmanager - Stopping instance 10.0.1.4 - Stop alertmanager 10.0.1.4:9104 success - -...... 部分日志忽略...... - - Destroy monitored on 10.0.1.1 success -Destroying monitored 10.0.1.2 -Destroying monitored - Destroying instance 10.0.1.2 -Destroy monitored on 10.0.1.2 success -Destroying monitored 10.0.1.4 -Destroying monitored - Destroying instance 10.0.1.4 -Destroy monitored on 10.0.1.4 success -Destroying component pd -Destroying instance 10.0.1.4 -Deleting paths on 10.0.1.4: /tidb-data/pd-2379 /tidb-deploy/pd-2379 /tidb-deploy/pd-2379/log /etc/systemd/system/pd-2379.service -Destroy 10.0.1.4 success -Destroying monitored 10.0.1.4 -Destroying monitored - Destroying instance 10.0.1.4 -Destroy monitored on 10.0.1.4 success -Destroyed cluster `tidb-test` successfully -``` +预期结果输出 `Destroy cluster tidb-test successfully` 标志销毁成功。 ## 9. 常见部署问题 @@ -1382,7 +1280,6 @@ tidb_servers: numa_node: "0,1" # Config is used to overwrite the `server_configs.tidb` values config: - log.level: warn log.slow-query-file: tidb-slow-overwritten.log ``` @@ -1403,21 +1300,23 @@ tidb_servers: ```yaml server_configs: tidb: - binlog.enable: false - binlog.ignore-error: false + log.slow-threshold: 300 + binlog.enable: false + binlog.ignore-error: false tikv: - readpool.storage.low-concurrency: 8 - server.labels: - zone: sh - dc: sha - rack: rack1 - host: host1 + # server.grpc-concurrency: 4 + # raftstore.apply-pool-size: 2 + # raftstore.store-pool-size: 2 + # rocksdb.max-sub-compactions: 1 + # storage.block-cache.capacity: "16GB" + # readpool.unified.max-thread-count: 12 + readpool.storage.use-unified-pool: true + readpool.coprocessor.use-unified-pool: true pd: - replication.enable-placement-rules: true - label-property: - reject-leader: - - key: "dc" - value: "bja" + schedule.leader-schedule-limit: 4 + schedule.region-schedule-limit: 2048 + schedule.replica-schedule-limit: 64 + replication.enable-placement-rules: true pump: gc: 7 ``` @@ -1433,7 +1332,76 @@ tidb_servers: log_dir: "deploy/monitored-9100/log" ``` -### 检测 NTP 服务是否正常 +### 如何查看 TiUP 支持管理的 TiDB 版本 + +执行如下命令查看 TiUP 支持管理的 TiDB 版本: + +```shell +tiup list tidb --refresh +``` + +以下输出结果中,`Version` 为支持的 TiDB 版本,`Installed` 为当前安装的版本,`Release` 为发版时间,`Platforms` 为支持的平台。 + +```log +Available versions for tidb (Last Modified: 2020-02-26T15:20:35+08:00): +Version Installed Release: Platforms +------- --------- -------- --------- +master 2020-03-18T08:39:11.753360611+08:00 linux/amd64,darwin/amd64 +v3.0.1 2020-04-07T17:53:00+08:00 linux/amd64,darwin/amd64 +v3.0.2 2020-04-08T23:38:37+08:00 linux/amd64,darwin/amd64 +v3.0.3 2020-03-27T22:41:16.279411145+08:00 linux/amd64,darwin/amd64 +v3.0.4 2020-03-27T22:43:35.362550386+08:00 linux/amd64,darwin/amd64 +v3.0.5 2020-03-27T22:46:01.016467032+08:00 linux/amd64,darwin/amd64 +v3.0.6 2020-03-13T11:55:17.941641963+08:00 linux/amd64,darwin/amd64 +v3.0.7 2020-03-13T12:02:22.538128662+08:00 linux/amd64,darwin/amd64 +v3.0.8 2020-03-17T14:03:29.575448277+08:00 linux/amd64,darwin/amd64 +v3.0.9 2020-03-13T13:02:15.947260351+08:00 linux/amd64,darwin/amd64 +v3.0.10 2020-03-13T14:11:53.774527401+08:00 linux/amd64,darwin/amd64 +v3.0.11 2020-03-13T15:31:06.94547891+08:00 linux/amd64,darwin/amd64 +v3.0.12 YES 2020-03-20T11:36:28.18950808+08:00 linux/amd64,darwin/amd64 +v3.1.0-beta.2 2020-03-19T00:48:48.266468238+08:00 linux/amd64,darwin/amd64 +v3.1.0-rc 2020-04-02T23:43:17.456327834+08:00 linux/amd64,darwin/amd64 +v4.0.0-beta 2020-03-13T12:43:55.508190493+08:00 linux/amd64,darwin/amd64 +v4.0.0-beta.1 2020-03-13T12:30:08.913759828+08:00 linux/amd64,darwin/amd64 +v4.0.0-beta.2 YES 2020-03-18T22:52:00.830626492+08:00 linux/amd64,darwin/amd64 +v4.0.0-rc 2020-04-09T00:10:32+08:00 linux/amd64,darwin/amd64 +nightly 2020-04-10T08:42:23+08:00 darwin/amd64,linux/amd64 +``` + +### 如何查看 TiUP 支持管理的组件和版本 + +执行如下命令查看 TiUP 支持管理的组件和版本: + +```shell +tiup list +``` + +以下输出结果中,`Name` 为支持组件名称,`Installed` 为当前是否安装,`Platforms` 为支持的系统平台,`Description` 为组件描述。 + +```log +Available components (Last Modified: 2020-02-27T15:20:35+08:00): +Name Installed Platforms Description +---- --------- --------- ----------- +tidb YES(v3.0.12,v4.0.0-rc) darwin/amd64,linux/amd64 TiDB is an open source distributed HTAP database compatible with the MySQL protocol +tikv YES(v3.0.12,v4.0.0-rc) darwin/amd64,linux/amd64 Distributed transactional key-value database, originally created to complement TiDB +pd YES(v3.0.12,v4.0.0-rc) darwin/amd64,linux/amd64 PD is the abbreviation for Placement Driver. It is used to manage and schedule the TiKV cluster +playground YES(v0.0.5) darwin/amd64,linux/amd64 Bootstrap a local TiDB cluster +client darwin/amd64,linux/amd64 A simple mysql client to connect TiDB +prometheus darwin/amd64,linux/amd64 The Prometheus monitoring system and time series database. +tpc darwin/amd64,linux/amd64 A toolbox to benchmark workloads in TPC +package darwin/amd64,linux/amd64 A toolbox to package tiup component +grafana linux/amd64,darwin/amd64 Grafana is the open source analytics & monitoring solution for every database +alertmanager darwin/amd64,linux/amd64 Prometheus alertmanager +blackbox_exporter darwin/amd64,linux/amd64 Blackbox prober exporter +node_exporter darwin/amd64,linux/amd64 Exporter for machine metrics +pushgateway darwin/amd64,linux/amd64 Push acceptor for ephemeral and batch jobs +tiflash linux/amd64 The TiFlash Columnar Storage Engine +drainer linux/amd64 The drainer componet of TiDB binlog service +pump linux/amd64 The pump componet of TiDB binlog service +cluster YES(v0.4.6) linux/amd64,darwin/amd64 Deploy a TiDB cluster for production +``` + +### 如何检测 NTP 服务是否正常 1. 执行以下命令,如果输出 `running` 表示 NTP 服务正在运行: @@ -1566,7 +1534,7 @@ tidb_servers: [root@10.0.1.1 tidb]# ``` -### 关闭部署机器的防火墙 +### 如何关闭部署机器的防火墙 1. 检查防火墙状态(以 CentOS Linux release 7.7.1908 (Core) 为例) @@ -1601,7 +1569,7 @@ tidb_servers: sudo systemctl status firewalld.service ``` -### 安装 numactl 工具 +### 如何安装 numactl 工具 > **注意:** >