diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 38e352a0987b..000000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,126 +0,0 @@ -version: 2 - -jobs: - lint: - docker: - - image: circleci/node:lts - working_directory: ~/pingcap/docs - steps: - - checkout - - - run: - name: Setup - command: | - mkdir ~/.npm-global - npm config set prefix '~/.npm-global' - echo 'export PATH=~/.npm-global/bin:$PATH' >> $BASH_ENV - echo 'export NODE_PATH=~/.npm-global/lib/node_modules:$NODE_PATH' >> $BASH_ENV - - - run: - name: "Check file encoding" - command: | - git remote add upstream https://github.com/pingcap/docs.git - git fetch upstream - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-file-encoding.py - python3 check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/refactor-migration-docs..HEAD -- '*.md' ':(exclude).github/*') - - - run: - name: "Check git conflicts" - command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-conflicts.py - python3 check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/refactor-migration-docs..HEAD -- '*.md' '*.yml' '*.yaml') - - - run: - name: "Install markdownlint" - command: | - npm install -g markdownlint-cli@0.17.0 - - - run: - name: "Lint edited files" - command: | - markdownlint $(git diff-tree --name-only --no-commit-id -r upstream/refactor-migration-docs..HEAD -- '*.md' ':(exclude).github/*') - - - run: - name: "Check internal links" - command: | - scripts/verify-links.sh - - - run: - name: "Check internal link anchors" - command: | - scripts/verify-link-anchors.sh - - - run: - name: "Check control characters" - command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-control-char.py - python3 check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/refactor-migration-docs..HEAD -- '*.md' ':(exclude).github/*') - - - run: - name: "Check unclosed tags" - command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-tags.py - python3 check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/refactor-migration-docs..HEAD -- '*.md' ':(exclude).github/*') - - - run: - name: "Check manual line breaks" - command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-manual-line-breaks.py - python3 check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/refactor-migration-docs..HEAD -- '*.md' ':(exclude).github/*') - - build: - docker: - - image: andelf/doc-build:0.1.9 - working_directory: ~/pingcap/docs - - steps: - - checkout - - - run: - name: install sdk and awscli - command: | - sudo pip3 install boto3 - sudo pip3 install awscli - printf "%s\n" ${AWS_ACCESS_KEY} ${AWS_SECRET_KEY} ${AWS_REGION} "json" | aws configure - - - - run: - name: "Special Check for Golang User - YOUR TAB SUCK" - command: grep -RP '\t' * | tee | grep '.md' && exit 1; echo ok - - - run: - name: "Merge Makedown Files" - command: python3 scripts/merge_by_toc.py - - - run: - name: "Generate PDF" - command: scripts/generate_pdf.sh - - # echo "222.222.95.49 uc.qbox.me": adds a new host - - deploy: - name: "Publish PDF to Qiniu and S3" - command: | - sudo bash -c 'echo "222.222.95.49 uc.qbox.me" >> /etc/hosts'; - if [ "${CIRCLE_BRANCH}" == "refactor-migration-docs" ]; then - python3 scripts/upload.py output.pdf tidb-dev-en-manual.pdf; - fi - - - run: - name: "Copy Generated PDF" - command: mkdir /tmp/artifacts && cp output.pdf doc.md /tmp/artifacts - - - store_artifacts: - path: /tmp/artifacts - -workflows: - version: 2 - lint: - jobs: - - lint - build: - jobs: - - build: - filters: - branches: - only: - - refactor-migration-docs diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index dae71379e682..b43061138a84 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,7 +12,14 @@ +**Tips for choosing the affected version(s):** + +By default, **CHOOSE MASTER ONLY** so your changes will be applied to the next TiDB major or minor releases. If your PR involves a product feature behavior change or a compatibility change, **CHOOSE THE AFFECTED RELEASE BRANCH(ES) AND MASTER**. + +For details, see [tips for choosing the affected versions](https://github.com/pingcap/docs/blob/master/CONTRIBUTING.md#guideline-for-choosing-the-affected-versions). + - [ ] master (the latest development version) +- [ ] v5.2 (TiDB 5.2 versions) - [ ] v5.1 (TiDB 5.1 versions) - [ ] v5.0 (TiDB 5.0 versions) - [ ] v4.0 (TiDB 4.0 versions) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 476b6d6d559e..df71d03af55d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,12 +29,13 @@ Before you contribute, please take a quick look at some general information abou ### Learn about docs versions -Currently, we maintain six versions of TiDB documentation, each with a separate branch: +Currently, we maintain the following versions of TiDB documentation, each with a separate branch: | Docs branch name | Version description | | :--- | :--- | | `master` branch | the latest development version | -| `release-5.1` branch | the 5.1 version | +| `release-5.2` branch | the 5.2 stable version | +| `release-5.1` branch | the 5.1 stable version | | `release-5.0` branch | the 5.0 stable version | | `release-4.0` branch | the 4.0 stable version | | `release-3.1` branch | the 3.1 stable version | @@ -45,22 +46,25 @@ Currently, we maintain six versions of TiDB documentation, each with a separate > > Previously, we maintain all versions in the `master` branch, with directories like `dev` (the latest development version), `v3.0` and so on. Each docs version is updated very frequently and changes to one version often apply to another version or other versions as well. > -> Since February 21, 2020, to reduce manual editing and updating work among versions, we have started to maintain each version in a separate branch and introduce sre-bot to automatically file PRs to other versions as long as you add corresponding cherry-pick labels to your PR. +> Since February 21, 2020, to reduce manual editing and updating work among versions, we have started to maintain each version in a separate branch and introduced sre-bot (now ti-chi-bot) to automatically file PRs to other versions as long as you add corresponding cherry-pick labels to your PR. ### Use cherry-pick labels - If your changes apply to only one docs version, just submit a PR to the corresponding version branch. -- If your changes apply to multiple docs versions, you don't have to submit a PR to each branch. Instead, after you submit your PR, trigger the sre-bot to submit a PR to other version branches by adding one or several of the following labels as needed. Once the current PR is merged, sre-bot will start to work. - - `needs-cherry-pick-5.1` label: sre-bot will submit a PR to the `release-5.1` branch. - - `needs-cherry-pick-5.0` label: sre-bot will submit a PR to the `release-5.0` branch. - - `needs-cherry-pick-4.0` label: sre-bot will submit a PR to the `release-4.0` branch. - - `needs-cherry-pick-3.1` label: sre-bot will submit a PR to the `release-3.1` branch. - - `needs-cherry-pick-3.0` label: sre-bot will submit a PR to the `release-3.0` branch. - - `needs-cherry-pick-2.1` label: sre-bot will submit a PR to the `release-2.1` branch. - - `needs-cherry-pick-master` label: sre-bot will submit a PR to the `master` branch. +- If your changes apply to multiple docs versions, you don't have to submit a PR to each branch. Instead, after you submit your PR, trigger the ti-chi-bot to submit a PR to other version branches by adding one or several of the following labels as needed. Once the current PR is merged, ti-chi-bot will start to work. + - `needs-cherry-pick-5.2` label: ti-chi-bot will submit a PR to the `release-5.2` branch. + - `needs-cherry-pick-5.1` label: ti-chi-bot will submit a PR to the `release-5.1` branch. + - `needs-cherry-pick-5.0` label: ti-chi-bot will submit a PR to the `release-5.0` branch. + - `needs-cherry-pick-4.0` label: ti-chi-bot will submit a PR to the `release-4.0` branch. + - `needs-cherry-pick-3.1` label: ti-chi-bot will submit a PR to the `release-3.1` branch. + - `needs-cherry-pick-3.0` label: ti-chi-bot will submit a PR to the `release-3.0` branch. + - `needs-cherry-pick-2.1` label: ti-chi-bot will submit a PR to the `release-2.1` branch. + - `needs-cherry-pick-master` label: ti-chi-bot will submit a PR to the `master` branch. -- If most of your changes apply to multiple docs versions but some differences exist among versions, you still can use cherry-pick labels to let sre-bot create PRs to other versions. After the PR to another version is successfully submitted by sre-bot, you can make changes to that PR. + For how to choose the docs versions, refer to [Guideline for choosing the affected version(s)](#guideline-for-choosing-the-affected-versions). + +- If most of your changes apply to multiple docs versions but some differences exist among versions, you still can use cherry-pick labels to let ti-chi-bot create PRs to other versions. After the PR to another version is successfully submitted by ti-chi-bot, you can make changes to that PR. ## How to contribute @@ -142,6 +146,23 @@ git push -u origin new-branch-name # "-u" is used to track the remote branch fro Now, your PR is successfully submitted! After this PR is merged, you will automatically become a contributor to TiDB documentation. +## Guideline for choosing the affected version(s) + +When you create a Pull Request, you need to choose the release version to which your document change applies in the description template on your Pull Request page. + +If your change fits one of the following situations, it is recommended to **CHOOSE THE MASTER BRANCH ONLY**. After the PR is merged, the change will be soon displayed on the [Dev page of the PingCAP documentation website](https://docs.pingcap.com/tidb/dev/). After the next major or minor version of TiDB is released, the change will also be displayed on the website page for the new version. + +- Relates to a documentation enhancement, such as supplementing missing or incomplete document contents. +- Fixes inaccurate or incorrect document contents, including values, descriptions, examples, or typos. +- Involves a documentation refactor in a specific topic module. + +If your change fits one of the following situations, **CHOOSE THE AFFECTED RELEASE BRANCH(ES) AND MASTER**: + +- Involves a feature behavior change that relates to a specific version. +- Involves a compatibility change, including changing the default value of a configuration item or a system variable. +- Fixes format to resolve a display error +- Fixes broken links + ## Contact Join the Slack channel: [#sig-docs](https://slack.tidb.io/invite?team=tidb-community&channel=sig-docs&ref=pingcap-docs) diff --git a/README.md b/README.md index 3049740008c1..ee9284332a9b 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ Currently, we maintain the following versions of TiDB documentation in different | Branch name | TiDB docs version | | :---------|:----------| | [`master`](https://github.com/pingcap/docs/tree/master) | The latest development version | -| [`release-5.1`](https://github.com/pingcap/docs/tree/release-5.1) | 5.1 version | +| [`release-5.2`](https://github.com/pingcap/docs/tree/release-5.2) | 5.2 stable version | +| [`release-5.1`](https://github.com/pingcap/docs/tree/release-5.1) | 5.1 stable version | | [`release-5.0`](https://github.com/pingcap/docs/tree/release-5.0) | 5.0 stable version | | [`release-4.0`](https://github.com/pingcap/docs/tree/release-4.0) | 4.0 stable version | | [`release-3.1`](https://github.com/pingcap/docs/tree/release-3.1) | 3.1 stable version | diff --git a/TOC.md b/TOC.md index 99de0ea9c3aa..c6455dd6404d 100644 --- a/TOC.md +++ b/TOC.md @@ -7,20 +7,22 @@ + About TiDB + [TiDB Introduction](/overview.md) - + [TiDB 5.1 Release Notes](/releases/release-5.1.0.md) + + [TiDB 5.2 Release Notes](/releases/release-5.2.0.md) + [Basic Features](/basic-features.md) + [Experimental Features](/experimental-features.md) + Benchmarks - + [v5.1 Sysbench Performance Test Report](/benchmark/benchmark-sysbench-v5.1.0-vs-v5.0.2.md) - + [v5.1 TPC-C Performance Test Report](/benchmark/v5.1-performance-benchmarking-with-tpcc.md) - + [v5.1 MPP mode TPC-H 100GB Performance Test](/benchmark/v5.1-performance-benchmarking-with-tpch.md) + + [v5.2 Sysbench Performance Test Report](/benchmark/benchmark-sysbench-v5.2.0-vs-v5.1.1.md) + + [v5.2 TPC-C Performance Test Report](/benchmark/v5.2-performance-benchmarking-with-tpcc.md) + + [v5.2 MPP mode TPC-H 100GB Performance Test](/benchmark/v5.2-performance-benchmarking-with-tpch.md) + [MySQL Compatibility](/mysql-compatibility.md) + [TiDB Limitations](/tidb-limitations.md) + [TiDB Adopters](/adopters.md) + [Credits](/credits.md) + Quick Start + [Try Out TiDB](/quick-start-with-tidb.md) + + [Try Out HTAP](/quick-start-with-htap.md) + [Learn TiDB SQL](/basic-sql-operations.md) + + [Learn HTAP](/explore-htap.md) + [Import Example Database](/import-example-data.md) + Deploy + [Software and Hardware Requirements](/hardware-and-software-requirements.md) @@ -158,6 +160,7 @@ + Tutorials + [Multiple Data Centers in One City Deployment](/multi-data-centers-in-one-city-deployment.md) + [Three Data Centers in Two Cities Deployment](/three-data-centers-in-two-cities-deployment.md) + + [Two Data Centers in One City Deployment](/two-data-centers-in-one-city-deployment.md) + Read Historical Data + Use Stale Read (Recommended) + [Usage Scenarios of Stale Read](/stale-read.md) @@ -226,6 +229,7 @@ + [Maintain](/ticdc/manage-ticdc.md) + [Troubleshoot](/ticdc/troubleshoot-ticdc.md) + [Monitor](/ticdc/monitor-ticdc.md) + + [TiCDC OpenAPI](/ticdc/ticdc-open-api.md) + [TiCDC Open Protocol](/ticdc/ticdc-open-protocol.md) + [Integrate TiDB with Confluent Platform](/ticdc/integrate-confluent-using-ticdc.md) + [Glossary](/ticdc/ticdc-glossary.md) @@ -284,6 +288,7 @@ + [`ADMIN CHECKSUM TABLE`](/sql-statements/sql-statement-admin-checksum-table.md) + [`ADMIN CHECK [TABLE|INDEX]`](/sql-statements/sql-statement-admin-check-table-index.md) + [`ADMIN SHOW DDL [JOBS|QUERIES]`](/sql-statements/sql-statement-admin-show-ddl.md) + + [`ADMIN SHOW TELEMETRY`](/sql-statements/sql-statement-admin-show-telemetry.md) + [`ALTER DATABASE`](/sql-statements/sql-statement-alter-database.md) + [`ALTER INDEX`](/sql-statements/sql-statement-alter-index.md) + [`ALTER INSTANCE`](/sql-statements/sql-statement-alter-instance.md) @@ -468,6 +473,7 @@ + [`METRICS_TABLES`](/information-schema/information-schema-metrics-tables.md) + [`PARTITIONS`](/information-schema/information-schema-partitions.md) + [`PROCESSLIST`](/information-schema/information-schema-processlist.md) + + [`REFERENTIAL_CONSTRAINTS`](/information-schema/information-schema-referential-constraints.md) + [`SCHEMATA`](/information-schema/information-schema-schemata.md) + [`SEQUENCES`](/information-schema/information-schema-sequences.md) + [`SESSION_VARIABLES`](/information-schema/information-schema-session-variables.md) @@ -509,6 +515,9 @@ + [Use Diagnostics](/dashboard/dashboard-diagnostics-usage.md) + [Search Logs Page](/dashboard/dashboard-log-search.md) + [Profile Instances Page](/dashboard/dashboard-profiling.md) + + Session Management and Configuration + + [Share Session](/dashboard/dashboard-session-share.md) + + [Configure SSO](/dashboard/dashboard-session-sso.md) + [FAQ](/dashboard/dashboard-faq.md) + CLI + [tikv-ctl](/tikv-control.md) @@ -565,16 +574,23 @@ + [Glossary](/glossary.md) + Release Notes + [All Releases](/releases/release-notes.md) - + [TiDB Roadmap](/roadmap.md) + + v5.2 + + [5.2.1](/releases/release-5.2.1.md) + + [5.2.0](/releases/release-5.2.0.md) + v5.1 + + [5.1.2](/releases/release-5.1.2.md) + + [5.1.1](/releases/release-5.1.1.md) + [5.1.0](/releases/release-5.1.0.md) + v5.0 + + [5.0.4](/releases/release-5.0.4.md) + [5.0.3](/releases/release-5.0.3.md) + [5.0.2](/releases/release-5.0.2.md) + [5.0.1](/releases/release-5.0.1.md) + [5.0 GA](/releases/release-5.0.0.md) + [5.0.0-rc](/releases/release-5.0.0-rc.md) + v4.0 + + [4.0.15](/releases/release-4.0.15.md) + + [4.0.14](/releases/release-4.0.14.md) + [4.0.13](/releases/release-4.0.13.md) + [4.0.12](/releases/release-4.0.12.md) + [4.0.11](/releases/release-4.0.11.md) diff --git a/_index.md b/_index.md index 6ab422c50da5..15a43923d102 100644 --- a/_index.md +++ b/_index.md @@ -8,7 +8,7 @@ aliases: ['/docs/dev/'] [TiDB](https://github.com/pingcap/tidb) (/’taɪdiːbi:/, "Ti" stands for Titanium) is an open-source, distributed, NewSQL database that supports Hybrid Transactional and Analytical Processing (HTAP) workloads. It is MySQL compatible and features horizontal scalability, strong consistency, and high availability. TiDB can be deployed on-premise or in-cloud. -Designed for the cloud, TiDB provides flexible scalability, reliability and security on the cloud platform. Users can elastically scale TiDB to meet the requirements of their changing workloads. [TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/v1.1/tidb-operator-overview) helps manage TiDB on Kubernetes and automates operating tasks, which makes TiDB easier to deploy on any cloud that provides managed Kubernetes. [TiDB Cloud](https://pingcap.com/products/tidbcloud) (Beta), the fully-managed TiDB service, is the easiest, most economical, and most resilient way to unlock the full power of [TiDB in the cloud](https://docs.pingcap.com/tidbcloud/beta), allowing you to deploy and run TiDB clusters with just a few clicks. +Designed for the cloud, TiDB provides flexible scalability, reliability and security on the cloud platform. Users can elastically scale TiDB to meet the requirements of their changing workloads. [TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/v1.1/tidb-operator-overview) helps manage TiDB on Kubernetes and automates operating tasks, which makes TiDB easier to deploy on any cloud that provides managed Kubernetes. [TiDB Cloud](https://pingcap.com/products/tidbcloud) (Public Preview), the fully-managed TiDB service, is the easiest, most economical, and most resilient way to unlock the full power of [TiDB in the cloud](https://docs.pingcap.com/tidbcloud/beta), allowing you to deploy and run TiDB clusters with just a few clicks. @@ -16,7 +16,7 @@ Designed for the cloud, TiDB provides flexible scalability, reliability and secu - [TiDB Introduction](/overview.md) - [Basic Features](/basic-features.md) -- [TiDB 5.1 Release Notes](/releases/release-5.1.0.md) +- [TiDB 5.2 Release Notes](/releases/release-5.2.0.md) - [Compatibility with MySQL](/mysql-compatibility.md) - [Usage Limitations](/tidb-limitations.md) - [TiDB Adopters](/adopters.md) @@ -26,8 +26,10 @@ Designed for the cloud, TiDB provides flexible scalability, reliability and secu Quick Start -- [Quick Start Guide](/quick-start-with-tidb.md) +- [Quick Start with TiDB](/quick-start-with-tidb.md) +- [Quick Start with HTAP](/quick-start-with-htap.md) - [Explore SQL with TiDB](/basic-sql-operations.md) +- [Explore HTAP](/explore-htap.md) diff --git a/adopters.md b/adopters.md index aafbe2586ac8..94fc942c9ccb 100644 --- a/adopters.md +++ b/adopters.md @@ -12,61 +12,62 @@ This is a list of TiDB adopters in various industries. | :--- | :--- | :--- | |[China Telecom Bestpay](https://www.crunchbase.com/organization/orange-finance)|Mobile Payment|[English](https://pingcap.com/case-studies/how-we-process-data-five-times-more-efficiently-using-a-scale-out-mysql-alternative/); [Chinese](https://pingcap.com/cases-cn/user-case-bestpay/)| |[VNG](https://en.wikipedia.org/wiki/VNG_Corporation)|Mobile Payment|English [#1](https://pingcap.com/case-studies/tidb-at-zalopay-infrastructure-lesson-learned/), [#2](https://pingcap.com/case-studies/zalopay-using-a-scale-out-mysql-alternative-to-serve-millions-of-users)| -|[Ping++](https://www.crunchbase.com/organization/ping-5)|Mobile Payment|[Chinese](https://pingcap.com/cases-cn/user-case-ping++/)| +|[Ping++](https://www.crunchbase.com/organization/ping-5)|Mobile Payment|| |[LianLian Tech](https://www.crunchbase.com/organization/lianlian-pay)|Mobile Payment|| -|[U-Next](https://www.crunchbase.com/organization/u-next)|Media and Entertainment|[English](https://pingcap.com/case-studies/running-a-scale-out-database-on-arm-as-mysql-alternative/); [Chinese](https://pingcap.com/cases-cn/user-case-unext/)| +|[U-Next](https://www.crunchbase.com/organization/u-next)|Media and Entertainment|[English](https://pingcap.com/case-studies/running-a-scale-out-database-on-arm-as-mysql-alternative/)| |[Dailymotion](https://en.wikipedia.org/wiki/Dailymotion)|Media and Entertainment|| -|[iQiyi](https://en.wikipedia.org/wiki/IQiyi)|Media and Entertainment|[English](https://pingcap.com/case-studies/tidb-in-iqiyi/); [Chinese](https://pingcap.com/cases-cn/user-case-iqiyi/)| +|[iQiyi](https://en.wikipedia.org/wiki/IQiyi)|Media and Entertainment|[English](https://pingcap.com/case-studies/tidb-in-iqiyi/)| |[BookMyShow](https://www.crunchbase.com/organization/bookmyshow)|Media and Entertainment|[English](https://pingcap.com/case-studies/tidb-in-bookmyshow/)| -|[Yiguo.com](https://www.crunchbase.com/organization/shanghai-yiguo-electron-business)|E-commerce|[English](https://www.datanami.com/2018/02/22/hybrid-database-capturing-perishable-insights-yiguo/); [Chinese](https://pingcap.com/cases-cn/user-case-yiguo)| -|[Shopee](https://en.wikipedia.org/wiki/Shopee)|E-commerce|English [#1](https://pingcap.com/case-studies/tidb-in-shopee/), [#2](https://pingcap.com/case-studies/choosing-right-database-for-your-applications); [Chinese](https://pingcap.com/cases-cn/user-case-shopee/)| -|[Zhuan Zhuan](https://www.crunchbase.com/organization/zhuan-zhuan)|E-commerce|English [#1](https://pingcap.com/case-studies/tidb-in-zhuanzhuan/), [#2](https://pingcap.com/case-studies/scale-out-database-powers-china-letgo-with-reduced-maintenance-costs); Chinese [#1](https://pingcap.com/cases-cn/user-case-zhuanzhuan/), [#2](https://pingcap.com/cases-cn/user-case-zhuanzhuan-2/), [#3](https://pingcap.com/cases-cn/user-case-zhuanzhuan-3/)| -|[Xiaohongshu](https://en.wikipedia.org/wiki/Xiaohongshu)|E-commerce|[English](https://pingcap.com/case-studies/how-we-use-a-scale-out-htap-database-for-real-time-analytics-and-complex-queries); Chinese [#1](https://pingcap.com/cases-cn/user-case-xiaohongshu/), [#2](https://pingcap.com/cases-cn/user-case-xiaohongshu-2/)| -|[Meituan](https://www.crunchbase.com/organization/meituan)|E-commerce|English [#1](https://pingcap.com/case-studies/migrating-from-mysql-to-a-scale-out-database-to-serve-our-290-million-monthly-users), [#2](https://pingcap.com/case-studies/how-we-use-a-mysql-alternative-to-avoid-sharding-and-provide-strong-consistency); [Chinese](https://pingcap.com/cases-cn/user-case-meituan/)| +|[Yiguo.com](https://www.crunchbase.com/organization/shanghai-yiguo-electron-business)|E-commerce|[English](https://www.datanami.com/2018/02/22/hybrid-database-capturing-perishable-insights-yiguo/)| +|[Shopee](https://en.wikipedia.org/wiki/Shopee)|E-commerce|English [#1](https://pingcap.com/case-studies/tidb-in-shopee/), [#2](https://pingcap.com/case-studies/choosing-right-database-for-your-applications)| +|[Zhuan Zhuan](https://www.crunchbase.com/organization/zhuan-zhuan)|E-commerce|English [#1](https://pingcap.com/case-studies/tidb-in-zhuanzhuan/), [#2](https://pingcap.com/case-studies/scale-out-database-powers-china-letgo-with-reduced-maintenance-costs)| +|[Xiaohongshu](https://en.wikipedia.org/wiki/Xiaohongshu)|E-commerce|[English](https://pingcap.com/case-studies/how-we-use-a-scale-out-htap-database-for-real-time-analytics-and-complex-queries); [Chinese](https://pingcap.com/zh/case/user-case-xiaohongshu/)| +|[Meituan](https://www.crunchbase.com/organization/meituan)|E-commerce|English [#1](https://pingcap.com/case-studies/migrating-from-mysql-to-a-scale-out-database-to-serve-our-290-million-monthly-users), [#2](https://pingcap.com/case-studies/how-we-use-a-mysql-alternative-to-avoid-sharding-and-provide-strong-consistency); [Chinese](https://pingcap.com/zh/case/user-case-meituandianping)| |[Happigo.com](https://www.crunchbase.com/organization/happigo-com)|E-commerce|| |[Yimutian](https://www.crunchbase.com/organization/yimutian)|E-commerce|| |[Youju Tech](https://nn.yjyz.com/)|E-commerce|| |[Maizuo](https://www.crunchbase.com/organization/maizhuo)|E-commerce|| |[Mogujie](https://www.crunchbase.com/organization/mogujie)|E-commerce|| -|[Zhihu](https://en.wikipedia.org/wiki/Zhihu)|Knowledge Sharing|English [#1](https://pingcap.com/case-studies/lesson-learned-from-queries-over-1.3-trillion-rows-of-data-within-milliseconds-of-response-time-at-zhihu/), [#2](https://pingcap.com/case-studies/horizontally-scaling-hive-metastore-database-by-migrating-from-mysql-to-tidb); [Chinese](https://pingcap.com/cases-cn/user-case-zhihu/)| -|[PatSnap](https://www.crunchbase.com/organization/patsnap)|Artificial Intelligence| [English](https://pingcap.com/case-studies/why-we-chose-a-scale-out-data-warehouse-for-real-time-analytics); [Chinese](https://pingcap.com/cases-cn/user-case-zhihuiya/)| -|[JD Cloud](https://www.crunchbase.com/organization/jd-cloud)|Cloud Computing|[English](https://pingcap.com/case-studies/lesson-learned-from-40-k-qps-and-20-billion-rows-of-data-in-a-single-scale-out-cluster/); [Chinese](https://pingcap.com/cases-cn/user-case-jingdongyun/)| -|[Mobike](https://en.wikipedia.org/wiki/Mobike)|Ridesharing|[English](https://pingcap.com/case-studies/tidb-in-mobike); Chinese [#1](https://pingcap.com/cases-cn/user-case-mobike/), [#2](https://pingcap.com/cases-cn/user-case-mobike-2/)| +|[Zhihu](https://en.wikipedia.org/wiki/Zhihu)|Knowledge Sharing|English [#1](https://pingcap.com/case-studies/lesson-learned-from-queries-over-1.3-trillion-rows-of-data-within-milliseconds-of-response-time-at-zhihu/), [#2](https://pingcap.com/case-studies/horizontally-scaling-hive-metastore-database-by-migrating-from-mysql-to-tidb), [#3](https://pingcap.com/case-studies/boosting-big-data-performance-by-combining-tidb-with-hive-and-hdfs)| +|[PatSnap](https://www.crunchbase.com/organization/patsnap)|Artificial Intelligence| [English](https://pingcap.com/case-studies/why-we-chose-a-scale-out-data-warehouse-for-real-time-analytics); [Chinese](https://pingcap.com/zh/case/user-case-patsnap)| +|[JD Cloud](https://www.crunchbase.com/organization/jd-cloud)|Cloud Computing|[English](https://pingcap.com/case-studies/lesson-learned-from-40-k-qps-and-20-billion-rows-of-data-in-a-single-scale-out-cluster/)| +|[Mobike](https://en.wikipedia.org/wiki/Mobike)|Ridesharing|[English](https://pingcap.com/case-studies/tidb-in-mobike)| |[Autohome](https://www.crunchbase.com/organization/autohome)|Automobile|[English](https://pingcap.com/case-studies/reduce-real-time-query-latency-from-0.5s-to-0.01s-with-scale-out-htap-database); [Chinese](https://pingcap.com/cases-cn/user-case-qichezhijia/)| |[Chehaoduo](https://www.crunchbase.com/organization/guazi-com)|Automobile|[English](https://pingcap.com/case-studies/top-car-trading-platform-chooses-scale-out-database-as-mysql-alternative)| -|[Xiaomi](https://en.wikipedia.org/wiki/Xiaomi)|Consumer Electronics|[English](https://pingcap.com/case-studies/tidb-in-xiaomi); [Chinese](https://pingcap.com/cases-cn/user-case-xiaomi/)| -|[LY.com](https://www.crunchbase.com/organization/ly-com)|Travel|[Chinese](https://pingcap.com/cases-cn/user-case-tongcheng/)| -|[Qunar.com](https://www.crunchbase.com/organization/qunar-com)|Travel|[Chinese](https://pingcap.com/cases-cn/user-case-qunar/)| +|[Xiaomi](https://en.wikipedia.org/wiki/Xiaomi)|Consumer Electronics|English [#1](https://pingcap.com/case-studies/tidb-in-xiaomi), [#2](https://pingcap.com/case-studies/a-mysql-alternative-scale-out-database-helps-xiaomi-hyper-growth)| +|[LY.com](https://www.crunchbase.com/organization/ly-com)|Travel|| +|[Qunar.com](https://www.crunchbase.com/organization/qunar-com)|Travel|| |[Hulu](https://www.hulu.com)|Entertainment|| -|[PalFish](https://www.crunchbase.com/organization/palfish)|EdTech|[English](https://pingcap.com/case-studies/embracing-newsql-why-we-chose-tidb-over-mongodb-and-mysql); Chinese [#1](https://pingcap.com/cases-cn/user-case-banyu-2/), [#2](https://pingcap.com/cases-cn/user-case-banyu/)| +|[PalFish](https://www.crunchbase.com/organization/palfish)|EdTech|[English](https://pingcap.com/case-studies/embracing-newsql-why-we-chose-tidb-over-mongodb-and-mysql); [Chinese](https://pingcap.com/zh/case/user-case-banyu)| |[VIPKid](https://www.crunchbase.com/organization/vipkid)|EdTech|[English](https://pingcap.com/case-studies/why-we-chose-a-distributed-sql-database-to-complement-mysql); [Chinese](https://pingcap.com/cases-cn/user-case-vipkid/)| -|[Yuanfudao.com](https://www.crunchbase.com/organization/yuanfudao)|EdTech|[English](https://pingcap.com/blog/2017-08-08-tidbforyuanfudao/); [Chinese](https://pingcap.com/cases-cn/user-case-yuanfudao/)| -|[Bank of Beijing](https://en.wikipedia.org/wiki/Bank_of_Beijing)|Banking|[English](https://pingcap.com/case-studies/how-we-use-a-distributed-database-to-achieve-horizontal-scaling-without-downtime); Chinese [#1](https://pingcap.com/cases-cn/user-case-beijing-bank/), [#2](https://pingcap.com/cases-cn/user-case-beijing-bank-2/)| -|[WeBank](https://en.wikipedia.org/wiki/WeBank_(China))|Banking|[English](https://pingcap.com/case-studies/how-we-reduced-batch-processing-time-by-58-percent-with-a-scale-out-mysql-alternative/); Chinese [#1](https://pingcap.com/cases-cn/user-case-webank/), [#2](https://pingcap.com/cases-cn/user-case-webank-2/)| +|[Yuanfudao.com](https://www.crunchbase.com/organization/yuanfudao)|EdTech|[English](https://pingcap.com/blog/2017-08-08-tidbforyuanfudao/)| +|[Bank of Beijing](https://en.wikipedia.org/wiki/Bank_of_Beijing)|Banking|[English](https://pingcap.com/case-studies/how-we-use-a-distributed-database-to-achieve-horizontal-scaling-without-downtime); [Chinese](https://pingcap.com/zh/case/user-case-beijing-bank)| +|[WeBank](https://en.wikipedia.org/wiki/WeBank_(China))|Banking|[English](https://pingcap.com/case-studies/how-we-reduced-batch-processing-time-by-58-percent-with-a-scale-out-mysql-alternative/); [Chinese](https://pingcap.com/zh/case/user-case-webank)| |[Bank of China](https://en.wikipedia.org/wiki/Bank_of_China) | Banking | [English](https://pingcap.com/case-studies/how-bank-of-china-uses-a-scale-out-database-to-support-zabbix-monitoring-at-scale); [Chinese](https://pingcap.com/cases-cn/user-case-bank-of-china/) | |[China Zheshang Bank](https://en.wikipedia.org/wiki/China_Zheshang_Bank)|Banking|[English](https://pingcap.com/case-studies/reduce-query-latency-from-seconds-to-milliseconds-with-a-scale-out-database); [Chinese](https://pingcap.com/cases-cn/user-case-zheshang-bank/)| |[Industrial and Commercial Bank of China](https://en.wikipedia.org/wiki/Industrial_and_Commercial_Bank_of_China)|Banking|| |[Ping An Life Insurance](https://www.bloomberg.com/profile/company/OPAHWZ:CH)|Insurance|[English](https://pingcap.com/case-studies/how-chinas-insurance-giant-improved-agile-application-performance-with-a-newsql-database); [Chinese](https://pingcap.com/cases-cn/user-case-pingan/)| -|[Yimian Data](https://www.crunchbase.com/organization/yimian-data)|Big Data|[Chinese](https://pingcap.com/cases-cn/user-case-yimian)| -|[CAASDATA](https://www.caasdata.com/)|Big Data|[Chinese](https://pingcap.com/cases-cn/user-case-kasi/)| +|[Yimian Data](https://www.crunchbase.com/organization/yimian-data)|Big Data|| +|[CAASDATA](https://www.caasdata.com/)|Big Data|| |[58.com](https://www.crunchbase.com/organization/58-com)|Advertising|[English](https://pingcap.com/case-studies/no-sharding-no-etl-use-scale-out-mysql-alternative-to-store-160-tb-of-data); [Chinese](https://pingcap.com/cases-cn/user-case-58/)| -|[Mobikok](https://www.linkedin.com/company/shenzhen-keke-network-technology-co.-ltd./)|AdTech|[Chinese](https://pingcap.com/cases-cn/user-case-mobikok/)| +|[Mobikok](https://www.linkedin.com/company/shenzhen-keke-network-technology-co.-ltd./)|AdTech|| |[Ninja Van](https://www.crunchbase.com/organization/ninja-van-2)| Logistics|[English](https://pingcap.com/case-studies/choose-a-mysql-alternative-over-vitess-and-crdb-to-scale-out-our-databases-on-k8s)| |[ZTO Express](https://www.crunchbase.com/organization/zto-express)| Logistics|[English](https://pingcap.com/case-studies/why-we-migrated-from-exadata-to-a-scale-out-htap-database-for-near-real-time-analytics); [Chinese](https://pingcap.com/cases-cn/user-case-zto-express/)| -|[G7 Networks](https://www.crunchbase.com/organization/g7)| Logistics|[Chinese](https://pingcap.com/cases-cn/user-case-g7/)| -|[Hive-Box](http://www.fcbox.com/en/pc/index.html#/)|Logistics|[Chinese](https://pingcap.com/cases-cn/user-case-fengchao/)| +|[G7 Networks](https://www.crunchbase.com/organization/g7)| Logistics|| +|[Hive-Box](http://www.fcbox.com/en/pc/index.html#/)|Logistics|| |[NetEase Games](https://www.linkedin.com/company/netease-games)|Gaming|[English](https://pingcap.com/case-studies/why-we-chose-tidb-over-other-mysql-based-and-newsql-storage-solutions); [Chinese](https://pingcap.com/cases-cn/user-case-wangyihuyu/)| -|[GAEA](http://www.gaea.com/en/)|Gaming|[English](https://pingcap.com/case-studies/2017-05-22-Comparison-between-MySQL-and-TiDB-with-tens-of-millions-of-data-per-day); [Chinese](https://pingcap.com/cases-cn/user-case-gaea-ad/)| +|[GAEA](http://www.gaea.com/en/)|Gaming|[English](https://pingcap.com/case-studies/2017-05-22-Comparison-between-MySQL-and-TiDB-with-tens-of-millions-of-data-per-day)| |[Kunlun](https://www.crunchbase.com/organization/kunlun)|Gaming|[English](https://pingcap.com/case-studies/empowering-your-gaming-application-with-a-scale-out-newsql-database)| -|[YOOZOO Games](https://www.crunchbase.com/organization/yoozoo-games)|Gaming|[Chinese](https://pingcap.com/cases-cn/user-case-youzu/)| -|[Seasun Games](https://www.crunchbase.com/organization/seasun)|Gaming|[Chinese](https://pingcap.com/cases-cn/user-case-xishanju/)| -|[FUNYOURS JAPAN](http://company.funyours.co.jp/)|Gaming|[Chinese](https://pingcap.com/cases-cn/user-case-funyours-japan/)| +|[YOOZOO Games](https://www.crunchbase.com/organization/yoozoo-games)|Gaming|| +|[Seasun Games](https://www.crunchbase.com/organization/seasun)|Gaming|| +|[FUNYOURS JAPAN](http://company.funyours.co.jp/)|Gaming|| |[Hoodinn](https://www.crunchbase.com/organization/hoodinn)|Gaming|| |[SEA group](https://sea-group.org/?lang=en)|Gaming|| |[Zhaopin.com](https://www.crunchbase.com/organization/zhaopin)|Recruiting|| |[BIGO](https://www.crunchbase.com/organization/bigo-technology)|Live Streaming|[English](https://pingcap.com/case-studies/why-we-chose-an-htap-database-over-mysql-for-horizontal-scaling-and-complex-queries/); [Chinese](https://pingcap.com/cases-cn/user-case-bigo/)| +|[Huya Live](https://en.wikipedia.org/wiki/Huya_Live)|Live Streaming|[English](https://pingcap.com/case-studies/how-we-scale-out-databases-and-get-big-data-queries-6x-faster-with-a-mysql-alternative); [Chinese](https://pingcap.com/zh/case/user-case-huya)| |[Panda.tv](https://www.crunchbase.com/organization/panda-tv)|Live Streaming|| -|[Phoenix New Media](https://www.crunchbase.com/organization/phoenix-new-media)|Media|[Chinese](https://pingcap.com/cases-cn/user-case-ifeng/)| +|[Phoenix New Media](https://www.crunchbase.com/organization/phoenix-new-media)|Media|| |[Tencent OMG](https://en.wikipedia.org/wiki/Tencent)|Media|| |[Terren](https://www.crunchbase.com/organization/terren)|Media|| |[LeCloud](https://www.crunchbase.com/organization/letv-2)|Media|| @@ -74,11 +75,11 @@ This is a list of TiDB adopters in various industries. |[Meizu](https://en.wikipedia.org/wiki/Meizu)|Media|| |[Sogou](https://en.wikipedia.org/wiki/Sogou)|MediaTech|| |[Gengmei](https://www.crunchbase.com/organization/gengmei)|Plastic Surgery|| -|[Keruyun](https://www.crunchbase.com/organization/keruyun-technology-beijing-co-ltd)|SaaS|[Chinese](https://pingcap.com/cases-cn/user-case-keruyun/)| -|[LinkDoc Technology](https://www.crunchbase.com/organization/linkdoc-technology)|HealthTech|[Chinese](https://pingcap.com/cases-cn/user-case-linkdoc/)| +|[Keruyun](https://www.crunchbase.com/organization/keruyun-technology-beijing-co-ltd)|SaaS|| +|[LinkDoc Technology](https://www.crunchbase.com/organization/linkdoc-technology)|HealthTech|| |[Chunyu Yisheng](https://www.crunchbase.com/organization/chunyu)|HealthTech|| |[Qutoutiao](https://www.crunchbase.com/organization/qutoutiao)|Social Network|| -|[Jinri Toutiao](https://en.wikipedia.org/wiki/Toutiao)|Mobile News Platform|[Chinese](https://pingcap.com/cases-cn/user-case-toutiao/)| +|[Jinri Toutiao](https://en.wikipedia.org/wiki/Toutiao)|Mobile News Platform|| |[360 Finance](https://www.crunchbase.com/organization/360-finance)|FinTech|[Chinese](https://pingcap.com/cases-cn/user-case-360/)| |[Tongdun Technology](https://www.crunchbase.com/organization/tongdun-technology)|FinTech|| |[Wacai](https://www.crunchbase.com/organization/wacai)|FinTech|| @@ -92,10 +93,10 @@ This is a list of TiDB adopters in various industries. |[Founder Securities](https://www.crunchbase.com/organization/keruyun-technology-beijing-co-ltd)|Financial Services|| |[China Telecom Shanghai](http://www.189.cn/sh/)|Telecom|| |[State Administration of Taxation](https://en.wikipedia.org/wiki/State_Administration_of_Taxation)|Finance|| -|[Hainan eKing Technology](https://www.crunchbase.com/organization/hainan-eking-technology)|Enterprise Technology|[Chinese](https://pingcap.com/cases-cn/user-case-ekingtech/)| +|[Hainan eKing Technology](https://www.crunchbase.com/organization/hainan-eking-technology)|Enterprise Technology|| |[Wuhan Antian Information Technology](https://www.avlsec.com/)|Enterprise Technology|| |[Lenovo](https://en.wikipedia.org/wiki/Lenovo)|Enterprise Technology|| -|[2Dfire.com](http://www.2dfire.com/)|FoodTech|[Chinese](https://pingcap.com/cases-cn/user-case-erweihuo/)| +|[2Dfire.com](http://www.2dfire.com/)|FoodTech|| |[Acewill](https://www.crunchbase.com/organization/acewill)|FoodTech|| |[Ausnutria Dairy](https://www.crunchbase.com/organization/ausnutria-dairy)|FoodTech|| -|[Qingdao Telaidian](https://www.teld.cn/)|Electric Car Charger|[Chinese](https://pingcap.com/cases-cn/user-case-telaidian/)| +|[Qingdao Telaidian](https://www.teld.cn/)|Electric Car Charger|| diff --git a/alert-rules.md b/alert-rules.md index 783ffa15f0f0..1ed724b2cf2d 100644 --- a/alert-rules.md +++ b/alert-rules.md @@ -218,7 +218,7 @@ This section gives the alert rules for the PD component. * Description: - etcd writes data to disk at a lower speed than normal. It might lead to PD leader timeout or failure to store TSO on disk in time, which will shut down the service of the entire cluster. + If the latency of the fsync operation exceeds 1 second, it indicates that etcd writes data to disk at a lower speed than normal. It might lead to PD leader timeout or failure to store TSO on disk in time, which will shut down the service of the entire cluster. * Solution: @@ -234,7 +234,7 @@ This section gives the alert rules for the PD component. * Description: - The number of Region replicas is smaller than the value of `max-replicas`. When a TiKV machine is down and its downtime exceeds `max-down-time`, it usually leads to missing replicas for some Regions during a period of time. When a TiKV node is made offline, it might result in a small number of Regions with missing replicas. + The number of Region replicas is smaller than the value of `max-replicas`. When a TiKV machine is down and its downtime exceeds `max-down-time`, it usually leads to missing replicas for some Regions during a period of time. * Solution: @@ -690,7 +690,7 @@ This section gives the alert rules for the TiKV component. * Alert rule: - `increase(tikv_coprocessor_request_error{reason!="lock"}[10m]) > 100` + `increase(tikv_coprocessor_request_error{reason=!"meet_lock"}[10m]) > 100` * Description: @@ -704,7 +704,7 @@ This section gives the alert rules for the TiKV component. * Alert rule: - `increase(tikv_coprocessor_request_error{reason="lock"}[10m]) > 10000` + `increase(tikv_coprocessor_request_error{reason="meet_lock"}[10m]) > 10000` * Description: @@ -752,11 +752,20 @@ This section gives the alert rules for the TiKV component. Check which kind of tasks has a higher value. You can normally find a solution to the Coprocessor and apply worker tasks from other metrics. -#### `TiKV_low_space_and_add_region` +#### `TiKV_low_space` * Alert rule: - `count((sum(tikv_store_size_bytes{type="available"}) by (instance) / sum(tikv_store_size_bytes{type="capacity"}) by (instance) < 0.2) and (sum(tikv_raftstore_snapshot_traffic_total{type="applying"}) by (instance) > 0)) > 0` + `sum(tikv_store_size_bytes{type="available"}) by (instance) / sum(tikv_store_size_bytes{type="capacity"}) by (instance) < 0.2` + +* Description: + + The data volume of TiKV exceeds 80% of the configured node capacity or the disk capacity of the machine. + +* Solution: + + * Check the balance condition of node space. + * Make a plan to increase the disk capacity or delete some data or increase cluster node depending on different situations. #### `TiKV_approximate_region_size` diff --git a/as-of-timestamp.md b/as-of-timestamp.md index baa31b39a4a4..ce63be0ff055 100644 --- a/as-of-timestamp.md +++ b/as-of-timestamp.md @@ -9,12 +9,10 @@ This document describes how to perform the [Stale Read](/stale-read.md) feature > **Warning:** > -> Currently, Stale Read is an experimental feature. It is not recommended to use it in the production environment. -> > Currently, you cannot use Stale Read together with TiFlash. If your SQL query contains the `AS OF TIMESTAMP` clause and TiDB might read data from TiFlash replicas, you might encounter an error with a message like `ERROR 1105 (HY000): stale requests require tikv backend`. > > To fix the problem, disable TiFlash replicas for your Stale Read query. To do that, perform one of the following operations: -> +> > - Use the `set session tidb_isolation_read_engines='tidb,tikv'` variable. > - Use the [hint](/optimizer-hints.md#read_from_storagetiflasht1_name--tl_name--tikvt2_name--tl_name-) to enforce TiDB to read data from TiKV. @@ -47,9 +45,7 @@ Here are some examples of the `AS OF TIMESTAMP` clause: > > In addition to specifying a timestamp, the most common use of the `AS OF TIMESTAMP` clause is to read data that is several seconds old. If this approach is used, it is recommended to read historical data older than 5 seconds. > -> You need to deploy the NTP service for your TiDB and PD nodes when you use Stale Read. This avoids the situation where the specified timestamp used by TiDB goes ahead of the latest TSO allocating progress (such as a timestamp several seconds ahead), or is later than the GC safe point timestamp. When the specified timestamp goes beyond the service scope, TiDB returns an error or waits for the transaction to commit. -> -> The `Prepare` statement and the `AS OF TIMESTAMP` syntax are not perfectly compatible. It is not recommended to use them together. +> You need to deploy the NTP service for your TiDB and PD nodes when you use Stale Read. This avoids the situation where the specified timestamp used by TiDB goes ahead of the latest TSO allocating progress (such as a timestamp several seconds ahead), or is later than the GC safe point timestamp. When the specified timestamp goes beyond the service scope, TiDB returns an error. ## Usage examples diff --git a/basic-features.md b/basic-features.md index 74260e3ff2b4..fc1a7b2738f8 100644 --- a/basic-features.md +++ b/basic-features.md @@ -1,80 +1,134 @@ --- -title: TiDB Basic Features +title: TiDB Features summary: Learn about the basic features of TiDB. aliases: ['/docs/dev/basic-features/'] --- -# TiDB Basic Features - -This document introduces the basic features of TiDB. - -## Data types - -- Numeric types: `BIT`, `BOOL|BOOLEAN`, `SMALLINT`, `MEDIUMINT`, `INT|INTEGER`, `BIGINT`, `FLOAT`, `DOUBLE`, `DECIMAL`. - -- Date and time types: `DATE`, `TIME`, `DATETIME`, `TIMESTAMP`, `YEAR`. - -- String types: `CHAR`, `VARCHAR`, `TEXT`, `TINYTEXT`, `MEDIUMTEXT`, `LONGTEXT`, `BINARY`, `VARBINARY`, `BLOB`, `TINYBLOB`, `MEDIUMBLOB`, `LONGBLOB`, `ENUM`, `SET`. - -- The `JSON` type. - -## Operators - -- Arithmetic operators, bit operators, comparison operators, logical operators, date and time operators, and so on. - -## Character sets and collations - -- Character sets: `UTF8`, `UTF8MB4`, `BINARY`, `ASCII`, `LATIN1`. - -- Collations: `UTF8MB4_GENERAL_CI`, `UTF8MB4_UNICODE_CI`, `UTF8MB4_GENERAL_BIN`, `UTF8_GENERAL_CI`, `UTF8_UNICODE_CI`, `UTF8_GENERAL_BIN`, `BINARY`. - -## Functions - -- Control flow functions, string functions, date and time functions, bit functions, data type conversion functions, data encryption and decryption functions, compression and decompression functions, information functions, JSON functions, aggregation functions, window functions, and so on. - -## SQL statements - -- Fully supports standard Data Definition Language (DDL) statements, such as `CREATE`, `DROP`, `ALTER`, `RENAME`, `TRUNCATE`, and so on. - -- Fully supports standard Data Manipulation Language (DML) statements, such as `INSERT`, `REPLACE`, `SELECT`, subqueries, `UPDATE`, `LOAD DATA`, and so on. - -- Fully supports standard transactional and locking statements, such as `START TRANSACTION`, `COMMIT`, `ROLLBACK`, `SET TRANSACTION`, and so on. - -- Fully supports standard database administration statements, such as `SHOW`, `SET`, and so on. - -- Fully supports standard utility statements, such as `DESCRIBE`, `EXPLAIN`, `USE`, and so on. - -- Fully supports the `GROUP BY` and `ORDER BY` clauses. - -- Fully supports the standard `LEFT OUTER JOIN` and `RIGHT OUTER JOIN` SQL statements. - -- Fully supports the standard SQL table and column aliases. - -## Partitioning - -- Supports Range partitioning -- Supports Hash partitioning - -## Views - -- Supports general views - -## Constraints - -- Supports non-empty constraints -- Supports primary key constraints -- Supports unique constraints - -## Security - -- Supports privilege management based on RBAC (role-based access control) -- Supports password management -- Supports communication and data encryption -- Supports IP allowlist -- Supports audit - -## Tools - -- Supports fast backup -- Supports data migration from MySQL to TiDB using tools -- Supports deploying and maintaining TiDB using tools +# TiDB Features + +The following table provides an overview of the feature development history of TiDB. Note that features under active development might change before the final release. + +| Data types, functions, and operators | 5.2 | 5.1 | 5.0 | 4.0 | +|----------------------------------------------------------------------------------------------------------|:------------:|:------------:|:------------:|:------------:| +| [Numeric types](/data-type-numeric.md) | Y | Y | Y | Y | +| [Date and time types](/data-type-date-and-time.md) | Y | Y | Y | Y | +| [String types](/data-type-string.md) | Y | Y | Y | Y | +| [JSON type](/data-type-json.md) | Experimental | Experimental | Experimental | Experimental | +| [Control flow functions](/functions-and-operators/control-flow-functions.md) | Y | Y | Y | Y | +| [String functions](/functions-and-operators/string-functions.md) | Y | Y | Y | Y | +| [Numeric functions and operators](/functions-and-operators/numeric-functions-and-operators.md) | Y | Y | Y | Y | +| [Date and time functions](/functions-and-operators/date-and-time-functions.md) | Y | Y | Y | Y | +| [Bit functions and operators](/functions-and-operators/bit-functions-and-operators.md) | Y | Y | Y | Y | +| [Cast functions and operators](/functions-and-operators/cast-functions-and-operators.md) | Y | Y | Y | Y | +| [Encryption and compression functions](/functions-and-operators/encryption-and-compression-functions.md) | Y | Y | Y | Y | +| [Information functions](/functions-and-operators/information-functions.md) | Y | Y | Y | Y | +| [JSON functions](/functions-and-operators/json-functions.md) | Experimental | Experimental | Experimental | Experimental | +| [Aggregation functions](/functions-and-operators/aggregate-group-by-functions.md) | Y | Y | Y | Y | +| [Window functions](/functions-and-operators/window-functions.md) | Y | Y | Y | Y | +| [Miscellaneous functions](/functions-and-operators/miscellaneous-functions.md) | Y | Y | Y | Y | +| [Operators](/functions-and-operators/operators.md) | Y | Y | Y | Y | +| [Character sets and collations](/character-set-and-collation.md) [^1] | Y | Y | Y | Y | +| **Indexing and constraints** | **5.2** | **5.1** | **5.0** | **4.0** | +| [Expression indexes](/sql-statements/sql-statement-create-index.md#expression-index) | Experimental | Experimental | Experimental | Experimental | +| [Columnar storage (TiFlash)](/tiflash/tiflash-overview.md) | Y | Y | Y | Y | +| [RocksDB engine](/storage-engine/rocksdb-overview.md) | Y | Y | Y | Y | +| [Titan plugin](/storage-engine/titan-overview.md) | Y | Y | Y | Y | +| [Invisible indexes](/sql-statements/sql-statement-add-index.md) | Y | Y | Y | N | +| [Composite `PRIMARY KEY`](/constraints.md) | Y | Y | Y | Y | +| [Unique indexes](/constraints.md) | Y | Y | Y | Y | +| [Clustered index on integer `PRIMARY KEY`](/constraints.md) | Y | Y | Y | Y | +| [Clustered index on composite or non-integer key](/constraints.md) | Y | Y | Y | N | +| **SQL statements** [^2] | **5.2** | **5.1** | **5.0** | **4.0** | +| Basic `SELECT`, `INSERT`, `UPDATE`, `DELETE`, `REPLACE` | Y | Y | Y | Y | +| `INSERT ON DUPLICATE KEY UPDATE` | Y | Y | Y | Y | +| `LOAD DATA INFILE` | Y | Y | Y | Y | +| `SELECT INTO OUTFILE` | Y | Y | Y | Y | +| `INNER JOIN`, `LEFT\|RIGHT [OUTER] JOIN` | Y | Y | Y | Y | +| `UNION`, `UNION ALL` | Y | Y | Y | Y | +| [`EXCEPT` and `INTERSECT` operators](/functions-and-operators/set-operators.md) | Y | Y | Y | N | +| `GROUP BY`, `ORDER BY` | Y | Y | Y | Y | +| [Window Functions](/functions-and-operators/window-functions.md) | Y | Y | Y | Y | +| [Common Table Expressions (CTE)](/sql-statements/sql-statement-with.md) | Y | Y | N | N | +| `START TRANSACTION`, `COMMIT`, `ROLLBACK` | Y | Y | Y | Y | +| [`EXPLAIN`](/sql-statements/sql-statement-explain.md) | Y | Y | Y | Y | +| [`EXPLAIN ANALYZE`](/sql-statements/sql-statement-explain-analyze.md) | Y | Y | Y | Y | +| [User-defined variables](/user-defined-variables.md) | Experimental | Experimental | Experimental | Experimental | +| **Advanced SQL Features** | **5.2** | **5.1** | **5.0** | **4.0** | +| [Prepared statement cache](/sql-prepare-plan-cache.md) | Experimental | Experimental | Experimental | Experimental | +| [SQL plan management (SPM)](/sql-plan-management.md) | Y | Y | Y | Y | +| [Coprocessor cache](/coprocessor-cache.md) | Y | Y | Y | Experimental | +| [Stale Read](/stale-read.md) | Y | Y | N | N | +| [Follower reads](/follower-read.md) | Y | Y | Y | Y | +| [Read historical data (tidb_snapshot)](/read-historical-data.md) | Y | Y | Y | Y | +| [Optimizer hints](/optimizer-hints.md) | Y | Y | Y | Y | +| [MPP Exection Engine](/explain-mpp.md) | Y | Y | Y | N | +| [Index Merge Join](/explain-index-merge.md) | Experimental | Experimental | Experimental | Experimental | +| **Data definition language (DDL)** | **5.2** | **5.1** | **5.0** | **4.0** | +| Basic `CREATE`, `DROP`, `ALTER`, `RENAME`, `TRUNCATE` | Y | Y | Y | Y | +| [Generated columns](/generated-columns.md) | Experimental | Experimental | Experimental | Experimental | +| [Views](/views.md) | Y | Y | Y | Y | +| [Sequences](/sql-statements/sql-statement-create-sequence.md) | Y | Y | Y | Y | +| [Auto increment](/auto-increment.md) | Y | Y | Y | Y | +| [Auto random](/auto-random.md) | Y | Y | Y | Y | +| [DDL algorithm assertions](/sql-statements/sql-statement-alter-table.md) | Y | Y | Y | Y | +| Multi schema change: add column(s) | Y | Y | Y | N | +| [Change column type](/sql-statements/sql-statement-modify-column.md) | Y | Y | N | N | +| **Transactions** | **5.2** | **5.1** | **5.0** | **4.0** | +| [Async commit](/system-variables.md#tidb_enable_async_commit-new-in-v50) | Y | Y | Y | N | +| [1PC](/system-variables.md#tidb_enable_1pc-new-in-v50) | Y | Y | Y | N | +| [Large transactions (10GB)](/transaction-overview.md#transaction-size-limit) | Y | Y | Y | Y | +| [Pessimistic transactions](/pessimistic-transaction.md) | Y | Y | Y | Y | +| [Optimistic transactions](/optimistic-transaction.md) | Y | Y | Y | Y | +| [Repeatable-read isolation (snapshot isolation)](/transaction-isolation-levels.md) | Y | Y | Y | Y | +| [Read-committed isolation](/transaction-isolation-levels.md) | Y | Y | Y | Y | +| **Partitioning** | **5.2** | **5.1** | **5.0** | **4.0** | +| [Range partitioning](/partitioned-table.md) | Y | Y | Y | Y | +| [Hash partitioning](/partitioned-table.md) | Y | Y | Y | Y | +| [List partitioning](/partitioned-table.md) | Experimental | Experimental | Experimental | N | +| [List COLUMNS partitioning](/partitioned-table.md) | Experimental | Experimental | Experimental | N | +| [`EXCHANGE PARTITION`](/partitioned-table.md) | Experimental | Experimental | Experimental | N | +| [Dynamic Pruning](/partitioned-table.md#dynamic-pruning-mode) | Experimental | Experimental | N | N | +| **Statistics** | **5.2** | **5.1** | **5.0** | **4.0** | +| [CMSketch](/statistics.md) | Deprecated | Deprecated | Deprecated | Y | +| [Histograms](/statistics.md) | Y | Y | Y | Y | +| [Extended statistics (multiple columns)](/statistics.md) | Experimental | Experimental | Experimental | N | +| [Statistics Feedback](/statistics.md#automatic-update) | Experimental | Experimental | Experimental | Experimental | +| [Fast Analyze](/system-variables.md#tidb_enable_fast_analyze) | Experimental | Experimental | Experimental | Experimental | +| **Security** | **5.2** | **5.1** | **5.0** | **4.0** | +| [Transparent layer security (TLS)](/enable-tls-between-clients-and-servers.md) | Y | Y | Y | Y | +| [Encryption at rest (TDE)](/encryption-at-rest.md) | Y | Y | Y | Y | +| [Role-based authentication (RBAC)](/role-based-access-control.md) | Y | Y | Y | Y | +| [Certificate-based authentication](/certificate-authentication.md) | Y | Y | Y | Y | +| `caching_sha2_password` authentication | Y | N | N | N | +| [MySQL compatible `GRANT` system](/privilege-management.md) | Y | Y | Y | Y | +| [Dynamic Privileges](/privilege-management.md#dynamic-privileges) | Y | Y | N | N | +| [Security Enhanced Mode](/system-variables.md#tidb_enable_enhanced_security) | Y | Y | N | N | +| [Redacted Log Files](/log-redaction.md) | Y | Y | Y | N | +| **Data import and export** | **5.2** | **5.1** | **5.0** | **4.0** | +| [Fast Importer (TiDB Lightning)](/tidb-lightning/tidb-lightning-overview.md) | Y | Y | Y | Y | +| mydumper logical dumper | Deprecated | Deprecated | Deprecated | Deprecated | +| [Dumpling logical dumper](/dumpling-overview.md) | Y | Y | Y | Y | +| [Transactional `LOAD DATA`](/sql-statements/sql-statement-load-data.md) | Y | Y | Y | N | +| [Database migration toolkit (DM)](/migration-overview.md) | Y | Y | Y | Y | +| [TiDB Binlog](/tidb-binlog/tidb-binlog-overview.md) | Deprecated | Deprecated | Deprecated | Deprecated | +| [Change data capture (CDC)](/ticdc/ticdc-overview.md) | Y | Y | Y | Y | +| **Management, observability and tools** | **5.2** | **5.1** | **5.0** | **4.0** | +| [TiDB Dashboard](/dashboard/dashboard-intro.md) | Y | Y | Y | Y | +| [SQL diagnostics](/information-schema/information-schema-sql-diagnostics.md) | Experimental | Experimental | Experimental | Experimental | +| [Information schema](/information-schema/information-schema.md) | Y | Y | Y | Y | +| [Metrics schema](/metrics-schema.md) | Y | Y | Y | Y | +| [Statements summary tables](/statement-summary-tables.md) | Y | Y | Y | Y | +| [Slow query log](/identify-slow-queries.md) | Y | Y | Y | Y | +| [TiUP deployment](/tiup/tiup-overview.md) | Y | Y | Y | Y | +| Ansible deployment | N | N | N | Deprecated | +| [Kubernetes operator](https://docs.pingcap.com/tidb-in-kubernetes/) | Y | Y | Y | Y | +| [Built-in physical backup](/br/backup-and-restore-use-cases.md) | Y | Y | Y | Y | +| Top SQL | Y | N | N | N | +| [Global Kill](/sql-statements/sql-statement-kill.md) | Experimental | Experimental | Experimental | Experimental | +| [Lock View](/information-schema/information-schema-data-lock-waits.md) | Y | Experimental | Experimental | Experimental | +| [`SHOW CONFIG`](/sql-statements/sql-statement-show-config.md) | Experimental | Experimental | Experimental | Experimental | +| [`SET CONFIG`](/dynamic-config.md) | Experimental | Experimental | Experimental | Experimental | + +[^1]: TiDB incorrectly treats latin1 as a subset of utf8. See [TiDB #18955](https://github.com/pingcap/tidb/issues/18955) for more details. + +[^2]: See [Statement Reference](/sql-statements/sql-statement-select.md) for a full list of SQL statements supported. diff --git a/basic-sql-operations.md b/basic-sql-operations.md index 8cdf0169ca39..4ee0bf60b8a4 100644 --- a/basic-sql-operations.md +++ b/basic-sql-operations.md @@ -236,11 +236,15 @@ To query a specific column, add the column name after the `SELECT` keyword: ```sql SELECT name FROM person; +``` + +```sql +------+ | name | +------+ | tom | +------+ +1 rows in set (0.00 sec) ``` Use the `WHERE` clause to filter all records that match the conditions and then return the result: diff --git a/benchmark/benchmark-sysbench-v5.2.0-vs-v5.1.1.md b/benchmark/benchmark-sysbench-v5.2.0-vs-v5.1.1.md new file mode 100644 index 000000000000..87064ac5f323 --- /dev/null +++ b/benchmark/benchmark-sysbench-v5.2.0-vs-v5.1.1.md @@ -0,0 +1,185 @@ +--- +title: TiDB Sysbench Performance Test Report -- v5.2.0 vs. v5.1.1 +--- + +# TiDB Sysbench Performance Test Report -- v5.2.0 vs. v5.1.1 + +## Test overview + +This test aims at comparing the Sysbench performance of TiDB v5.2.0 and TiDB v5.1.1 in the Online Transactional Processing (OLTP) scenario. The results show that compared with v5.1.1, the Point Select performance of v5.2.0 is improved by 11.03%, and the performance of other scenarios is slightly reduced. + +## Test environment (AWS EC2) + +### Hardware configuration + +| Service type | EC2 type | Instance count | +|:----------|:----------|:----------| +| PD | m5.xlarge | 3 | +| TiKV | i3.4xlarge| 3 | +| TiDB | c5.4xlarge| 3 | +| Sysbench | c5.9xlarge| 1 | + +### Software version + +| Service type | Software version | +|:----------|:-----------| +| PD | v5.1.1 and v5.2.0 | +| TiDB | v5.1.1 and v5.2.0 | +| TiKV | v5.1.1 and v5.2.0 | +| Sysbench | 1.1.0-ead2689 | + +### Parameter configuration + +TiDB v5.2.0 and TiDB v5.1.1 use the same configuration. + +#### TiDB parameter configuration + +{{< copyable "" >}} + +```yaml +log.level: "error" +performance.max-procs: 20 +prepared-plan-cache.enabled: true +tikv-client.max-batch-wait-time: 2000000 +``` + +#### TiKV parameter configuration + +{{< copyable "" >}} + +```yaml +storage.scheduler-worker-pool-size: 5 +raftstore.store-pool-size: 3 +raftstore.apply-pool-size: 3 +rocksdb.max-background-jobs: 8 +raftdb.max-background-jobs: 4 +raftdb.allow-concurrent-memtable-write: true +server.grpc-concurrency: 6 +readpool.unified.min-thread-count: 5 +readpool.unified.max-thread-count: 20 +readpool.storage.normal-concurrency: 10 +pessimistic-txn.pipelined: true +server.enable-request-batch: false +``` + +#### TiDB global variable configuration + +{{< copyable "sql" >}} + +```sql +set global tidb_hashagg_final_concurrency=1; +set global tidb_hashagg_partial_concurrency=1; +set global tidb_enable_async_commit = 1; +set global tidb_enable_1pc = 1; +set global tidb_guarantee_linearizability = 0; +set global tidb_enable_clustered_index = 1; +``` + +## Test plan + +1. Deploy TiDB v5.2.0 and v5.1.1 using TiUP. +2. Use Sysbench to import 16 tables, each table with 10 million rows of data. +3. Execute the `analyze table` statement on each table. +4. Back up the data used for restore before different concurrency tests, which ensures data consistency for each test. +5. Start the Sysbench client to perform the `point_select`, `read_write`, `update_index`, and `update_non_index` tests. Perform stress tests on TiDB via HAProxy. The test takes 5 minutes. +6. After each type of test is completed, stop the cluster, overwrite the cluster with the backup data in step 4, and restart the cluster. + +### Prepare test data + +Execute the following command to prepare the test data: + +{{< copyable "shell-regular" >}} + +```bash +sysbench oltp_common \ + --threads=16 \ + --rand-type=uniform \ + --db-driver=mysql \ + --mysql-db=sbtest \ + --mysql-host=$aws_nlb_host \ + --mysql-port=$aws_nlb_port \ + --mysql-user=root \ + --mysql-password=password \ + prepare --tables=16 --table-size=10000000 +``` + +### Perform the test + +Execute the following command to perform the test: + +{{< copyable "shell-regular" >}} + +```bash +sysbench $testname \ + --threads=$threads \ + --time=300 \ + --report-interval=1 \ + --rand-type=uniform \ + --db-driver=mysql \ + --mysql-db=sbtest \ + --mysql-host=$aws_nlb_host \ + --mysql-port=$aws_nlb_port \ + run --tables=16 --table-size=10000000 +``` + +## Test results + +### Point Select performance + +| Threads | v5.1.1 QPS | v5.1.1 95% latency (ms) | v5.2.0 QPS | v5.2.0 95% latency (ms) | QPS improvement | +|:----------|:----------|:----------|:----------|:----------|:----------| +|150|143014.13|2.35|174402.5|1.23|21.95%| +|300|199133.06|3.68|272018|1.64|36.60%| +|600|389391.65|2.18|393536.4|2.11|1.06%| +|900|468338.82|2.97|447981.98|3.3|-4.35%| +|1200|448348.52|5.18|468241.29|4.65|4.44%| +|1500|454376.79|7.04|483888.42|6.09|6.49%| + +Compared with v5.1.1, the Point Select performance of v5.2.0 is improved by 11.03%. + +![Point Select](/media/sysbench_v511vsv520_point_select.png) + +### Update Non-index performance + +| Threads | v5.1.1 QPS | v5.1.1 95% latency (ms) | v5.2.0 QPS | v5.2.0 95% latency (ms) | QPS improvement | +|:----------|:----------|:----------|:----------|:----------|:----------| +|150|31198.68|6.43|30714.73|6.09|-1.55%| +|300|43577.15|10.46|42997.92|9.73|-1.33%| +|600|57230.18|17.32|56168.81|16.71|-1.85%| +|900|65325.11|23.1|64098.04|22.69|-1.88%| +|1200|71528.26|28.67|69908.15|28.67|-2.26%| +|1500|76652.5|33.12|74371.79|33.72|-2.98%| + +Compared with v5.1.1, the Update Non-index performance of v5.2.0 is reduced by 1.98%. + +![Update Non-index](/media/sysbench_v511vsv520_update_non_index.png) + +### Update Index performance + +| Threads | v5.1.1 QPS | v5.1.1 95% latency (ms) | v5.2.0 QPS | v5.2.0 95% latency (ms) | QPS improvement | +|:----------|:----------|:----------|:----------|:----------|:----------| +|150|15641.04|13.22|15320|13.46|-2.05%| +|300|19787.73|21.89|19161.35|22.69|-3.17%| +|600|24566.74|36.89|23616.07|38.94|-3.87%| +|900|27516.57|50.11|26270.04|54.83|-4.53%| +|1200|29421.10|63.32|28002.65|69.29|-4.82%| +|1500|30957.84|77.19|28624.44|95.81|-7.54%| + +Compared with v5.0.2, the Update Index performance of v5.1.0 is reduced by 4.33%. + +![Update Index](/media/sysbench_v511vsv520_update_index.png) + +### Read Write performance + +| Threads | v5.1.1 QPS | v5.1.1 95% latency (ms) | v5.2.0 QPS | v5.2.0 95% latency (ms) | QPS improvement | +|:----------|:----------|:----------|:----------|:----------|:----------| +|150|68471.02|57.87|69246|54.83|1.13%| +|300|86573.09|97.55|85340.42|94.10|-1.42%| +|600|101760.75|176.73|102221.31|173.58|0.45%| +|900|111877.55|248.83|109276.45|257.95|-2.32%| +|1200|117479.4|337.94|114231.33|344.08|-2.76%| +|1500|119662.91|419.45|116663.28|434.83|-2.51%| + +Compared with v5.0.2, the Read Write performance of v5.1.0 is reduced by 1.24%. + +![Read Write](/media/sysbench_v511vsv520_read_write.png) diff --git a/benchmark/v5.2-performance-benchmarking-with-tpcc.md b/benchmark/v5.2-performance-benchmarking-with-tpcc.md new file mode 100644 index 000000000000..495b709cdcf8 --- /dev/null +++ b/benchmark/v5.2-performance-benchmarking-with-tpcc.md @@ -0,0 +1,92 @@ +--- +title: TiDB TPC-C Performance Test Report -- v5.2.0 vs. v5.1.1 +--- + +# TiDB TPC-C Performance Test Report -- v5.2.0 vs. v5.1.1 + +## Test overview + +This test aims to compare the TPC-H performance of TiDB v5.2.0 and TiDB v5.1.1 in the online analytical processing (OLAP) scenario. The results show that compared with v5.1.1, the TPC-C performance of v5.2.0 is reduced by 4.22%. + +## Test environment (AWS EC2) + +## Hardware configuration + +| Service type | EC2 type | Instance count | +|:----------|:----------|:----------| +| PD | m5.xlarge | 3 | +| TiKV | i3.4xlarge| 3 | +| TiDB | c5.4xlarge| 3 | +| TPC-C | c5.9xlarge| 1 | + +### Software version + +| Service type | Software version | +|:----------|:-----------| +| PD | v5.1.1 and v5.2.0 | +| TiDB | v5.1.1 and v5.2.0 | +| TiKV | v5.1.1 and v5.2.0 | +| TiUP | 1.5.1 | + +### Parameter configuration + +TiDB v5.2.0 and TiDB v5.1.1 use the same configuration. + +#### TiDB parameter configuration + +{{< copyable "" >}} + +```yaml +log.level: "error" +performance.max-procs: 20 +prepared-plan-cache.enabled: true +tikv-client.max-batch-wait-time: 2000000 +``` + +#### TiKV parameter configuration + +{{< copyable "" >}} + +```yaml +pessimistic-txn.pipelined: true +raftdb.allow-concurrent-memtable-write: true +raftdb.max-background-jobs: 4 +raftstore.apply-max-batch-size: 2048 +raftstore.apply-pool-size: 3 +raftstore.store-max-batch-size: 2048 +raftstore.store-pool-size: 3 +readpool.storage.normal-concurrency: 10 +readpool.unified.max-thread-count: 20 +readpool.unified.min-thread-count: 5 +rocksdb.max-background-jobs: 8 +server.grpc-concurrency: 6 +storage.scheduler-worker-pool-size: 20 +server.enable-request-batch: false +``` + +#### TiDB global variable configuration + +{{{< copyable "sql" >}} + +```sql +set global tidb_hashagg_final_concurrency=1; +set global tidb_hashagg_partial_concurrency=1; +set global tidb_enable_async_commit = 1; +set global tidb_enable_1pc = 1; +set global tidb_guarantee_linearizability = 0; +set global tidb_enable_clustered_index = 1; +``` + +## Test plan + +1. Deploy TiDB v5.2.0 and v5.1.1 using TiUP. +2. Create a database named `tpcc`: `create database tpcc;`. +3. Use BenchmarkSQL to import the TPC-C 5000 Warehouse data: `tiup bench tpcc prepare --warehouse 5000 --db tpcc -H 127.0.0.1 -p 4000`. +4. Execute the `tiup bench tpcc run -U root --db tpcc --host 127.0.0.1 --port 4000 --time 300s --warehouses 5000 --threads {{thread}}` command to perform stress tests on TiDB via HAProxy. +5. Extract the tpmC data of New Order from the result. + +## Test result + +Compared with v5.1.1, the TPC-C performance of v5.2.0 is **reduced by 4.22%**. + +![TPC-C](/media/tpcc_v511_vs_v520.png) diff --git a/benchmark/v5.1-performance-benchmarking-with-tpch.md b/benchmark/v5.2-performance-benchmarking-with-tpch.md similarity index 82% rename from benchmark/v5.1-performance-benchmarking-with-tpch.md rename to benchmark/v5.2-performance-benchmarking-with-tpch.md index 52003973bc51..3cefbfe0e55a 100644 --- a/benchmark/v5.1-performance-benchmarking-with-tpch.md +++ b/benchmark/v5.2-performance-benchmarking-with-tpch.md @@ -1,17 +1,17 @@ --- -title: TiDB TPC-H 100GB Performance Test Report -- v5.1 MPP mode vs. Greenplum 6.15.0 and Apache Spark 3.1.1 -summary: Compare the TPC-H 100GB performance of TiDB 5.1 MPP mode, Greenplum 6.15.0, and Apache Spark 3.1.1. +title: TiDB TPC-H 100GB Performance Test Report -- v5.2 MPP mode vs. Greenplum 6.15.0 and Apache Spark 3.1.1 +summary: Compare the TPC-H 100GB performance of TiDB 5.2 MPP mode, Greenplum 6.15.0, and Apache Spark 3.1.1. --- -# TiDB TPC-H 100GB Performance Test Report -- TiDB v5.1 MPP mode vs. Greenplum 6.15.0 and Apache Spark 3.1.1 +# TiDB TPC-H 100GB Performance Test Report -- TiDB v5.2 MPP mode vs. Greenplum 6.15.0 and Apache Spark 3.1.1 ## Test overview -This test aims at comparing the TPC-H 100GB performance of TiDB v5.1 in the MPP mode with that of Greenplum and Apache Spark, two mainstream analytics engines, in their latest versions. The test result shows that the performance of TiDB v5.1 in the MPP mode is two to three times faster than that of the other two solutions under TPC-H workload. +This test aims at comparing the TPC-H 100GB performance of TiDB v5.2 in the MPP mode with that of Greenplum and Apache Spark, two mainstream analytics engines, in their latest versions. The test result shows that the performance of TiDB v5.2 in the MPP mode is two to three times faster than that of the other two solutions under TPC-H workload. -In v5.0, TiDB introduces the MPP mode for [TiFlash](/tiflash/tiflash-overview.md), which significantly enhances TiDB's Hybrid Transactional and Analytical Processing (HTAP) capabilities. Test objects in this report are as follows: +In v5.0, TiDB introduces the MPP mode for [TiFlash](/tiflash/tiflash-overview.md). In v5.1 and v5.2, the MPP mode has been further improved, which significantly enhances TiDB's Hybrid Transactional and Analytical Processing (HTAP) capabilities. Test objects in this report are as follows: -+ TiDB v5.1 columnar storage in the MPP mode ++ TiDB v5.2 columnar storage in the MPP mode + Greenplum 6.15.0 + Apache Spark 3.1.1 + Parquet @@ -28,15 +28,15 @@ In v5.0, TiDB introduces the MPP mode for [TiFlash](/tiflash/tiflash-overview.md | Service type | Software version | |:----------|:-----------| -| TiDB | 5.1 | +| TiDB | 5.2 | | Greenplum | 6.15.0 | | Apache Spark | 3.1.1 | ### Parameter configuration -#### TiDB v5.1 +#### TiDB v5.2 -For the v5.1 cluster, TiDB uses the default parameter configuration except for the following configuration items. +For the v5.2 cluster, TiDB uses the default parameter configuration except for the following configuration items. In the configuration file `users.toml` of TiFlash, configure `max_memory_usage` as follows: @@ -50,7 +50,6 @@ Set session variables with the following SQL statements: ```sql set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; -set @@tidb_opt_broadcast_join=0; set @@tidb_mem_quota_query = 10 << 30; ``` @@ -88,7 +87,7 @@ The test of Apache Spark uses Apache Parquet as the storage format and stores th > > The following test results are the average data of three tests. All numbers are in seconds. -| Query ID | TiDB v5.1 | Greenplum 6.15.0 | Apache Spark 3.1.1 + Parquet | +| Query ID | TiDB v5.2 | Greenplum 6.15.0 | Apache Spark 3.1.1 + Parquet | | :-------- | :----------- | :------------ | :-------------- | | 1 | 8.08 | 64.1307 | 52.64 | | 2 | 2.53 | 4.76612 | 11.83 | @@ -113,11 +112,11 @@ The test of Apache Spark uses Apache Parquet as the storage format and stores th | 21 | 24.44 | 39.08594 | 34.83 | | 22 | 1.23 | 7.67476 | 4.59 | -![TPC-H](/media/tidb-v5-tpch-100-vs-gp-spark.png) +![TPC-H](/media/tidb-v5.2-tpch-100-vs-gp-spark.png) In the performance diagram above: -- Blue lines represent TiDB v5.1; +- Blue lines represent TiDB v5.2; - Red lines represent Greenplum 6.15.0; - Yellow lines represent Apache Spark 3.1.1. - The y-axis represents the execution time of the query. The less the time is, the better the performance is. diff --git a/best-practices/high-concurrency-best-practices.md b/best-practices/high-concurrency-best-practices.md index bbca43b1a38d..4cd6b74b6c33 100644 --- a/best-practices/high-concurrency-best-practices.md +++ b/best-practices/high-concurrency-best-practices.md @@ -35,7 +35,7 @@ For a distributed database, it is important to make full use of the capacity of To address the above challenges, it is necessary to start with the data segmentation and scheduling principle of TiDB. Refer to [Scheduling](https://pingcap.com/blog/2017-07-20-tidbinternal3/) for more details. -TiDB splits data into Regions, each representing a range of data with a size limit of 96M by default. Each Region has multiple replicas, and each group of replicas is called a Raft Group. In a Raft Group, the Region Leader executes the read and write tasks within the data range. The Region Leader is automatically scheduled by the Placement Driver (PD) component to different physical nodes evenly to distribute the read and write pressure. +TiDB splits data into Regions, each representing a range of data with a size limit of 96M by default. Each Region has multiple replicas, and each group of replicas is called a Raft Group. In a Raft Group, the Region Leader executes the read and write tasks (TiDB supports [Follower-Read](/follower-read.md)) within the data range. The Region Leader is automatically scheduled by the Placement Driver (PD) component to different physical nodes evenly to distribute the read and write pressure. ![TiDB Data Overview](/media/best-practices/tidb-data-overview.png) diff --git a/best-practices/java-app-best-practices.md b/best-practices/java-app-best-practices.md index fc30f91aaa33..627e3f8bc22a 100644 --- a/best-practices/java-app-best-practices.md +++ b/best-practices/java-app-best-practices.md @@ -189,7 +189,7 @@ update t set a = 10 where id = 1; update t set a = 11 where id = 2; update t set In addition, because of a [client bug](https://bugs.mysql.com/bug.php?id=96623), if you want to configure `rewriteBatchedStatements=true` and `useServerPrepStmts=true` during batch update, it is recommended that you also configure the `allowMultiQueries=true` parameter to avoid this bug. -#### Check parameters before execution +#### Integrate parameters Through monitoring, you might notice that although the application only performs `INSERT` operations to the TiDB cluster, there are a lot of redundant `SELECT` statements. Usually this happens because JDBC sends some SQL statements to query the settings, for example, `select @@session.transaction_read_only`. These SQL statements are useless for TiDB, so it is recommended that you configure `useConfigs=maxPerformance` to avoid extra overhead. @@ -205,6 +205,12 @@ enableQueryTimeouts=false After it is configured, you can check the monitoring to see a decreased number of `SELECT` statements. +#### Timeout-related parameters + +TiDB provides two MySQL-compatible parameters that controls the timeout: `wait_timeout` and `max_execution_time`. These two parameters respectively control the connection idle timeout with the Java application and the timeout of the SQL execution in the connection; that is to say, these parameters control the longest idle time and the longest busy time for the connection between TiDB and the Java application. The default value of both parameters is `0`, which by default allows the connection to be infinitely idle and infinitely busy (an infinite duration for one SQL statement to execute). + +However, in an actual production environment, idle connections and SQL statements with excessively long execution time negatively affect databases and applications. To avoid idle connections and SQL statements that are executed for too long, you can configure these two parameters in your application's connection string. For example, set `sessionVariables=wait_timeout=3600` (1 hour) and `sessionVariables=max_execution_time=300000` (5 minutes). + ## Connection pool Building TiDB (MySQL) connections is relatively expensive (for OLTP scenarios at least), because in addition to building a TCP connection, connection authentication is also required. Therefore, the client usually saves the TiDB (MySQL) connections to the connection pool for reuse. diff --git a/best-practices/massive-regions-best-practices.md b/best-practices/massive-regions-best-practices.md index 907300b844e7..4f9a8dbf3e89 100644 --- a/best-practices/massive-regions-best-practices.md +++ b/best-practices/massive-regions-best-practices.md @@ -20,7 +20,7 @@ A TiKV instance has multiple Regions on it. The Raftstore module drives the Raft > > This diagram only illustrates the workflow of Raftstore and does not represent the actual code structure. -From the above diagram, you can see that requests from the TiDB servers, after passing through the gRPC and storage modules, become read and write messages of KV (key-value), and are sent to the corresponding Regions. These messages are not immediately processed but are temporarily stored. Raftstore polls to check whether each Region has messages to process. If a Region has messages to process, Raftstore drives the Raft state machine of this Region to process these messages and perform subsequent operations according to the state changes of these messages. For example, when write requests come in, the Raft state machine stores logs into disk and sends logs to other Region replicas; when the heartbeat interval is reached, the Raft state machine sends heartbeat information to other Region replicas. +From the above diagram, you can see that requests from the TiDB servers, after passing through the gRPC and storage modules, become read and write messages of KV (key-value), and are sent to the corresponding Regions. These messages are not immediately processed but are temporarily stored. Raftstore polls to check whether each Region has messages to process. If a Region has messages to process, Raftstore drives the Raft state machine of this Region to process these messages and perform subsequent operations according to the state changes of these messages. For example, when write requests come in, the Raft state machine stores logs into disk and sends logs to other Region replicas; when the heartbeat interval is reached, the Raft state machine sends heartbeat information to other Region replicas. ## Performance problem diff --git a/best-practices/pd-scheduling-best-practices.md b/best-practices/pd-scheduling-best-practices.md index 91e0b0586b97..2c786c5df5f5 100644 --- a/best-practices/pd-scheduling-best-practices.md +++ b/best-practices/pd-scheduling-best-practices.md @@ -280,3 +280,5 @@ For v3.0.4 and v2.1.16 or earlier, the `approximate_keys` of regions are inaccur If a TiKV node fails, PD defaults to setting the corresponding node to the **down** state after 30 minutes (customizable by configuration item `max-store-down-time`), and rebalancing replicas for regions involved. Practically, if a node failure is considered unrecoverable, you can immediately take it offline. This makes PD replenish replicas soon in another node and reduces the risk of data loss. In contrast, if a node is considered recoverable, but the recovery cannot be done in 30 minutes, you can temporarily adjust `max-store-down-time` to a larger value to avoid unnecessary replenishment of the replicas and resources waste after the timeout. + +In TiDB v5.2.0, TiKV introduces the mechanism of slow TiKV node detection. By sampling the requests in TiKV, it calculates a score ranging from 1 to 100. A TiKV node with a score greater than or equal to 80 is marked as slow. You can add [`evict-slow-store-scheduler`](/pd-control.md#scheduler-show--add--remove--pause--resume--config) to detect and schedule slow nodes. When one and only one slow node appears, and the slow score reaches the upper limit (100 by default), all leaders in the node will be evicted. \ No newline at end of file diff --git a/br/backup-and-restore-faq.md b/br/backup-and-restore-faq.md index 7be170dc4930..c69435fad3bc 100644 --- a/br/backup-and-restore-faq.md +++ b/br/backup-and-restore-faq.md @@ -22,9 +22,7 @@ To reduce the impact on the cluster, you can use the `--ratelimit` parameter to ## Does BR back up system tables? During data restoration, do they raise conflict? -The system schemas (`information_schema`, `performance_schema`, `mysql`) are filtered out during full backup. For more details, refer to the [Backup Principle](/br/backup-and-restore-tool.md#implementation-principles). - -Because these system schemas do not exist in the backup files, no conflict occurs among system tables during data restoration. +Before v5.1.0, BR filtered out data from the system schema `mysql` during the backup. Since v5.1.0, BR **backs up** all data by default, including the system schemas `mysql.*`. But the technical implementation of restoring the system tables in `mysql.*` is not complete yet, so the tables in the system schema `mysql` are **not** restored by default. For more details, refer to the [Back up and restore table data in the `mysql` system schema (experimental feature)](/br/backup-and-restore-tool.md#back-up-and-restore-table-data-in-the-mysql-system-schema-experimental-feature). ## What should I do to handle the `Permission denied` or `No such file or directory` error, even if I have tried to run BR using root in vain? @@ -38,7 +36,74 @@ Running BR with the root access might fail due to the disk permission, because t > > You might encounter the same problem during data restoration. When the SST files are read for the first time, the read permission is verified. The execution duration of DDL suggests that there might be a long interval between checking the permission and running BR. You might receive the error message `Permission denied` after waiting for a long time. > -> Therefore, It is recommended to check the permission before data restoration. +> Therefore, it is recommended to check the permission before data restore according to the following steps: + +1. Run the Linux-native command for process query: + + {{< copyable "shell-regular" >}} + + ```bash + ps aux | grep tikv-server + ``` + + The output of the above command: + + ```shell + tidb_ouo 9235 10.9 3.8 2019248 622776 ? Ssl 08:28 1:12 bin/tikv-server --addr 0.0.0.0:20162 --advertise-addr 172.16.6.118:20162 --status-addr 0.0.0.0:20188 --advertise-status-addr 172.16.6.118:20188 --pd 172.16.6.118:2379 --data-dir /home/user1/tidb-data/tikv-20162 --config conf/tikv.toml --log-file /home/user1/tidb-deploy/tikv-20162/log/tikv.log + tidb_ouo 9236 9.8 3.8 2048940 631136 ? Ssl 08:28 1:05 bin/tikv-server --addr 0.0.0.0:20161 --advertise-addr 172.16.6.118:20161 --status-addr 0.0.0.0:20189 --advertise-status-addr 172.16.6.118:20189 --pd 172.16.6.118:2379 --data-dir /home/user1/tidb-data/tikv-20161 --config conf/tikv.toml --log-file /home/user1/tidb-deploy/tikv-20161/log/tikv.log + ``` + + Or you can run the following command: + + {{< copyable "shell-regular" >}} + + ```bash + ps aux | grep tikv-server | awk '{print $1}' + ``` + + The output of the above command: + + ```shell + tidb_ouo + tidb_ouo + ``` + +2. Query the startup information of the cluster using the TiUP command: + + {{< copyable "shell-regular" >}} + + ```bash + tiup cluster list + ``` + + The output of the above command: + + ```shell + [root@Copy-of-VM-EE-CentOS76-v1 br]# tiup cluster list + Starting component `cluster`: /root/.tiup/components/cluster/v1.5.2/tiup-cluster list + Name User Version Path PrivateKey + ---- ---- ------- ---- ---------- + tidb_cluster tidb_ouo v5.0.2 /root/.tiup/storage/cluster/clusters/tidb_cluster /root/.tiup/storage/cluster/clusters/tidb_cluster/ssh/id_rsa + ``` + +3. Check the permission for the backup directory. For example, `backup` is for backup data storage: + + {{< copyable "shell-regular" >}} + + ```bash + ls -al backup + ``` + + The output of the above command: + + ```shell + [root@Copy-of-VM-EE-CentOS76-v1 user1]# ls -al backup + total 0 + drwxr-xr-x 2 root root 6 Jun 28 17:48 . + drwxr-xr-x 11 root root 310 Jul 4 10:35 .. + ``` + + From the above output, you can find that the `tikv-server` instance is started by the user `tidb_ouo`. But the user `tidb_ouo` does not have the write permission for `backup`, the backup fails. ## What should I do to handle the `Io(Os...)` error? @@ -97,3 +162,11 @@ BR does not back up statistics (except in v4.0.9). Therefore, after restoring th In v4.0.9, BR backs up statistics by default, which consumes too much memory. To ensure that the backup process goes well, the backup for statistics is disabled by default starting from v4.0.10. If you do not execute `ANALYZE` on the table, TiDB will fail to select the optimized execution plan due to inaccurate statistics. If query performance is not a key concern, you can ignore `ANALYZE`. + +## Can I use multiple BR processes at the same time to restore the data of a single cluster? + +**It is strongly not recommended** to use multiple BR processes at the same time to restore the data of a single cluster for the following reasons: + ++ When BR restores data, it modifies some global configurations of PD. Therefore, if you use multiple BR processes for data restore at the same time, these configurations might be mistakenly overwritten and cause abnormal cluster status. ++ BR consumes a lot of cluster resources to restore data, so in fact, running BR processes in parallel improves the restore speed only to a limited extent. ++ There has been no test for running multiple BR processes in parallel for data restore, so it is not guaranteed to succeed. \ No newline at end of file diff --git a/br/backup-and-restore-tool.md b/br/backup-and-restore-tool.md index aea8cad94d93..cd71e8a52739 100644 --- a/br/backup-and-restore-tool.md +++ b/br/backup-and-restore-tool.md @@ -152,17 +152,20 @@ Note that skipping the version check might introduce incompatibility. The versio | Use BR v4.0 to back up TiDB v4.0 | ✅ | ✅ | ✅ (If TiKV >= v4.0.0-rc.1, and if BR contains the [#233](https://github.com/pingcap/br/pull/233) bug fix and TiKV does not contain the [#7241](https://github.com/tikv/tikv/pull/7241) bug fix, BR will cause the TiKV node to restart.) | | Use BR nightly or v5.0 to back up TiDB v4.0 | ❌ (If the TiDB version is earlier than v4.0.9, the [#609](https://github.com/pingcap/br/issues/609) issue might occur.) | ❌ (If the TiDB version is earlier than v4.0.9, the [#609](https://github.com/pingcap/br/issues/609) issue might occur.) | ❌ (If the TiDB version is earlier than v4.0.9, the [#609](https://github.com/pingcap/br/issues/609) issue might occur.) | -### Backup and restore system schemas +### Back up and restore table data in the `mysql` system schema (experimental feature) -Before v5.1.0, BR filtered out data from the system schemas during the backup. +> **Warning:** +> +> This feature is experimental and not thoroughly tested. It is highly **not recommended** to use this feature in the production environment. + +Before v5.1.0, BR filtered out data from the system schema `mysql` during the backup. Since v5.1.0, BR **backs up** all data by default, including the system schemas `mysql.*`. But the technical implementation of restoring the system tables in `mysql.*` is not complete yet, so the tables in the system schema `mysql` are **not** restored by default. -Since v5.1.0, BR **backups** all data by default, including the system schema (`mysql.*`). But to be compatible with the earlier versions of BR, the tables in system schema are **not** restored by default during the **restore**. If you want the tables to be restored to the system schemas, you need to set the [`filter` parameter](/br/use-br-command-line-tool.md#back-up-with-table-filter). Then, the system tables are first restored to the temporary schemas and then to the system schemas (by renaming the temporary schemas). +If you want the data of a system table (for example, `mysql.usertable1`) to be restored to the system schema `mysql`, you can set the [`filter` parameter](/br/use-br-command-line-tool.md#back-up-with-table-filter) to filter the table name (`-f "mysql.usertable1"`). After the setting, the system table is first restored to the temporary schema, and then to the system schema through renaming. -In addition, TiDB performs special operations on the following system tables: +It should be noted that the following system tables cannot be restored correctly due to technical reasons. Even if `-f "mysql.*"` is specified, these tables will not be restored: -- Tables related to statistical information are not restored, because the table ID of the statistical information has changed. -- `tidb` and `global_variables` tables in the `mysql` schema are not restored, because these tables cannot be overwritten. For example, overwriting these tables by the GC safepoint will affect the cluster. -- The restore of the `user` table in the `mysql` schema does not take effect until you manually execute the `FLUSH PRIVILEGE` command. +- Tables related to statistics: "stats_buckets", "stats_extended", "stats_feedback", "stats_fm_sketch", "stats_histograms", "stats_meta", "stats_top_n" +- Tables related to privileges or the system: "tidb", "global_variables", "columns_priv", "db", "default_roles", "global_grants", "global_priv", "role_edges", "tables_priv", "user", "gc_delete_range", "Gc_delete_range_done", "schema_index_usage" ### Minimum machine configuration required for running BR diff --git a/br/use-br-command-line-tool.md b/br/use-br-command-line-tool.md index 25fc603e680d..27a559572d1a 100644 --- a/br/use-br-command-line-tool.md +++ b/br/use-br-command-line-tool.md @@ -307,6 +307,8 @@ To restore the cluster data, use the `br restore` command. You can add the `full > - Where each peer is scattered to during restore is random. We don't know in advance which node will read which file. > > These can be avoided using shared storage, for example mounting an NFS on the local path, or using S3. With network storage, every node can automatically read every SST file, so these caveats no longer apply. +> +> Also, note that you can only run one restore operation for a single cluster at the same time. Otherwise, unexpected behaviors might occur. For details, see [FAQ](/br/backup-and-restore-faq.md#can-i-use-multiple-br-processes-at-the-same-time-to-restore-the-data-of-a-single-cluster). ### Restore all the backup data @@ -361,6 +363,10 @@ br restore db \ In the above command, `--db` specifies the name of the database to be restored. For descriptions of other options, see [Restore all backup data](#restore-all-the-backup-data)). +> **Note:** +> +> When you restore the backup data, the name of the database specified by `--db` must be the same as the one specified by `-- db` in the backup command. Otherwise, the restore fails. This is because the metafile of the backup data ( `backupmeta` file) records the database name, you can only restore data to the database with the same name. The recommended method is to restore the backup data to the database with the same name in another cluster. + ### Restore a table To restore a single table to the cluster, execute the `br restore table` command. To get help on this command, execute `br restore table -h` or `br restore table --help`. @@ -438,6 +444,39 @@ In the above command, `--table` specifies the name of the table to be restored. Restoring incremental data is similar to [restoring full data using BR](#restore-all-the-backup-data). Note that when restoring incremental data, make sure that all the data backed up before `last backup ts` has been restored to the target cluster. +### Restore tables created in the `mysql` schema (experimental feature) + +BR backs up tables created in the `mysql` schema by default. + +When you restore data using BR, the tables created in the `mysql` schema are not restored by default. If you need to restore these tables, you can explicitly include them using the [table filter](/table-filter.md#syntax). The following example restores `mysql.usertable` created in `mysql` schema. The command restores `mysql.usertable` along with other data. + +{{< copyable "shell-regular" >}} + +```shell +br restore full -f '*.*' -f '!mysql.*' -f 'mysql.usertable' -s $external_storage_url +``` + +In the above command, `-f '*.*'` is used to override the default rules and `-f '!mysql.*'` instructs BR not to restore tables in `mysql` unless otherwise stated. `-f 'mysql.usertable'` indicates that `mysql.usertable` is required for restore. For detailed implementation, refer to the [table filter document](/table-filter.md#syntax). + +If you only need to restore `mysql.usertable`, use the following command: + +{{< copyable "shell-regular" >}} + +```shell +br restore full -f 'mysql.usertable' -s $external_storage_url +``` + +> **Warning:** +> +> Although you can back up and restore system tables (such as `mysql.tidb`) using the BR tool, some unexpected situations might occur after the restore, including: +> +> - the statistical information tables (`mysql.stat_*`) cannot be restored. +> - the system variable tables (`mysql.tidb`,`mysql.global_variables`) cannot be restored. +> - the user information tables (such as `mysql.user` and `mysql.columns_priv`) cannot be restored. +> - GC data cannot be restored. +> +> Restoring system tables might cause more compatibility issues. To avoid unexpected issues, **DO NOT** restore system tables in the production environment. + ### Restore Raw KV (experimental feature) > **Warning:** diff --git a/check-before-deployment.md b/check-before-deployment.md index b231971fbd26..3fa071c58a27 100644 --- a/check-before-deployment.md +++ b/check-before-deployment.md @@ -598,7 +598,6 @@ Take the following steps to check the current operating system configuration and echo "net.ipv4.tcp_tw_recycle = 0">> /etc/sysctl.conf echo "net.ipv4.tcp_syncookies = 0">> /etc/sysctl.conf echo "vm.overcommit_memory = 1">> /etc/sysctl.conf - echo "vm.swappiness = 0">> /etc/sysctl.conf sysctl -p ``` diff --git a/choose-index.md b/choose-index.md index b00824139bba..f7672d1afd4d 100644 --- a/choose-index.md +++ b/choose-index.md @@ -29,21 +29,85 @@ Before introducing index selection, it is important to understand the ways TiDB ## Index selection rules -TiDB provides a heuristic rule named skyline-pruning based on the cost estimation of each operator for accessing tables. It can reduce the probability of wrong index selection caused by wrong estimation. +TiDB selects indexes based on rules or cost. The based rules include pre-rules and skyline-pruning. When selecting an index, TiDB tries the pre-rule first. If an index satisfies a pre-rule, TiDB directly selects this index. Otherwise, TiDB uses skyline-pruning to exclude unsuitable indexes, and then selects the index with the lowest cost based on the cost estimation of each operator that accesses tables. -### Skyline-pruning +### Rule-based selection -Skyline-pruning is a heuristic filtering rule for indexes. To judge an index, the following three dimensions are needed: +#### Pre-rules -- Whether it needs to retrieve rows from a table when you select the index to access the table (that is, the plan generated by the index is IndexReader operator or IndexLookupReader operator). Indexes that do not retrieve rows from a table are better on this dimension than indexes that do. +TiDB uses the following heuristic pre-rules to select indexes: -- Select whether the index satisfies a certain order. Because index reading can guarantee the order of certain column sets, indexes that satisfy the query order are superior to indexes that do not satisfy on this dimension. ++ Rule 1: If an index satisfies "unique index with full match + no need to retrieve rows from a table (which means that the plan generated by the index is the IndexReader operator)", TiDB directly selects this index. + ++ Rule 2: If an index satisfies "unique index with full match + the need to retrieve rows from a table (which means that the plan generated by the index is the IndexReader operator)", TiDB selects the index with the smallest number of rows to be retrieved from a table as a candidate index. + ++ Rule 3: If an index satisfies "ordinary index + no need to retrieve rows from a table + the number of rows to be read is less than the value of a certain threshold", TiDB selects the index with the smallest number of rows to be read as a candidate index. + ++ Rule 4: If only one candidate index is selected based on rule 2 and 3, select this candidate index. If two candidate indexes are respectively selected based on rule 2 and 3, select the index with the smaller number of rows to be read (the number of rows with index + the number of rows to be retrieved from a table). + +The "index with full match" in the above rules means each indexed column has the equal condition. When executing the `EXPLAIN FORMAT = 'verbose' ...` statement, if the pre-rules match an index, TiDB outputs a NOTE-level warning indicating that the index matches the pre-rule. + +In the following example, because the index `idx_b` meets the condition "unique index with full match + the need to retrieve rows from a table" in rule 2, TiDB selects the index `idx_b` as the access path, and `SHOW WARNING` returns a note indicating that the index `idx_b` matches the pre-rule. + +```sql +mysql> CREATE TABLE t(a INT PRIMARY KEY, b INT, c INT, UNIQUE INDEX idx_b(b)); +Query OK, 0 rows affected (0.01 sec) + +mysql> EXPLAIN FORMAT = 'verbose' SELECT b, c FROM t WHERE b = 3 OR b = 6; ++-------------------+---------+---------+------+-------------------------+------------------------------+ +| id | estRows | estCost | task | access object | operator info | ++-------------------+---------+---------+------+-------------------------+------------------------------+ +| Batch_Point_Get_5 | 2.00 | 8.80 | root | table:t, index:idx_b(b) | keep order:false, desc:false | ++-------------------+---------+---------+------+-------------------------+------------------------------+ +1 row in set, 1 warning (0.00 sec) + +mysql> SHOW WARNINGS; ++-------+------+-------------------------------------------------------------------------------------------+ +| Level | Code | Message | ++-------+------+-------------------------------------------------------------------------------------------+ +| Note | 1105 | unique index idx_b of t is selected since the path only has point ranges with double scan | ++-------+------+-------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +### Skyline-pruning + +Skyline-pruning is a heuristic filtering rule for indexes, which can reduce the probability of wrong index selection caused by wrong estimation. To judge an index, the following three dimensions are needed: - How many access conditions are covered by the indexed columns. An “access condition” is a where condition that can be converted to a column range. And the more access conditions an indexed column set covers, the better it is in this dimension. -For these three dimensions, if an index named idx_a is not worse than the index named idx_b in all three dimensions and one of the dimensions is better than idx_b, then idx_a is preferred. +- Whether it needs to retrieve rows from a table when you select the index to access the table (that is, the plan generated by the index is IndexReader operator or IndexLookupReader operator). Indexes that do not retrieve rows from a table are better on this dimension than indexes that do. If both indexes need TiDB to retrieve rows from the table, compare how many filtering conditions are covered by the indexed columns. Filtering conditions mean the `where` condition that can be judged based on the index. If the column set of an index covers more access conditions, the smaller the number of retrieved rows from a table, and the better the index is in this dimension. + +- Select whether the index satisfies a certain order. Because index reading can guarantee the order of certain column sets, indexes that satisfy the query order are superior to indexes that do not satisfy on this dimension. -### Selection based on cost estimation +For these three dimensions above, if the index `idx_a` performs no worse than the index `idx_b` in all three dimensions and performs better than `idx_b` in one dimension, then `idx_a` is preferred. When executing the `EXPLAIN FORMAT = 'verbose' ...` statement, if skyline-pruning excludes some indexes, TiDB outputs a NOTE-level warning listing the remaining indexes after the skyline-pruning exclusion. + +In the following example, the indexes `idx_b` and `idx_e` are both inferior to `idx_b_c`, so they are excluded by skyline-pruning. The returned result of `SHOW WARNING` displays the remaining indexes after skyline-pruning. + +```sql +mysql> CREATE TABLE t(a INT PRIMARY KEY, b INT, c INT, d INT, e INT, INDEX idx_b(b), INDEX idx_b_c(b, c), INDEX idx_e(e)); +Query OK, 0 rows affected (0.01 sec) + +mysql> EXPLAIN FORMAT = 'verbose' SELECT * FROM t WHERE b = 2 AND c > 4; ++-------------------------------+---------+---------+-----------+------------------------------+----------------------------------------------------+ +| id | estRows | estCost | task | access object | operator info | ++-------------------------------+---------+---------+-----------+------------------------------+----------------------------------------------------+ +| IndexLookUp_10 | 33.33 | 738.29 | root | | | +| ├─IndexRangeScan_8(Build) | 33.33 | 2370.00 | cop[tikv] | table:t, index:idx_b_c(b, c) | range:(2 4,2 +inf], keep order:false, stats:pseudo | +| └─TableRowIDScan_9(Probe) | 33.33 | 2370.00 | cop[tikv] | table:t | keep order:false, stats:pseudo | ++-------------------------------+---------+---------+-----------+------------------------------+----------------------------------------------------+ +3 rows in set, 1 warning (0.00 sec) + +mysql> SHOW WARNINGS; ++-------+------+------------------------------------------------------------------------------------------+ +| Level | Code | Message | ++-------+------+------------------------------------------------------------------------------------------+ +| Note | 1105 | [t,idx_b_c] remain after pruning paths for t given Prop{SortItems: [], TaskTp: rootTask} | ++-------+------+------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +### Cost estimation-based selection After using the skyline-pruning rule to rule out inappropriate indexes, the selection of indexes is based entirely on the cost estimation. The cost estimation of accessing tables requires the following considerations: diff --git a/command-line-flags-for-tidb-configuration.md b/command-line-flags-for-tidb-configuration.md index a6a7e05f50b5..13b409dac8a2 100644 --- a/command-line-flags-for-tidb-configuration.md +++ b/command-line-flags-for-tidb-configuration.md @@ -14,12 +14,6 @@ When you start the TiDB cluster, you can use command-line options or environment - Default: "" - This address must be accessible by the rest of the TiDB cluster and the user. -## `--binlog-socket` - -- The TiDB services use the unix socket file for internal connections, such as the Pump service -- Default: "" -- You can use "/tmp/pump.sock" to accept the communication of Pump unix socket file. - ## `--config` - The configuration file @@ -103,11 +97,6 @@ When you start the TiDB cluster, you can use command-line options or environment - Default: "/tmp/tidb" - You can use `tidb-server --store=unistore --path=""` to enable a pure in-memory TiDB. -## `--tmp-storage-path` - -+ TiDB's temporary storage path -+ Default: `/tidb/tmp-storage` - ## `--proxy-protocol-networks` - The list of proxy server's IP addresses allowed to connect to TiDB using the [PROXY protocol](https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt). diff --git a/config-templates/complex-cdc.yaml b/config-templates/complex-cdc.yaml index 682157e7d032..9c4c48bcf56d 100644 --- a/config-templates/complex-cdc.yaml +++ b/config-templates/complex-cdc.yaml @@ -21,7 +21,7 @@ monitored: # # - PD: https://pingcap.com/docs/stable/reference/configuration/pd-server/configuration-file/ # # All configuration items use points to represent the hierarchy, e.g: # # readpool.storage.use-unified-pool -# # +# # # # You can overwrite this configuration via the instance-level `config` field. server_configs: @@ -96,15 +96,21 @@ cdc_servers: - host: 10.0.1.1 port: 8300 deploy_dir: "/tidb-deploy/cdc-8300" + data_dir: "/tidb-data/cdc-8300" log_dir: "/tidb-deploy/cdc-8300/log" + # gc-ttl: 86400 - host: 10.0.1.2 port: 8300 deploy_dir: "/tidb-deploy/cdc-8300" + data_dir: "/tidb-data/cdc-8300" log_dir: "/tidb-deploy/cdc-8300/log" + # gc-ttl: 86400 - host: 10.0.1.3 port: 8300 deploy_dir: "/tidb-deploy/cdc-8300" + data_dir: "/tidb-data/cdc-8300" log_dir: "/tidb-deploy/cdc-8300/log" + # gc-ttl: 86400 monitoring_servers: - host: 10.0.1.10 diff --git a/config-templates/complex-tidb-binlog.yaml b/config-templates/complex-tidb-binlog.yaml index d186ece3a3ea..97e5f8d02218 100644 --- a/config-templates/complex-tidb-binlog.yaml +++ b/config-templates/complex-tidb-binlog.yaml @@ -92,35 +92,35 @@ pump_servers: - host: 10.0.1.1 ssh_port: 22 port: 8250 - deploy_dir: "/tidb-deploy/pump-8249" - data_dir: "/tidb-data/pump-8249" + deploy_dir: "/tidb-deploy/pump-8250" + data_dir: "/tidb-data/pump-8250" # The following configs are used to overwrite the `server_configs.pump` values. config: gc: 7 - host: 10.0.1.2 ssh_port: 22 port: 8250 - deploy_dir: "/tidb-deploy/pump-8249" - data_dir: "/tidb-data/pump-8249" + deploy_dir: "/tidb-deploy/pump-8250" + data_dir: "/tidb-data/pump-8250" # The following configs are used to overwrite the `server_configs.pump` values. config: gc: 7 - host: 10.0.1.3 ssh_port: 22 port: 8250 - deploy_dir: "/tidb-deploy/pump-8249" - data_dir: "/tidb-data/pump-8249" + deploy_dir: "/tidb-deploy/pump-8250" + data_dir: "/tidb-data/pump-8250" # The following configs are used to overwrite the `server_configs.pump` values. config: gc: 7 drainer_servers: - host: 10.0.1.12 port: 8249 + deploy_dir: "/tidb-deploy/drainer-8249" data_dir: "/tidb-data/drainer-8249" # If drainer doesn't have a checkpoint, use initial commitTS as the initial checkpoint. # Will get a latest timestamp from pd if commit_ts is set to -1 (the default value). commit_ts: -1 - deploy_dir: "/tidb-deploy/drainer-8249" # The following configs are used to overwrite the `server_configs.drainer` values. config: syncer.db-type: "tidb" @@ -128,7 +128,30 @@ drainer_servers: syncer.to.user: "root" syncer.to.password: "" syncer.to.port: 4000 - + syncer.to.checkpoint: + schema: "tidb-binlog" + type: "tidb" + host: "10.0.1.14" + user: "root" + password: "123" + port: 4000 + - host: 10.0.1.13 + port: 8249 + deploy_dir: "/tidb-deploy/drainer-8249" + data_dir: "/tidb-data/drainer-8249" + # If Drainer does not have a checkpoint, use the initial commitTS as the initial checkpoint. + # If commit_ts is set to -1 (the default value), you will get a latest timestamp from PD. + commit_ts: -1 + # The following configurations are used to overwrite the `server_configs.drainer` values. + config: + syncer.db-type: "kafka" + syncer.replicate-do-db: + - db1 + - db2 + syncer.to.kafka-addrs: "10.0.1.20:9092,10.0.1.21:9092,10.0.1.22:9092" + syncer.to.kafka-version: "0.10.0.0" + syncer.to.topic-name: "asyouwish" + monitoring_servers: - host: 10.0.1.10 # ssh_port: 22 diff --git a/config-templates/geo-redundancy-deployment.yaml b/config-templates/geo-redundancy-deployment.yaml index 676145106757..74ad7ecddca3 100644 --- a/config-templates/geo-redundancy-deployment.yaml +++ b/config-templates/geo-redundancy-deployment.yaml @@ -29,7 +29,7 @@ server_configs: pd: replication.location-labels: ["zone","dc","rack","host"] replication.max-replicas: 5 - label-property: + label-property: # Since TiDB 5.2, the `label-property` configuration is not supported by default. To set the replica policy, use the placement rules. reject-leader: - key: "dc" value: "sha" diff --git a/configure-memory-usage.md b/configure-memory-usage.md index 414b22aa92ba..c754c221b270 100644 --- a/configure-memory-usage.md +++ b/configure-memory-usage.md @@ -118,3 +118,84 @@ The following example constructs a memory-intensive SQL statement that triggers * `record path` indicates the directory of status files. 5. You can see a set of files in the directory of status files (In the above example, the directory is `/tmp/1000_tidb/MC4wLjAuMDo0MDAwLzAuMC4wLjA6MTAwODA=/tmp-storage/record`), including `goroutinue`, `heap`, and `running_sql`. These three files are suffixed with the time when status files are logged. They respectively record goroutine stack information, the usage status of heap memory, and the running SQL information when the alarm is triggered. For the format of log content in `running_sql`, refer to [`expensive-queries`](/identify-expensive-queries.md). + +## Other memory control behaviors of tidb-server + +### Flow control + +- TiDB supports dynamic memory control for the operator that reads data. By default, this operator uses the maximum number of threads that [`tidb_disql_scan_concurrency`](/system-variables.md#tidb_distsql_scan_concurrency) allows to read data. When the memory usage of a single SQL execution exceeds [`tidb_mem_quota_query`](/system-variables.md#tidb_mem_quota_query) each time, the operator that reads data stops one thread. + +- This flow control behavior is controlled by the system variable [`tidb_enable_rate_limit_action`](/system-variables.md#tidb_enable_rate_limit_action). +- When the flow control behavior is triggered, TiDB outputs a log containing the keywords `memory exceeds quota, destroy one token now`. + +### Disk spill + +TiDB supports disk spill for execution operators. When the memory usage of a SQL execution exceeds the memory quota, tidb-server can spill the intermediate data of execution operators to the disk to relieve memory pressure. Operators supporting disk spill include Sort, MergeJoin, HashJoin, and HashAgg. + +- The disk spill behavior is jointly controlled by the [`mem-quota-query`](/tidb-configuration-file.md#mem-quota-query), [`oom-use-tmp-storage`](/tidb-configuration-file.md#oom-use-tmp-storage), [`tmp-storage-path`](/tidb-configuration-file.md#tmp-storage-path), and [`tmp-storage-quota`](/tidb-configuration-file.md#tmp-storage-quota) parameters. +- When the disk spill is triggered, TiDB outputs a log containing the keywords `memory exceeds quota, spill to disk now` or `memory exceeds quota, set aggregate mode to spill-mode`. +- Disk spill for the Sort, MergeJoin, and HashJoin operator is introduced in v4.0.0; disk spill for the HashAgg operator is introduced in v5.2.0. +- When the SQL executions containing Sort, MergeJoin, or HashJoin cause OOM, TiDB triggers disk spill by default. When SQL executions containing HashAgg cause OOM, TiDB does not trigger disk spill by default. You can configure the system variable `tidb_executor_concurrency = 1` to trigger disk spill for HashAgg. + +> **Note:** +> +> The disk spill for HashAgg does not support SQL executions containing the `DISTINCT` aggregate function. When a SQL execution containing a `DISTINCT` aggregate function uses too much memory, the disk spill does not apply. + +The following example uses a memory-consuming SQL statement to demonstrate the disk spill feature for HashAgg: + +1. Configure the memory quota of a SQL statement to 1GB (1 GB by default): + + {{< copyable "sql" >}} + + ```sql + set tidb_mem_quota_query = 1 << 30; + ``` + +2. Create a single table `CREATE TABLE t(a int);` and insert 256 rows of different data. + +3. Execute the following SQL statement: + + {{< copyable "sql" >}} + + ```sql + [tidb]> explain analyze select /*+ HASH_AGG() */ count(*) from t t1 join t t2 join t t3 group by t1.a, t2.a, t3.a; + ``` + + Because executing this SQL statement occupies too much memory, the following "Out of Memory Quota" error message is returned: + + ```sql + ERROR 1105 (HY000): Out Of Memory Quota![conn_id=3] + ``` + +4. Configure the system variable `tidb_executor_concurrency` to 1. With this configuration, when out of memory, HashAgg automatically tries to trigger disk spill. + + {{< copyable "sql" >}} + + ```sql + set tidb_executor_concurrency = 1; + ``` + +5. Execute the same SQL statement. You can find that this time, the statement is successfully executed and no error message is returned. From the following detailed execution plan, you can see that HashAgg has used 600 MB of hard disk space. + + {{< copyable "sql" >}} + + ```sql + [tidb]> explain analyze select /*+ HASH_AGG() */ count(*) from t t1 join t t2 join t t3 group by t1.a, t2.a, t3.a; + ``` + + ```sql + +---------------------------------+-------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+-----------+----------+ + | id | estRows | actRows | task | access object | execution info | operator info | memory | disk | + +---------------------------------+-------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+-----------+----------+ + | HashAgg_11 | 204.80 | 16777216 | root | | time:1m37.4s, loops:16385 | group by:test.t.a, test.t.a, test.t.a, funcs:count(1)->Column#7 | 1.13 GB | 600.0 MB | + | └─HashJoin_12 | 16777216.00 | 16777216 | root | | time:21.5s, loops:16385, build_hash_table:{total:267.2µs, fetch:228.9µs, build:38.2µs}, probe:{concurrency:1, total:35s, max:35s, probe:35s, fetch:962.2µs} | CARTESIAN inner join | 8.23 KB | 4 KB | + | ├─TableReader_21(Build) | 256.00 | 256 | root | | time:87.2µs, loops:2, cop_task: {num: 1, max: 150µs, proc_keys: 0, rpc_num: 1, rpc_time: 145.1µs, copr_cache_hit_ratio: 0.00} | data:TableFullScan_20 | 885 Bytes | N/A | + | │ └─TableFullScan_20 | 256.00 | 256 | cop[tikv] | table:t3 | tikv_task:{time:23.2µs, loops:256} | keep order:false, stats:pseudo | N/A | N/A | + | └─HashJoin_14(Probe) | 65536.00 | 65536 | root | | time:728.1µs, loops:65, build_hash_table:{total:307.5µs, fetch:277.6µs, build:29.9µs}, probe:{concurrency:1, total:34.3s, max:34.3s, probe:34.3s, fetch:278µs} | CARTESIAN inner join | 8.23 KB | 4 KB | + | ├─TableReader_19(Build) | 256.00 | 256 | root | | time:126.2µs, loops:2, cop_task: {num: 1, max: 308.4µs, proc_keys: 0, rpc_num: 1, rpc_time: 295.3µs, copr_cache_hit_ratio: 0.00} | data:TableFullScan_18 | 885 Bytes | N/A | + | │ └─TableFullScan_18 | 256.00 | 256 | cop[tikv] | table:t2 | tikv_task:{time:79.2µs, loops:256} | keep order:false, stats:pseudo | N/A | N/A | + | └─TableReader_17(Probe) | 256.00 | 256 | root | | time:211.1µs, loops:2, cop_task: {num: 1, max: 295.5µs, proc_keys: 0, rpc_num: 1, rpc_time: 279.7µs, copr_cache_hit_ratio: 0.00} | data:TableFullScan_16 | 885 Bytes | N/A | + | └─TableFullScan_16 | 256.00 | 256 | cop[tikv] | table:t1 | tikv_task:{time:71.4µs, loops:256} | keep order:false, stats:pseudo | N/A | N/A | + +---------------------------------+-------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------+-----------+----------+ + 9 rows in set (1 min 37.428 sec) + ``` \ No newline at end of file diff --git a/configure-placement-rules.md b/configure-placement-rules.md index f339b1a4411b..b28a4526cf27 100644 --- a/configure-placement-rules.md +++ b/configure-placement-rules.md @@ -298,7 +298,7 @@ For example, to save all configuration to the `rules.json` file, execute the fol {{< copyable "shell-regular" >}} ```bash -pd-ctl config placement-rules rule-bundle load -out="rules.json" +pd-ctl config placement-rules rule-bundle load --out="rules.json" ``` After editing the file, execute the following command to save the configuration to the PD server: @@ -306,7 +306,7 @@ After editing the file, execute the following command to save the configuration {{< copyable "shell-regular" >}} ```bash -pd-ctl config placement-rules rule-bundle save -in="rules.json" +pd-ctl config placement-rules rule-bundle save --in="rules.json" ``` ### Use tidb-ctl to query the table-related key range diff --git a/configure-time-zone.md b/configure-time-zone.md index e46147e0bea8..246e226ca16c 100644 --- a/configure-time-zone.md +++ b/configure-time-zone.md @@ -28,12 +28,12 @@ Each client has its own time zone setting, given by the session `time_zone` vari SET time_zone = timezone; ``` -You can use the following statement to view the current values of the global and client-specific time zones: +You can use the following statement to view the current values of the global, client-specific and system time zones: {{< copyable "sql" >}} ```sql -SELECT @@global.time_zone, @@session.time_zone; +SELECT @@global.time_zone, @@session.time_zone, @@global.system_time_zone; ``` To set the format of the value of the `time_zone`: diff --git a/credits.md b/credits.md index c303dd7af4c5..a37b8ef016cc 100644 --- a/credits.md +++ b/credits.md @@ -5,176 +5,39 @@ summary: Learn about the list and roles of contributors to the TiDB community. # Credits -The TiDB developer community uses SIG (Special Interest Group) as a unit to manage and organize developers. Each module has its own SIG which is responsible for new feature development, performance optimization, stability guarantee, etc. If you also want to become a TiDB developer, come and join the interested [SIG](https://github.com/pingcap/community/tree/master/special-interest-groups) and discuss directly with other senior developers! As of TiDB 4.0 GA, the following is the list of contributors to the TiDB community and their corresponding roles: - -## Committers - -| SIG name | GitHub ID | -|:--|:--| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[winoros](https://github.com/winoros)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[jackysp](https://github.com/jackysp)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[cofyc](https://github.com/cofyc)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[rleungx](https://github.com/rleungx)| -|[Dashboard](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-dashboard)|[breeswish](https://github.com/breeswish)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[lilin90](https://github.com/lilin90)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[qw4990](https://github.com/qw4990)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[kennytm](https://github.com/kennytm)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[lonng](https://github.com/lonng)| -|[web](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-web)|[wd0517](https://github.com/wd0517)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[zimulala](https://github.com/zimulala)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[bobotu](https://github.com/bobotu)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[cfzjywxk](https://github.com/cfzjywxk)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[lysu](https://github.com/lysu)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[tiancaiamao](https://github.com/tiancaiamao)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[youjiali1995](https://github.com/youjiali1995)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[coocood](https://github.com/coocood)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[imtbkcat](https://github.com/imtbkcat)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[MyonKeminta](https://github.com/MyonKeminta)| -|[transaction](https://github.com/tikv/community/tree/master/sig/transaction)|[nrc](https://github.com/nrc)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[AstroProfundis](https://github.com/AstroProfundis)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[aylei](https://github.com/aylei)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[DanielZhangQD](https://github.com/DanielZhangQD)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[gregwebs](https://github.com/gregwebs)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[jlerche](https://github.com/jlerche)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[LinuxGit](https://github.com/LinuxGit)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[onlymellb](https://github.com/onlymellb)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[qiffang](https://github.com/qiffang)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[sdojjy](https://github.com/sdojjy)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[shuijing198799](https://github.com/shuijing198799)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[tennix](https://github.com/tennix)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[weekface](https://github.com/weekface)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[disksing](https://github.com/disksing)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[hundundm](https://github.com/hundundm)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[lhy1024](https://github.com/lhy1024)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[nolouch](https://github.com/nolouch)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[shafreeck](https://github.com/shafreeck)| -|[Dashboard](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-dashboard)|[crazycs520](https://github.com/crazycs520)| -|[Dashboard](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-dashboard)|[Deardrops](https://github.com/Deardrops)| -|[Dashboard](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-dashboard)|[mapleFU](https://github.com/mapleFU)| -|[Dashboard](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-dashboard)|[reafans](https://github.com/reafans)| -|[Dashboard](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-dashboard)|[rleungx](https://github.com/rleungx)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[CaitinChen](https://github.com/CaitinChen)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[cofyc](https://github.com/cofyc)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[DanielZhangQD](https://github.com/DanielZhangQD)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[dcalvin](https://github.com/dcalvin)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[jackysp](https://github.com/jackysp)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[ran-huang](https://github.com/ran-huang)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[TomShawn](https://github.com/TomShawn)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[toutdesuite](https://github.com/toutdesuite)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[WangXiangUSTC](https://github.com/WangXiangUSTC)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[3pointer](https://github.com/3pointer)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[5kbpers](https://github.com/5kbpers)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[amyangfei](https://github.com/amyangfei)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[gmhdbjd](https://github.com/gmhdbjd)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[GregoryIan](https://github.com/GregoryIan)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[july2993](https://github.com/july2993)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[leoppro](https://github.com/leoppro)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[lichunzhu](https://github.com/lichunzhu)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[overvenus](https://github.com/overvenus)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[YuJuncen](https://github.com/YuJuncen)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[AstroProfundis](https://github.com/AstroProfundis)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[july2993](https://github.com/july2993)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[nrc](https://github.com/nrc)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[birdstorm](https://github.com/birdstorm)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[breeswish](https://github.com/breeswish)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[Reminiscent](https://github.com/Reminiscent)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[wshwsh12](https://github.com/wshwsh12)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[zz-jason](https://github.com/zz-jason)| -|[web](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-web)|[g1eny0ung](https://github.com/g1eny0ung)| -|[web](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-web)|[YiniXu9506](https://github.com/YiniXu9506)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[AilinKid](https://github.com/AilinKid)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[bb7133](https://github.com/bb7133)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[crazycs520](https://github.com/crazycs520)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[djshow832](https://github.com/djshow832)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[lonng](https://github.com/lonng)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[winkyao](https://github.com/winkyao)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[wjhuang2016](https://github.com/wjhuang2016)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[eurekaka](https://github.com/eurekaka)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[francis0407](https://github.com/francis0407)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[lzmhhh123](https://github.com/lzmhhh123)| -|[migrate](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-migrate)|[csuzhangxc](https://github.com/csuzhangxc)| - -## Reviewers - -| SIG name | GitHub ID | -|:--|:--| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[anotherrachel](https://github.com/anotherrachel)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[aylei](https://github.com/aylei)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[crazycs520](https://github.com/crazycs520)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[ericsyh](https://github.com/ericsyh)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[juliezhang1112](https://github.com/juliezhang1112)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[morgo](https://github.com/morgo)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[weekface](https://github.com/weekface)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[YiniXu9506](https://github.com/YiniXu9506)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[b41sh](https://github.com/b41sh)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[js00070](https://github.com/js00070)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[mmyj](https://github.com/mmyj)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[shihongzhi](https://github.com/shihongzhi)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[tangwz](https://github.com/tangwz)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[tsthght](https://github.com/tsthght)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[Deardrops](https://github.com/Deardrops)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[lysu](https://github.com/lysu)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[imtbkcat](https://github.com/imtbkcat)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[lamxTyler](https://github.com/lamxTyler)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[SunRunAway](https://github.com/SunRunAway)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[wjhuang2016](https://github.com/wjhuang2016)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[XuHuaiyu](https://github.com/XuHuaiyu)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[zz-jason](https://github.com/zz-jason)| - -## Active Contributors - -| SIG name | GitHub ID | -|:--|:--| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[cwen0](https://github.com/cwen0)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[mikechengwei](https://github.com/mikechengwei)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[shonge](https://github.com/shonge)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[xiaojingchen](https://github.com/xiaojingchen)| -|[k8s](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-k8s)|[shinnosuke-okada](https://github.com/shinnosuke-okada)| -|[scheduling](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-scheduling)|[mantuliu](https://github.com/mantuliu)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[3pointer](https://github.com/3pointer)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[amyangfei](https://github.com/amyangfei)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[csuzhangxc](https://github.com/csuzhangxc)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[Deardrops](https://github.com/Deardrops)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[gmhdbjd](https://github.com/gmhdbjd)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[huangxiuyan](https://github.com/huangxiuyan)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[IzabelWang](https://github.com/IzabelWang)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[july2993](https://github.com/july2993)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[kissmydb](https://github.com/kissmydb)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[kolbe](https://github.com/kolbe)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[lamxTyler](https://github.com/lamxTyler)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[lance6716](https://github.com/lance6716)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[lichunzhu](https://github.com/lichunzhu)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[liubo0127](https://github.com/liubo0127)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[lysu](https://github.com/lysu)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[superlzs0476](https://github.com/superlzs0476)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[tangenta](https://github.com/tangenta)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[tennix](https://github.com/tennix)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[tiancaiamao](https://github.com/tiancaiamao)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[xiaojingchen](https://github.com/xiaojingchen)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[Yisaer](https://github.com/Yisaer)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[zhouqiang-cl](https://github.com/zhouqiang-cl)| -|[docs](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-docs)|[zimulala](https://github.com/zimulala)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[c4pt0r](https://github.com/c4pt0r)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[YangKeao](https://github.com/YangKeao)| -|[tiup](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-tiup)|[qinzuoyan](https://github.com/qinzuoyan)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[AerysNan](https://github.com/AerysNan)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[AndrewDi](https://github.com/AndrewDi)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[ekalinin](https://github.com/ekalinin)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[erjiaqing](https://github.com/erjiaqing)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[hey-kong](https://github.com/hey-kong)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[jacklightChen](https://github.com/jacklightChen)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[k-ye](https://github.com/k-ye)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[pingyu](https://github.com/pingyu)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[Rustin-Liu](https://github.com/Rustin-Liu)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[spongedu](https://github.com/spongedu)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[TennyZhuang](https://github.com/TennyZhuang)| -|[execution](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-exec)|[xiekeyi98](https://github.com/xiekeyi98)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[reafans](https://github.com/reafans)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[Rustin-Liu](https://github.com/Rustin-Liu)| -|[ddl](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-ddl)|[spongedu](https://github.com/spongedu)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[Deardrops](https://github.com/Deardrops)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[foreyes](https://github.com/foreyes)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[lonng](https://github.com/lonng)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[SeaRise](https://github.com/SeaRise)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[tiancaiamao](https://github.com/tiancaiamao)| -|[planner](https://github.com/pingcap/community/tree/master/special-interest-groups/sig-planner)|[wshwsh12](https://github.com/wshwsh12)| +Each contributor has played an important role in promoting the robust development of TiDB. We sincerely appreciate all contributors who have submitted code, written and translated documents for TiDB. + +## TiDB developers + +TiDB developers contribute to new feature development, performance improvement, stability guarantee, and bug fixes. The following is the list of contributors in TiDB related repos: + +- [pingcap/tidb](https://github.com/pingcap/tidb/graphs/contributors) +- [tikv/tikv](https://github.com/tikv/tikv/graphs/contributors) +- [pingcap/parser](https://github.com/pingcap/parser/graphs/contributors) +- [tikv/pd](https://github.com/tikv/pd/graphs/contributors) +- [pingcap/tidb-operator](https://github.com/pingcap/tidb-operator/graphs/contributors) +- [pingcap/tiup](https://github.com/pingcap/tiup/graphs/contributors) +- [pingcap/br](https://github.com/pingcap/br/graphs/contributors) +- [pingcap/dm](https://github.com/pingcap/dm/graphs/contributors) +- [pingcap/tidb-binlog](https://github.com/pingcap/tidb-binlog/graphs/contributors) +- [pingcap/tidb-dashboard](https://github.com/pingcap/tidb-dashboard/graphs/contributors) +- [pingcap/ticdc](https://github.com/pingcap/ticdc/graphs/contributors) +- [pingcap/tidb-tools](https://github.com/pingcap/tidb-tools/graphs/contributors) +- [pingcap/tidb-lightning](https://github.com/pingcap/tidb-lightning/graphs/contributors) +- [pingcap/tispark](https://github.com/pingcap/tispark/graphs/contributors) +- [pingcap/dumpling](https://github.com/pingcap/dumpling/graphs/contributors) +- [tikv/client-java](https://github.com/tikv/client-java/graphs/contributors) +- [tidb-incubator/TiBigData](https://github.com/tidb-incubator/TiBigData/graphs/contributors) +- [ti-community-infra](https://github.com/orgs/ti-community-infra/people) + +For the full list of contributors, see [SIG | TiDB DevGroup](https://contributor.tidb.io/sig). + +## Writers and translators for TiDB documentation + +Writers and translators write and translate documents for TiDB and the related projects. The following is the list of contributors in TiDB documentation related repos: + +- [pingcap/docs-cn](https://github.com/pingcap/docs-cn/graphs/contributors) +- [pingcap/docs](https://github.com/pingcap/docs/graphs/contributors) +- [pingcap/docs-tidb-operator](https://github.com/pingcap/docs-tidb-operator/graphs/contributors) +- [pingcap/docs-dm](https://github.com/pingcap/docs-dm/graphs/contributors) +- [tikv/website](https://github.com/tikv/website/graphs/contributors) \ No newline at end of file diff --git a/dashboard/dashboard-access.md b/dashboard/dashboard-access.md index 7bc968f02e23..0b9061da2f93 100644 --- a/dashboard/dashboard-access.md +++ b/dashboard/dashboard-access.md @@ -28,7 +28,7 @@ You can use TiDB Dashboard in the following common desktop browsers of a relativ ## Sign in -For the first-time access, TiDB Dashboard displays the user sign in interface, as shown in the image below. You can sign in using the TiDB `root` account. +For the first-time access, TiDB Dashboard displays the user sign in interface, as shown in the image below. You can sign in using the TiDB `root` account. By default, the `root` password is empty. ![Login interface](/media/dashboard/dashboard-access-login.png) diff --git a/dashboard/dashboard-faq.md b/dashboard/dashboard-faq.md index e0aeb8ff2d53..4755b616b5c6 100644 --- a/dashboard/dashboard-faq.md +++ b/dashboard/dashboard-faq.md @@ -58,3 +58,25 @@ If your deployment tool is TiUP, take the following steps to solve this problem. ### An `invalid connection` error is shown in **Top SQL Statements** and **Recent Slow Queries** on the Overview page The possible reason is that you have enabled the `prepared-plan-cache` feature of TiDB. As an experimental feature, when enabled, `prepared-plan-cache` might not function properly in specific TiDB versions, which could cause this problem in TiDB Dashboard (and other applications). You can disable `prepared-plan-cache` by updating [TiDB Configuration file](/tidb-configuration-file.md#prepared-plan-cache) to solve this problem. + +### An `unknown field` error is shown in **Slow Queries** page + +If the `unknown field` error appears on the **Slow Queries** page after the cluster upgrade, the error is related to a compatibility issue caused by the difference between TiDB Dashboard server fields (which might be updated) and user preferences fields (which are in the browser cache). This issue has been fixed. If your cluster is earlier than v5.0.3 or v4.0.14, perform the following steps to resolve the issue: + +To clear your browser cache, take the following steps: + +1. Open TiDB Dashboard page. + +2. Open Developer Tools. Different browsers have different ways of opening Developer Tools. After clicking the **Menu Bar**: + + - Firefox: Menu ➤ Web Developer ➤ Toggle Tools, or Tools ➤ Web Developer ➤ Toggle Tools. + - Chrome: More tools ➤ Developer tools. + - Safari: Develop ➤ Show Web Inspector. If you can't see the Develop menu, go to Safari ➤ Preferences ➤ Advanced, and check the Show Develop menu in menu bar checkbox. + + In the following example, Chrome is used. + + ![Opening DevTools from Chrome's main menu](/media/dashboard/dashboard-faq-devtools.png) + +3. Select the **Application** panel, expand the **Local Storage** menu and select the **TiDB Dashboard page domain**. Click the **Clear All** button. + + ![Clear the Local Storage](/media/dashboard/dashboard-faq-devtools-application.png) diff --git a/dashboard/dashboard-session-share.md b/dashboard/dashboard-session-share.md new file mode 100644 index 000000000000..53a7fee12481 --- /dev/null +++ b/dashboard/dashboard-session-share.md @@ -0,0 +1,56 @@ +--- +title: Share TiDB Dashboard Sessions +summary: Learn how to share the current TiDB Dashboard session to other users. +--- + +# Share TiDB Dashboard Sessions + +You can share the current session of the TiDB Dashboard to other users so that they can access and operate the TiDB Dashboard without entering the user password. + +## Steps for the Inviter + +1. Sign into TiDB Dashboard. + +2. Click the username in the left sidebar to access the configuration page. + +3. Click **Share Current Session**. + + ![Sample Step](/media/dashboard/dashboard-session-share-settings-1.png) + + > **Note:** + > + > For security reasons, the shared session cannot be shared again. + +4. Adjust sharing settings in the popup dialog: + + - Expire in: How long the shared session will be effective. Signing out of the current session does not affect the effective time of the shared session. + + - Share as read-only privilege: The shared session only permits read operations but not write operations (such as modifying configurations). + +5. Click **Generate Authorization Code**. + + ![Sample Step](/media/dashboard/dashboard-session-share-settings-2.png) + +6. Provide the generated **Authorization Code** to the user to whom you want to share the session. + + ![Sample Step](/media/dashboard/dashboard-session-share-settings-3.png) + + > **Warning:** + > + > Keep your authorization code secure and do not send it to anyone who is untrusted. Otherwise, they will be able to access and operate TiDB Dashboard without your authorization. + +## Steps for the Invitee + +1. On the sign-in page of TiDB Dashboard, click **Use Alternative Authentication**. + + ![Sample Step](/media/dashboard/dashboard-session-share-signin-1.png) + +2. Click **Authorization Code** to use it to sign in. + + ![Sample Step](/media/dashboard/dashboard-session-share-signin-2.png) + +3. Enter the authorization code you have received from the inviter. + +4. Click **Sign In**. + + ![Sample Step](/media/dashboard/dashboard-session-share-signin-3.png) diff --git a/dashboard/dashboard-session-sso.md b/dashboard/dashboard-session-sso.md new file mode 100644 index 000000000000..597e3e8d5ade --- /dev/null +++ b/dashboard/dashboard-session-sso.md @@ -0,0 +1,151 @@ +--- +title: Configure SSO for TiDB Dashboard +summary: Learn how to enable SSO to sign into TiDB Dashboard. +--- + +# Configure SSO for TiDB Dashboard + +TiDB Dashboard supports [OIDC](https://openid.net/connect/)-based Single Sign-On (SSO). After enabling the SSO feature of TiDB Dashboard, the configured SSO service is used for your sign-in authentication and then you can access TiDB Dashboard without entering the SQL user password. + +## Configure OIDC SSO + +### Enable SSO + +1. Sign into TiDB Dashboard. + +2. Click the username in the left sidebar to access the configuration page. + +3. In the **Single Sign-On** section, select **Enable to use SSO when sign into TiDB Dashboard**. + +4. Fill the **OIDC Client ID** and the **OIDC Discovery URL** fields in the form. + + Generally, you can obtain the two fields from the SSO service provider: + + - OIDC Client ID is also called OIDC Token Issuer. + - OIDC Discovery URL is also called OIDC Token Audience. + +5. Click **Authorize Impersonation** and input the SQL password. + + TiDB Dashboard will store this SQL password and use it to impersonate a normal SQL sign-in after an SSO sign-in is finished. + + ![Sample Step](/media/dashboard/dashboard-session-sso-enable-1.png) + + > **Note:** + > + > The password you have entered will be encrypted and stored. The SSO sign-in will fail after the password of the SQL user is changed. In this case, you can re-enter the password to bring SSO back. + +6. Click **Authorize and Save**. + + ![Sample Step](/media/dashboard/dashboard-session-sso-enable-2.png) + +7. Click **Update** (Update) to save the configuration. + + ![Sample Step](/media/dashboard/dashboard-session-sso-enable-3.png) + +Now SSO sign-in has been enabled for TiDB Dashboard. + +> **Note:** +> +> For security reasons, some SSO services require additional configuration for the SSO service, such as the trusted sign-in and sign-out URIs. Refer to the documentation of the SSO service for further information. + +### Disable SSO + +You can disable the SSO, which will completely erase the stored SQL password: + +1. Sign into TiDB Dashboard. + +2. Click the username in the left sidebar to access the configuration page. + +3. In the **Single Sign-On** section, deselect **Enable to use SSO when sign into TiDB Dashboard**. + +4. Click **Update** (Update) to save the configuration. + + ![Sample Step](/media/dashboard/dashboard-session-sso-disable.png) + +### Re-enter the password after a password change + +The SSO sign-in will fail once the password of the SQL user is changed. In this case, you can bring back the SSO sign-in by re-entering the SQL password: + +1. Sign into TiDB Dashboard. + +2. Click the username in the left sidebar to access the configuration page. + +3. In the **Single Sign-On** section, Click **Authorize Impersonation** and input the updated SQL password. + + ![Sample Step](/media/dashboard/dashboard-session-sso-reauthorize.png) + +4. Click **Authorize and Save**. + +## Sign in via SSO + +Once SSO is configured for TiDB Dashboard, you can sign in via SSO by taking following steps: + +1. In the sign-in page of TiDB Dashboard, click **Sign in via Company Account**. + + ![Sample Step](/media/dashboard/dashboard-session-sso-signin.png) + +2. Sign into the system with SSO service configured. + +3. You are redirected back to TiDB Dashboard to finish the sign-in. + +## Example: Use Okta for TiDB Dashboard SSO sign-in + +[Okta](https://www.okta.com/) is an OIDC SSO identity service, which is compatible with the SSO feature of TiDB Dashboard. The steps below demonstrate how to configure Okta and TiDB Dashboard so that Okta can be used as the TiDB Dashboard SSO provider. + +### Step 1: Configure Okta + +First, create an Okta Application Integration to integrate SSO. + +1. Access the Okta administration site. + +2. Navigate from the left sidebar **Applications** > **Applications**. + +3. Click **Create App Integration**。 + + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-1.png) + +4. In the poped up dialog, choose **OIDC - OpenID Connect** in **Sign-in method**. + +5. Choose **Single-Page Application** in **Application Type**. + +6. Click the **Next** button. + + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-2.png) + +7. Fill **Sign-in redirect URIs** as follows: + + ``` + http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 + ``` + + Substitute `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in the browser. + +8. Fill **Sign-out redirect URIs** as follows: + + ``` + http://DASHBOARD_IP:PORT/dashboard/ + ``` + + Similarly, substitute `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port. + + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-3.png) + +9. Configure what type of users in your organization is allowed for SSO sign-in in the **Assignments** field, and then click **Save** to save the configuration. + + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-4.png) + +### Step 2: Obtain OIDC information and fill in TiDB Dashboard + +1. In the Application Integration just created in Okta, click **Sign On**. + + ![Sample Step 1](/media/dashboard/dashboard-session-sso-okta-info-1.png) + +2. Copy values of the **Issuer** and **Audience** fields from the **OpenID Connect ID Token** section. + + ![Sample Step 2](/media/dashboard/dashboard-session-sso-okta-info-2.png) + +3. Open the TiDB Dashboard configuration page, fill **OIDC Client ID** with **Issuer** obtained from the last step and fill **OIDC Discovery URL** with **Audience**. Then finish the authorization and save the configuration. For example: + + ![Sample Step 3](/media/dashboard/dashboard-session-sso-okta-info-3.png) + +Now TiDB Dashboard has been configured to use Okta SSO for sign-in. diff --git a/dashboard/dashboard-statement-list.md b/dashboard/dashboard-statement-list.md index 93223fbb2a1f..721468ae0077 100644 --- a/dashboard/dashboard-statement-list.md +++ b/dashboard/dashboard-statement-list.md @@ -22,6 +22,10 @@ You can use one of the following two methods to access the SQL statement summary All the data shown on the SQL statement summary page are from the TiDB statement summary tables. For more details about the tables, see [TiDB Statement Summary Tables](/statement-summary-tables.md). +> **Note:** +> +> In the **Mean Latency** column of the SQL statement summary page, the blue bar indicates the average execution time. If there is a yellow line on the blue bar for an SQL statement, the left and right sides of the yellow line respectively represent the minimum and maximum execution time of the SQL statement during the recent data collection cycle. + ### Change Filters On the top of the SQL statement summary page, you can modify the time range of SQL executions to be displayed. You can also filter the list by database in which SQL statements are executed, or by SQL types. The following image shows all SQL executions over the recent data collection cycle (recent 30 minutes by default). diff --git a/download-ecosystem-tools.md b/download-ecosystem-tools.md index d75f986b65bd..f347a4b7ed20 100644 --- a/download-ecosystem-tools.md +++ b/download-ecosystem-tools.md @@ -18,7 +18,7 @@ If you want to download the latest version of [TiDB Binlog](/tidb-binlog/tidb-bi > **Note:** > -> `{version}` in the above download link indicates the version number of TiDB. For example, the download link for `v5.1.0` is `https://download.pingcap.org/tidb-v5.1.0-linux-amd64.tar.gz`. +> `{version}` in the above download link indicates the version number of TiDB. For example, the download link for `v5.2.1` is `https://download.pingcap.org/tidb-v5.2.1-linux-amd64.tar.gz`. ## TiDB Lightning @@ -30,7 +30,7 @@ Download [TiDB Lightning](/tidb-lightning/tidb-lightning-overview.md) by using t > **Note:** > -> `{version}` in the above download link indicates the version number of TiDB Lightning. For example, the download link for `v5.1.0` is `https://download.pingcap.org/tidb-toolkit-v5.1.0-linux-amd64.tar.gz`. +> `{version}` in the above download link indicates the version number of TiDB Lightning. For example, the download link for `v5.2.1` is `https://download.pingcap.org/tidb-toolkit-v5.2.1-linux-amd64.tar.gz`. ## BR (backup and restore) @@ -42,7 +42,7 @@ Download [BR](/br/backup-and-restore-tool.md) by using the download link in the > **Note:** > -> `{version}` in the above download link indicates the version number of BR. For example, the download link for `v5.0.0-beta` is `http://download.pingcap.org/tidb-toolkit-v5.0.0-beta-linux-amd64.tar.gz`. +> `{version}` in the above download link indicates the version number of BR. For example, the download link for `v5.2.1` is `https://download.pingcap.org/tidb-toolkit-v5.2.1-linux-amd64.tar.gz`. ## TiDB DM (Data Migration) @@ -54,7 +54,7 @@ Download [DM](https://docs.pingcap.com/tidb-data-migration/stable/overview) by u > **Note:** > -> `{version}` in the above download link indicates the version number of DM. For example, the download link for `v2.0.4` is `https://download.pingcap.org/dm-v2.0.4-linux-amd64.tar.gz`. You can check the published DM versions in the [DM Release](https://github.com/pingcap/dm/releases) page. +> `{version}` in the above download link indicates the version number of DM. For example, the download link for `v2.0.6` is `https://download.pingcap.org/dm-v2.0.6-linux-amd64.tar.gz`. You can check the published DM versions in the [DM Release](https://github.com/pingcap/dm/releases) page. ## Dumpling @@ -66,7 +66,7 @@ Download [Dumpling](/dumpling-overview.md) from the links below: > **Note:** > -> The `{version}` in the download link is the version number of Dumpling. For example, the link for downloading the `v5.1.0` version of Dumpling is `https://download.pingcap.org/tidb-toolkit-v5.1.0-linux-amd64.tar.gz`. You can view the currently released versions in [Dumpling Releases](https://github.com/pingcap/dumpling/releases). +> The `{version}` in the download link is the version number of Dumpling. For example, the link for downloading the `v5.2.1` version of Dumpling is `https://download.pingcap.org/tidb-toolkit-v5.2.1-linux-amd64.tar.gz`. You can view the currently released versions in [Dumpling Releases](https://github.com/pingcap/dumpling/releases). > > Dumpling supports arm64 linux. You can replace `amd64` in the download link with `arm64`, which means the `arm64` version of Dumpling. diff --git a/dumpling-overview.md b/dumpling-overview.md index 515594f29c4d..f829eed9b2ed 100644 --- a/dumpling-overview.md +++ b/dumpling-overview.md @@ -51,6 +51,7 @@ Dumpling is also included in the tidb-toolkit installation package and can be [d - RELOAD - LOCK TABLES - REPLICATION CLIENT +- PROCESS ### Export to SQL files @@ -264,9 +265,9 @@ With the above options specified, Dumpling can have a quicker speed of data expo > **Note:** > -> In most scenarios, you do not need to adjust the default data consistency options of Dumpling. +> In most scenarios, you do not need to adjust the default data consistency options of Dumpling (the default value is `auto`). -Dumpling uses the `--consistency ` option to control the way in which data is exported for "consistency assurance". For TiDB, data consistency is guaranteed by getting a snapshot of a certain timestamp by default (namely, `--consistency snapshot`). When using snapshot for consistency, you can use the `--snapshot` option to specify the timestamp to be backed up. You can also use the following levels of consistency: +Dumpling uses the `--consistency ` option to control the way in which data is exported for "consistency assurance". When using snapshot for consistency, you can use the `--snapshot` option to specify the timestamp to be backed up. You can also use the following levels of consistency: - `flush`: Use [`FLUSH TABLES WITH READ LOCK`](https://dev.mysql.com/doc/refman/8.0/en/flush.html#flush-tables-with-read-lock) to temporarily interrupt the DML and DDL operations of the replica database, to ensure the global consistency of the backup connection, and to record the binlog position (POS) information. The lock is released after all backup connections start transactions. It is recommended to perform full backups during off-peak hours or on the MySQL replica database. - `snapshot`: Get a consistent snapshot of the specified timestamp and export it. diff --git a/dynamic-config.md b/dynamic-config.md index af0791432b65..6ade8ff60646 100644 --- a/dynamic-config.md +++ b/dynamic-config.md @@ -126,7 +126,6 @@ The following TiKV configuration items can be modified online: | Configuration item | Description | | :--- | :--- | -| `raftstore.sync-log` | Determines whether to sync data and logs for persistent storage | | `raftstore.raft-entry-max-size` | The maximum size of a single log | | `raftstore.raft-log-gc-tick-interval` | The time interval at which the polling task of deleting Raft logs is scheduled | | `raftstore.raft-log-gc-threshold` | The soft limit on the maximum allowable number of residual Raft logs | @@ -272,9 +271,11 @@ For detailed parameter description, refer to [PD Configuration File](/pd-configu ### Modify TiDB configuration online -Currently, the method of changing TiDB configuration is different from that of changing TiKV and PD configurations. You can modify TiDB configuration by using [SQL variables](/system-variables.md). +Currently, the method of changing TiDB configuration is different from that of changing TiKV and PD configurations. You can modify TiDB configuration by using [system variables](/system-variables.md). -The following example shows how to modify `slow-threshold` online by using the `tidb_slow_log_threshold` variable. The default value of `slow-threshold` is 200 ms. You can set it to 200 ms by using `tidb_slow_log_threshold`. +The following example shows how to modify `slow-threshold` online by using the `tidb_slow_log_threshold` variable. + +The default value of `slow-threshold` is 300 ms. You can set it to 200 ms by using `tidb_slow_log_threshold`. {{< copyable "sql" >}} diff --git a/enable-tls-between-clients-and-servers.md b/enable-tls-between-clients-and-servers.md index 06994c28b7dd..d39981f8ab49 100644 --- a/enable-tls-between-clients-and-servers.md +++ b/enable-tls-between-clients-and-servers.md @@ -38,7 +38,7 @@ Similar to MySQL, the encrypted connections in TiDB consist of single connection ## Configure TiDB to use encrypted connections -See the following desrciptions about the related parameters to enable encrypted connections: +See the following descriptions about the related parameters to enable encrypted connections: - [`ssl-cert`](/tidb-configuration-file.md#ssl-cert): specifies the file path of the SSL certificate - [`ssl-key`](/tidb-configuration-file.md#ssl-key): specifies the private key that matches the certificate @@ -183,4 +183,4 @@ The newly loaded certificate, key, and CA take effect on the connection that is ### See also -- [Enable TLS Between TiDB Components](/enable-tls-between-components.md)。 +- [Enable TLS Between TiDB Components](/enable-tls-between-components.md). diff --git a/encryption-at-rest.md b/encryption-at-rest.md index 251a31f12d3b..923d4edcaa99 100644 --- a/encryption-at-rest.md +++ b/encryption-at-rest.md @@ -4,23 +4,47 @@ summary: Learn how to enable encryption at rest to protect sensitive data. aliases: ['/docs/dev/encryption at rest/'] --- -# Encryption at Rest New in v4.0.0 +# Encryption at Rest + +> **Note:** +> +> If your cluster is deployed on AWS and uses the EBS storage, it is recommended to use the EBS encryption. See [AWS documentation - EBS Encryption](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSEncryption.html). You are using the non-EBS storage on AWS such as the local NVMe storage, it is recommended to use encryption at rest introduced in this document. Encryption at rest means that data is encrypted when it is stored. For databases, this feature is also referred to as TDE (transparent data encryption). This is opposed to encryption in flight (TLS) or encryption in use (rarely used). Different things could be doing encryption at rest (SSD drive, file system, cloud vendor, etc), but by having TiKV do the encryption before storage this helps ensure that attackers must authenticate with the database to gain access to data. For example, when an attacker gains access to the physical machine, data cannot be accessed by copying files on disk. -TiKV supports encryption at rest starting from v4.0.0. The feature allows TiKV to transparently encrypt data files using [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation) mode. To enable encryption at rest, an encryption key must be provided by user and this key is called master key. The master key can be provided via AWS KMS (recommended), or specifying a key stored as plaintext in a file. TiKV automatically rotates data keys that it used to encrypt actual data files. Manually rotating the master key can be done occasionally. Note that encryption at rest only encrypts data at rest (namely, on disk) and not while data is transferred over network. It is advised to use TLS together with encryption at rest. +## Encryption support in different TiDB components + +In a TiDB cluster, different components use different encryption methods. This section introduces the encryption supports in different TiDB components such as TiKV, TiFlash, PD, and Backup & Restore (BR). + +When a TiDB cluster is deployed, the majority of user data is stored on TiKV and TiFlash nodes. Some metadata is stored on PD nodes (for example, secondary index keys used as TiKV Region boundaries). To get the full benefits of encryption at rest, you need to enable encryption for all components. Backups, log files, and data transmitted over the network should also be considered when you implement encryption. + +### TiKV + +TiKV supports encryption at rest. This feature allows TiKV to transparently encrypt data files using [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation) mode. To enable encryption at rest, an encryption key must be provided by the user and this key is called master key. TiKV automatically rotates data keys that it used to encrypt actual data files. Manually rotating the master key can be done occasionally. Note that encryption at rest only encrypts data at rest (namely, on disk) and not while data is transferred over network. It is advised to use TLS together with encryption at rest. + +Optionally, you can use AWS KMS for both cloud and on-premises deployments. You can also supply the plaintext master key in a file. + +TiKV currently does not exclude encryption keys and user data from core dumps. It is advised to disable core dumps for the TiKV process when using encryption at rest. This is not currently handled by TiKV itself. + +TiKV tracks encrypted data files using the absolute path of the files. As a result, once encryption is turned on for a TiKV node, the user should not change data file paths configuration such as `storage.data-dir`, `raftstore.raftdb-path`, `rocksdb.wal-dir` and `raftdb.wal-dir`. + +### TiFlash + +TiFlash supports encryption at rest. Data keys are generated by TiFlash. All files (including data files, schema files, and temporary files) written into TiFlash (including TiFlash Proxy) are encrypted using the current data key. The encryption algorithms, the encryption configuration (in the `tiflash-learner.toml` file) supported by TiFlash, and the meanings of monitoring metrics are consistent with those of TiKV. + +If you have deployed TiFlash with Grafana, you can check the **TiFlash-Proxy-Details** -> **Encryption** panel. + +### PD -Also from v4.0.0, BR supports S3 server-side encryption (SSE) when backing up to S3. A customer owned AWS KMS key can also be used together with S3 server-side encryption. +Encryption-at-rest for PD is an experimental feature, which is configured in the same way as in TiKV. -## Warnings +### Backups with BR -The current version of TiKV encryption has the following drawbacks. Be aware of these drawbacks before you get started: +BR supports S3 server-side encryption (SSE) when backing up data to S3. A customer-owned AWS KMS key can also be used together with S3 server-side encryption. See [BR S3 server-side encryption](/encryption-at-rest.md#br-s3-server-side-encryption) for details. -* When a TiDB cluster is deployed, the majority of user data is stored in TiKV nodes, and that data will be encrypted when encryption is enabled. However, a small amount of user data is stored in PD nodes as metadata (for example, secondary index keys used as TiKV region boundaries). As of v4.0.0, PD doesn't support encryption at rest. It is recommended to use storage-level encryption (for example, file system encryption) to help protect sensitive data stored in PD. -* TiFlash supports encryption at rest since v4.0.5. For details, refer to [Encryption at Rest for TiFlash](#encryption-at-rest-for-tiflash-new-in-v405). When deploying TiKV with TiFlash earlier than v4.0.5, data stored in TiFlash is not encrypted. -* TiKV currently does not exclude encryption keys and user data from core dumps. It is advised to disable core dumps for the TiKV process when using encryption at rest. This is not currently handled by TiKV itself. -* TiKV tracks encrypted data files using the absolute path of the files. As a result, once encryption is turned on for a TiKV node, the user should not change data file paths configuration such as `storage.data-dir`, `raftstore.raftdb-path`, `rocksdb.wal-dir` and `raftdb.wal-dir`. -* TiKV info log contains user data for debugging purposes. The info log and this data in it are not encrypted. +### Logging + +TiKV, TiDB, and PD info logs might contain user data for debugging purposes. The info log and this data in it are not encrypted. It is recommended to enable [log redaction](/log-redaction.md). ## TiKV encryption at rest @@ -29,24 +53,42 @@ The current version of TiKV encryption has the following drawbacks. Be aware of TiKV currently supports encrypting data using AES128, AES192 or AES256, in CTR mode. TiKV uses envelope encryption. As a result, two types of keys are used in TiKV when encryption is enabled. * Master key. The master key is provided by user and is used to encrypt the data keys TiKV generates. Management of master key is external to TiKV. -* Data key. The data key is generated by TiKV and is the key actually used to encrypt data. The data key is automatically rotated by TiKV. +* Data key. The data key is generated by TiKV and is the key actually used to encrypt data. The same master key can be shared by multiple instances of TiKV. The recommended way to provide a master key in production is via AWS KMS. Create a customer master key (CMK) through AWS KMS, and then provide the CMK key ID to TiKV in the configuration file. The TiKV process needs access to the KMS CMK while it is running, which can be done by using an [IAM role](https://aws.amazon.com/iam/). If TiKV fails to get access to the KMS CMK, it will fail to start or restart. Refer to AWS documentation for [KMS](https://docs.aws.amazon.com/kms/index.html) and [IAM](https://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html) usage. Alternatively, if using custom key is desired, supplying the master key via file is also supported. The file must contain a 256 bits (or 32 bytes) key encoded as hex string, end with a newline (namely, `\n`), and contain nothing else. Persisting the key on disk, however, leaks the key, so the key file is only suitable to be stored on the `tempfs` in RAM. -Data keys are generated by TiKV and passed to the underlying storage engine (namely, RocksDB). All files written by RocksDB, including SST files, WAL files, and the MANIFEST file, are encrypted by the current data key. Other temporary files used by TiKV that may include user data are also encrypted using the same data key. Data keys are automatically rotated by TiKV every week by default, but the period is configurable. On key rotation, TiKV does not rewrite all existing files to replace the key, but RocksDB compaction are expected to rewrite old data into new data files, with the most recent data key, if the cluster gets constant write workload. TiKV keeps track of the key and encryption method used to encrypt each of the files and use the information to decrypt the content on reads. +Data keys are passed to the underlying storage engine (namely, RocksDB). All files written by RocksDB, including SST files, WAL files, and the MANIFEST file, are encrypted by the current data key. Other temporary files used by TiKV that may include user data are also encrypted using the same data key. Data keys are automatically rotated by TiKV every week by default, but the period is configurable. On key rotation, TiKV does not rewrite all existing files to replace the key, but RocksDB compaction are expected to rewrite old data into new data files, with the most recent data key, if the cluster gets constant write workload. TiKV keeps track of the key and encryption method used to encrypt each of the files and use the information to decrypt the content on reads. Regardless of data encryption method, data keys are encrypted using AES256 in GCM mode for additional authentication. This required the master key to be 256 bits (32 bytes), when passing from file instead of KMS. +### Key creation + +To create a key on AWS, follow these steps: + +1. Go to the [AWS KMS](https://console.aws.amazon.com/kms) on the AWS console. +2. Make sure that you have selected the correct region on the top right corner of your console. +3. Click **Create key** and select **Symmetric** as the key type. +4. Set an alias for the key. + +You can also perform the operations using the AWS CLI: + +```shell +aws --region us-west-2 kms create-key +aws --region us-west-2 kms create-alias --alias-name "alias/tidb-tde" --target-key-id 0987dcba-09fe-87dc-65ba-ab0987654321 +``` + +The `--target-key-id` to enter in the second command is in the output of the first command. + ### Configure encryption -To enable encryption, you can add the encryption section in TiKV's configuration file: +To enable encryption, you can add the encryption section in the configuration files of TiKV and PD: ``` [security.encryption] -data-encryption-method = aes128-ctr -data-key-rotation-period = 7d +data-encryption-method = "aes128-ctr" +data-key-rotation-period = "168h" # 7 days ``` Possible values for `data-encryption-method` are "aes128-ctr", "aes192-ctr", "aes256-ctr" and "plaintext". The default value is "plaintext", which means encryption is not turned on. `data-key-rotation-period` defines how often TiKV rotates the data key. Encryption can be turned on for a fresh TiKV cluster, or an existing TiKV cluster, though only data written after encryption is enabled is guaranteed to be encrypted. To disable encryption, remove `data-encryption-method` in the configuration file, or reset it to "plaintext", and restart TiKV. To change encryption method, update `data-encryption-method` in the configuration file and restart TiKV. @@ -61,7 +103,9 @@ region = "us-west-2" endpoint = "https://kms.us-west-2.amazonaws.com" ``` -The `key-id` specifies the key id for the KMS CMK. The `region` is the AWS region name for the KMS CMK. The `endpoint` is optional and doesn't need to be specified normally, unless you are using a AWS KMS compatible service from a non-AWS vendor. +The `key-id` specifies the key ID for the KMS CMK. The `region` is the AWS region name for the KMS CMK. The `endpoint` is optional and you do not need to specify it normally unless you are using an AWS KMS-compatible service from a non-AWS vendor or need to use a [VPC endpoint for KMS](https://docs.aws.amazon.com/kms/latest/developerguide/kms-vpc-endpoint.html). + +You can also use [multi-Region keys](https://docs.aws.amazon.com/kms/latest/developerguide/multi-region-keys-overview.html) in AWS. For this, you need to set up a primary key in a specific region and add replica keys in the regions you require. To specify a master key that's stored in a file, the master key configuration would look like the following: @@ -141,9 +185,3 @@ When restoring the backup, both `--s3.sse` and `--s3.sse-kms-key-id` should NOT ``` ./br restore full --pd --storage "s3:/// --s3.region " ``` - -## Encryption at rest for TiFlash New in v4.0.5 - -TiFlash supports encryption at rest since v4.0.5. Data keys are generated by TiFlash. All files (including data files, schema files, and temporary files) written into TiFlash (including TiFlash Proxy) are encrypted using the current data key. The encryption algorithms, the encryption configuration (in the `tiflash-learner.toml` file) supported by TiFlash, and the meanings of monitoring metrics are consistent with those of TiKV. - -If you have deployed TiFlash with Grafana, you can check the **TiFlash-Proxy-Details** -> **Encryption** panel. diff --git a/error-codes.md b/error-codes.md index 6867b930ba12..49775537e572 100644 --- a/error-codes.md +++ b/error-codes.md @@ -10,7 +10,7 @@ This document describes the problems encountered during the use of TiDB and prov ## Error codes -TiDB is compatible with the error codes in MySQL, and in most cases returns the same error code as MySQL. For a list of error codes for MySQL, see [Server Error Message Reference](https://dev.mysql.com/doc/refman/5.7/en/server-error-reference.html). In addition, TiDB has the following unique error codes: +TiDB is compatible with the error codes in MySQL, and in most cases returns the same error code as MySQL. For a list of error codes for MySQL, see [MySQL 5.7 Error Message Reference](https://dev.mysql.com/doc/mysql-errors/5.7/en/). In addition, TiDB has the following unique error codes: > **Note:** > diff --git a/experimental-features.md b/experimental-features.md index fb2f84d057be..aa55505635c1 100644 --- a/experimental-features.md +++ b/experimental-features.md @@ -24,13 +24,12 @@ This document introduces the experimental features of TiDB in different versions + List Partition (Introduced in v5.0) + List COLUMNS Partition (Introduced in v5.0) -+ [Dynamic Mode for Partitioned Tables](/partitioned-table.md#dynamic-mode). (Introduced in v5.1) ++ [Dynamic Pruning Mode for Partitioned Tables](/partitioned-table.md#dynamic-pruning-mode). (Introduced in v5.1) + The expression index feature. The expression index is also called the function-based index. When you create an index, the index fields do not have to be a specific column but can be an expression calculated from one or more columns. This feature is useful for quickly accessing the calculation-based tables. See [Expression index](/sql-statements/sql-statement-create-index.md) for details. (Introduced in v4.0) + [Generated Columns](/generated-columns.md). + [User-Defined Variables](/user-defined-variables.md). + [JSON data type](/data-type-json.md) and [JSON functions](/functions-and-operators/json-functions.md). + [View](/information-schema/information-schema-views.md). -+ [Stale Read](/stale-read.md). ## Configuration management diff --git a/explore-htap.md b/explore-htap.md new file mode 100644 index 000000000000..cd434d955821 --- /dev/null +++ b/explore-htap.md @@ -0,0 +1,104 @@ +--- +title: Explore HTAP +summary: Learn how to explore and use the features of TiDB HTAP. +--- + +# Explore HTAP + +This guide describes how to explore and use the features of TiDB Hybrid Transactional and Analytical Processing (HTAP). + +> **Note:** +> +> If you are new to TiDB HTAP and want to start using it quickly, see [Quick start with HTAP](/quick-start-with-htap.md). + +## Use cases + +TiDB HTAP can handle the massive data that increases rapidly, reduce the cost of DevOps, and be deployed in either on-premises or cloud environments easily, which brings the value of data assets in real time. + +The following are the typical use cases of HTAP: + +- Hybrid workload + + When using TiDB for real-time Online Analytical Processing (OLAP) in hybrid load scenarios, you only need to provide an entry point of TiDB to your data. TiDB automatically selects different processing engines based on the specific business. + +- Real-time stream processing + + When using TiDB in real-time stream processing scenarios, TiDB ensures that all the data flowed in constantly can be queried in real time. At the same time, TiDB also can handle highly concurrent data workloads and Business Intelligence (BI) queries. + +- Data hub + + When using TiDB as a data hub, TiDB can meet specific business needs by seamlessly connecting the data for the application and the data warehouse. + +For more information about use cases of TiDB HTAP, see [blogs about HTAP on the PingCAP website](https://en.pingcap.com/blog/tag/HTAP). + +## Architecture + +In TiDB, a row-based storage engine [TiKV](/tikv-overview.md) for Online Transactional Processing (OLTP) and a columnar storage engine [TiFlash](/tiflash/tiflash-overview.md) for Online Analytical Processing (OLAP) co-exist, replicate data automatically, and keep strong consistency. + +For more information about the architecture, see [architecture of TiDB HTAP](/tiflash/tiflash-overview.md#architecture). + +## Environment preparation + +Before exploring the features of TiDB HTAP, you need to deploy TiDB and the corresponding storage engines according to the data volume. If the data volume is large (for example, 100 T), it is recommended to use TiFlash Massively Parallel Processing (MPP) as the primary solution and TiSpark as the supplementary solution. + +- TiFlash + + - If you have deployed a TiDB cluster with no TiFlash node, add the TiFlash nodes in the current TiDB cluster. For detailed information, see [Scale out a TiFlash cluster](/scale-tidb-using-tiup.md#scale-out-a-tiflash-cluster). + - If you have not deployed a TiDB cluster, see [Deploy a TiDB Cluster using TiUP](/production-deployment-using-tiup.md). Based on the minimal TiDB topology, you also need to deploy the [topology of TiFlash](/tiflash-deployment-topology.md). + - When deciding how to choose the number of TiFlash nodes, consider the following scenarios: + + - If your use case requires OLTP with small-scale analytical processing and Ad-Hoc queries, deploy one or several TiFlash nodes. They can dramatically increase the speed of analytic queries. + - If the OLTP throughput does not cause significant pressure to I/O usage rate of the TiFlash nodes, each TiFlash node uses more resources for computation, and thus the TiFlash cluster can have near-linear scalability. The number of TiFlash nodes should be tuned based on expected performance and response time. + - If the OLTP throughput is relatively high (for example, the write or update throughput is higher than 10 million lines/hours), due to the limited write capacity of network and physical disks, the I/O between TiKV and TiFlash becomes a bottleneck and is also prone to read and write hotspots. In this case, the number of TiFlash nodes has a complex non-linear relationship with the computation volume of analytical processing, so you need to tune the number of TiFlash nodes based on the actual status of the system. + +- TiSpark + + - If your data needs to be analyzed with Spark, deploy TiSpark (Spark 3.x is not currently supported). For specific process, see [TiSpark User Guide](/tispark-overview.md). + + + +## Data preparation + +After TiFlash is deployed, TiKV does not replicate data to TiFlash automatically. You need to manually specify which tables need to be replicated to TiFlash. After that, TiDB creates the corresponding TiFlash replicas. + +- If there is no data in the TiDB Cluster, migrate the data to TiDB first. For detailed information, see [data migration](/migration-overview.md). +- If the TiDB cluster already has the replicated data from upstream, after TiFlash is deployed, data replication does not automatically begin. You need to manually specify the tables to be replicated to TiFlash. For detailed information, see [Use TiFlash](/tiflash/use-tiflash.md). + +## Data processing + +With TiDB, you can simply enter SQL statements for query or write requests. For the tables with TiFlash replicas, TiDB uses the front-end optimizer to automatically choose the optimal execution plan. + +> **Note:** +> +> The MPP mode of TiFlash is enabled by default. When an SQL statement is executed, TiDB automatically determines whether to run in the MPP mode through the optimizer. +> +> - To disable the MPP mode of TiFlash, set the value of the [tidb_allow_mpp](/system-variables.md#tidb_allow_mpp-new-in-v50) system variable to `OFF`. +> - To forcibly enable MPP mode of TiFlash for query execution, set the values of [tidb_allow_mpp](/system-variables.md#tidb_allow_mpp-new-in-v50) and [tidb_enforce_mpp](/system-variables.md#tidb_enforce_mpp-new-in-v51) to `ON`. +> - To check whether TiDB chooses the MPP mode to execute a specific query, see [Explain Statements in the MPP Mode](/explain-mpp.md#explain-statements-in-the-mpp-mode). If the output of `EXPLAIN` statement includes the `ExchangeSender` and `ExchangeReceiver` operators, the MPP mode is in use. + +## Performance monitoring + +When using TiDB, you can monitor the TiDB cluster status and performance metrics in either of the following ways: + +- [TiDB Dashboard](/dashboard/dashboard-intro.md): you can see the overall running status of the TiDB cluster, analyse distribution and trends of read and write traffic, and learn the detailed execution information of slow queries. +- [Monitoring system (Prometheus & Grafana)](/grafana-overview-dashboard.md): you can see the monitoring parameters of TiDB cluster-related componants including PD, TiDB, TiKV, TiFlash,TiCDC, and Node_exporter. + +To see the alert rules of TiDB cluster and TiFlash cluster, see [TiDB cluster alert rules](/alert-rules.md) and [TiFlash alert rules](/tiflash/tiflash-alert-rules.md). + +## Troubleshooting + +If any issue occurs during using TiDB, refer to the following documents: + +- [Analyze slow queries](/analyze-slow-queries.md) +- [Identify expensive queries](/identify-expensive-queries.md) +- [Troubleshoot hotspot issues](/troubleshoot-hot-spot-issues.md) +- [TiDB cluster troubleshooting guide](/troubleshoot-tidb-cluster.md) +- [Troubleshoot a TiFlash Cluster](/tiflash/troubleshoot-tiflash.md) + +You are also welcome to create [Github Issues](https://github.com/pingcap/tiflash/issues) or submit your questions on [AskTUG](https://asktug.com/). + +## What's next + +- To check the TiFlash version, critical logs, system tables, see [Maintain a TiFlash cluster](/tiflash/maintain-tiflash.md). +- To remove a specific TiFlash node, see [Scale out a TiFlash cluster](/scale-tidb-using-tiup.md#scale-out-a-tiflash-cluster). diff --git a/faq/migration-tidb-faq.md b/faq/migration-tidb-faq.md index 50e1939ecca1..7d6559264735 100644 --- a/faq/migration-tidb-faq.md +++ b/faq/migration-tidb-faq.md @@ -5,7 +5,15 @@ summary: Learn about the FAQs related to data migration. # Migration FAQs -This document summarizes the FAQs related to TiDB data migration. +This document summarizes the frequently asked questions (FAQs) related to TiDB data migration. + +For the frequently asked questions about migration-related tools, click the corresponding links in the list below: + +- [Backup & Restore FAQ](/br/backup-and-restore-faq.md) +- [TiDB Binlog FAQ](/tidb-binlog/tidb-binlog-faq.md) +- [TiDB Lightning FAQs](/tidb-lightning/tidb-lightning-faq.md) +- [TiDB Data Migration (DM) FAQs](https://docs.pingcap.com/tidb-data-migration/stable/faq) +- [Troubleshoot TiCDC](/ticdc/troubleshoot-ticdc.md) ## Full data export and import diff --git a/faq/sql-faq.md b/faq/sql-faq.md index c8cbd05be770..cfb877072a8e 100644 --- a/faq/sql-faq.md +++ b/faq/sql-faq.md @@ -187,11 +187,12 @@ SELECT column_name FROM table_name USE INDEX(index_name)WHERE where_conditio TiDB handles the SQL statement using the `schema` of the time and supports online asynchronous DDL change. A DML statement and a DDL statement might be executed at the same time and you must ensure that each statement is executed using the same `schema`. Therefore, when the DML operation meets the ongoing DDL operation, the `Information schema is changed` error might be reported. Some improvements have been made to prevent too many error reportings during the DML operation. -Now, there are still a few reasons for this error reporting (the latter two are unrelated to tables): +Now, there are still a few reasons for this error reporting (only the first one is related to tables): + Some tables involved in the DML operation are the same tables involved in the ongoing DDL operation. + The DML operation goes on for a long time. During this period, many DDL statements have been executed, which causes more than 1024 `schema` version changes. You can modify this default value by modifying the `tidb_max_delta_schema_count` variable. + The TiDB server that accepts the DML request is not able to load `schema information` for a long time (possibly caused by the connection failure between TiDB and PD or TiKV). During this period, many DDL statements have been executed, which causes more than 100 `schema` version changes. ++ After TiDB restarts and before the first DDL operation is executed, the DML operation is executed and then encounters the first DDL operation (which means before the first DDL operation is executed, the transaction corresponding to the DML is started. And after the first `schema` version of the DDL is changed, the transaction corresponding to the DML is committed), this DML operation reports this error. > **Note:** > diff --git a/functions-and-operators/aggregate-group-by-functions.md b/functions-and-operators/aggregate-group-by-functions.md index 4094fa952290..7e8094ce82fd 100644 --- a/functions-and-operators/aggregate-group-by-functions.md +++ b/functions-and-operators/aggregate-group-by-functions.md @@ -154,3 +154,7 @@ group by id, val; The following aggregate functions are currently unsupported in TiDB. You can track our progress in [TiDB #7623](https://github.com/pingcap/tidb/issues/7623): - `JSON_ARRAYAGG` + +## Related system variables + +The `group_concat_max_len` variable sets the maximum number of items for the `GROUP_CONCAT()` function. \ No newline at end of file diff --git a/functions-and-operators/date-and-time-functions.md b/functions-and-operators/date-and-time-functions.md index eeef80249f96..32c2d2353c05 100644 --- a/functions-and-operators/date-and-time-functions.md +++ b/functions-and-operators/date-and-time-functions.md @@ -74,3 +74,7 @@ TiDB supports all of the [date and time functions](https://dev.mysql.com/doc/ref | [`YEARWEEK()`](https://dev.mysql.com/doc/refman/5.7/en/date-and-time-functions.html#function_yearweek) | Return the year and week | For details, see [Date and Time Functions](https://dev.mysql.com/doc/refman/5.7/en/date-and-time-functions.html). + +## Related system variables + +The `default_week_format` variable affects the `WEEK()` function. \ No newline at end of file diff --git a/functions-and-operators/encryption-and-compression-functions.md b/functions-and-operators/encryption-and-compression-functions.md index 837b051aa82a..d52b7ea79068 100644 --- a/functions-and-operators/encryption-and-compression-functions.md +++ b/functions-and-operators/encryption-and-compression-functions.md @@ -32,6 +32,10 @@ TiDB supports most of the [encryption and compression functions](https://dev.mys | [`ASYMMETRIC_SIGN()`](https://dev.mysql.com/doc/refman/5.7/en/enterprise-encryption-functions.html#function_asymmetric-sign) | Generate signature from digest | | [`ASYMMETRIC_VERIFY()`](https://dev.mysql.com/doc/refman/5.7/en/enterprise-encryption-functions.html#function_asymmetric-verify) | Verify that signature matches digest | +## Related system variables + +The `block_encryption_mode` variable sets the encryption mode that is used for `AES_ENCRYPT()` and `AES_DECRYPT()`. + ## Unsupported functions * `DES_DECRYPT()`, `DES_ENCRYPT()`, `OLD_PASSWORD()`, `ENCRYPT()`: these functions were deprecated in MySQL 5.7 and removed in 8.0. diff --git a/functions-and-operators/string-functions.md b/functions-and-operators/string-functions.md index 60b75c3a358d..c9d2539fb8a5 100644 --- a/functions-and-operators/string-functions.md +++ b/functions-and-operators/string-functions.md @@ -6,64 +6,65 @@ aliases: ['/docs/dev/functions-and-operators/string-functions/','/docs/dev/refer # String Functions -TiDB supports most of the [string functions](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html) available in MySQL 5.7. +TiDB supports most of the [string functions](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html) available in MySQL 5.7 and some of the [functions](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlqr/SQL-Functions.html#GUID-93EC62F8-415D-4A7E-B050-5D5B2C127009) available in Oracle 21. ## Supported functions -| Name | Description | -|:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------| -| [`ASCII()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ascii) | Return numeric value of left-most character | -| [`BIN()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_bin) | Return a string containing binary representation of a number | -| [`BIT_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_bit-length) | Return length of argument in bits | -| [`CHAR()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_char) | Return the character for each integer passed | -| [`CHAR_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_char-length) | Return number of characters in argument | -| [`CHARACTER_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_character-length) | Synonym for `CHAR_LENGTH()` | -| [`CONCAT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_concat) | Return concatenated string | -| [`CONCAT_WS()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_concat-ws) | Return concatenate with separator | -| [`ELT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_elt) | Return string at index number | -| [`EXPORT_SET()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_export-set) | Return a string such that for every bit set in the value bits, you get an on string and for every unset bit, you get an off string | -| [`FIELD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_field) | Return the index (position) of the first argument in the subsequent arguments | -| [`FIND_IN_SET()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_find-in-set) | Return the index position of the first argument within the second argument | -| [`FORMAT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_format) | Return a number formatted to specified number of decimal places | -| [`FROM_BASE64()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_from-base64) | Decode to a base-64 string and return result | -| [`HEX()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_hex) | Return a hexadecimal representation of a decimal or string value | -| [`INSERT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_insert) | Insert a substring at the specified position up to the specified number of characters | -| [`INSTR()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_instr) | Return the index of the first occurrence of substring | -| [`LCASE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_lcase) | Synonym for `LOWER()` | -| [`LEFT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_left) | Return the leftmost number of characters as specified | -| [`LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_length) | Return the length of a string in bytes | -| [`LIKE`](https://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html#operator_like) | Simple pattern matching | -| [`LOCATE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_locate) | Return the position of the first occurrence of substring | -| [`LOWER()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_lower) | Return the argument in lowercase | -| [`LPAD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_lpad) | Return the string argument, left-padded with the specified string | -| [`LTRIM()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ltrim) | Remove leading spaces | -| [`MAKE_SET()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_make-set) | Return a set of comma-separated strings that have the corresponding bit in bits set | -| [`MID()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_mid) | Return a substring starting from the specified position | -| [`NOT LIKE`](https://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html#operator_not-like) | Negation of simple pattern matching | -| [`NOT REGEXP`](https://dev.mysql.com/doc/refman/5.7/en/regexp.html#operator_not-regexp) | Negation of `REGEXP` | -| [`OCT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_oct) | Return a string containing octal representation of a number | -| [`OCTET_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_octet-length) | Synonym for `LENGTH()` | -| [`ORD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ord) | Return character code for leftmost character of the argument | -| [`POSITION()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_position) | Synonym for `LOCATE()` | -| [`QUOTE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_quote) | Escape the argument for use in an SQL statement | -| [`REGEXP`](https://dev.mysql.com/doc/refman/5.7/en/regexp.html#operator_regexp) | Pattern matching using regular expressions | -| [`REPEAT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_repeat) | Repeat a string the specified number of times | -| [`REPLACE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_replace) | Replace occurrences of a specified string | -| [`REVERSE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_reverse) | Reverse the characters in a string | -| [`RIGHT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_right) | Return the specified rightmost number of characters | -| [`RLIKE`](https://dev.mysql.com/doc/refman/5.7/en/regexp.html#operator_regexp) | Synonym for `REGEXP` | -| [`RPAD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_rpad) | Append string the specified number of times | -| [`RTRIM()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_rtrim) | Remove trailing spaces | -| [`SPACE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_space) | Return a string of the specified number of spaces | -| [`STRCMP()`](https://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html#function_strcmp) | Compare two strings | -| [`SUBSTR()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_substr) | Return the substring as specified | -| [`SUBSTRING()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_substring) | Return the substring as specified | -| [`SUBSTRING_INDEX()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_substring-index) | Return a substring from a string before the specified number of occurrences of the delimiter | -| [`TO_BASE64()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_to-base64) | Return the argument converted to a base-64 string | -| [`TRIM()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_trim) | Remove leading and trailing spaces | -| [`UCASE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ucase) | Synonym for `UPPER()` | -| [`UNHEX()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_unhex) | Return a string containing hex representation of a number | -| [`UPPER()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_upper) | Convert to uppercase | +| Name | Description | +|:----------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------| +| [`ASCII()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ascii) | Return numeric value of left-most character | +| [`BIN()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_bin) | Return a string containing binary representation of a number | +| [`BIT_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_bit-length) | Return length of argument in bits | +| [`CHAR()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_char) | Return the character for each integer passed | +| [`CHAR_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_char-length) | Return number of characters in argument | +| [`CHARACTER_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_character-length) | Synonym for `CHAR_LENGTH()` | +| [`CONCAT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_concat) | Return concatenated string | +| [`CONCAT_WS()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_concat-ws) | Return concatenate with separator | +| [`ELT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_elt) | Return string at index number | +| [`EXPORT_SET()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_export-set) | Return a string such that for every bit set in the value bits, you get an on string and for every unset bit, you get an off string | +| [`FIELD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_field) | Return the index (position) of the first argument in the subsequent arguments | +| [`FIND_IN_SET()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_find-in-set) | Return the index position of the first argument within the second argument | +| [`FORMAT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_format) | Return a number formatted to specified number of decimal places | +| [`FROM_BASE64()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_from-base64) | Decode to a base-64 string and return result | +| [`HEX()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_hex) | Return a hexadecimal representation of a decimal or string value | +| [`INSERT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_insert) | Insert a substring at the specified position up to the specified number of characters | +| [`INSTR()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_instr) | Return the index of the first occurrence of substring | +| [`LCASE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_lcase) | Synonym for `LOWER()` | +| [`LEFT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_left) | Return the leftmost number of characters as specified | +| [`LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_length) | Return the length of a string in bytes | +| [`LIKE`](https://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html#operator_like) | Simple pattern matching | +| [`LOCATE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_locate) | Return the position of the first occurrence of substring | +| [`LOWER()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_lower) | Return the argument in lowercase | +| [`LPAD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_lpad) | Return the string argument, left-padded with the specified string | +| [`LTRIM()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ltrim) | Remove leading spaces | +| [`MAKE_SET()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_make-set) | Return a set of comma-separated strings that have the corresponding bit in bits set | +| [`MID()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_mid) | Return a substring starting from the specified position | +| [`NOT LIKE`](https://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html#operator_not-like) | Negation of simple pattern matching | +| [`NOT REGEXP`](https://dev.mysql.com/doc/refman/5.7/en/regexp.html#operator_not-regexp) | Negation of `REGEXP` | +| [`OCT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_oct) | Return a string containing octal representation of a number | +| [`OCTET_LENGTH()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_octet-length) | Synonym for `LENGTH()` | +| [`ORD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ord) | Return character code for leftmost character of the argument | +| [`POSITION()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_position) | Synonym for `LOCATE()` | +| [`QUOTE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_quote) | Escape the argument for use in an SQL statement | +| [`REGEXP`](https://dev.mysql.com/doc/refman/5.7/en/regexp.html#operator_regexp) | Pattern matching using regular expressions | +| [`REPEAT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_repeat) | Repeat a string the specified number of times | +| [`REPLACE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_replace) | Replace occurrences of a specified string | +| [`REVERSE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_reverse) | Reverse the characters in a string | +| [`RIGHT()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_right) | Return the specified rightmost number of characters | +| [`RLIKE`](https://dev.mysql.com/doc/refman/5.7/en/regexp.html#operator_regexp) | Synonym for `REGEXP` | +| [`RPAD()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_rpad) | Append string the specified number of times | +| [`RTRIM()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_rtrim) | Remove trailing spaces | +| [`SPACE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_space) | Return a string of the specified number of spaces | +| [`STRCMP()`](https://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html#function_strcmp) | Compare two strings | +| [`SUBSTR()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_substr) | Return the substring as specified | +| [`SUBSTRING()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_substring) | Return the substring as specified | +| [`SUBSTRING_INDEX()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_substring-index) | Return a substring from a string before the specified number of occurrences of the delimiter | +| [`TO_BASE64()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_to-base64) | Return the argument converted to a base-64 string | +| [`TRANSLATE()`](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/TRANSLATE.html#GUID-80F85ACB-092C-4CC7-91F6-B3A585E3A690) | Replace all occurrences of characters by other characters in a string. It does not treat empty strings as `NULL` as Oracle does. | +| [`TRIM()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_trim) | Remove leading and trailing spaces | +| [`UCASE()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_ucase) | Synonym for `UPPER()` | +| [`UNHEX()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_unhex) | Return a string containing hex representation of a number | +| [`UPPER()`](https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_upper) | Convert to uppercase | ## Unsupported functions diff --git a/functions-and-operators/tidb-functions.md b/functions-and-operators/tidb-functions.md index 07a20be367a3..2359dacc1d48 100644 --- a/functions-and-operators/tidb-functions.md +++ b/functions-and-operators/tidb-functions.md @@ -257,3 +257,73 @@ Check Table Before Drop: false ### MySQL compatibility The `TIDB_VERSION` function is TiDB-specific and not compatible with MySQL. If MySQL compatibility is required, you can also use `VERSION` to get version information, but the result does not contain build details. + +## TIDB_DECODE_SQL_DIGESTS + +The `TIDB_DECODE_SQL_DIGESTS` function is used to query the normalized SQL statements (a form without formats and arguments) corresponding to the set of SQL digests in the cluster. This function accepts 1 or 2 arguments: + +* `digests`: A string. This parameter is in the format of a JSON string array, and each string in the array is a SQL digest. +* `stmtTruncateLength`: An integer (optional). It is used to limit the length of each SQL statement in the returned result. If a SQL statement exceeds the specified length, the statement is truncated. `0` means that the length is unlimited. + +This function returns a string, which is in the format of a JSON string array. The *i*-th item in the array is the normalized SQL statement corresponding to the *i*-th element in the `digests` parameter. If an element in the `digests` parameter is not a valid SQL digest or the system cannot find the corresponding SQL statement, the corresponding item in the returned result is `null`. If the truncation length is specified (`stmtTruncateLength > 0`), for each statement in the returned result that exceeds this length, the first `stmtTruncateLength` characters are retained and the suffix `"..."` is added at the end to indicate the truncation. If the `digests` parameter is `NULL`, the returned value of the function is `NULL`. + +> **Note:** +> +> * Only users with the [PROCESS](https://dev.mysql.com/doc/refman/8.0/en/privileges-provided.html#priv_process) privilege can use this function. +> * When `TIDB_DECODE_SQL_DIGESTS` is executed, TiDB queries the statement corresponding to each SQL digest from the statement summary tables, so there is no guarantee that the corresponding statement can always be found for any SQL digest. Only the statements that have been executed in the cluster can be found, and whether these SQL statements can be queried or not is also affected by the related configuration of the statement summary tables. For the detailed description of the statement summary table, see [Statement Summary Tables](/statement-summary-tables.md). +> * This function has a high overhead. In queries with a large number of rows (for example, querying the full table of `information_schema.cluster_tidb_trx` on a large and busy cluster), using this function might cause the queries to run for too long. Use it with caution. +> * This function has a high overhead because every time it is called, it internally queries the `STATEMENTS_SUMMARY`, `STATEMENTS_SUMMARY_HISTORY`, `CLUSTER_STATEMENTS_SUMMARY`, and `CLUSTER_STATEMENTS_SUMMARY_HISTORY` tables, and the query involves the `UNION` operation. This function currently does not support vectorization, that is, when calling this function for multiple rows of data, the above query is performed separately for each row. + +### Synopsis + +```ebnf+diagram +DecodeSQLDigestsExpr ::= + "TIDB_DECODE_SQL_DIGESTS" "(" digests ( "," stmtTruncateLength )? ")" +``` + +### Example + +{{< copyable "sql" >}} + +```sql +set @digests = '["e6f07d43b5c21db0fbb9a31feac2dc599787763393dd5acbfad80e247eb02ad5","38b03afa5debbdf0326a014dbe5012a62c51957f1982b3093e748460f8b00821","e5796985ccafe2f71126ed6c0ac939ffa015a8c0744a24b7aee6d587103fd2f7"]'; + +select tidb_decode_sql_digests(@digests); +``` + +```sql ++------------------------------------+ +| tidb_decode_sql_digests(@digests) | ++------------------------------------+ +| ["begin",null,"select * from `t`"] | ++------------------------------------+ +1 row in set (0.00 sec) +``` + +In the above example, the parameter is a JSON array containing 3 SQL digests, and the corresponding SQL statements are the three items in the query results. But the SQL statement corresponding to the second SQL digest cannot be found from the cluster, so the second item in the result is `null`. + +{{< copyable "sql" >}} + +```sql +select tidb_decode_sql_digests(@digests, 10); +``` + +```sql ++---------------------------------------+ +| tidb_decode_sql_digests(@digests, 10) | ++---------------------------------------+ +| ["begin",null,"select * f..."] | ++---------------------------------------+ +1 row in set (0.01 sec) +``` + +The above call specifies the second parameter (that is, the truncation length) as 10, and the length of the third statement in the query result is greater than 10. Therefore, only the first 10 characters are retained, and `"..."` is added at the end, which indicates the truncation. + +### MySQL compatibility + +`TIDB_DECODE_SQL_DIGESTS` is a TiDB-specific function and not compatible with MySQL. + +### See also + +- [`Statement Summary Tables`](/statement-summary-tables.md) +- [`INFORMATION_SCHEMA.TIDB_TRX`](/information-schema/information-schema-tidb-trx.md) diff --git a/geo-distributed-deployment-topology.md b/geo-distributed-deployment-topology.md index 68e4484968f3..858f6e48acc6 100644 --- a/geo-distributed-deployment-topology.md +++ b/geo-distributed-deployment-topology.md @@ -84,9 +84,13 @@ This section describes the key parameter configuration of the TiDB geo-distribut value: "sha" ``` + > **Note:** + > + > Since TiDB 5.2, the `label-property` configuration is not supported by default. To set the replica policy, use the [placement rules](/configure-placement-rules.md). + +For the further information about labels and the number of Raft Group replicas, see [Schedule Replicas by Topology Labels](/schedule-replicas-by-topology-labels.md). + > **Note:** > > - You do not need to manually create the `tidb` user in the configuration file. The TiUP cluster component automatically creates the `tidb` user on the target machines. You can customize the user, or keep the user consistent with the control machine. > - If you configure the deployment directory as a relative path, the cluster will be deployed in the home directory of the user. - -[Schedule Replicas by Topology Labels](/schedule-replicas-by-topology-labels.md) further explains the use of labels and the number of Raft Group replicas. diff --git a/grafana-overview-dashboard.md b/grafana-overview-dashboard.md index b59dbdf25bd9..887bd68b22c6 100644 --- a/grafana-overview-dashboard.md +++ b/grafana-overview-dashboard.md @@ -34,7 +34,7 @@ To understand the key metrics displayed on the Overview dashboard, check the fol | PD | 99% Region heartbeat latency | The heartbeat latency per TiKV instance (P99). | | TiDB | Statement OPS | The number of different types of SQL statements executed per second, which is counted according to `SELECT`, `INSERT`, `UPDATE`, and other types of statements. | | TiDB | Duration | The execution time.
1. The duration between the time that the client's network request is sent to TiDB and the time that the request is returned to the client after TiDB has executed the request. In general, client requests are sent in the form of SQL statements; however, this duration can include the execution time of commands such as `COM_PING`, `COM_SLEEP`, `COM_STMT_FETCH`, and `COM_SEND_LONG_DATA`.
2. Because TiDB supports Multi-Query, TiDB supports sending multiple SQL statements at one time, such as `select 1; select 1; select 1;`. In this case, the total execution time of this query includes the execution time of all SQL statements. | -| TiDB | QPS By Instance | The QPS on each TiDB instance, which is classified according to the success or failure of command execution results. | +| TiDB | CPS By Instance | CPS By Instance: the command statistics on each TiDB instance, which is classified according to the success or failure of command execution results. | | TiDB | Failed Query OPM | The statistics of error types (such as syntax errors and primary key conflicts) based on the errors occurred when executing SQL statements per second on each TiDB instance. The module in which the error occurs and the error code are included. | | TiDB | Connection Count | The connection number of each TiDB instance. | | TiDB | Memory Usage | The memory usage statistics of each TiDB instance, which is divided into the memory occupied by processes and the memory applied by Golang on the heap. | diff --git a/grafana-pd-dashboard.md b/grafana-pd-dashboard.md index 68c346e86532..7b457bfdf5a9 100644 --- a/grafana-pd-dashboard.md +++ b/grafana-pd-dashboard.md @@ -84,8 +84,8 @@ The following is the description of PD Dashboard metrics items: ## Statistics - hot read -- Hot Region's leader distribution: The total number of leader Regions that have become read hotspots on each TiKV instance -- Total read bytes on hot leader Regions: The total read bytes of leaders that have become read hotspots on each TiKV instance +- Hot Region's peer distribution: The total number of peer Regions that have become read hotspots on each TiKV instance +- Total read bytes on hot peer Regions: The total read bytes of peers that have become read hotspots on each TiKV instance - Store read rate bytes: The total read bytes of each TiKV instance - Store read rate keys: The total read keys of each TiKV instance - Hot cache read entry number: The number of peers that are in the read hotspot statistics module on each TiKV instance diff --git a/grafana-tidb-dashboard.md b/grafana-tidb-dashboard.md index 52e04b900ce6..e7b1e086514c 100644 --- a/grafana-tidb-dashboard.md +++ b/grafana-tidb-dashboard.md @@ -165,6 +165,5 @@ To understand the key metrics displayed on the TiDB dashboard, check the followi - Batch Client - Pending Request Count by TiKV: the number of Batch messages that are pending processing - - Wait Duration 95: the waiting time of Batch messages that are pending processing - Batch Client Unavailable Duration 95: the unavailable time of the Batch client - No Available Connection Counter: the number of times the Batch client cannot find an available link diff --git a/grafana-tikv-dashboard.md b/grafana-tikv-dashboard.md index 03bca8a699a3..89d667111fd9 100644 --- a/grafana-tikv-dashboard.md +++ b/grafana-tikv-dashboard.md @@ -100,7 +100,14 @@ This document provides a detailed description of these key metrics on the **TiKV ## Raft process -- Ready handled: The count of handled ready operations per second +- Ready handled: The number of handled ready operations per type per second + - count: The number of handled ready operations per second + - has_ready_region: The number of Regions that have ready per second + - pending_region: The operations per second of the Regions being checked for whether it has ready. This metric is deprecated since v3.0.0 + - message: The number of messages that the ready operations per second contain + - append: The number of Raft log entries that the ready operations per second contain + - commit: The number of committed Raft log entries that the ready operations per second contain + - snapshot: The number of snapshots that the ready operations per second contains - 0.99 Duration of Raft store events: The time consumed by Raftstore events (P99) - Process ready duration: The time consumed for processes to be ready in Raft - Process ready duration per server: The time consumed for peer processes to be ready in Raft per TiKV instance. It should be less than 2 seconds (P99.99). diff --git a/information-schema/information-schema-data-lock-waits.md b/information-schema/information-schema-data-lock-waits.md index 71cba4cda575..37051b0d8970 100644 --- a/information-schema/information-schema-data-lock-waits.md +++ b/information-schema/information-schema-data-lock-waits.md @@ -7,10 +7,6 @@ summary: Learn the `DATA_LOCK_WAITS` information_schema table. The `DATA_LOCK_WAITS` table shows the ongoing pessimistic locks waiting on all TiKV nodes in the cluster. -> **Warning:** -> -> Currently, this is an experimental feature. The definition and behavior of the table structure might have major changes in future releases. - {{< copyable "sql" >}} ```sql @@ -22,24 +18,55 @@ DESC data_lock_waits; +------------------------+---------------------+------+------+---------+-------+ | Field | Type | Null | Key | Default | Extra | +------------------------+---------------------+------+------+---------+-------+ -| KEY | varchar(64) | NO | | NULL | | +| KEY | text | NO | | NULL | | +| KEY_INFO | text | YES | | NULL | | | TRX_ID | bigint(21) unsigned | NO | | NULL | | | CURRENT_HOLDING_TRX_ID | bigint(21) unsigned | NO | | NULL | | | SQL_DIGEST | varchar(64) | YES | | NULL | | +| SQL_DIGEST_TEXT | text | YES | | NULL | | +------------------------+---------------------+------+------+---------+-------+ ``` The meaning of each column field in the `DATA_LOCK_WAITS` table is as follows: -* `KEY`: The KEY that is waiting for the lock and displayed in the form of hexadecimal string. +* `KEY`: The key that is waiting for the lock and in the hexadecimal form. +* `KEY_INFO`: The detailed information of `KEY`. See the [KEY_INFO](#key_info) section. * `TRX_ID`: The ID of the transaction that is waiting for the lock. This ID is also the `start_ts` of the transaction. * `CURRENT_HOLDING_TRX_ID`: The ID of the transaction that currently holds the lock. This ID is also the `start_ts` of the transaction. * `SQL_DIGEST`: The digest of the SQL statement that is currently blocked in the lock-waiting transaction. +* `SQL_DIGEST_TEXT`: The normalized SQL statement (the SQL statement without arguments and formats) that is currently blocked in the lock-waiting transaction. It corresponds to `SQL_DIGEST`. > **Warning:** > -> * The information in this table is obtained in real time from all TiKV nodes during the query. Currently, even if the `WHERE` condition is added, TiDB might still collect information from all TiKV nodes. If the cluster is large and the load is high, querying this table might cause a potential risk of performance jitter. Therefore, use this table according to your actual situation. -> * The information from different TiKV nodes is NOT guaranteed to be the snapshot at the same point in time. +> * Only the users with the [PROCESS](https://dev.mysql.com/doc/refman/8.0/en/privileges-provided.html#priv_process) privilege can query this table. +> * The information in the `DATA_LOCK_WAITS` table is obtained in real time from all TiKV nodes during the query. Currently, even if a query has the `WHERE` condition, the information collection is still performed on all TiKV nodes. If your cluster is large and the load is high, querying this table might cause potential risk of performance jitter. Therefore, use it according to your actual situation. +> * Information from different TiKV nodes is NOT guaranteed to be snapshots of the same time. +> * The information (SQL digest) in the `SQL_DIGEST` column is the hash value calculated from the normalized SQL statement. The information in the `SQL_DIGEST_TEXT` column is internally queried from statements summary tables, so it is possible that the corresponding statement cannot be found internally. For the detailed description of SQL digests and the statements summary tables, see [Statement Summary Tables](/statement-summary-tables.md). + +## `KEY_INFO` + +The `KEY_INFO` column shows the detailed information of the `KEY` column. The information is shown in the JSON format. The description of each field is as follows: + +* `"db_id"`: The ID of the schema to which the key belongs. +* `"db_name"`: The name of the schema to which the key belongs. +* `"table_id"`: The ID of the table to which the key belongs. +* `"table_name"`: The name of the table to which the key belongs. +* `"partition_id"`: The ID of the partition where the key is located. +* `"partition_name"`: The name of the partition where the key is located. +* `"handle_type"`: The handle type of the row key (that is, the key that stores a row of data). The possible values ​​are as follows: + * `"int"`: The handle type is int, which means that the handle is the row ID. + * `"common"`: The handle type is not int64. This type is shown in the non-int primary key when clustered index is enabled. + * `"unknown"`: The handle type is currently not supported. +* `"handle_value"`: The handle value. +* `"index_id"`: The index ID to which the index key (the key that stores the index) belongs. +* `"index_name"`: The name of the index to which the index key belongs. +* `"index_values"`: The index value in the index key. + +In the above fields, if the information of a field is not applicable or currently unavailable, the field is omitted in the query result. For example, the row key information does not contain `index_id`, `index_name`, and `index_values`; the index key does not contain `handle_type` and `handle_value`; non-partitioned tables do not display `partition_id` and `partition_name`; the key information in the deleted table cannot obtain schema information such as `table_name`, `db_id`, `db_name`, and `index_name`, and it is unable to distinguish whether the table is a partitioned table. + +> **Note:** +> +> If a key comes from a table with partitioning enabled, and the information of the schema to which the key belongs cannot be queried due to some reasons (for example, the table to which the key belongs has been deleted) during the query, the ID of the partition to which the key belongs might be appear in the `table_id` field. This is because TiDB encodes the keys of different partitions in the same way as it encodes the keys of several independent tables. Therefore, when the schema information is missing, TiDB cannot confirm whether the key belongs to an unpartitioned table or to one partition of a table. ## Example @@ -51,36 +78,13 @@ select * from information_schema.data_lock_waits\G ```sql *************************** 1. row *************************** - KEY: 7480000000000000355f728000000000000002 - TRX_ID: 425405024158875649 -CURRENT_HOLDING_TRX_ID: 425405016242126849 - SQL_DIGEST: f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb22 -2 rows in set (0.01 sec) -``` - -The above query result shows that the transaction of the ID `425405024158875649` was trying to obtain the pessimistic lock on the key `7480000000000000355f728000000000000002` when the statement with digest `"f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb22"` was being executed, but the lock on this key was held by the transaction of the ID `425405016242126849`. - -## SQL Digest - -The `DATA_LOCK_WAITS` table records the SQL digest but not the original SQL statement. - -SQL digest is the hash value of the normalized SQL statement. To find the original SQL statement corresponding to the SQL digest, perform one of the following operations: - -- For the statements executed on the current TiDB node in the recent period of time, you can find the corresponding original SQL statement in the `STATEMENTS_SUMMARY` or `STATEMENTS_SUMMARY_HISTORY` table according to the SQL digest. -- For the statements executed on all TiDB nodes in the entire cluster in the recent period of time, you can find the corresponding SQL statement in the `CLUSTER_STATEMENTS_SUMMARY` or `CLUSTER_STATEMENTS_SUMMARY_HISTORY` table according to the SQL digest. - -{{< copyable "sql" >}} - -```sql -select digest, digest_text from information_schema.statements_summary where digest = "f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2"; -``` - -```sql -+------------------------------------------------------------------+---------------------------------------+ -| digest | digest_text | -+------------------------------------------------------------------+---------------------------------------+ -| f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2 | update `t` set `v` = ? where `id` = ? | -+------------------------------------------------------------------+---------------------------------------+ + KEY: 7480000000000000355F728000000000000001 + KEY_INFO: {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"1"} + TRX_ID: 426790594290122753 +CURRENT_HOLDING_TRX_ID: 426790590082449409 + SQL_DIGEST: 38b03afa5debbdf0326a014dbe5012a62c51957f1982b3093e748460f8b00821 + SQL_DIGEST_TEXT: update `t` set `v` = `v` + ? where `id` = ? +1 row in set (0.01 sec) ``` -For detailed description of SQL digest, `STATEMENTS_SUMMARY`, `STATEMENTS_SUMMARY_HISTORY`, `CLUSTER_STATEMENTS_SUMMARY`, and `CLUSTER_STATEMENTS_SUMMARY_HISTORY` tables, see [Statement Summary Tables](/statement-summary-tables.md). +The above query result shows that the transaction of the ID `426790594290122753` is trying to obtain the pessimistic lock on the key `"7480000000000000355F728000000000000001"` when executing a statement that has digest `"38b03afa5debbdf0326a014dbe5012a62c51957f1982b3093e748460f8b00821"` and is in the form of ``update `t` set `v` = `v` + ? where `id` = ?``, but the lock on this key was held by the transaction of the ID `426790590082449409`. diff --git a/information-schema/information-schema-deadlocks.md b/information-schema/information-schema-deadlocks.md index 6133ac398819..67c377ff82ae 100644 --- a/information-schema/information-schema-deadlocks.md +++ b/information-schema/information-schema-deadlocks.md @@ -7,10 +7,6 @@ summary: Learn the `DEADLOCKS` information_schema table. The `DEADLOCKS` table shows the information of the several deadlock errors that have occurred recently on the current TiDB node. -> **Warning:** -> -> Currently, this is an experimental feature. The definition and behavior of the table structure might have major changes in future releases. - {{< copyable "sql" >}} ```sql @@ -19,17 +15,19 @@ DESC deadlocks; ``` ```sql -+--------------------+---------------------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+--------------------+---------------------+------+------+---------+-------+ -| DEADLOCK_ID | bigint(21) | NO | | NULL | | -| OCCUR_TIME | timestamp(6) | YES | | NULL | | -| RETRYABLE | tinyint(1) | NO | | NULL | | -| TRY_LOCK_TRX_ID | bigint(21) unsigned | NO | | NULL | | -| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | -| KEY | text | YES | | NULL | | -| TRX_HOLDING_LOCK | bigint(21) unsigned | NO | | NULL | | -+--------------------+---------------------+------+------+---------+-------+ ++-------------------------+---------------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------------------+------+------+---------+-------+ +| DEADLOCK_ID | bigint(21) | NO | | NULL | | +| OCCUR_TIME | timestamp(6) | YES | | NULL | | +| RETRYABLE | tinyint(1) | NO | | NULL | | +| TRY_LOCK_TRX_ID | bigint(21) unsigned | NO | | NULL | | +| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | +| CURRENT_SQL_DIGEST_TEXT | text | YES | | NULL | | +| KEY | text | YES | | NULL | | +| KEY_INFO | text | YES | | NULL | | +| TRX_HOLDING_LOCK | bigint(21) unsigned | NO | | NULL | | ++-------------------------+---------------------+------+------+---------+-------+ ``` The `DEADLOCKS` table uses multiple rows to show the same deadlock event, and each row displays the information about one of the transactions involved in the deadlock event. If the TiDB node records multiple deadlock errors, each error is distinguished using the `DEADLOCK_ID` column. The same `DEADLOCK_ID` indicates the same deadlock event. Note that `DEADLOCK_ID` **does not guarantee global uniqueness and will not be persisted**. It only shows the same deadlock event in the same result set. @@ -38,78 +36,51 @@ The meaning of each column field in the `DEADLOCKS` table is as follows: * `DEADLOCK_ID`: The ID of the deadlock event. When multiple deadlock errors exist in the table, you can use this column to distinguish rows that belong to different deadlock errors. * `OCCUR_TIME`: The time when the deadlock error occurs. -* `RETRYABLE`: Whether the deadlock error can be retried. Currently, TiDB does not support collecting the information of the retryable deadlock error, so the value of this field is always `0`. For the description of retryable deadlock errors, see the [Retryable deadlock errors](#retryable-deadlock-errors) section. +* `RETRYABLE`: Whether the deadlock error can be retried. For the description of retryable deadlock errors, see the [Retryable deadlock errors](#retryable-deadlock-errors) section. * `TRY_LOCK_TRX_ID`: The ID of the transaction that tries to acquire lock. This ID is also the `start_ts` of the transaction. * `CURRENT_SQL_DIGEST`: The digest of the SQL statement currently being executed in the lock-acquiring transaction. +* `CURRENT_SQL_DIGEST_TEXT`: The normalized form of the SQL statement that is currently being executed in the lock-acquiring transaction. * `KEY`: The blocked key that the transaction tries to lock. The value of this field is displayed in the form of hexadecimal string. +* `KEY_INFO`: The detailed information of `KEY`. See the [KEY_INFO](#key_info) section. * `TRX_HOLDING_LOCK`: The ID of the transaction that currently holds the lock on the key and causes blocking. This ID is also the `start_ts` of the transaction. To adjust the maximum number of deadlock events that can be recorded in the `DEADLOCKS` table, adjust the [`pessimistic-txn.deadlock-history-capacity`](/tidb-configuration-file.md#deadlock-history-capacity) configuration in the TiDB configuration file. By default, the information of the recent 10 deadlock events is recorded in the table. -## Example 1 - -Assume that the table definition and the initial data are as follows: - -{{< copyable "sql" >}} - -```sql -create table t (id int primary key, v int); -insert into t values (1, 10), (2, 20); -``` - -Execute the two transactions in the following order: - -| Transaction 1 | Transaction 2 | Description | -|--------------------------------------|--------------------------------------|----------------------| -| `update t set v = 11 where id = 1;` | | | -| | `update t set v = 21 where id = 2;` | | -| `update t set v = 12 where id = 2;` | | Transaction 1 gets blocked. | -| | `update t set v = 22 where id = 1;` | Transaction 2 reports a deadlock error. | - -Next, transaction 2 reports a deadlock error. At this time, query the `DEADLOCKS` table: - -{{< copyable "sql" >}} - -```sql -select * from information_schema.deadlocks; -``` - -The expected output is as follows: - -```sql -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -| DEADLOCK_ID | OCCUR_TIME | RETRYABLE | TRY_LOCK_TRX_ID | CURRENT_SQL_DIGEST | KEY | TRX_HOLDING_LOCK | -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -| 1 | 2021-06-04 08:22:38.765699 | 0 | 425405959304904707 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000002 | 425405959304904708 | -| 1 | 2021-06-04 08:22:38.765699 | 0 | 425405959304904708 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000001 | 425405959304904707 | -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -``` - -Two rows of data are generated in the `DEADLOCKS` table. The `DEADLOCK_ID` field of both rows is `1`, which means that the information in both rows belongs to the same deadlock error. The first row shows that the transaction of the ID `425405959304904707` is blocked on the key of `"7480000000000000385F728000000000000002"` by the transaction of the ID `"425405959304904708"`. The second row shows that the transaction of the ID `"425405959304904708"` is blocked on the key of `"7480000000000000385F728000000000000001"` by the transaction of the ID `425405959304904707`, which constitutes mutual blocking and forms a deadlock. - -## Example 2 - -Assume that you query the `DEADLOCKS` table and get the following result: - -```sql -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -| DEADLOCK_ID | OCCUR_TIME | RETRYABLE | TRY_LOCK_TRX_ID | CURRENT_SQL_DIGEST | KEY | TRX_HOLDING_LOCK | -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -| 1 | 2021-06-04 08:22:38.765699 | 0 | 425405959304904707 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000002 | 425405959304904708 | -| 1 | 2021-06-04 08:22:38.765699 | 0 | 425405959304904708 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000001 | 425405959304904707 | -| 2 | 2021-06-04 08:22:56.795410 | 0 | 425405961664462853 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000002 | 425405961664462854 | -| 2 | 2021-06-04 08:22:56.795410 | 0 | 425405961664462854 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000003 | 425405961664462855 | -| 2 | 2021-06-04 08:22:56.795410 | 0 | 425405961664462855 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000001 | 425405961664462853 | -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -``` +> **Warning:** +> +> * Only users with the [PROCESS](https://dev.mysql.com/doc/refman/8.0/en/privileges-provided.html#priv_process) privilege can query this table. +> * The information (SQL digest) in the `CURRENT_SQL_DIGEST` column is the hash value calculated from the normalized SQL statement. The information in the `CURRENT_SQL_DIGEST_TEXT` column is internally queried from statements summary tables, so it is possible that the corresponding statement cannot be found internally. For the detailed description of SQL digests and the statements summary tables, see [Statement Summary Tables](/statement-summary-tables.md). + +## `KEY_INFO` + +The `KEY_INFO` column shows the detailed information of the `KEY` column. The information is shown in the JSON format. The description of each field is as follows: + +* `"db_id"`: The ID of the schema to which the key belongs. +* `"db_name"`: The name of the schema to which the key belongs. +* `"table_id"`: The ID of the table to which the key belongs. +* `"table_name"`: The name of the table to which the key belongs. +* `"partition_id"`: The ID of the partition where the key is located. +* `"partition_name"`: The name of the partition where the key is located. +* `"handle_type"`: The handle type of the row key (that is, the key that stores a row of data). The possible values ​​are as follows: + * `"int"`: The handle type is int, which means that the handle is the row ID. + * `"common"`: The handle type is not int64. This type is shown in the non-int primary key when clustered index is enabled. + * `"unknown"`: The handle type is currently not supported. +* `"handle_value"`: The handle value. +* `"index_id"`: The index ID to which the index key (the key that stores the index) belongs. +* `"index_name"`: The name of the index to which the index key belongs. +* `"index_values"`: The index value in the index key. + +In the above fields, if the information of a field is not applicable or currently unavailable, the field is omitted in the query result. For example, the row key information does not contain `index_id`, `index_name`, and `index_values`; the index key does not contain `handle_type` and `handle_value`; non-partitioned tables do not display `partition_id` and `partition_name`; the key information in the deleted table cannot obtain schema information such as `table_name`, `db_id`, `db_name`, and `index_name`, and it is unable to distinguish whether the table is a partitioned table. -The `DEADLOCK_ID` column in the above query result shows that the first two rows together represent the information of a deadlock error, and the two transactions that wait for each other form the deadlock. The next three rows together represent another deadlock information, and the three transactions that wait in a cycle form the deadlock. +> **Note:** +> +> If a key comes from a table with partitioning enabled, and the information of the schema to which the key belongs cannot be queried due to some reasons (for example, the table to which the key belongs has been deleted) during the query, the ID of the partition to which the key belongs might be appear in the `table_id` field. This is because TiDB encodes the keys of different partitions in the same way as it encodes the keys of several independent tables. Therefore, when the schema information is missing, TiDB cannot confirm whether the key belongs to an unpartitioned table or to one partition of a table. ## Retryable deadlock errors > **Note:** > -> Currently, TiDB does not support collecting retryable deadlock errors in the `DEADLOCKS` table. +> The `DEADLOCKS` table does not collect the information of retryable deadlock errors by default. If you want the table to collect the retryable deadlock error information, you can adjust the value of [`pessimistic-txn.deadlock-history-collect-retryable`](/tidb-configuration-file.md#deadlock-history-collect-retryable) in the TiDB configuration file. When transaction A is blocked by a lock already held by transaction B, and transaction B is directly or indirectly blocked by the lock held by the current transaction A, a deadlock error will occur. In this deadlock, there might be two cases: @@ -146,55 +117,91 @@ For this case, because the statement of transaction A that blocks other transact When a retryable deadlock occurs, the internal automatic retry will not cause a transaction error, so it is transparent to the client. However, if this situation occurs frequently, the performance might be affected. When this occurs, you can see `single statement deadlock, retry statement` in the TiDB log. -## CLUSTER_DEADLOCKS +## Example 1 -The `CLUSTER_DEADLOCKS` table returns information about the recent deadlock errors on each TiDB node in the entire cluster, which is the information of the `DEADLOCKS` table on each node combined together. `CLUSTER_DEADLOCKS` also contains an additional `INSTANCE` column to display the IP address and port of the node to distinguish between different TiDB nodes. +Assume that the table definition and the initial data are as follows: -Note that, because `DEADLOCK_ID` does not guarantee global uniqueness, in the query result of the `CLUSTER_DEADLOCKS` table, you need to use the `INSTANCE` and `DEADLOCK_ID` together to distinguish the information of different deadlock errors in the result set. +{{< copyable "sql" >}} + +```sql +create table t (id int primary key, v int); +insert into t values (1, 10), (2, 20); +``` + +Two transactions are executed in the following order: + +| Transaction 1 | Transaction 2 | Description | +|--------------------------------------|--------------------------------------|----------------------| +| `update t set v = 11 where id = 1;` | | | +| | `update t set v = 21 where id = 2;` | | +| `update t set v = 12 where id = 2;` | | Transaction 1 gets blocked. | +| | `update t set v = 22 where id = 1;` | Transaction 2 reports a deadlock error. | + +Next, transaction 2 reports a deadlock error. At this time, query the `DEADLOCKS` table: {{< copyable "sql" >}} ```sql -USE information_schema; -DESC cluster_deadlocks; +select * from information_schema.deadlocks; ``` +The expected output is as follows: + ```sql -+--------------------+---------------------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+--------------------+---------------------+------+------+---------+-------+ -| INSTANCE | varchar(64) | YES | | NULL | | -| DEADLOCK_ID | bigint(21) | NO | | NULL | | -| OCCUR_TIME | timestamp(6) | YES | | NULL | | -| RETRYABLE | tinyint(1) | NO | | NULL | | -| TRY_LOCK_TRX_ID | bigint(21) unsigned | NO | | NULL | | -| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | -| KEY | text | YES | | NULL | | -| TRX_HOLDING_LOCK | bigint(21) unsigned | NO | | NULL | | -+--------------------+---------------------+------+------+---------+-------+ ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +| DEADLOCK_ID | OCCUR_TIME | RETRYABLE | TRY_LOCK_TRX_ID | CURRENT_SQL_DIGEST | CURRENT_SQL_DIGEST_TEXT | KEY | KEY_INFO | TRX_HOLDING_LOCK | ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +| 1 | 2021-08-05 11:09:03.230341 | 0 | 426812829645406216 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000002 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"2"} | 426812829645406217 | +| 1 | 2021-08-05 11:09:03.230341 | 0 | 426812829645406217 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000001 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"1"} | 426812829645406216 | ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ ``` -## SQL Digest +Two rows of data are generated in the `DEADLOCKS` table. The `DEADLOCK_ID` field of both rows is `1`, which means that the information in both rows belongs to the same deadlock error. The first row shows that on the key of `"7480000000000000355F728000000000000002"`, the transaction of the ID `"426812829645406216"` is blocked by the transaction of the ID `"426812829645406217"`. The second row shows that on the key of `"7480000000000000355F728000000000000001"`, the transaction of the ID `"426812829645406217"` is blocked by the transaction of the ID `426812829645406216`, which constitutes mutual blocking and forms a deadlock. -The `DEADLOCKS` table records the SQL digest but not the original SQL statement. +## Example 2 -SQL digest is the hash value after the SQL normalization. To find the original SQL statement corresponding to the SQL digest, perform one of the following operations: +Assume that you query the `DEADLOCKS` table and get the following result: -- For the statements executed on the current TiDB node in the recent period of time, you can find the corresponding original SQL statement in the `STATEMENTS_SUMMARY` or `STATEMENTS_SUMMARY_HISTORY` table according to the SQL digest. -- For the statements executed on all TiDB nodes in the entire cluster in the recent period of time, you can find the corresponding SQL statement in the `CLUSTER_STATEMENTS_SUMMARY` or `CLUSTER_STATEMENTS_SUMMARY_HISTORY` table according to the SQL digest. +```sql ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +| DEADLOCK_ID | OCCUR_TIME | RETRYABLE | TRY_LOCK_TRX_ID | CURRENT_SQL_DIGEST | CURRENT_SQL_DIGEST_TEXT | KEY | KEY_INFO | TRX_HOLDING_LOCK | ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +| 1 | 2021-08-05 11:09:03.230341 | 0 | 426812829645406216 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000002 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"2"} | 426812829645406217 | +| 1 | 2021-08-05 11:09:03.230341 | 0 | 426812829645406217 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000001 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"1"} | 426812829645406216 | +| 2 | 2021-08-05 11:09:21.252154 | 0 | 426812832017809412 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000002 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"2"} | 426812832017809413 | +| 2 | 2021-08-05 11:09:21.252154 | 0 | 426812832017809413 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000003 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"3"} | 426812832017809414 | +| 2 | 2021-08-05 11:09:21.252154 | 0 | 426812832017809414 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000001 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"1"} | 426812832017809412 | ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +``` + +The `DEADLOCK_ID` column in the above query result shows that the first two rows together represent the information of a deadlock error, and the two transactions that wait for each other form the deadlock. The next three rows together represent the information of another deadlock error, and the three transactions that wait in a cycle form the deadlock. + +## CLUSTER_DEADLOCKS + +The `CLUSTER_DEADLOCKS` table returns information about the recent deadlock errors on each TiDB node in the entire cluster, which is the combined information of the `DEADLOCKS` table on each node. `CLUSTER_DEADLOCKS` also includes an additional `INSTANCE` column to display the IP address and port of the node to distinguish between different TiDB nodes. + +Note that, because `DEADLOCK_ID` does not guarantee global uniqueness, in the query result of the `CLUSTER_DEADLOCKS` table, you need to use the `INSTANCE` and `DEADLOCK_ID` together to distinguish the information of different deadlock errors in the result set. {{< copyable "sql" >}} ```sql -select digest, digest_text from information_schema.statements_summary where digest = "f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2"; +USE information_schema; +DESC cluster_deadlocks; ``` ```sql -+------------------------------------------------------------------+---------------------------------------+ -| digest | digest_text | -+------------------------------------------------------------------+---------------------------------------+ -| f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2 | update `t` set `v` = ? where `id` = ? | -+------------------------------------------------------------------+---------------------------------------+ ++-------------------------+---------------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+---------------------+------+------+---------+-------+ +| INSTANCE | varchar(64) | YES | | NULL | | +| DEADLOCK_ID | bigint(21) | NO | | NULL | | +| OCCUR_TIME | timestamp(6) | YES | | NULL | | +| RETRYABLE | tinyint(1) | NO | | NULL | | +| TRY_LOCK_TRX_ID | bigint(21) unsigned | NO | | NULL | | +| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | +| CURRENT_SQL_DIGEST_TEXT | text | YES | | NULL | | +| KEY | text | YES | | NULL | | +| KEY_INFO | text | YES | | NULL | | +| TRX_HOLDING_LOCK | bigint(21) unsigned | NO | | NULL | | ++-------------------------+---------------------+------+------+---------+-------+ ``` - -For detailed description of SQL digest, `STATEMENTS_SUMMARY`, `STATEMENTS_SUMMARY_HISTORY`, `CLUSTER_STATEMENTS_SUMMARY`, and `CLUSTER_STATEMENTS_SUMMARY_HISTORY` tables, see [Statement Summary Tables](/statement-summary-tables.md). diff --git a/information-schema/information-schema-referential-constraints.md b/information-schema/information-schema-referential-constraints.md new file mode 100644 index 000000000000..18bb3b9d3d4c --- /dev/null +++ b/information-schema/information-schema-referential-constraints.md @@ -0,0 +1,69 @@ +--- +title: REFERENTIAL_CONSTRAINTS +summary: Learn the `REFERENTIAL_CONSTRAINTS` information_schema table. +--- + +# REFERENTIAL_CONSTRAINTS + +The `REFERENTIAL_CONSTRAINTS` table provides information about `FOREIGN KEY` relationships between tables. Note that TiDB currently does not enforce `FOREIGN KEY` constraints, or perform actions such as `ON DELETE CASCADE`. + +{{< copyable "sql" >}} + +```sql +USE information_schema; +DESC referential_constraints; +``` + +```sql ++---------------------------+--------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------------------+--------------+------+------+---------+-------+ +| CONSTRAINT_CATALOG | varchar(512) | NO | | NULL | | +| CONSTRAINT_SCHEMA | varchar(64) | NO | | NULL | | +| CONSTRAINT_NAME | varchar(64) | NO | | NULL | | +| UNIQUE_CONSTRAINT_CATALOG | varchar(512) | NO | | NULL | | +| UNIQUE_CONSTRAINT_SCHEMA | varchar(64) | NO | | NULL | | +| UNIQUE_CONSTRAINT_NAME | varchar(64) | YES | | NULL | | +| MATCH_OPTION | varchar(64) | NO | | NULL | | +| UPDATE_RULE | varchar(64) | NO | | NULL | | +| DELETE_RULE | varchar(64) | NO | | NULL | | +| TABLE_NAME | varchar(64) | NO | | NULL | | +| REFERENCED_TABLE_NAME | varchar(64) | NO | | NULL | | ++---------------------------+--------------+------+------+---------+-------+ +11 rows in set (0.00 sec) +``` + +{{< copyable "sql" >}} + +```sql +CREATE TABLE test.parent ( + id INT NOT NULL AUTO_INCREMENT, + PRIMARY KEY (id) +); + +CREATE TABLE test.child ( + id INT NOT NULL AUTO_INCREMENT, + name varchar(255) NOT NULL, + parent_id INT DEFAULT NULL, + PRIMARY KEY (id), + CONSTRAINT fk_parent FOREIGN KEY (parent_id) REFERENCES parent (id) ON UPDATE CASCADE ON DELETE RESTRICT +); + +SELECT * FROM referential_constraints\G +``` + +``` +*************************** 1. row *************************** + CONSTRAINT_CATALOG: def + CONSTRAINT_SCHEMA: test + CONSTRAINT_NAME: fk_parent +UNIQUE_CONSTRAINT_CATALOG: def + UNIQUE_CONSTRAINT_SCHEMA: test + UNIQUE_CONSTRAINT_NAME: PRIMARY + MATCH_OPTION: NONE + UPDATE_RULE: CASCADE + DELETE_RULE: RESTRICT + TABLE_NAME: child + REFERENCED_TABLE_NAME: parent +1 row in set (0.00 sec) +``` \ No newline at end of file diff --git a/information-schema/information-schema-tidb-trx.md b/information-schema/information-schema-tidb-trx.md index 1a9008bc49f9..f67d881eca3f 100644 --- a/information-schema/information-schema-tidb-trx.md +++ b/information-schema/information-schema-tidb-trx.md @@ -7,11 +7,6 @@ summary: Learn the `TIDB_TRX` information_schema table. The `TIDB_TRX` table provides information about the transactions currently being executed on the TiDB node. -> **Warning:** -> -> * Currently, this is an experimental feature. The definition and behavior of the table structure might have major changes in future releases. -> * Currently, the `TIDB_TRX` table does not support displaying information of TiDB's internal transactions. - {{< copyable "sql" >}} ```sql @@ -20,21 +15,22 @@ DESC tidb_trx; ``` ```sql -+--------------------+---------------------------------------------------------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+--------------------+---------------------------------------------------------+------+------+---------+-------+ -| ID | bigint(21) unsigned | NO | PRI | NULL | | -| START_TIME | timestamp(6) | YES | | NULL | | -| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | -| STATE | enum('Normal','LockWaiting','Committing','RollingBack') | YES | | NULL | | -| WAITING_START_TIME | timestamp(6) | YES | | NULL | | -| MEM_BUFFER_KEYS | bigint(64) | YES | | NULL | | -| MEM_BUFFER_BYTES | bigint(64) | YES | | NULL | | -| SESSION_ID | bigint(21) unsigned | YES | | NULL | | -| USER | varchar(16) | YES | | NULL | | -| DB | varchar(64) | YES | | NULL | | -| ALL_SQL_DIGESTS | text | YES | | NULL | | -+--------------------+---------------------------------------------------------+------+------+---------+-------+ ++-------------------------+-----------------------------------------------------------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+-----------------------------------------------------------------+------+------+---------+-------+ +| ID | bigint(21) unsigned | NO | PRI | NULL | | +| START_TIME | timestamp(6) | YES | | NULL | | +| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | +| CURRENT_SQL_DIGEST_TEXT | text | YES | | NULL | | +| STATE | enum('Idle','Running','LockWaiting','Committing','RollingBack') | YES | | NULL | | +| WAITING_START_TIME | timestamp(6) | YES | | NULL | | +| MEM_BUFFER_KEYS | bigint(64) | YES | | NULL | | +| MEM_BUFFER_BYTES | bigint(64) | YES | | NULL | | +| SESSION_ID | bigint(21) unsigned | YES | | NULL | | +| USER | varchar(16) | YES | | NULL | | +| DB | varchar(64) | YES | | NULL | | +| ALL_SQL_DIGESTS | text | YES | | NULL | | ++-------------------------+-----------------------------------------------------------------+------+------+---------+-------+ ``` The meaning of each column field in the `TIDB_TRX` table is as follows: @@ -42,8 +38,10 @@ The meaning of each column field in the `TIDB_TRX` table is as follows: * `ID`: The transaction ID, which is the `start_ts` (start timestamp) of the transaction. * `START_TIME`: The start time of the transaction, which is the physical time corresponding to the `start_ts` of the transaction. * `CURRENT_SQL_DIGEST`: The digest of the SQL statement currently being executed in the transaction. +* `CURRENT_SQL_DIGEST_TEXT`: The normalized form of the SQL statement currently being executed by the transaction, that is, the SQL statement without arguments and format. It corresponds to `CURRENT_SQL_DIGEST`. * `STATE`: The current state of the transaction. The possible values ​​include: - * `Normal`: The transaction is being executed normally or in an idle state. + * `Idle`: The transaction is in an idle state, that is, it is waiting for the user to input a query. + * `Running`: The transaction is executing a query. * `LockWaiting`: The transaction is waiting for the pessimistic lock to be acquired. Note that the transaction enters this state at the beginning of the pessimistic locking operation, no matter whether it is blocked by other transactions or not. * `Committing`: The transaction is in the process of commit. * `RollingBack`: The transaction is being rolled back. @@ -53,7 +51,14 @@ The meaning of each column field in the `TIDB_TRX` table is as follows: * `SESSION_ID`: The ID of the session to which this transaction belongs. * `USER`: The name of the user who performs the transaction. * `DB`: The current default database name of the session in which the transaction is executed. -* `ALL_SQL_DIGESTS`: The digest list of statements that have been executed in this transaction. For each transaction, the first 50 statements at most are recorded. +* `ALL_SQL_DIGESTS`: The digest list of statements that have been executed by the transaction. The list is shown as a string array in JSON format. Each transaction records at most the first 50 statements. Using the [`TIDB_DECODE_SQL_DIGESTS`](/functions-and-operators/tidb-functions.md#tidb_decode_sql_digests) function, you can convert the information in this column into a list of corresponding normalized SQL statements. + +> **Note:** +> +> * Only users with the [PROCESS](https://dev.mysql.com/doc/refman/8.0/en/privileges-provided.html#priv_process) privilege can obtain the complete information in this table. Users without the PROCESS privilege can only query information of the transactions performed by the current user. +> * The information (SQL digest) in the `CURRENT_SQL_DIGEST` and `ALL_SQL_DIGESTS` columns is the hash value calculated from the normalized SQL statement. The information in the `CURRENT_SQL_DIGEST_TEXT` column and the result returned from the `TIDB_DECODE_SQL_DIGESTS` function are internally queried from the statements summary tables, so it is possible that the corresponding statement cannot be found internally. For the detailed description of SQL digests and the statements summary tables, see [Statement Summary Tables](/statement-summary-tables.md). +> * The [`TIDB_DECODE_SQL_DIGESTS`](/functions-and-operators/tidb-functions.md#tidb_decode_sql_digests) function call has a high overhead. If the function is called to query historical SQL statements for a large number of transactions, the query might take a long time. If the cluster is large with many concurrent transactions, avoid directly using this function on the `ALL_SQL_DIGEST` column while querying the full table of `TIDB_TRX`. This means to avoid an SQL statement like ``select *, tidb_decode_sql_digests(all_sql_digests) from tidb_trx``. +> * Currently the `TIDB_TRX` table does not support showing information of TiDB internal transactions. ## Example @@ -65,73 +70,83 @@ select * from information_schema.tidb_trx\G ```sql *************************** 1. row *************************** - ID: 425403705115541506 - START_TIME: 2021-06-04 05:59:10.691000 -CURRENT_SQL_DIGEST: NULL - STATE: Normal -WAITING_START_TIME: NULL - MEM_BUFFER_KEYS: 2 - MEM_BUFFER_BYTES: 48 - SESSION_ID: 7 - USER: root - DB: test - ALL_SQL_DIGESTS: [e6f07d43b5c21db0fbb9a31feac2dc599787763393dd5acbfad80e247eb02ad5, 04fa858fa491c62d194faec2ab427261cc7998b3f1ccf8f6844febca504cb5e9, f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2] -1 row in set (0.00 sec) + ID: 426789913200689153 + START_TIME: 2021-08-04 10:51:54.883000 + CURRENT_SQL_DIGEST: NULL +CURRENT_SQL_DIGEST_TEXT: NULL + STATE: Idle + WAITING_START_TIME: NULL + MEM_BUFFER_KEYS: 1 + MEM_BUFFER_BYTES: 29 + SESSION_ID: 7 + USER: root + DB: test + ALL_SQL_DIGESTS: ["e6f07d43b5c21db0fbb9a31feac2dc599787763393dd5acbfad80e247eb02ad5","04fa858fa491c62d194faec2ab427261cc7998b3f1ccf8f6844febca504cb5e9","b83710fa8ab7df8504920e8569e48654f621cf828afbe7527fd003b79f48da9e"] +*************************** 2. row *************************** + ID: 426789921471332353 + START_TIME: 2021-08-04 10:52:26.433000 + CURRENT_SQL_DIGEST: 38b03afa5debbdf0326a014dbe5012a62c51957f1982b3093e748460f8b00821 +CURRENT_SQL_DIGEST_TEXT: update `t` set `v` = `v` + ? where `id` = ? + STATE: LockWaiting + WAITING_START_TIME: 2021-08-04 10:52:35.106568 + MEM_BUFFER_KEYS: 0 + MEM_BUFFER_BYTES: 0 + SESSION_ID: 9 + USER: root + DB: test + ALL_SQL_DIGESTS: ["e6f07d43b5c21db0fbb9a31feac2dc599787763393dd5acbfad80e247eb02ad5","38b03afa5debbdf0326a014dbe5012a62c51957f1982b3093e748460f8b00821"] +2 rows in set (0.01 sec) ``` -The query result of the above example indicates that a transaction is being executed on the current node (the `STATE` is `Normal`), and this transaction is currently idle (`CURRENT_SQL_DIGEST` is `NULL`). This transaction has executed three statements (there are three records in the `ALL_SQL_DIGESTS` list and they are the digests of the three executed statements). - -## CLUSTER_TIDB_TRX - -The `TIDB_TRX` table only provides information about the transactions that are being executed on a single TiDB node. If you want to view the information of the transactions that are being executed on all TiDB nodes in the entire cluster, you need to query the `CLUSTER_TIDB_TRX` table. Compared with the query result of the `TIDB_TRX` table, the query result of the `CLUSTER_TIDB_TRX` table contains an extra `INSTANCE` field. The `INSTANCE` field displays the IP address and port of each node in the cluster, which is used to distinguish the TiDB node where the transaction is located. +From the query result of this example, you can see that: the current node has two on-going transactions. One transaction is in the idle state (`STATE` is `Idle` and `CURRENT_SQL_DIGEST` is `NULL`), and this transaction has executed 3 statements (there are three records in the `ALL_SQL_DIGESTS` list, which are the digests of the three SQL statements that have been executed). Another transaction is executing a statement and waiting for the lock (`STATE` is `LockWaiting` and `WAITING_START_TIME` shows the start time of the waiting lock). The transaction has executed 2 statements, and the statement currently being executed is in the form of ``"update `t` set `v` = `v` + ? where `id` = ?"``. {{< copyable "sql" >}} ```sql -USE information_schema; -DESC cluster_tidb_trx; +select id, all_sql_digests, tidb_decode_sql_digests(all_sql_digests) as all_sqls from information_schema.tidb_trx\G ``` ```sql -+--------------------+---------------------------------------------------------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+--------------------+---------------------------------------------------------+------+------+---------+-------+ -| INSTANCE | varchar(64) | YES | | NULL | | -| ID | bigint(21) unsigned | NO | PRI | NULL | | -| START_TIME | timestamp(6) | YES | | NULL | | -| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | -| STATE | enum('Normal','LockWaiting','Committing','RollingBack') | YES | | NULL | | -| WAITING_START_TIME | timestamp(6) | YES | | NULL | | -| MEM_BUFFER_KEYS | bigint(64) | YES | | NULL | | -| MEM_BUFFER_BYTES | bigint(64) | YES | | NULL | | -| SESSION_ID | bigint(21) unsigned | YES | | NULL | | -| USER | varchar(16) | YES | | NULL | | -| DB | varchar(64) | YES | | NULL | | -| ALL_SQL_DIGESTS | text | YES | | NULL | | -+--------------------+---------------------------------------------------------+------+------+---------+-------+ +*************************** 1. row *************************** + id: 426789913200689153 +all_sql_digests: ["e6f07d43b5c21db0fbb9a31feac2dc599787763393dd5acbfad80e247eb02ad5","04fa858fa491c62d194faec2ab427261cc7998b3f1ccf8f6844febca504cb5e9","b83710fa8ab7df8504920e8569e48654f621cf828afbe7527fd003b79f48da9e"] + all_sqls: ["begin","insert into `t` values ( ... )","update `t` set `v` = `v` + ?"] +*************************** 2. row *************************** + id: 426789921471332353 +all_sql_digests: ["e6f07d43b5c21db0fbb9a31feac2dc599787763393dd5acbfad80e247eb02ad5","38b03afa5debbdf0326a014dbe5012a62c51957f1982b3093e748460f8b00821"] + all_sqls: ["begin","update `t` set `v` = `v` + ? where `id` = ?"] ``` -## SQL Digest +This query calls the [`TIDB_DECODE_SQL_DIGESTS`](/functions-and-operators/tidb-functions.md#tidb_decode_sql_digests) function on the `ALL_SQL_DIGESTS` column of the `TIDB_TRX` table, and converts the SQL digest array into an array of normalized SQL statement through the system internal query. This helps you visually obtain the information of the statements that have been historically executed by the transaction. However, note that the above query scans the entire table of `TIDB_TRX` and calls the `TIDB_DECODE_SQL_DIGESTS` function for each row. Calling the `TIDB_DECODE_SQL_DIGESTS` function has a high overhead. Therefore, if many concurrent transactions exist in the cluster, try to avoid this type of query. -The `TIDB_TRX` table only records SQL digests, not the original SQL statement. - -SQL digest is the hash value after the SQL normalization. To find the original SQL statement corresponding to the SQL digest, perform one of the following operations: +## CLUSTER_TIDB_TRX -- For the statements executed on the current TiDB node in the recent period of time, you can find the corresponding orginal SQL statement from the SQL digest in `STATEMENTS_SUMMARY` or `STATEMENTS_SUMMARY_HISTORY`. -- For the statements executed on all TiDB nodes in the entire cluster in the recent period of time, you can find the corresponding SQL statement from the SQL digest in `CLUSTER_STATEMENTS_SUMMARY` or `CLUSTER_STATEMENTS_SUMMARY_HISTORY`. +The `TIDB_TRX` table only provides information about the transactions that are being executed on a single TiDB node. If you want to view the information of the transactions that are being executed on all TiDB nodes in the entire cluster, you need to query the `CLUSTER_TIDB_TRX` table. Compared with the query result of the `TIDB_TRX` table, the query result of the `CLUSTER_TIDB_TRX` table includes an extra `INSTANCE` field. The `INSTANCE` field displays the IP address and port of each node in the cluster, which is used to distinguish the TiDB nodes where the transactions are located. {{< copyable "sql" >}} ```sql -select digest, digest_text from information_schema.statements_summary where digest = "f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2"; +USE information_schema; +DESC cluster_tidb_trx; ``` ```sql -+------------------------------------------------------------------+---------------------------------------+ -| digest | digest_text | -+------------------------------------------------------------------+---------------------------------------+ -| f7530877a35ae65300c42250abd8bc731bbaf0a7cabc05dab843565230611bb2 | update `t` set `v` = ? where `id` = ? | -+------------------------------------------------------------------+---------------------------------------+ +mysql> desc cluster_tidb_trx; ++-------------------------+-----------------------------------------------------------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++-------------------------+-----------------------------------------------------------------+------+------+---------+-------+ +| INSTANCE | varchar(64) | YES | | NULL | | +| ID | bigint(21) unsigned | NO | PRI | NULL | | +| START_TIME | timestamp(6) | YES | | NULL | | +| CURRENT_SQL_DIGEST | varchar(64) | YES | | NULL | | +| CURRENT_SQL_DIGEST_TEXT | text | YES | | NULL | | +| STATE | enum('Idle','Running','LockWaiting','Committing','RollingBack') | YES | | NULL | | +| WAITING_START_TIME | timestamp(6) | YES | | NULL | | +| MEM_BUFFER_KEYS | bigint(64) | YES | | NULL | | +| MEM_BUFFER_BYTES | bigint(64) | YES | | NULL | | +| SESSION_ID | bigint(21) unsigned | YES | | NULL | | +| USER | varchar(16) | YES | | NULL | | +| DB | varchar(64) | YES | | NULL | | +| ALL_SQL_DIGESTS | text | YES | | NULL | | ++-------------------------+-----------------------------------------------------------------+------+------+---------+-------+ ``` - -For detailed description of SQL digest, `STATEMENTS_SUMMARY`, `STATEMENTS_SUMMARY_HISTORY`, `CLUSTER_STATEMENTS_SUMMARY`, and `CLUSTER_STATEMENTS_SUMMARY_HISTORY` tables, see [Statement Summary Tables](/statement-summary-tables.md). diff --git a/information-schema/information-schema.md b/information-schema/information-schema.md index d2d763dffe50..c06b656376e6 100644 --- a/information-schema/information-schema.md +++ b/information-schema/information-schema.md @@ -32,7 +32,7 @@ Many `INFORMATION_SCHEMA` tables have a corresponding `SHOW` command. The benefi | `PLUGINS` | Not implemented by TiDB. Returns zero rows. | | [`PROCESSLIST`](/information-schema/information-schema-processlist.md) | Provides similar information to the command `SHOW PROCESSLIST`. | | `PROFILING` | Not implemented by TiDB. Returns zero rows. | -| `REFERENTIAL_CONSTRAINTS` | Not implemented by TiDB. Returns zero rows. | +| `REFERENTIAL_CONSTRAINTS` | Provides information on `FOREIGN KEY` constraints. | | `ROUTINES` | Not implemented by TiDB. Returns zero rows. | | [`SCHEMATA`](/information-schema/information-schema-schemata.md) | Provides similar information to `SHOW DATABASES`. | | `SCHEMA_PRIVILEGES` | Not implemented by TiDB. Returns zero rows. | diff --git a/maintain-tidb-using-tiup.md b/maintain-tidb-using-tiup.md index 03f50ee1662d..874a9a202cdd 100644 --- a/maintain-tidb-using-tiup.md +++ b/maintain-tidb-using-tiup.md @@ -109,7 +109,7 @@ When the cluster is in operation, if you need to modify the parameters of a comp log.slow-threshold: 300 ``` - For the parameter format, see the [TiUP parameter template](https://github.com/pingcap/tiup/blob/master/embed/templates/examples/topology.example.yaml). + For the parameter format, see the [TiUP parameter template](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml). **Use `.` to represent the hierarchy of the configuration items**. diff --git a/media/dashboard/dashboard-faq-devtools-application.png b/media/dashboard/dashboard-faq-devtools-application.png new file mode 100644 index 000000000000..812e672bafd3 Binary files /dev/null and b/media/dashboard/dashboard-faq-devtools-application.png differ diff --git a/media/dashboard/dashboard-faq-devtools.png b/media/dashboard/dashboard-faq-devtools.png new file mode 100644 index 000000000000..db2413d913f5 Binary files /dev/null and b/media/dashboard/dashboard-faq-devtools.png differ diff --git a/media/dashboard/dashboard-session-share-settings-1.png b/media/dashboard/dashboard-session-share-settings-1.png new file mode 100644 index 000000000000..6551d803039c Binary files /dev/null and b/media/dashboard/dashboard-session-share-settings-1.png differ diff --git a/media/dashboard/dashboard-session-share-settings-2.png b/media/dashboard/dashboard-session-share-settings-2.png new file mode 100644 index 000000000000..6c90439886cf Binary files /dev/null and b/media/dashboard/dashboard-session-share-settings-2.png differ diff --git a/media/dashboard/dashboard-session-share-settings-3.png b/media/dashboard/dashboard-session-share-settings-3.png new file mode 100644 index 000000000000..372a4ef41049 Binary files /dev/null and b/media/dashboard/dashboard-session-share-settings-3.png differ diff --git a/media/dashboard/dashboard-session-share-signin-1.png b/media/dashboard/dashboard-session-share-signin-1.png new file mode 100644 index 000000000000..00a29527f800 Binary files /dev/null and b/media/dashboard/dashboard-session-share-signin-1.png differ diff --git a/media/dashboard/dashboard-session-share-signin-2.png b/media/dashboard/dashboard-session-share-signin-2.png new file mode 100644 index 000000000000..006e85b115b7 Binary files /dev/null and b/media/dashboard/dashboard-session-share-signin-2.png differ diff --git a/media/dashboard/dashboard-session-share-signin-3.png b/media/dashboard/dashboard-session-share-signin-3.png new file mode 100644 index 000000000000..96cc51e59b06 Binary files /dev/null and b/media/dashboard/dashboard-session-share-signin-3.png differ diff --git a/media/dashboard/dashboard-session-sso-disable.png b/media/dashboard/dashboard-session-sso-disable.png new file mode 100644 index 000000000000..7740f1579ca9 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-disable.png differ diff --git a/media/dashboard/dashboard-session-sso-enable-1.png b/media/dashboard/dashboard-session-sso-enable-1.png new file mode 100644 index 000000000000..8b810c0f41a8 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-enable-1.png differ diff --git a/media/dashboard/dashboard-session-sso-enable-2.png b/media/dashboard/dashboard-session-sso-enable-2.png new file mode 100644 index 000000000000..5a721c39a965 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-enable-2.png differ diff --git a/media/dashboard/dashboard-session-sso-enable-3.png b/media/dashboard/dashboard-session-sso-enable-3.png new file mode 100644 index 000000000000..244a2ed84aff Binary files /dev/null and b/media/dashboard/dashboard-session-sso-enable-3.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-1.png b/media/dashboard/dashboard-session-sso-okta-1.png new file mode 100644 index 000000000000..bbea4f0b7759 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-1.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-2.png b/media/dashboard/dashboard-session-sso-okta-2.png new file mode 100644 index 000000000000..6a89ed6444ae Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-2.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-3.png b/media/dashboard/dashboard-session-sso-okta-3.png new file mode 100644 index 000000000000..186931fbf5ec Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-3.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-4.png b/media/dashboard/dashboard-session-sso-okta-4.png new file mode 100644 index 000000000000..a33b5c33fe13 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-4.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-info-1.png b/media/dashboard/dashboard-session-sso-okta-info-1.png new file mode 100644 index 000000000000..a0d88c6c9b4c Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-info-1.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-info-2.png b/media/dashboard/dashboard-session-sso-okta-info-2.png new file mode 100644 index 000000000000..d36c5c96018f Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-info-2.png differ diff --git a/media/dashboard/dashboard-session-sso-okta-info-3.png b/media/dashboard/dashboard-session-sso-okta-info-3.png new file mode 100644 index 000000000000..fe3d85df0b6f Binary files /dev/null and b/media/dashboard/dashboard-session-sso-okta-info-3.png differ diff --git a/media/dashboard/dashboard-session-sso-reauthorize.png b/media/dashboard/dashboard-session-sso-reauthorize.png new file mode 100644 index 000000000000..c87891131378 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-reauthorize.png differ diff --git a/media/dashboard/dashboard-session-sso-signin.png b/media/dashboard/dashboard-session-sso-signin.png new file mode 100644 index 000000000000..b213419429e7 Binary files /dev/null and b/media/dashboard/dashboard-session-sso-signin.png differ diff --git a/media/sysbench_v511vsv520_point_select.png b/media/sysbench_v511vsv520_point_select.png new file mode 100644 index 000000000000..1bde357dc4af Binary files /dev/null and b/media/sysbench_v511vsv520_point_select.png differ diff --git a/media/sysbench_v511vsv520_read_write.png b/media/sysbench_v511vsv520_read_write.png new file mode 100644 index 000000000000..6c3017270724 Binary files /dev/null and b/media/sysbench_v511vsv520_read_write.png differ diff --git a/media/sysbench_v511vsv520_update_index.png b/media/sysbench_v511vsv520_update_index.png new file mode 100644 index 000000000000..caf5e9a9abf0 Binary files /dev/null and b/media/sysbench_v511vsv520_update_index.png differ diff --git a/media/sysbench_v511vsv520_update_non_index.png b/media/sysbench_v511vsv520_update_non_index.png new file mode 100644 index 000000000000..c037a7f5c2a3 Binary files /dev/null and b/media/sysbench_v511vsv520_update_non_index.png differ diff --git a/media/tidb-v5-tpch-100-vs-gp-spark.png b/media/tidb-v5-tpch-100-vs-gp-spark.png deleted file mode 100644 index c76f4ede8c37..000000000000 Binary files a/media/tidb-v5-tpch-100-vs-gp-spark.png and /dev/null differ diff --git a/media/tidb-v5.2-tpch-100-vs-gp-spark.png b/media/tidb-v5.2-tpch-100-vs-gp-spark.png new file mode 100644 index 000000000000..e743a8f7965e Binary files /dev/null and b/media/tidb-v5.2-tpch-100-vs-gp-spark.png differ diff --git a/media/tpcc_v511_vs_v520.png b/media/tpcc_v511_vs_v520.png new file mode 100644 index 000000000000..9b2808e8e29e Binary files /dev/null and b/media/tpcc_v511_vs_v520.png differ diff --git a/media/two-dc-replication-1.png b/media/two-dc-replication-1.png new file mode 100644 index 000000000000..e04dad91c9b3 Binary files /dev/null and b/media/two-dc-replication-1.png differ diff --git a/migrate-from-aurora-using-lightning.md b/migrate-from-aurora-using-lightning.md index 26106d74a839..e0d8d298d6d2 100644 --- a/migrate-from-aurora-using-lightning.md +++ b/migrate-from-aurora-using-lightning.md @@ -79,12 +79,12 @@ Because the snapshot data exported from Aurora to S3 does not contain the SQL st 1. Use Dumpling to export table schema files: ``` - ./dumpling --host 127.0.0.1 --port 4000 --user root --password password --no-data --output ./schema --filter "mydb.*" + ./dumpling --host database-1.cedtft9htlae.us-west-2.rds.amazonaws.com --port 3306 --user root --password password --consistency none --no-data --output ./schema --filter "mydb.*" ``` > **Note:** > - > - Set the parameters of the data source address and the path of output files according to your actual situation. + > - Set the parameters of the data source address and the path of output files according to your actual situation. For example, `database-1.cedtft9htlae.us-west-2.rds.amazonaws.com` is the address of Aurora MySQL. > - If you need to export all database tables, you do not need to set the `--filter` parameter. If you only need to export some of the database tables, configure `--filter` according to [table-filter](https://github.com/pingcap/tidb-tools/blob/master/pkg/table-filter/README.md). 2. Use TiDB Lightning to create table schemas: diff --git a/minimal-deployment-topology.md b/minimal-deployment-topology.md index c17babeb8707..03db82b2c74d 100644 --- a/minimal-deployment-topology.md +++ b/minimal-deployment-topology.md @@ -15,7 +15,7 @@ This document describes the minimal deployment topology of TiDB clusters. | TiDB | 3 | 16 VCore 32GB * 1 | 10.0.1.1
10.0.1.2
10.0.1.3 | Default port
Global directory configuration | | PD | 3 | 4 VCore 8GB * 1 |10.0.1.4
10.0.1.5
10.0.1.6 | Default port
Global directory configuration | | TiKV | 3 | 16 VCore 32GB 2TB (nvme ssd) * 1 | 10.0.1.7
10.0.1.8
10.0.1.9 | Default port
Global directory configuration | -| Monitoring & Grafana | 1 | 4 VCore 8GB * 1 500GB (ssd) | 10.0.1.11 | Default port
Global directory configuration | +| Monitoring & Grafana | 1 | 4 VCore 8GB * 1 500GB (ssd) | 10.0.1.10 | Default port
Global directory configuration | ### Topology templates diff --git a/multi-data-centers-in-one-city-deployment.md b/multi-data-centers-in-one-city-deployment.md index ec19507d81de..3681434d41e7 100644 --- a/multi-data-centers-in-one-city-deployment.md +++ b/multi-data-centers-in-one-city-deployment.md @@ -71,6 +71,10 @@ member leader_priority pdName2 4 member leader_priority pdName3 3 ``` +> **Note:** +> +> Since TiDB 5.2, the `label-property` configuration is not supported by default. To set the replica policy, use the [placement rules](/configure-placement-rules.md). + **Disadvantages:** - Write scenarios are still affected by network latency across DCs. This is because Raft follows the majority protocol and all written data must be replicated to at least two DCs. diff --git a/mysql-compatibility.md b/mysql-compatibility.md index 5505f1e9560d..3058b1c8a303 100644 --- a/mysql-compatibility.md +++ b/mysql-compatibility.md @@ -6,7 +6,7 @@ aliases: ['/docs/dev/mysql-compatibility/','/docs/dev/reference/mysql-compatibil # MySQL Compatibility -TiDB is fully compatible with the MySQL 5.7 protocol and the common features and syntax of MySQL 5.7. The ecosystem tools for MySQL 5.7 (PHPMyAdmin, Navicat, MySQL Workbench, mysqldump, and Mydumper/myloader) and the MySQL client can be used for TiDB. +TiDB is highly compatible with the MySQL 5.7 protocol and the common features and syntax of MySQL 5.7. The ecosystem tools for MySQL 5.7 (PHPMyAdmin, Navicat, MySQL Workbench, mysqldump, and Mydumper/myloader) and the MySQL client can be used for TiDB. However, some features of MySQL are not supported. This could be because there is now a better way to solve the problem (such as XML functions superseded by JSON), or a lack of current demand versus effort required (such as stored procedures and functions). Some features might also be difficult to implement as a distributed system. diff --git a/optimistic-transaction.md b/optimistic-transaction.md index 2403dbc608fa..c1ffae47f36b 100644 --- a/optimistic-transaction.md +++ b/optimistic-transaction.md @@ -65,7 +65,7 @@ However, TiDB transactions also have the following disadvantages: ## Transaction retries -In the optimistic transaction model, transactions might fail to be committed because of write–write conflict in heavy contention scenarios. TiDB uses optimistic concurrency control by default, whereas MySQL applies pessimistic concurrency control. This means that MySQL adds locks during SQL execution, and its Repeatable Read isolation level allows for non-repeatable reads, so commits generally do not encounter exceptions. To lower the difficulty of adapting applications, TiDB provides an internal retry mechanism. +In the optimistic transaction model, transactions might fail to be committed because of write–write conflict in heavy contention scenarios. TiDB uses optimistic concurrency control by default, whereas MySQL applies pessimistic concurrency control. This means that MySQL adds locks during the execution of write-type SQL statements, and its Repeatable Read isolation level allows for current reads, so commits generally do not encounter exceptions. To lower the difficulty of adapting applications, TiDB provides an internal retry mechanism. ### Automatic retry diff --git a/optimizer-hints.md b/optimizer-hints.md index a98ef4946108..aa9cb41eb8e0 100644 --- a/optimizer-hints.md +++ b/optimizer-hints.md @@ -189,7 +189,7 @@ select /*+ AGG_TO_COP() */ sum(t1.a) from t t1; ### READ_FROM_STORAGE(TIFLASH[t1_name [, tl_name ...]], TIKV[t2_name [, tl_name ...]]) -The `READ_FROM_STORAGE(TIFLASH[t1_name [, tl_name ...]], TIKV[t2_name [, tl_name ...]])` hint tells the optimizer to read specific table(s) from specific storage engine(s). Currently, this hint supports two storage engine parameters - `TIKV` and `TIFLASH`. For example: +The `READ_FROM_STORAGE(TIFLASH[t1_name [, tl_name ...]], TIKV[t2_name [, tl_name ...]])` hint tells the optimizer to read specific table(s) from specific storage engine(s). Currently, this hint supports two storage engine parameters - `TIKV` and `TIFLASH`. If a table has an alias, use the alias as the parameter of `READ_FROM_STORAGE()`; if the table does not has an alias, use the table's original name as the parameter. For example: {{< copyable "sql" >}} diff --git a/overview.md b/overview.md index 4c5a66ce0ca4..c4f69d1e6d40 100644 --- a/overview.md +++ b/overview.md @@ -20,7 +20,7 @@ aliases: ['/docs/dev/key-features/','/tidb/dev/key-features','/docs/dev/overview - **Real-time HTAP** - TiDB provides two storage engines: [TiKV](https://tikv.org/), a row-based storage engine, and [TiFlash](/tiflash/tiflash-overview.md), a columnar storage engine. TiFlash uses the Multi-Raft Learner protocol to replicate data from TiKV in real time, ensuring that the data between the TiKV row-based storage engine and the TiFlash columnar storage engine are consistent. TiKV and TiFlash can be deployed on different machines as needed to solve the problem of HTAP resource isolation. + TiDB provides two storage engines: [TiKV](/tikv-overview.md), a row-based storage engine, and [TiFlash](/tiflash/tiflash-overview.md), a columnar storage engine. TiFlash uses the Multi-Raft Learner protocol to replicate data from TiKV in real time, ensuring that the data between the TiKV row-based storage engine and the TiFlash columnar storage engine are consistent. TiKV and TiFlash can be deployed on different machines as needed to solve the problem of HTAP resource isolation. - **Cloud-native distributed database** @@ -28,7 +28,7 @@ aliases: ['/docs/dev/key-features/','/tidb/dev/key-features','/docs/dev/overview - **Compatible with the MySQL 5.7 protocol and MySQL ecosystem** - TiDB is compatible with the MySQL 5.7 protocol, common features of MySQL, and the MySQL ecosystem. To migrate your applications to TiDB, you do not need to change a single line of code in many cases or only need to modify a small amount of code. In addition, TiDB provides a series of [data migration tools](/migration-overview.md) to help migrate application data easily into TiDB. + TiDB is compatible with the MySQL 5.7 protocol, common features of MySQL, and the MySQL ecosystem. To migrate your applications to TiDB, you do not need to change a single line of code in many cases or only need to modify a small amount of code. In addition, TiDB provides a series of [data migration tools](/ecosystem-tool-user-guide.md) to help easily migrate application data into TiDB. ## Use cases diff --git a/partitioned-table.md b/partitioned-table.md index f545ff4435df..5d9b28c9873f 100644 --- a/partitioned-table.md +++ b/partitioned-table.md @@ -579,7 +579,21 @@ You can see that the inserted record `(NULL, 'mothra')` falls into the same part ## Partition management -You can add, drop, merge, split, redefine partitions by using `ALTER TABLE` statements. +For `LIST` and `RANGE` partitioned tables, you can add and drop partitions using the `ALTER TABLE ADD PARTITION ()` or `ALTER TABLE
DROP PARTITION ` statement. + +For `LIST` and `RANGE` partitioned tables, `REORGANIZE PARTITION` is not yet supported. + +For `HASH` partitioned tables, `COALESCE PARTITION` and `ADD PARTITION` are not yet supported. + +`EXCHANGE PARTITION` works by swapping a partition and a non-partitioned table, similar to how renaming a table like `RENAME TABLE t1 TO t1_tmp, t2 TO t1, t1_tmp TO t2` works. + +For example, `ALTER TABLE partitioned_table EXCHANGE PARTITION p1 WITH TABLE non_partitioned_table` swaps the `non_partitioned_table` table in the `p1` partition with the `partitioned_table` table. + +Ensure that all rows that you are exchanging into the partition match the partition definition; otherwise, these rows will not be found and cause unexpected issues. + +> **Warning:** +> +> `EXCHANGE PARTITION` is an experimental feature. It is not recommended to use it in a production environment. To enable it, set the `tidb_enable_exchange_partition` system variable to `ON`. ### Range partition management @@ -1280,7 +1294,7 @@ The `tidb_enable_list_partition` environment variable controls whether to enable This variable is only used in table creation. After the table is created, modify this variable value takes no effect. For details, see [system variables](/system-variables.md#tidb_enable_list_partition-new-in-v50). -### Dynamic mode +### Dynamic pruning mode > **Warning:** > diff --git a/pd-configuration-file.md b/pd-configuration-file.md index ecc76c857017..01f011044fa2 100644 --- a/pd-configuration-file.md +++ b/pd-configuration-file.md @@ -127,10 +127,15 @@ Configuration items related to security Configuration items related to log +### `level` + ++ The log level, which can be specified as "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". ++ Default value: "INFO" + ### `format` + The log format, which can be specified as "text", "json", or "console" -+ Default value: `text` ++ Default value: "text" ### `disable-timestamp` @@ -263,11 +268,15 @@ Configuration items related to scheduling + Determines whether to enable the merging of cross-table Regions + Default value: `true` -### `region-score-formula-version` +### `region-score-formula-version` New in v5.0 + Controls the version of the Region score formula + Default value: `v2` -+ Optional values: `v1` and `v2` ++ Optional values: `v1` and `v2`. Compared to v1, the changes in v2 are smoother, and the scheduling jitter caused by space reclaim is improved. + +> **Note:** +> +> If you have upgraded your cluster from a TiDB 4.0 version to the current version, the new formula version is automatically disabled by default to ensure consistent PD behavior before and after the upgrading. If you want to change the formula version, you need to manually switch through the `pd-ctl` setting. For details, refer to [PD Control](/pd-control.md#config-show--set-option-value--placement-rules). ### `enable-joint-consensus` New in v5.0 @@ -280,7 +289,7 @@ Configuration items related to replicas ### `max-replicas` -+ The number of replicas ++ The number of replicas, that is, the sum of the number of leaders and followers. The default value `3` means 1 leader and 2 followers. When this configuration is modified online, PD will schedule Regions in the background so that the number of replicas matches this configuration. + Default value: `3` ### `location-labels` @@ -312,6 +321,10 @@ Configuration items related to replicas + Default value: 3 + PD rounds the lowest digits of the flow number, which reduces the update of statistics caused by the changes of the Region flow information. This configuration item is used to specify the number of lowest digits to round for the Region flow information. For example, the flow `100512` will be rounded to `101000` because the default value is `3`. This configuration replaces `trace-region-flow`. +> **Note:** +> +> If you have upgraded your cluster from a TiDB 4.0 version to the current version, the behavior of `flow-round-by-digit` after the upgrading and the behavior of `trace-region-flow` before the upgrading are consistent by default. This means that if the value of `trace-region-flow` is false before the upgrading, the value of `flow-round-by-digit` after the upgrading is 127; if the value of `trace-region-flow` is `true` before the upgrading, the value of `flow-round-by-digit` after the upgrading is `3`. + ## `label-property` Configuration items related to labels @@ -359,4 +372,4 @@ Configuration items related to the [TiDB Dashboard](/dashboard/dashboard-intro.m ## `replication-mode` -Configuration items related to the replication mode of all Regions. See [Enable synchronous replication in PD configuration file](/synchronous-replication.md#enable-synchronous-replication-in-the-pd-configuration-file) for details. +Configuration items related to the replication mode of all Regions. See [Enable the DR Auto-Sync mode](/two-data-centers-in-one-city-deployment.md#enable-the-dr-auto-sync-mode) for details. \ No newline at end of file diff --git a/pd-control.md b/pd-control.md index fe003c7457ba..0007849bec7b 100644 --- a/pd-control.md +++ b/pd-control.md @@ -28,7 +28,7 @@ If you want to download the latest version of `pd-ctl`, directly download the Ti > **Note:** > -> `{version}` indicates the version number of TiDB. For example, if `{version}` is `v5.1.0`, the package download link is `https://download.pingcap.org/tidb-v5.1.0-linux-amd64.tar.gz`. +> `{version}` indicates the version number of TiDB. For example, if `{version}` is `v5.2.1`, the package download link is `https://download.pingcap.org/tidb-v5.2.1-linux-amd64.tar.gz`. ### Compile from source code @@ -179,7 +179,7 @@ Usage: } >> config show cluster-version // Display the current version of the cluster, which is the current minimum version of TiKV nodes in the cluster and does not correspond to the binary version. -"5.1.0" +"5.2.1" ``` - `max-snapshot-count` controls the maximum number of snapshots that a single store receives or sends out at the same time. The scheduler is restricted by this configuration to avoid taking up normal application resources. When you need to improve the speed of adding replicas or balancing, increase this value. @@ -308,7 +308,7 @@ Usage: config set cluster-version 1.0.8 // Set the version of the cluster to 1.0.8 ``` -- `replication-mode` controls the replication mode of Regions in the dual data center scenario. See [Change replication mode manually](/synchronous-replication.md#change-the-replication-mode-manually) for details. +- `replication-mode` controls the replication mode of Regions in the dual data center scenario. See [Enable the DR Auto-Sync mode](/two-data-centers-in-one-city-deployment.md#enable-the-dr-auto-sync-mode) for details. - `leader-schedule-policy` is used to select the scheduling strategy for the leader. You can schedule the leader according to `size` or `count`. @@ -700,7 +700,8 @@ Usage: >> scheduler add evict-leader-scheduler 1 // Move all the Region leaders on store 1 out >> scheduler config evict-leader-scheduler // Display the stores in which the scheduler is located since v4.0.0 >> scheduler add shuffle-leader-scheduler // Randomly exchange the leader on different stores ->> scheduler add shuffle-region-scheduler // Randomly scheduling the regions on different stores +>> scheduler add shuffle-region-scheduler // Randomly scheduling the Regions on different stores +>> scheduler add evict-slow-store-scheduler // When there is one and only one slow store, evict all Region leaders of that store >> scheduler remove grant-leader-scheduler-1 // Remove the corresponding scheduler, and `-1` corresponds to the store ID >> scheduler pause balance-region-scheduler 10 // Pause the balance-region scheduler for 10 seconds >> scheduler pause all 10 // Pause all schedulers for 10 seconds @@ -720,30 +721,52 @@ Usage: { "min-hot-byte-rate": 100, "min-hot-key-rate": 10, + "min-hot-query-rate": 10, "max-zombie-rounds": 3, "max-peer-number": 1000, "byte-rate-rank-step-ratio": 0.05, "key-rate-rank-step-ratio": 0.05, + "query-rate-rank-step-ratio": 0.05, "count-rank-step-ratio": 0.01, "great-dec-ratio": 0.95, "minor-dec-ratio": 0.99, - "src-tolerance-ratio": 1.02, - "dst-tolerance-ratio": 1.02 + "src-tolerance-ratio": 1.05, + "dst-tolerance-ratio": 1.05, + "read-priorities": [ + "query", + "byte" + ], + "write-leader-priorities": [ + "key", + "byte" + ], + "write-peer-priorities": [ + "byte", + "key" + ], + "strict-picking-store": "true", + "enable-for-tiflash": "true" } ``` -- `min-hot-byte-rate` means the smallest byte counted, which is usually 100. +- `min-hot-byte-rate` means the smallest number of bytes to be counted, which is usually 100. ```bash >> scheduler config balance-hot-region-scheduler set min-hot-byte-rate 100 ``` -- `min-hot-key-rate` means the smallest key counted, which is usually 10. +- `min-hot-key-rate` means the smallest number of keys to be counted, which is usually 10. ```bash >> scheduler config balance-hot-region-scheduler set min-hot-key-rate 10 ``` +- `min-hot-query-rate` means the smallest number of queries to be counted, which is usually 10. + + ```bash + >> scheduler config balance-hot-region-scheduler set min-hot-query-rate 10 + ``` + - `max-zombie-rounds` means the maximum number of heartbeats with which an operator can be considered as the pending influence. If you set it to a larger value, more operators might be included in the pending influence. Usually, you do not need to adjust its value. Pending influence refers to the operator influence that is generated during scheduling but still has an effect. ```bash @@ -756,7 +779,7 @@ Usage: >> scheduler config balance-hot-region-scheduler set max-peer-number 1000 ``` -- `byte-rate-rank-step-ratio`, `key-rate-rank-step-ratio`, and `count-rank-step-ratio` respectively mean the step ranks of byte, key, and count. The rank step ratio decides the step when the rank is calculated. `great-dec-ratio` and `minor-dec-ratio` are used to determine the `dec` rank. Usually, you do not need to modify these items. +- `byte-rate-rank-step-ratio`, `key-rate-rank-step-ratio`, `query-rate-rank-step-ratio`, and `count-rank-step-ratio` respectively mean the step ranks of byte, key, query, and count. The rank-step-ratio decides the step when the rank is calculated. `great-dec-ratio` and `minor-dec-ratio` are used to determine the `dec` rank. Usually, you do not need to modify these items. ```bash >> scheduler config balance-hot-region-scheduler set byte-rate-rank-step-ratio 0.05 @@ -765,7 +788,32 @@ Usage: - `src-tolerance-ratio` and `dst-tolerance-ratio` are configuration items for the expectation scheduler. The smaller the `tolerance-ratio`, the easier it is for scheduling. When redundant scheduling occurs, you can appropriately increase this value. ```bash - >> scheduler config balance-hot-region-scheduler set src-tolerance-ratio 1.05 + >> scheduler config balance-hot-region-scheduler set src-tolerance-ratio 1.1 + ``` + +- `read-priorities`, `write-leader-priorities`, and `write-peer-priorities` control which dimension the scheduler prioritizes for hot Region scheduling. Two dimensions are supported for configuration. + + - `read-priorities` and `write-leader-priorities` control which dimensions the scheduler prioritizes for scheduling hot Regions of the read and write-leader types. The dimension options are `query`, `byte`, and `key`. + - `write-peer-priorities` controls which dimensions the scheduler prioritizes for scheduling hot Regions of the write-peer type. The dimension options are `byte` and `key`. + + > **Note:** + > + > If a cluster component is earlier than v5.2, the configuration of `query` dimension does not take effect. If some components are upgraded to v5.2 or later, the `byte` and `key` dimensions still by default have the priority for hot Region scheduling. After all components of the cluster are upgraded to v5.2 or later, such a configuration still takes effect for compatibility. You can view the real-time configuration using the `pd-ctl` command. Usually, you do not need to modify these configurations. + + ```bash + >> scheduler config balance-hot-region-scheduler set read-priorities query,byte + ``` + +- `strict-picking-store` controls the search space of hot Region scheduling. Usually, it is enabled. When it is enabled, hot Region scheduling ensures hotspot balance on the two configured dimensions. When it is disabled, hot Region scheduling only ensures the balance on the dimension with the first priority, which might reduce balance on other dimensions. Usually, you do not need to modify this configuration. + + ```bash + >> scheduler config balance-hot-region-scheduler set strict-picking-store true + ``` + +- `enable-for-tiflash` controls whether hot Region scheduling takes effect for TiFlash instances. Usually, it is enabled. When it is disabled, the hot Region scheduling between TiFlash instances is not performed. + + ```bash + >> scheduler config balance-hot-region-scheduler set enable-for-tiflash true ``` ### `store [delete | label | weight | remove-tombstone | limit | limit-scene] [--jq=""]` diff --git a/pd-recover.md b/pd-recover.md index 255fa2f22f00..2630f4a8679c 100644 --- a/pd-recover.md +++ b/pd-recover.md @@ -27,7 +27,7 @@ To download the latest version of PD Recover, directly download the TiDB package > **Note:** > -> `{version}` indicates the version number of TiDB. For example, if `{version}` is `v5.1.0`, the package download link is `https://download.pingcap.org/tidb-v5.1.0-linux-amd64.tar.gz`. +> `{version}` indicates the version number of TiDB. For example, if `{version}` is `v5.2.1`, the package download link is `https://download.pingcap.org/tidb-v5.2.1-linux-amd64.tar.gz`. ## Quick Start diff --git a/post-installation-check.md b/post-installation-check.md index a44ba6300031..b97f3b92fadf 100644 --- a/post-installation-check.md +++ b/post-installation-check.md @@ -53,9 +53,11 @@ Log in to the database by running the following command: {{< copyable "shell-regular" >}} ```shell -mysql -u root -h 10.0.1.4 -P 4000 +mysql -u root -h ${tidb_server_host_IP_address} -P 4000 ``` +`${tidb_server_host_IP_address}` is one of the IP addresses set for `tidb_servers` when you [initialize the cluster topology file](/production-deployment-using-tiup.md#step-3-initialize-cluster-topology-file), such as `10.0.1.7`. + The following information indicates successful login: ```sql diff --git a/predicate-push-down.md b/predicate-push-down.md index c2be2f77c630..b574db739fed 100644 --- a/predicate-push-down.md +++ b/predicate-push-down.md @@ -108,7 +108,7 @@ explain select * from t left join s on t.a = s.a where s.a is null; 6 rows in set (0.00 sec) ``` -In this query,there is a predicate `s.a is null` on the inner table `s`。 +In this query,there is a predicate `s.a is null` on the inner table `s`. From the `explain` results,we can see that the predicate is not pushed below join operator. This is because the outer join fills the inner table with `NULL` values when the `on` condition isn't satisfied, and the predicate `s.a is null` is used to filter the results after the join. If it is pushed down to the inner table below join, the execution plan is not equivalent to the original one. diff --git a/production-deployment-using-tiup.md b/production-deployment-using-tiup.md index 73bc55696759..e4ed3589af37 100644 --- a/production-deployment-using-tiup.md +++ b/production-deployment-using-tiup.md @@ -133,6 +133,52 @@ To prepare the TiUP offline component package, manually pack an offline componen `tidb-community-server-${version}-linux-amd64.tar.gz` is an independent offline environment package. +3. Customize the offline mirror, or adjust the contents of an existing offline mirror. + + If you want to adjust an existing offline mirror (such as adding a new version of a component), take the following steps: + + 1. When pulling an offline mirror, you can get an incomplete offline mirror by specifying specific information via parameters, such as the component and version information. For example, you can pull an offline mirror that includes only the offline mirror of TiUP v1.5.2 and TiUP Cluster v1.5.2 by running the following command: + + {{< copyable "shell-regular" >}} + + ```bash + tiup mirror clone tiup-custom-mirror-v1.5.2 --tiup v1.5.2 --cluster v1.5.2 + ``` + + If you only need the components for a particular platform, you can specify them using the `--os` or `--arch` parameters. + + 2. Refer to the step 2 of "Pull the mirror using TiUP", and send this incomplete offline mirror to the control machine in the isolated environment. + + 3. Check the path of the current offline mirror on the control machine in the isolated environment. If your TiUP tool is of a recent version, you can get the current mirror address by running the following command: + + {{< copyable "shell-regular" >}} + + ```bash + tiup mirror show + ``` + + If the output of the above command indicates that the `show` command does not exist, you might be using an older version of TiUP. In this case, you can get the current mirror address from `$HOME/.tiup/tiup.toml`. Record this mirror address. In the following steps, `${base_mirror}` is used to refer to this address. + + 4. Merge an incomplete offline mirror into an existing offline mirror: + + First, copy the `keys` directory in the current offline mirror to the `$HOME/.tiup` directory: + + {{< copyable "shell-regular" >}} + + ```bash + cp -r ${base_mirror}/keys $HOME/.tiup/ + ``` + + Then use the TiUP command to merge the incomplete offline mirror into the mirror in use: + + {{< copyable "shell-regular" >}} + + ```bash + tiup mirror merge tiup-custom-mirror-v1.5.2 + ``` + + 5. When the above steps are completed, check the result by running the `tiup list` command. In this document's example, the outputs of both `tiup list tiup` and `tiup list cluster` show that the corresponding components of `v1.5.2` are available. + #### Step 2: Deploy the offline TiUP component After sending the package to the control machine of the target cluster, install the TiUP component by running the following commands: @@ -228,7 +274,7 @@ The following examples cover six common scenarios. You need to modify the config > > - For parameters that should be globally effective, configure these parameters of corresponding components in the `server_configs` section of the configuration file. > - For parameters that should be effective on a specific node, configure these parameters in the `config` of this node. -> - Use `.` to indicate the subcategory of the configuration, such as `log.slow-threshold`. For more formats, see [TiUP configuration template](https://github.com/pingcap/tiup/blob/master/embed/templates/examples/topology.example.yaml). +> - Use `.` to indicate the subcategory of the configuration, such as `log.slow-threshold`. For more formats, see [TiUP configuration template](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml). > - For more parameter description, see [TiDB `config.toml.example`](https://github.com/pingcap/tidb/blob/master/config/config.toml.example), [TiKV `config.toml.example`](https://github.com/tikv/tikv/blob/master/etc/config-template.toml), [PD `config.toml.example`](https://github.com/pingcap/pd/blob/master/conf/config.toml), and [TiFlash configuration](/tiflash/tiflash-configuration.md). ## Step 4: Execute the deployment command @@ -260,17 +306,17 @@ Then execute the `deploy` command to deploy the TiDB cluster: {{< copyable "shell-regular" >}} ```shell -tiup cluster deploy tidb-test v5.1.0 ./topology.yaml --user root [-p] [-i /home/root/.ssh/gcp_rsa] +tiup cluster deploy tidb-test v5.2.1 ./topology.yaml --user root [-p] [-i /home/root/.ssh/gcp_rsa] ``` In the above command: - The name of the deployed TiDB cluster is `tidb-test`. -- You can see the latest supported versions by running `tiup list tidb`. This document takes `v5.1.0` as an example. +- You can see the latest supported versions by running `tiup list tidb`. This document takes `v5.2.1` as an example. - The initialization configuration file is `topology.yaml`. - `--user root`: Log in to the target machine through the `root` key to complete the cluster deployment, or you can use other users with `ssh` and `sudo` privileges to complete the deployment. - `[-i]` and `[-p]`: optional. If you have configured login to the target machine without password, these parameters are not required. If not, choose one of the two parameters. `[-i]` is the private key of the `root` user (or other users specified by `--user`) that has access to the target machine. `[-p]` is used to input the user password interactively. -- If you need to specify the user group name to be created on the target machine, see [this example](https://github.com/pingcap/tiup/blob/master/embed/templates/examples/topology.example.yaml#L7). +- If you need to specify the user group name to be created on the target machine, see [this example](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml#L7). At the end of the output log, you will see ```Deployed cluster `tidb-test` successfully```. This indicates that the deployment is successful. @@ -288,7 +334,7 @@ TiUP supports managing multiple TiDB clusters. The command above outputs informa Starting /home/tidb/.tiup/components/cluster/v1.5.0/cluster list Name User Version Path PrivateKey ---- ---- ------- ---- ---------- -tidb-test tidb v5.1.0 /home/tidb/.tiup/storage/cluster/clusters/tidb-test /home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa +tidb-test tidb v5.2.1 /home/tidb/.tiup/storage/cluster/clusters/tidb-test /home/tidb/.tiup/storage/cluster/clusters/tidb-test/ssh/id_rsa ``` ## Step 6: Check the status of the deployed TiDB cluster diff --git a/quick-start-with-htap.md b/quick-start-with-htap.md new file mode 100644 index 000000000000..e5b7d3e8366b --- /dev/null +++ b/quick-start-with-htap.md @@ -0,0 +1,213 @@ +--- +title: Quick start with HTAP +summary: Learn how to quickly get started with the TiDB HTAP. +--- + +# Quick Start Guide for TiDB HTAP + +This guide walks you through the quickest way to get started with TiDB's one-stop solution of Hybrid Transactional and Analytical Processing (HTAP). + +> **Note:** +> +> The steps provided in this guide is ONLY for quick start in the test environment. For production environments, [explore HTAP](/explore-htap.md) is recommended. + +## Basic concepts + +Before using TiDB HTAP, you need to have some basic knowledge about [TiKV](/tikv-overview.md), a row-based storage engine for TiDB Online Transactional Processing (OLTP), and [TiFlash](/tiflash/tiflash-overview.md), a columnar storage engine for TiDB Online Analytical Processing (OLAP). + +- Storage engines of HTAP: The row-based storage engine and the columnar storage engine co-exist for HTAP. Both storage engines can replicate data automatically and keep strong consistency. The row-based storage engine optimizes OLTP performance, and the columnar storage engine optimizes OLAP performance. +- Data consistency of HTAP: As a distributed and transactional key-value database, TiKV provides transactional interfaces with ACID compliance, and guarantees data consistency between multiple replicas and high availability with the implementation of the [Raft consensus algorithm](https://raft.github.io/raft.pdf). As a columnar storage extension of TiKV, TiFlash replicates data from TiKV in real time according to the Raft Learner consensus algorithm, which ensures that data is strongly consistent between TiKV and TiFlash. +- Data isolation of HTAP: TiKV and TiFlash can be deployed on different machines as needed to solve the problem of HTAP resource isolation. +- MPP computing engine: [MPP](/tiflash/use-tiflash.md#control-whether-to-select-the-mpp-mode) is a distributed computing framework provided by the TiFlash engine since TiDB 5.0, which allows data exchange between nodes and provides high-performance, high-throughput SQL algorithms. In the MPP mode, the run time of the analytic queries can be significantly reduced. + +## Steps + +In this document, you can experience the convenience and high performance of TiDB HTAP by querying an example table in a popular [TPC-H](http://www.tpc.org/tpch/) dataset. + +### Step 1. Deploy a local test environment + +Before using TiDB HTAP, follow the steps in the [Quick Start Guide for the TiDB Database Platform](/quick-start-with-tidb.md) to prepare a local test environment, and run the following command to deploy a TiDB cluster: + +{{< copyable "shell-regular" >}} + +```shell +tiup playground +``` + +> **Note:** +> +> `tiup playground` command is ONLY for quick start, NOT for production. + +### Step 2. Prepare test data + +In the following steps, you can create a [TPC-H](http://www.tpc.org/tpch/) dataset as the test data to use TiDB HTAP. If you are interested in TPC-H, see [General Implementation Guidelines](http://tpc.org/tpc_documents_current_versions/pdf/tpc-h_v3.0.0.pdf). + +> **Note:** +> +> If you want to use your existing data for analytic queries, you can [migrate your data to TiDB](/migration-overview.md). If you want to design and create your own test data, you can create it by executing SQL statements or using related tools. + +1. Install the test data generation tool by running the following command: + + {{< copyable "shell-regular" >}} + + ```shell + tiup install bench + ``` + +2. Generate the test data by running the following command: + + {{< copyable "shell-regular" >}} + + ```shell + tiup bench tpch --sf=1 prepare + ``` + + If the output of this command shows `Finished`, it indicates that the data is created. + +3. Execute the following SQL statement to view the generated data: + + {{< copyable "sql" >}} + + ```sql + SELECT + CONCAT(table_schema,'.',table_name) AS 'Table Name', + table_rows AS 'Number of Rows', + FORMAT_BYTES(data_length) AS 'Data Size', + FORMAT_BYTES(index_length) AS 'Index Size', + FORMAT_BYTES(data_length+index_length) AS'Total' + FROM + information_schema.TABLES + WHERE + table_schema='test'; + ``` + + As you can see from the output, eight tables are created in total, and the largest table has 6.5 million rows (the number of rows created by the tool depends on the actual SQL query result because the data is randomly generated). + + ```sql + +---------------+----------------+-----------+------------+-----------+ + | Table Name | Number of Rows | Data Size | Index Size | Total | + +---------------+----------------+-----------+------------+-----------+ + | test.nation | 25 | 2.44 KiB | 0 bytes | 2.44 KiB | + | test.region | 5 | 416 bytes | 0 bytes | 416 bytes | + | test.part | 200000 | 25.07 MiB | 0 bytes | 25.07 MiB | + | test.supplier | 10000 | 1.45 MiB | 0 bytes | 1.45 MiB | + | test.partsupp | 800000 | 120.17 MiB| 12.21 MiB | 132.38 MiB| + | test.customer | 150000 | 24.77 MiB | 0 bytes | 24.77 MiB | + | test.orders | 1527648 | 174.40 MiB| 0 bytes | 174.40 MiB| + | test.lineitem | 6491711 | 849.07 MiB| 99.06 MiB | 948.13 MiB| + +---------------+----------------+-----------+------------+-----------+ + 8 rows in set (0.06 sec) + ``` + + This is a database of a commercial ordering system. In which, the `test.nation` table indicates the information about countries, the `test.region` table indicates the information about regions, the `test.part` table indicates the information about parts, the `test.supplier` table indicates the information about suppliers, the `test.partsupp` table indicates the information about parts of suppliers, the `test.customer` table indicates the information about customers, the `test.customer` table indicates the information about orders, and the `test.lineitem` table indicates the information about online items. + +### Step 3. Query data with the row-based storage engine + +To know the performance of TiDB with only the row-based storage engine, execute the following SQL statements: + +{{< copyable "sql" >}} + +```sql +SELECT + l_orderkey, + SUM( + l_extendedprice * (1 - l_discount) + ) AS revenue, + o_orderdate, + o_shippriority +FROM + customer, + orders, + lineitem +WHERE + c_mktsegment = 'BUILDING' +AND c_custkey = o_custkey +AND l_orderkey = o_orderkey +AND o_orderdate < DATE '1996-01-01' +AND l_shipdate > DATE '1996-02-01' +GROUP BY + l_orderkey, + o_orderdate, + o_shippriority +ORDER BY + revenue DESC, + o_orderdate +limit 10; +``` + +This is a shipping priority query, which provides the priority and potential revenue of the highest-revenue order that has not been shipped before a specified date. The potential revenue is defined as the sum of `l_extendedprice * (1-l_discount)`. The orders are listed in the descending order of revenue. In this example, this query lists the unshipped orders with potential query revenue in the top 10. + +### Step 4. Replicate the test data to the columnar storage engine + +After TiFlash is deployed, TiKV does not replicate data to TiFlash immediately. You need to execute the following DDL statements in a MySQL client of TiDB to specify which tables need to be replicated. After that, TiDB will create the specified replicas in TiFlash accordingly. + +{{< copyable "sql" >}} + +```sql +ALTER TABLE test.customer SET TIFLASH REPLICA 1; +ALTER TABLE test.orders SET TIFLASH REPLICA 1; +ALTER TABLE test.lineitem SET TIFLASH REPLICA 1; +``` + +To check the replication status of the specific tables, execute the following statements: + +{{< copyable "sql" >}} + +```sql +SELECT * FROM information_schema.tiflash_replica WHERE TABLE_SCHEMA = 'test' and TABLE_NAME = 'customer'; +SELECT * FROM information_schema.tiflash_replica WHERE TABLE_SCHEMA = 'test' and TABLE_NAME = 'orders'; +SELECT * FROM information_schema.tiflash_replica WHERE TABLE_SCHEMA = 'test' and TABLE_NAME = 'lineitem'; +``` + +In the result of the above statements: + +- `AVAILABLE` indicates whether the TiFlash replica of a specific table is available or not. `1` means available and `0` means unavailable. Once a replica becomes available, this status does not change any more. If you use DDL statements to modify the number of replicas, the replication status will be recalculated. +- `PROGRESS` means the progress of the replication. The value is between 0.0 and 1.0. 1 means at least one replica is replicated. + +### Step 5. Analyze data faster using HTAP + +Execute the SQL statements in [Step 3](#step-3-query-data-with-the-row-based-storage-engine) again, and you can see the performance of TiDB HTAP. + +For tables with TiFlash replicas, the TiDB optimizer automatically determines whether to use TiFlash replicas based on the cost estimation. To check whether or not a TiFlash replica is selected, you can use the `desc` or `explain analyze` statement. For example: + +{{< copyable "sql" >}} + +```sql +explain analyze SELECT + l_orderkey, + SUM( + l_extendedprice * (1 - l_discount) + ) AS revenue, + o_orderdate, + o_shippriority +FROM + customer, + orders, + lineitem +WHERE + c_mktsegment = 'BUILDING' +AND c_custkey = o_custkey +AND l_orderkey = o_orderkey +AND o_orderdate < DATE '1996-01-01' +AND l_shipdate > DATE '1996-02-01' +GROUP BY + l_orderkey, + o_orderdate, + o_shippriority +ORDER BY + revenue DESC, + o_orderdate +limit 10; +``` + +If the result of the `EXPLAIN` statement shows `ExchangeSender` and `ExchangeReceiver` operators, it indicates that the MPP mode has taken effect. + +In addition, you can specify that each part of the entire query is computed using only the TiFlash engine. For detailed information, see [Use TiDB to read TiFlash replicas](/tiflash/use-tiflash.md#use-tidb-to-read-tiflash-replicas). + +You can compare query results and query performance of these two methods. + +## What's next + +- [Architecture of TiDB HTAP](/tiflash/tiflash-overview.md#architecture) +- [Explore HTAP](/explore-htap.md) +- [Use TiFlash](/tiflash/use-tiflash.md#use-tiflash) diff --git a/quick-start-with-tidb.md b/quick-start-with-tidb.md index 70b24fdc8814..ab5d4c134b53 100644 --- a/quick-start-with-tidb.md +++ b/quick-start-with-tidb.md @@ -21,10 +21,6 @@ This guide walks you through the quickest way to get started with TiDB. You will ## Deploy a local test environment on Mac OS -> **Note:** -> -> Currently, some TiDB components do not have a released version that supports the Apple M1 chip. Therefore, the `tiup playground` command currently cannot be executed on the local Mac machine that uses the Apple M1 chip. - As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB instances, 3 TiKV instances, 3 PD instances, and optional TiFlash instances. With TiUP Playground, you can quickly build the test cluster by taking the following steps: 1. Download and install TiUP: @@ -62,10 +58,10 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in {{< copyable "shell-regular" >}} ```shell - tiup playground v5.1.0 --db 2 --pd 3 --kv 3 --monitor + tiup playground v5.2.1 --db 2 --pd 3 --kv 3 --monitor ``` - The command downloads a version cluster to the local machine and starts it, such as v5.1.0. `--monitor` means that the monitoring component is also deployed. + The command downloads a version cluster to the local machine and starts it, such as v5.2.1. `--monitor` means that the monitoring component is also deployed. To view the latest version, run `tiup list tidb`. @@ -81,8 +77,9 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in > **Note:** > - > For the playground operated in this way, after the test deployment is finished, TiUP will clean up the original cluster data. You will get a new cluster after re-running the command. - > If you want the data to be persisted on storage,run `tiup --tag playground ...`. For details, refer to [TiUP Reference Guide](/tiup/tiup-reference.md#-t---tag). + > + Since v5.2.1, TiDB supports running `tiup playground` on the machine that uses the Apple M1 chip. + > + For the playground operated in this way, after the test deployment is finished, TiUP will clean up the original cluster data. You will get a new cluster after re-running the command. + > + If you want the data to be persisted on storage,run `tiup --tag playground ...`. For details, refer to [TiUP Reference Guide](/tiup/tiup-reference.md#-t---tag). 4. Start a new session to access TiDB: @@ -167,10 +164,10 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in {{< copyable "shell-regular" >}} ```shell - tiup playground v5.1.0 --db 2 --pd 3 --kv 3 --monitor + tiup playground v5.2.1 --db 2 --pd 3 --kv 3 --monitor ``` - The command downloads a version cluster to the local machine and starts it, such as v5.1.0. `--monitor` means that the monitoring component is also deployed. + The command downloads a version cluster to the local machine and starts it, such as v5.2.1. `--monitor` means that the monitoring component is also deployed. To view the latest version, run `tiup list tidb`. @@ -178,10 +175,11 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in ```log CLUSTER START SUCCESSFULLY, Enjoy it ^-^ - To connect TiDB: mysql --host 127.0.0.1 --port 4000 -u root - To connect TiDB: mysql --host 127.0.0.1 --port 4001 -u root + To connect TiDB: mysql --host 127.0.0.1 --port 4000 -u root -p (no password) --comments To view the dashboard: http://127.0.0.1:2379/dashboard - To view the monitor: http://127.0.0.1:9090 + PD client endpoints: [127.0.0.1:2379] + To view the Prometheus: http://127.0.0.1:9090 + To view the Grafana: http://127.0.0.1:3000 ``` > **Note:** @@ -211,9 +209,11 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in 6. Access the [TiDB Dashboard](/dashboard/dashboard-intro.md) at . The default username is `root`, with an empty password. -7. (Optional) [Load data to TiFlash](/tiflash/use-tiflash.md) for analysis. +7. Access the Grafana dashboard of TiDB through . Both the default username and password are `admin`. -8. Clean up the cluster after the test deployment: +8. (Optional) [Load data to TiFlash](/tiflash/use-tiflash.md) for analysis. + +9. Clean up the cluster after the test deployment: 1. Stop the process by pressing `ctrl-c`. diff --git a/releases/release-4.0.14.md b/releases/release-4.0.14.md new file mode 100644 index 000000000000..69e680504a18 --- /dev/null +++ b/releases/release-4.0.14.md @@ -0,0 +1,166 @@ +--- +title: TiDB 4.0.14 Release Notes +--- + +# TiDB 4.0.14 Release Notes + +Release date: July 27, 2021 + +TiDB version: 4.0.14 + +## Compatibility changes + ++ TiDB + + - Change the default value of `tidb_multi_statement_mode` from `WARN` to `OFF` in v4.0. It is recommended to use the multi-statement feature of your client library instead. See [the documentation on `tidb_multi_statement_mode`](/system-variables.md#tidb_multi_statement_mode-new-in-v4011) for details. [#25749](https://github.com/pingcap/tidb/pull/25749) + - Upgrade Grafana dashboard from v6.1.16 to v7.5.7 to solve two security vulnerabilities. See the [Grafana blog post](https://grafana.com/blog/2020/06/03/grafana-6.7.4-and-7.0.2-released-with-important-security-fix/) for details. + - Change the default value of the `tidb_stmt_summary_max_stmt_count` variable from `200` to `3000` [#25872](https://github.com/pingcap/tidb/pull/25872) + ++ TiKV + + - Change the default value of `merge-check-tick-interval` from `10` to `2` to speed up the Region merge process [#9676](https://github.com/tikv/tikv/pull/9676) + +## Feature enhancements + ++ TiKV + + - Add a metric `pending` to monitor the number of pending PD heartbeats, which helps locate the issue of slow PD threads [#10008](https://github.com/tikv/tikv/pull/10008) + - Support using the virtual-host addressing mode to make BR support the S3-compatible storage [#10242](https://github.com/tikv/tikv/pull/10242) + ++ TiDB Dashboard + + - Support OIDC SSO. By setting the OIDC-compatible SSO services (such as Okta and Auth0), users can log into TiDB Dashboard without entering the SQL password. [#960](https://github.com/pingcap/tidb-dashboard/pull/960) + - Add the **Debug API** UI, which is an alternative method to the command line to call several common TiDB and PD internal APIs for advanced debugging [#927](https://github.com/pingcap/tidb-dashboard/pull/927) + +## Improvements + ++ TiDB + + - Change the `LOCK` record into the `PUT` record for the index keys using `point get` or `batch point get` for `UPDATE` reads [#26223](https://github.com/pingcap/tidb/pull/26223) + - Support the MySQL system variable `init_connect` and its associated features [#26031](https://github.com/pingcap/tidb/pull/26031) + - Support the stable result mode to make the query results more stable [#26003](https://github.com/pingcap/tidb/pull/26003) + - Support pushing down the built-in function `json_unquote()` to TiKV [#25721](https://github.com/pingcap/tidb/pull/25721) + - Make the SQL Plan Management (SPM) not affected by the character set [#23295](https://github.com/pingcap/tidb/pull/23295) + ++ TiKV + + - Shutdown the status server first to make sure that the client can correctly check the shutdown status [#10504](https://github.com/tikv/tikv/pull/10504) + - Always respond to stale peers to make sure that these peers are cleared quicker [#10400](https://github.com/tikv/tikv/pull/10400) + - Limit the TiCDC sink's memory consumption [#10147](https://github.com/tikv/tikv/pull/10147) + - When a Region is too large, use the even split to speed up the split process [#10275](https://github.com/tikv/tikv/pull/10275) + ++ PD + + - Reduce the conflicts among multiple schedulers that run at the same time [#3858](https://github.com/pingcap/pd/pull/3858) [#3854](https://github.com/tikv/pd/pull/3854) + ++ TiDB Dashboard + + - Update TiDB Dashboard to v2021.07.17.1 [#3882](https://github.com/pingcap/pd/pull/3882) + - Support sharing the current session as a read-only session to avoid further modification to it [#960](https://github.com/pingcap/tidb-dashboard/pull/960) + ++ Tools + + + Backup & Restore (BR) + + - Speed up restore by merging small backup files [#655](https://github.com/pingcap/br/pull/655) + + + Dumpling + + - Always split tables using `_tidb_rowid` when the upstream is a TiDB v3.x cluster, which helps reduce TiDB's memory usage [#306](https://github.com/pingcap/dumpling/pull/306) + + + TiCDC + + - Improve the error message returned when a PD endpoint misses the certificate [#1973](https://github.com/pingcap/ticdc/issues/1973) + - Make the sorter I/O errors more user-friendly [#1976](https://github.com/pingcap/ticdc/pull/1976) + - Add a concurrency limit on the Region incremental scan in the KV client to reduce the pressure of TiKV [#1926](https://github.com/pingcap/ticdc/pull/1926) + - Add metrics for the table memory consumption [#1884](https://github.com/pingcap/ticdc/pull/1884) + - Add `capture-session-ttl` to the TiCDC server configuration [#2169](https://github.com/pingcap/ticdc/pull/2169) + +## Bug fixes + ++ TiDB + + - Fix the issue that the `SELECT` result is incompatible with MySQL when joining a subquery with a `WHERE` clause evaluated to `false` [#24865](https://github.com/pingcap/tidb/issues/24865) + - Fix the calculation error of the `ifnull` function that occurs when the argument is the `ENUM` or `SET` type [#24944](https://github.com/pingcap/tidb/issues/24944) + - Fix the wrong aggregate pruning in some cases [#25202](https://github.com/pingcap/tidb/issues/25202) + - Fix the incorrect result of the merge join operation that might occur when the column is the `SET` type [#25669](https://github.com/pingcap/tidb/issues/25669) + - Fix the issue that TiDB returns wrong results for cartesian join [#25591](https://github.com/pingcap/tidb/issues/25591) + - Fix the panic issue that occurs when `SELECT ... FOR UPDATE` works on a join operation and the join uses a partitioned table [#20028](https://github.com/pingcap/tidb/issues/20028) + - Fix the issue that the cached `prepared` plan is incorrectly used for `point get` [#24741](https://github.com/pingcap/tidb/issues/24741) + - Fix the issue that the `LOAD DATA` statement can abnormally import non-utf8 data [#25979](https://github.com/pingcap/tidb/issues/25979) + - Fix a potential memory leak issue that occurs when accessing the statistics via an HTTP API [#24650](https://github.com/pingcap/tidb/pull/24650) + - Fix a security issue that occurs when executing the `ALTER USER` statement [#25225](https://github.com/pingcap/tidb/issues/25225) + - Fix a bug that the `TIKV_REGION_PEERS` table cannot correctly handle the `DOWN` status [#24879](https://github.com/pingcap/tidb/issues/24879) + - Fix the issue that invalid strings are not truncated when parsing `DateTime` [#22231](https://github.com/pingcap/tidb/issues/22231) + - Fix the issue that the `select into outfile` statement might have no result when the column type is `YEAR` [#22159](https://github.com/pingcap/tidb/issues/22159) + - Fix the issue that the query result might be wrong when `NULL` is in the `UNION` subquery [#26532](https://github.com/pingcap/tidb/issues/26532) + - Fix the issue that the projection operator in execution might cause panic in some cases [#26534](https://github.com/pingcap/tidb/pull/26534) + ++ TiKV + + - Fix the issue that the duration calculation might panic on certain platforms [#related-issue](https://github.com/rust-lang/rust/issues/86470#issuecomment-877557654) + - Fix the wrong function that casts `DOUBLE` to `DOUBLE` [#25200](https://github.com/pingcap/tidb/issues/25200) + - Fix the issue that the panic log might be lost when using the async logger [#8998](https://github.com/tikv/tikv/issues/8998) + - Fix the panic issue that occurs when building a snapshot twice if encryption is enabled [#9786](https://github.com/tikv/tikv/issues/9786) [#10407](https://github.com/tikv/tikv/issues/10407) + - Fix the wrong arguments type of the `json_unquote()` function in the coprocessor [#10176](https://github.com/tikv/tikv/issues/10176) + - Fix the issues of suspicious warnings during shutdown and the non-deterministic response from Raftstore [#10353](https://github.com/tikv/tikv/issues/10353) [#10307](https://github.com/tikv/tikv/issues/10307) + - Fix the issue of backup threads leak [#10287](https://github.com/tikv/tikv/issues/10287) + - Fix the issue that Region split might panic and corrupt the metadata if the split process is too slow and Region merge is on-going [#8456](https://github.com/tikv/tikv/issues/8456) [#8783](https://github.com/tikv/tikv/issues/8783) + - Fix the issue that the Region heartbeats prevent TiKV from splitting large Regions in some situations [#10111](https://github.com/tikv/tikv/issues/10111) + - Fix the wrong statistics caused by the format inconsistency of CM Sketch between TiKV and TiDB [#25638](https://github.com/pingcap/tidb/issues/25638) + - Fix the wrong statistics of the `apply wait duration` metric [#9893](https://github.com/tikv/tikv/issues/9893) + - Fix the "Missing Blob" error after using `delete_files_in_range` in Titan [#10232](https://github.com/tikv/tikv/pull/10232) + ++ PD + + - Fix a bug that the scheduler might reappear after executing the delete operation [#2572](https://github.com/tikv/pd/issues/2572) + - Fix the data race issue that might occur when the scheduler is started before the temporary configuration is loaded [#3771](https://github.com/tikv/pd/issues/3771) + - Fix a PD panic issue that might occur during the Region scattering operation [#3761](https://github.com/pingcap/pd/pull/3761) + - Fix the issue that the priority of some operators is not set correctly [#3703](https://github.com/pingcap/pd/pull/3703) + - Fix a PD panic issue that might occur when deleting the `evict-leader` scheduler from a non-existent store [#3660](https://github.com/tikv/pd/issues/3660) + - Fix the issue that the PD Leader re-election is slow when there are many stores [#3697](https://github.com/tikv/pd/issues/3697) + ++ TiDB Dashboard + + - Fix the issue that the **Profiling** UI cannot profile all TiDB instances [#944](https://github.com/pingcap/tidb-dashboard/pull/944) + - Fix the issue that the **Statements** UI does not display "Plan Count" [#939](https://github.com/pingcap/tidb-dashboard/pull/939) + - Fix the issue that the **Slow Query** UI might display the "unknown field" error after cluster upgrade [#902](https://github.com/pingcap/tidb-dashboard/issues/902) + ++ TiFlash + + - Fix the potential panic issue that occurs when compiling DAG requests + - Fix the panic issue that occurs when the read load is heavy + - Fix the issue that TiFlash keeps restarting because of the split failure in column storage + - Fix a potential bug that TiFlash cannot delete the delta data + - Fix the incorrect results that occur when cloning the shared delta index concurrently + - Fix a bug that TiFlash fails to restart in the case of incomplete data + - Fix the issue that the old dm files cannot be removed automatically + - Fix the panic issue that occurs when executing the `SUBSTRING` function with specific arguments + - Fix the issue of incorrect results when casting the `INTEGER` type to the `TIME` type + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue that the data restore from the `mysql` schema might fail [#1142](https://github.com/pingcap/br/pull/1142) + + + TiDB Lightning + + - Fix the issue that TiDB Lightning fails to parse the `DECIMAL` type data in Parquet files [#1276](https://github.com/pingcap/br/pull/1276) + - Fix the EOF error reported when TiDB Lightning splits the imported large CSV files [#1133](https://github.com/pingcap/br/issues/1133) + - Fix a bug that an excessively large base value is generated when TiDB Lightning imports tables with the `auto_increment` column of the `FLOAT` or `DOUBLE` type [#1185](https://github.com/pingcap/br/pull/1185) + - Fix the issue of TiDB Lightning panic that occurs when generating KV data larger than 4 GB [#1128](https://github.com/pingcap/br/pull/1128) + + + Dumpling + + - When using Dumpling to export data to the S3 storage, the `s3:ListBucket` permission is no longer required on the entire bucket. The permission is required only on the data source prefix. [#898](https://github.com/pingcap/br/issues/898) + + + TiCDC + + - Fix the issue of extra partition dispatching after adding new table partitions [#2205](https://github.com/pingcap/ticdc/pull/2205) + - Fix the panic issue that occurs when TiCDC fails to read `/proc/meminfo` [#2023](https://github.com/pingcap/ticdc/pull/2023) + - Reduce TiCDC's runtime memory consumption [#2011](https://github.com/pingcap/ticdc/pull/2011) [#1957](https://github.com/pingcap/ticdc/pull/1957) + - Fix a bug that some MySQL connection might leak after MySQL sink meets the error and pauses [#1945](https://github.com/pingcap/ticdc/pull/1945) + - Fix the issue that TiCDC changefeed cannot be created when start TS is less than current TS minus GC TTL [#1839](https://github.com/pingcap/ticdc/issues/1839) + - Reduce memory `malloc` in sort heap to avoid too much CPU overhead [#1853](https://github.com/pingcap/ticdc/issues/1853) + - Fix a bug that the replication task might stop when moving a table [#1827](https://github.com/pingcap/ticdc/pull/1827) diff --git a/releases/release-4.0.15.md b/releases/release-4.0.15.md new file mode 100644 index 000000000000..6d2b718b7516 --- /dev/null +++ b/releases/release-4.0.15.md @@ -0,0 +1,139 @@ +--- +title: TiDB 4.0.15 Release Notes +--- + +# TiDB 4.0.15 Release Notes + +Release Date: September 27, 2021 + +TiDB version: 4.0.15 + +## Compatibility changes + ++ TiDB + + - Fix the issue that executing `SHOW VARIABLES` in a new session is slow. This fix reverts some changes made in [#21045](https://github.com/pingcap/tidb/pull/21045) and might cause compatibility issues. [#24326](https://github.com/pingcap/tidb/issues/24326) + + The following bug fixes change execution results, which might cause upgrade incompatibilities: + - Fix the issue that `greatest(datetime) union null` returns empty string [#26532](https://github.com/pingcap/tidb/issues/26532) + - Fix the issue that the `having` clause might not work correctly [#26496](https://github.com/pingcap/tidb/issues/26496) + - Fix the wrong execution results that occur when the collations around the `between` expression are different [#27146](https://github.com/pingcap/tidb/issues/27146) + - Fix the result wrong that occurs when the argument of the `extract` function is a negative duration [#27236](https://github.com/pingcap/tidb/issues/27236) + - Fix the wrong execution results that occur when the column in the `group_concat` function has a non-bin collation [#27429](https://github.com/pingcap/tidb/issues/27429) + - Fix the issue of wrong character set and collation for the `case when` expression [#26662](https://github.com/pingcap/tidb/issues/26662) + - Fix the issue that column information is missed when converting the `Apply` operator to `Join` [#27233](https://github.com/pingcap/tidb/issues/27233) + - Fix the issue of unexpected behavior when casting the invalid string to `DATE` [#26762](https://github.com/pingcap/tidb/issues/26762) + - Fix a bug that the `count distinct` result on multiple columns is wrong when the new collation is enabled [#27091](https://github.com/pingcap/tidb/issues/27091) + +## Feature enhancement + ++ TiKV + + - Support changing TiCDC configurations dynamically [#10645](https://github.com/tikv/tikv/issues/10645) + +## Improvements + ++ TiDB + + - Trigger auto-analyze based on the histogram row count [#24237](https://github.com/pingcap/tidb/issues/24237) + ++ TiKV + + - Handle read ready and write ready separately to reduce read latency [#10475](https://github.com/tikv/tikv/issues/10475) + - The slow log of TiKV coprocessor only considers the time spent on processing requests. [#10841](https://github.com/tikv/tikv/issues/10841) + - Drop log instead of blocking threads when the slogger thread is overloaded and the queue is filled up [#10841](https://github.com/tikv/tikv/issues/10841) + - Reduce the size of Resolved TS messages to save network bandwidth [#2448](https://github.com/pingcap/ticdc/issues/2448) + ++ PD + + - Improve the performance of synchronizing Region information between PDs [#3932](https://github.com/tikv/pd/pull/3932) + ++ Tools + + + Backup & Restore (BR) + + - Split and scatter Regions concurrently to improve restore speed [#1363](https://github.com/pingcap/br/pull/1363) + - Retry BR tasks when encountering the PD request error or the TiKV I/O timeout error [#27787](https://github.com/pingcap/tidb/issues/27787) + - Reduce empty Regions when restoring many small tables to avoid affecting cluster operations after the restore [#1374](https://github.com/pingcap/br/issues/1374) + - Perform the `rebase auto id` operation while creating tables, which saves the separate `rebase auto id` DDL operation and speeds up restore [#1424](https://github.com/pingcap/br/pull/1424) + + + Dumpling + + - Filter the skipped databases before getting the table information to improve the filtering efficiency of `SHOW TABLE STATUS` [#337](https://github.com/pingcap/dumpling/pull/337) + - Use `SHOW FULL TABLES` to get table information for tables to be exported, because `SHOW TABLE STATUS` cannot work properly in some MySQL versions [#322](https://github.com/pingcap/dumpling/issues/322) + - Support backing up MySQL-compatible databases that do not support the `START TRANSACTION ... WITH CONSISTENT SNAPSHOT` or the `SHOW CREATE TABLE` syntax [#309](https://github.com/pingcap/dumpling/issues/309) + - Refine the Dumpling warning log to avoid the misleading information that a dump fails [#340](https://github.com/pingcap/dumpling/pull/340) + + + TiDB Lightning + + - Support importing data into tables that have expression index or the index that depends on virtual generated columns [#1404](https://github.com/pingcap/br/issues/1404) + + + TiCDC + + - Always pulls old values from TiKV internally to improve usability [#2397](https://github.com/pingcap/ticdc/pull/2397) + - Reduce the goroutine usage when a table's Regions are all transferred away from a TiKV node [#2284](https://github.com/pingcap/ticdc/issues/2284) + - Optimize workerpool for fewer goroutines when concurrency is high [#2211](https://github.com/pingcap/ticdc/issues/2211) + - Execute DDL statements asynchronously to avoid affecting other changefeeds [#2295](https://github.com/pingcap/ticdc/issues/2295) + - Add a global gRPC connection pool and share gRPC connections among KV clients [#2531](https://github.com/pingcap/ticdc/pull/2531) + - Fail fast for unrecoverable DML errors [#1724](https://github.com/pingcap/ticdc/issues/1724) + - Optimize memory management when the Unified Sorter is using memory to sort data [#2553](https://github.com/pingcap/ticdc/issues/2553) + - Add Prometheus metrics for DDL executions [#2595](https://github.com/pingcap/ticdc/issues/2595) [#2669](https://github.com/pingcap/ticdc/issues/2669) + - Prohibit operating TiCDC clusters across major or minor versions [#2601](https://github.com/pingcap/ticdc/pull/2601) + - Remove `file sorter` [#2325](https://github.com/pingcap/ticdc/pull/2325) + - Clean up changefeed metrics when a changefeed is removed, and clean up processor metrics when a processor exits [#2156](https://github.com/pingcap/ticdc/issues/2156) + - Optimize the lock-resolving algorithm after a Region is initialized [#2188](https://github.com/pingcap/ticdc/issues/2188) + +## Bug fixes + ++ TiDB + + - Fix a bug that collation is incorrectly set for binary literals when building ranges [#23672](https://github.com/pingcap/tidb/issues/23672) + + - Fix the "index out of range" error that occurs when a query includes both `GROUP BY` and `UNION` [#26553](https://github.com/pingcap/tidb/pull/26553) + - Fix the issue that TiDB might fail to send requests if TiKV has tombstone stores [#23676](https://github.com/pingcap/tidb/issues/23676) [#24648](https://github.com/pingcap/tidb/issues/24648) + - Remove the undocumented `/debug/sub-optimal-plan` HTTP API [#27264](https://github.com/pingcap/tidb/pull/27264) + ++ TiKV + + - Fix the issue that BR reports the "file already exists" error when TDE is enabled during data restore [#1179](https://github.com/pingcap/br/issues/1179) + - Fix the potential disk full issue caused by corrupted snapshot files [#10813](https://github.com/tikv/tikv/issues/10813) + - Fix the issue that TiKV deletes stale Regions too frequently [#10680](https://github.com/tikv/tikv/issues/10680) + - Fix the issue that TiKV frequently reconnects the PD client [#9690](https://github.com/tikv/tikv/issues/9690) + - Check stale file information from the encryption file dictionary [#9115](https://github.com/tikv/tikv/issues/9115) + ++ PD + + - Fix the issue that PD does not fix the down peers in time [#4077](https://github.com/tikv/pd/issues/4077) + - Fix a bug that PD might panic when scaling out TiKV [#3868](https://github.com/tikv/pd/issues/3868) + ++ TiFlash + + - Fix the potential issue of data inconsistency that occurs when TiFlash is deployed on multiple disks + - Fix a bug of incorrect results that occurs when queries contain filters like `CONSTANT`, `<`, `<=`, `>`, `>=`, or `COLUMN` + - Fix the issue that the store size in metrics is inaccurate under heavy writing + - Fix a potential bug that TiFlash cannot restore data when deployed on multiple disks + - Fix the potential issue that TiFlash cannot garbage-collect the delta data after running for a long time + ++ Tools + + + Backup & Restore (BR) + + - Fix a bug that the average speed is inaccurately calculated for backup and restore [#1405](https://github.com/pingcap/br/issues/1405) + + + TiCDC + + - Fix the `ErrSchemaStorageTableMiss` error that occurs when the DDL Job duplication is encountered in the integrated test [#2422](https://github.com/pingcap/ticdc/issues/2422) + - Fix a bug that a changefeed cannot be removed if the `ErrGCTTLExceeded` error occurs [#2391](https://github.com/pingcap/ticdc/issues/2391) + - Fix the issue that outdated capture might appear in the output of the `capture list` command [#2388](https://github.com/pingcap/ticdc/issues/2388) + - Fix the deadlock issue in the TiCDC processor [#2017](https://github.com/pingcap/ticdc/pull/2017) + - Fix a data inconsistency issue that occurs because multiple processors might write data to the same table when this table is being re-scheduled [#2230](https://github.com/pingcap/ticdc/issues/2230) + - Fix a bug that the `EtcdWorker` snapshot isolation is violated in metadata management [#2557](https://github.com/pingcap/ticdc/pull/2557) + - Fix the issue that the changefeed cannot be stopped due to the DDL sink error [#2552](https://github.com/pingcap/ticdc/issues/2552) + - Fix the issue of TiCDC Open Protocol: TiCDC outputs an empty value when there is no change in a transaction [#2612](https://github.com/pingcap/ticdc/issues/2612) + - Fix a bug that causes TiCDC to panic on the unsigned `TINYINT` type [#2648](https://github.com/pingcap/ticdc/issues/2648) + - Decrease the gRPC window size to avoid the OOM that occurs when TiCDC captures too many Regions [#2202](https://github.com/pingcap/ticdc/issues/2202) + - Fix the OOM issue that occurs when TiCDC captures too many Regions [#2673](https://github.com/pingcap/ticdc/issues/2673) + - Fix the issue of process panic that occurs when encoding the data types such as `mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar` into JSON [#2758](https://github.com/pingcap/ticdc/issues/2758) + - Fix the a memory leak issue that might occur when creating a new changefeed [#2389](https://github.com/pingcap/ticdc/issues/2389) + - Fix a bug that DDL handling fails when a changefeed starts at the finish TS of a schema change [#2603](https://github.com/pingcap/ticdc/issues/2603) + - Fix the issue of potential DDL loss when the owner crashes when executing DDL statements [#1260](https://github.com/pingcap/ticdc/issues/1260) + - Fix the issue of insecure concurrent access to the map in `SinkManager` [#2298](https://github.com/pingcap/ticdc/pull/2298) diff --git a/releases/release-5.0.0.md b/releases/release-5.0.0.md index 1c4575493084..9cd384e80999 100644 --- a/releases/release-5.0.0.md +++ b/releases/release-5.0.0.md @@ -68,6 +68,7 @@ In v5.0, the key new features or improvements are as follows: ### Others ++ Before the upgrade, check the value of the TiDB configuration [`feedback-probability`](/tidb-configuration-file.md#feedback-probability). If the value is not 0, the "panic in the recoverable goroutine" error will occur after the upgrade, but this error does not affect the upgrade. + Forbid conversion between `VARCHAR` type and `CHAR` type during the column type change to avoid data correctness issues. ## New features diff --git a/releases/release-5.0.4.md b/releases/release-5.0.4.md new file mode 100644 index 000000000000..3387995e133f --- /dev/null +++ b/releases/release-5.0.4.md @@ -0,0 +1,197 @@ +--- +title: TiDB 5.0.4 Release Notes +--- + +# TiDB 5.0.4 Release Notes + +Release Date: September 27, 2021 + +TiDB version: 5.0.4 + +## Compatibility changes + ++ TiDB + + - Fix the issue that executing `SHOW VARIABLES` in a new session is slow. This fix reverts some changes made in [#19341](https://github.com/pingcap/tidb/pull/19341) and might cause compatibility issues. [#24326](https://github.com/pingcap/tidb/issues/24326) + - Change the default value of the `tidb_stmt_summary_max_stmt_count` variable from `200` to `3000` [#25873](https://github.com/pingcap/tidb/pull/25873) + + The following bug fixes change execution results, which might cause upgrade incompatibilities: + - Fix the issue that TiDB returns wrong result when the children of `UNION` contain the `NULL` value [#26559](https://github.com/pingcap/tidb/issues/26559) + - Fix the issue that `greatest(datetime) union null` returns empty string [#26532](https://github.com/pingcap/tidb/issues/26532) + - Fix the issue that the behavior of the `last_day` function is incompatible in SQL mode [#26000](https://github.com/pingcap/tidb/pull/26000) + - Fix the issue that the `having` clause might not work correctly [#26496](https://github.com/pingcap/tidb/issues/26496) + - Fix the wrong execution results that occur when the collations around the `between` expression are different [#27146](https://github.com/pingcap/tidb/issues/27146) + - Fix the wrong execution results that occur when the column in the `group_concat` function has a non-bin collation [#27429](https://github.com/pingcap/tidb/issues/27429) + - Fix an issue that using a `count(distinct)` expression on multiple columns returns wrong result when the new collation is enabled [#27091](https://github.com/pingcap/tidb/issues/27091) + - Fix the result wrong that occurs when the argument of the `extract` function is a negative duration [#27236](https://github.com/pingcap/tidb/issues/27236) + - Fix the issue that inserting an invalid date does not report an error when the `SQL_MODE` is 'STRICT_TRANS_TABLES' [#26762](https://github.com/pingcap/tidb/issues/26762) + - Fix the issue that using an invalid default date does not report an error when the `SQL_MODE` is 'NO_ZERO_IN_DATE' [#26766](https://github.com/pingcap/tidb/issues/26766) + - Fix a bug on the query range of prefix index [#26029](https://github.com/pingcap/tidb/issues/26029) + - Fix the issue that the `LOAD DATA` statement might abnormally import non-utf8 data [#25979](https://github.com/pingcap/tidb/issues/25979) + - Fix the issue that `insert ignore on duplicate update` might insert wrong data when the secondary index has the same column with the primary key [#25809](https://github.com/pingcap/tidb/issues/25809) + - Fix the issue that `insert ignore duplicate update` might insert wrong data when a partitioned table has a clustered index [#25846](https://github.com/pingcap/tidb/issues/25846) + - Fix the issue that the query result might be wrong when the key is the `ENUM` type in point get or batch point get [#24562](https://github.com/pingcap/tidb/issues/24562) + - Fix the wrong result that occurs when dividing a `BIT`-type value [#23479](https://github.com/pingcap/tidb/issues/23479) + - Fix the issue that the results of `prepared` statements and direct queries might be inconsistent [#22949](https://github.com/pingcap/tidb/issues/22949) + - Fix the issue that the query result might be wrong when a `YEAR` type is compared with a string or an integer type [#23262](https://github.com/pingcap/tidb/issues/23262) + +## Feature enhancements + ++ TiDB + + - Support setting `tidb_enforce_mpp=1` to ignore the optimizer estimation and forcibly use the MPP mode [#26382](https://github.com/pingcap/tidb/pull/26382) + ++ TiKV + + - Support changing TiCDC configurations dynamically [#10645](https://github.com/tikv/tikv/issues/10645) + ++ PD + + - Add OIDC-based SSO support for TiDB Dashboard [#3884](https://github.com/tikv/pd/pull/3884) + ++ TiFlash + + - Support the `HAVING()` function in DAG requests + - Support the `DATE()` function + - Add Grafana panels for write throughput per instance + +## Improvements + ++ TiDB + + - Trigger auto-analyze based on the histogram row count [#24237](https://github.com/pingcap/tidb/issues/24237) + - Stop sending requests to a TiFlash node for a period if the node has failed and restarted before [#26757](https://github.com/pingcap/tidb/pull/26757) + - Increase the `split region` upper limit to make `split table` and `presplit` more stable [#26657](https://github.com/pingcap/tidb/pull/26657) + - Support retry for MPP queries [#26483](https://github.com/pingcap/tidb/pull/26483) + - Check the availability of TiFlash before launching MPP queries [#1807](https://github.com/pingcap/tics/issues/1807) + - Support the stable result mode to make the query result more stable [#26084](https://github.com/pingcap/tidb/pull/26084) + - Support the MySQL system variable `init_connect` and its associated features [#18894](https://github.com/pingcap/tidb/issues/18894) + - Thoroughly push down the `COUNT(DISTINCT)` aggregation function in the MPP mode [#25861](https://github.com/pingcap/tidb/pull/25861) + - Print log warnings when the aggregation function cannot be pushed down in `EXPLAIN` statements [#25736](https://github.com/pingcap/tidb/pull/25736) + - Add error labels for `TiFlashQueryTotalCounter` in Grafana dashboard [#25327](https://github.com/pingcap/tidb/pull/25327) + - Support getting the MVCC data of a clustered index table through a secondary index by HTTP API [#24209](https://github.com/pingcap/tidb/issues/24209) + - Optimize the memory allocation of `prepared` statement in parser [#24371](https://github.com/pingcap/tidb/pull/24371) + ++ TiKV + + - Handle read ready and write ready separately to reduce read latency [#10475](https://github.com/tikv/tikv/issues/10475) + - Reduce the size of Resolved TS messages to save network bandwidth [#2448](https://github.com/pingcap/ticdc/issues/2448) + - Drop log instead of blocking threads when the slogger thread is overloaded and the queue is filled up [#10841](https://github.com/tikv/tikv/issues/10841) + - Make the slow log of TiKV coprocessor only consider the time spent on processing requests [#10841](https://github.com/tikv/tikv/issues/10841) + - Make prewrite as idempotent as possible to reduce the chance of undetermined errors [#10587](https://github.com/tikv/tikv/pull/10587) + - Avoid the false "GC can not work" alert under low write flow [#10662](https://github.com/tikv/tikv/pull/10662) + - Make the database to be restored always match the original cluster size during backup. [#10643](https://github.com/tikv/tikv/pull/10643) + - Ensure that the panic output is flushed to the log [#9955](https://github.com/tikv/tikv/pull/9955) + ++ PD + + - Improve the performance of synchronizing Region information between PDs [#3993](https://github.com/tikv/pd/pull/3993) + ++ Tools + + + Dumpling + + - Support backing up MySQL-compatible databases that do not support the `START TRANSACTION ... WITH CONSISTENT SNAPSHOT` or the `SHOW CREATE TABLE` syntax [#309](https://github.com/pingcap/dumpling/issues/309) + + + TiCDC + + - Optimize memory management when Unified Sorter is using memory to sort [#2553](https://github.com/pingcap/ticdc/issues/2553) + - Prohibit operating TiCDC clusters across major or minor versions [#2598](https://github.com/pingcap/ticdc/pull/2598) + - Reduce the goroutine usage when a table's Regions are all transferred away from a TiKV node [#2284](https://github.com/pingcap/ticdc/issues/2284) + - Remove `file sorter` [#2326](https://github.com/pingcap/ticdc/pull/2326) + - Always pull the old values from TiKV and the output is adjusted according to `enable-old-value` [#2301](https://github.com/pingcap/ticdc/issues/2301) + - Improve the error message returned when a PD endpoint misses the certificate [#1973](https://github.com/pingcap/ticdc/issues/1973) + - Optimize workerpool for fewer goroutines when concurrency is high [#2211](https://github.com/pingcap/ticdc/issues/2211) + - Add a global gRPC connection pool and share gRPC connections among KV clients [#2533](https://github.com/pingcap/ticdc/pull/2533) + +## Bug Fixes + ++ TiDB + + - Fix the issue that TiDB might panic when querying a partitioned table and the partition key has the `IS NULL` condition [#23802](https://github.com/pingcap/tidb/issues/23802) + - Fix the issue that the overflow check of the `FLOAT64` type is different with that of MySQL [#23897](https://github.com/pingcap/tidb/issues/23897) + - Fix the wrong character set and collation for the `case when` expression [#26662](https://github.com/pingcap/tidb/issues/26662) + - Fix the issue that committing pessimistic transactions might cause write conflicts [#25964](https://github.com/pingcap/tidb/issues/25964) + - Fix a bug that the index keys in a pessimistic transaction might be repeatedly committed [#26359](https://github.com/pingcap/tidb/issues/26359) [#10600](https://github.com/tikv/tikv/pull/10600) + - Fix the issue that TiDB might panic when resolving the async commit locks [#25778](https://github.com/pingcap/tidb/issues/25778) + - Fix a bug that a column might not be found when using `INDEX MERGE` [#25045](https://github.com/pingcap/tidb/issues/25045) + - Fix a bug that `ALTER USER REQUIRE SSL` clears users' `authentication_string` [#25225](https://github.com/pingcap/tidb/issues/25225) + - Fix a bug that the value of the `tidb_gc_scan_lock_mode` global variable on a new cluster shows "PHYSICAL" instead of the actual default mode "LEGACY" [#25100](https://github.com/pingcap/tidb/issues/25100) + - Fix the bug that the `TIKV_REGION_PEERS` system table does not show the correct `DOWN` status [#24879](https://github.com/pingcap/tidb/issues/24879) + - Fix the issue of memory leaks that occurs when HTTP API is used [#24649](https://github.com/pingcap/tidb/pull/24649) + - Fix the issue that views do not support `DEFINER` [#24414](https://github.com/pingcap/tidb/issues/24414) + - Fix the issue that `tidb-server --help` exits with the code `2` [#24046](https://github.com/pingcap/tidb/issues/24046) + - Fix the issue that setting the global variable `dml_batch_size` does not take effect [#24709](https://github.com/pingcap/tidb/issues/24709) + - Fix the issue that using `read_from_storage` and partitioned table at the same time causes an error [#20372](https://github.com/pingcap/tidb/issues/20372) + - Fix the issue that TiDB panics when executing the projection operator [#24264](https://github.com/pingcap/tidb/issues/24264) + - Fix the issue that statistics might cause queries to panic [#24061](https://github.com/pingcap/tidb/pull/24061) + - Fix the issue that using the `approx_percentile` function on a `BIT` column might panic [#23662](https://github.com/pingcap/tidb/issues/23662) + - Fix the issue that the metrics on the **Coprocessor Cache** panel in Grafana are wrong [#26338](https://github.com/pingcap/tidb/issues/26338) + - Fix the issue that concurrently truncating the same partition causes DDL statements to stuck [#26229](https://github.com/pingcap/tidb/issues/26229) + - Fix the issue of wrong query results that occurs when the session variable is used as the `GROUP BY` item [#27106](https://github.com/pingcap/tidb/issues/27106) + - Fix the wrong implicit conversion between `VARCHAR` and timestamp when joining tables [#25902](https://github.com/pingcap/tidb/issues/25902) + - Fix the wrong results in associated subquery statements [#27233](https://github.com/pingcap/tidb/issues/27233) + ++ TiKV + + - Fix the potential disk full issue caused by corrupted snapshot files [#10813](https://github.com/tikv/tikv/issues/10813) + - Fix the TiKV panic issue that occurs when upgrading from a pre-5.0 version with Titan enabled [#10843](https://github.com/tikv/tikv/pull/10843) + - Fix the issue that TiKV of a newer version cannot be rolled back to v5.0.x [#10843](https://github.com/tikv/tikv/pull/10843) + - Fix the TiKV panic issue that occurs when upgrading from a pre-5.0 version to a 5.0 version or later. If a cluster was upgraded from TiKV v3.x with Titan enabled before the upgrade, this cluster might encounter the issue. [#10774](https://github.com/tikv/tikv/issues/10774) + - Fix the parsing failure caused by the left pessimistic locks [#26404](https://github.com/pingcap/tidb/issues/26404) + - Fix the panic that occurs when calculating duration on certain platforms [#10571](https://github.com/tikv/tikv/pull/10571) + - Fix the issue that the keys of `batch_get_command` in Load Base Split are unencoded [#10542](https://github.com/tikv/tikv/issues/10542) + ++ PD + + - Fix the issue that PD does not fix the down peers in time [#4077](https://github.com/tikv/pd/issues/4077) + - Fix the issue that the replica count of the default placement rules stays constant after `replication.max-replicas` is updated [#3886](https://github.com/tikv/pd/issues/3886) + - Fix a bug that PD might panic when scaling out TiKV [#3868](https://github.com/tikv/pd/issues/3868) + - Fix the scheduling conflict issue that occurs when multiple schedulers are running at same time [#3807](https://github.com/tikv/pd/issues/3807) + - Fix the issue that the scheduler might appear again even if it has been deleted [#2572](https://github.com/tikv/pd/issues/2572) + ++ TiFlash + + - Fix the potential panic issue that occurs when running table scan tasks + - Fix the potential memory leak issue that occurs when executing MPP tasks + - Fix a bug that TiFlash raises the `duplicated region` error when handling DAQ requests + - Fix the issue of unexpected results when executing the aggregation functions `COUNT` or `COUNT DISTINCT` + - Fix the potential panic issue that occurs when executing MPP tasks + - Fix a potential bug that TiFlash cannot restore data when deployed on multiple disks + - Fix the potential panic issue that occurs when deconstructing `SharedQueryBlockInputStream` + - Fix the potential panic issue that occurs when deconstructing `MPPTask` + - Fix the issue of unexpected results when TiFlash fails to establish MPP connections + - Fix the potential panic issue that occurs when resolving locks + - Fix the issue that the store size in metrics is inaccurate under heavy writing + - Fix a bug of incorrect results that occurs when queries contain filters like `CONSTANT`, `<`, `<=`, `>`, `>=`, or `COLUMN` + - Fix the potential issue that TiFlash cannot garbage-collect the delta data after running for a long time + - Fix a potential bug that metrics display wrong values + - Fix the potential issue of data inconsistency that occurs when TiFlash is deployed on multiple disks + ++ Tools + + + Dumpling + + - Fix the issue that the execution of `show table status` is stuck in MySQL 8.0.3 or a later version [#322](https://github.com/pingcap/dumpling/issues/322) + + + TiCDC + + - Fix the issue of process panic that occurs when encoding the data types such as `mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar` into JSON [#2758](https://github.com/pingcap/ticdc/issues/2758) + - Fix a data inconsistency issue that occurs because multiple processors might write data to the same table when this table is being re-scheduled [#2417](https://github.com/pingcap/ticdc/pull/2417) + - Decrease the gRPC window size to avoid the OOM that occurs when TiCDC captures too many Regions [#2724](https://github.com/pingcap/ticdc/pull/2724) + - Fix the error that the gRPC connection is frequently broken when the memory pressure is high [#2202](https://github.com/pingcap/ticdc/issues/2202) + - Fix a bug that causes TiCDC to panic on the unsigned `TINYINT` type [#2648](https://github.com/pingcap/ticdc/issues/2648) + - Fix the issue that TiCDC Open Protocol outputs an empty value when inserting a transaction and deleting data of the same row in the upstream [#2612](https://github.com/pingcap/ticdc/issues/2612) + - Fix a bug that DDL handling fails when a changefeed starts at the finish TS of a schema change [#2603](https://github.com/pingcap/ticdc/issues/2603) + - Fix the issue that irresponsive downstreams interrupt the replication task in old owner until the task times out [#2295](https://github.com/pingcap/ticdc/issues/2295) + - Fix a bug in metadata management [#2558](https://github.com/pingcap/ticdc/pull/2558) + - Fix the issue of data inconsistency that occurs after the TiCDC owner switch [#2230](https://github.com/pingcap/ticdc/issues/2230) + - Fix the issue that outdated capture might appear in the output of the `capture list` command [#2388](https://github.com/pingcap/ticdc/issues/2388) + - Fix the `ErrSchemaStorageTableMiss` error that occurs when the DDL Job duplication is encountered in the integrated test [#2422](https://github.com/pingcap/ticdc/issues/2422) + - Fix the bug that a changefeed cannot be removed if the `ErrGCTTLExceeded` error occurs [#2391](https://github.com/pingcap/ticdc/issues/2391) + - Fix a bug that replicating large tables to cdclog fails [#1259](https://github.com/pingcap/ticdc/issues/1259) [#2424](https://github.com/pingcap/ticdc/issues/2424) + - Fix the CLI backward compatibility issue [#2373](https://github.com/pingcap/ticdc/issues/2373) + - Fix the issue of insecure concurrent access to the map in `SinkManager` [#2299](https://github.com/pingcap/ticdc/pull/2299) + - Fix the issue of potential DDL loss when the owner crashes when executing DDL statements [#1260](https://github.com/pingcap/ticdc/issues/1260) + - Fix the issue that the lock is resolved immediately after a Region is initialized [#2188](https://github.com/pingcap/ticdc/issues/2188) + - Fix the issue of extra partition dispatching that occurs when adding a new partitioned table [#2263](https://github.com/pingcap/ticdc/pull/2263) + - Fix the issue that TiCDC keeps warning on removed changefeeds [#2156](https://github.com/pingcap/ticdc/issues/2156) diff --git a/releases/release-5.1.0.md b/releases/release-5.1.0.md index a114fea2b341..b4d53dd2dff2 100644 --- a/releases/release-5.1.0.md +++ b/releases/release-5.1.0.md @@ -33,7 +33,7 @@ In v5.1, the key new features or improvements are as follows: | [`tidb_analyze_version`](/system-variables.md#tidb_analyze_version-new-in-v510) | Newly added | Controls how TiDB collects statistics. The default value of this variable is `2`. This is an experimental feature. | | [`tidb_enable_enhanced_security`](/system-variables.md#tidb_enable_enhanced_security) | Newly added | Indicates whether the TiDB server you are connected to has the Security Enhanced Mode (SEM) enabled. This variable setting cannot be changed without restarting the TiDB server. | | [`tidb_enforce_mpp`](/system-variables.md#tidb_enforce_mpp-new-in-v51) | Newly added | Controls whether to ignore the optimizer's cost estimation and to forcibly use the MPP mode for query execution. The data type of this variable is `BOOL` and the default value is `false`. | -| [`tidb_partition_prune_mode`](/system-variables.md#tidb_partition_prune_mode-new-in-v51) | Newly added | Specifies whether to enable dynamic prune mode for partitioned tables. This feature is experimental. The default value of this variable is `static`, which means the dynamic mode for partitioned tables is disabled by default. | +| [`tidb_partition_prune_mode`](/system-variables.md#tidb_partition_prune_mode-new-in-v51) | Newly added | Specifies whether to enable dynamic pruning mode for partitioned tables. This feature is experimental. The default value of this variable is `static`, which means the dynamic pruning mode for partitioned tables is disabled by default. | ### Configuration file parameters @@ -58,11 +58,13 @@ In v5.1, the key new features or improvements are as follows: ### Others +- Before the upgrade, check the value of the TiDB configuration [`feedback-probability`](/tidb-configuration-file.md#feedback-probability). If the value is not 0, the "panic in the recoverable goroutine" error will occur after the upgrade, but this error does not affect the upgrade. - Upgrade the Go compiler version of TiDB from go1.13.7 to go1.16.4, which improves the TiDB performance. If you are a TiDB developer, upgrade your Go compiler version to ensure a smooth compilation. - Avoid creating tables with clustered indexes in the cluster that uses TiDB Binlog during the TiDB rolling upgrade. - Avoid executing statements like `alter table ... modify column` or `alter table ... change column` during the TiDB rolling upgrade. - Since v5.1, setting the replica of system tables, when building TiFlash replicas for each table, is no longer supported. Before upgrading the cluster, you need to clear the relevant system table replicas; otherwise, the upgrade will fail. - Deprecate the `--sort-dir` parameter in the `cdc cli changefeed` command of TiCDC. Instead, you can set `--sort-dir` in the `cdc server` command. [#1795](https://github.com/pingcap/ticdc/pull/1795) +- After upgrading to TiDB 5.1, if TiDB returns the "function READ ONLY has only noop implementation" error, you can let TiDB ignore this error by setting the value of [`tidb_enable_noop_functions`](/system-variables.md#tidb_enable_noop_functions-new-in-v40) to `ON`. This is because the `read_only` variable in MySQL does not yet take effect in TiDB (which is a 'noop' behavior in TiDB). Therefore, even if this variable is set in TiDB, you can still write data into the TiDB cluster. ## New features @@ -174,7 +176,7 @@ In v5.1, the key new features or improvements are as follows: TiDB adds the running status of TiDB cluster requests in telemetry, including execution status, failure status, etc. -To learn more about the information and how to disable this behavior, refer to [Telemetry](https://docs.pingcap.com/zh/tidb/stable/telemetry). +To learn more about the information and how to disable this behavior, refer to [Telemetry](/telemetry.md). ## Improvements @@ -184,7 +186,7 @@ To learn more about the information and how to disable this behavior, refer to [ - Support pushing down data of the enumerated type to TiKV to improve performance when using enumerated types in `WHERE` clauses [#23619](https://github.com/pingcap/tidb/issues/23619) - Optimize the calculation of Window Function to solve TiDB OOM problems when paging data with ROW_NUMBER() [#23807](https://github.com/pingcap/tidb/issues/23807) - Optimize the calculation of `UNION ALL` to solve the TiDB OOM problems when using `UNION ALL` to join a large number of `SELECT` statements [#21441](https://github.com/pingcap/tidb/issues/21441) - - Optimize the dynamic mode of partitioned tables to improve performance and stability [#24150](https://github.com/pingcap/tidb/issues/24150) + - Optimize the dynamic pruning mode of partitioned tables to improve performance and stability [#24150](https://github.com/pingcap/tidb/issues/24150) - Fix the `Region is Unavailable` issue that occurs in multiple scenarios [project#62](https://github.com/pingcap/tidb/projects/62) - Fix multiple `Region is Unavailable` issues that might occur in frequent scheduling situations - Fix `Region is Unavailable` issue that might occur in some high stress write situations diff --git a/releases/release-5.1.1.md b/releases/release-5.1.1.md new file mode 100644 index 000000000000..9bde0fd1acd1 --- /dev/null +++ b/releases/release-5.1.1.md @@ -0,0 +1,156 @@ +--- +title: TiDB 5.1.1 Release Notes +--- + +# TiDB 5.1.1 Release Notes + +Release Date: July 30, 2021 + +TiDB version: 5.1.1 + +## Compatibility changes + ++ TiDB + + - For TiDB clusters upgrade from v4.0 to v5.1, the default value of `tidb_multi_statement_mode` is `OFF`. It is recommended to use the multi-statement feature of your client library instead. See [the documentation on `tidb_multi_statement_mode`](/system-variables.md#tidb_multi_statement_mode-new-in-v4011) for details. [#25751](https://github.com/pingcap/tidb/pull/25751) + - Change the default value of the `tidb_stmt_summary_max_stmt_count` variable from `200` to `3000` [#25874](https://github.com/pingcap/tidb/pull/25874) + - Require the `SUPER` privilege to access the `table_storage_stats` table [#26352](https://github.com/pingcap/tidb/pull/26352) + - Require the `SELECT` privilege on `mysql.user` to access the `information_schema.user_privileges` table to show other user's privileges [#26311](https://github.com/pingcap/tidb/pull/26311) + - Require the `CONFIG` privilege to access the `information_schema.cluster_hardware` table [#26297](https://github.com/pingcap/tidb/pull/26297) + - Require the `PROCESS` privilege to access the `information_schema.cluster_info` table [#26297](https://github.com/pingcap/tidb/pull/26297) + - Require the `PROCESS` privilege to access the `information_schema.cluster_load` table [#26297](https://github.com/pingcap/tidb/pull/26297) + - Require the `PROCESS` privilege to access the `information_schema.cluster_systeminfo` table [#26297](https://github.com/pingcap/tidb/pull/26297) + - Require the `PROCESS` privilege to access the `information_schema.cluster_log` table [#26297](https://github.com/pingcap/tidb/pull/26297) + - Require the `CONFIG` privilege to access the `information_schema.cluster_config` table [#26150](https://github.com/pingcap/tidb/pull/26150) + +## Feature enhancements + ++ TiDB Dashboard + + - Support OIDC SSO. By setting the OIDC-compatible SSO services (such as Okta and Auth0), users can log into TiDB Dashboard without entering the SQL password. [#3883](https://github.com/tikv/pd/pull/3883) + ++ TiFlash + + - Support the `HAVING()` function in DAG requests + +## Improvements + ++ TiDB + + - Announce the general availability (GA) of the Stale Read feature + - Avoid allocation for `paramMarker` to speed up data insertion [#26076](https://github.com/pingcap/tidb/pull/26076) + - Support the stable result mode to make the query results more stable [#25995](https://github.com/pingcap/tidb/pull/25995) + - Support pushing down the built-in function `json_unquote()` to TiKV [#26265](https://github.com/pingcap/tidb/pull/26265) + - Support retrying MPP queries [#26480](https://github.com/pingcap/tidb/pull/26480) + - Change the `LOCK` record into the `PUT` record for the index keys using `point get` or `batch point get` for `UPDATE` reads [#26225](https://github.com/pingcap/tidb/pull/26225) + - Forbid creating views from stale queries [#26200](https://github.com/pingcap/tidb/pull/26200) + - Thoroughly push down the `COUNT(DISTINCT)` aggregation function in the MPP mode [#26194](https://github.com/pingcap/tidb/pull/26194) + - Check the availability of TiFlash before launching MPP queries [#26192](https://github.com/pingcap/tidb/pull/26192) + - Do not allow setting the read timestamp to a future time [#25763](https://github.com/pingcap/tidb/pull/25763) + - Print log warnings when aggregation functions cannot be pushed down in `EXPLAIN` statements [#25737](https://github.com/pingcap/tidb/pull/25737) + - Add the `statements_summary_evicted` table to record the evicted count information of a cluster [#25587](https://github.com/pingcap/tidb/pull/25587) + - Improve the MySQL compatibility of the built-in function `str_to_date` for the format specifiers `%b/%M/%r/%T` [#25768](https://github.com/pingcap/tidb/pull/25768) + ++ TiKV + + - Make the prewrite requests as idempotent as possible to reduce the chance of undetermined errors [#10586](https://github.com/tikv/tikv/pull/10586) + - Prevent the risk of stack overflow when handling many expired commands [#10502](https://github.com/tikv/tikv/pull/10502) + - Avoid excessive commit request retrying by not using the Stale Read request's `start_ts` to update `max_ts` [#10451](https://github.com/tikv/tikv/pull/10451) + - Handle read ready and write ready separately to reduce read latency [#10592](https://github.com/tikv/tikv/pull/10592) + - Reduce the impact on data import speed when the I/O rate limiting is enabled [#10390](https://github.com/tikv/tikv/pull/10390) + - Improve the load balance between Raft gRPC connections [#10495](https://github.com/tikv/tikv/pull/10495) + ++ Tools + + + TiCDC + + - Remove `file sorter` [#2327](https://github.com/pingcap/ticdc/pull/2327) + - Improve the error message returned when a PD endpoint misses the certificate [#1973](https://github.com/pingcap/ticdc/issues/1973) + + + TiDB Lightning + + - Add a retry mechanism for restoring schemas [#1294](https://github.com/pingcap/br/pull/1294) + + + Dumpling + + - Always split tables using `_tidb_rowid` when the upstream is a TiDB v3.x cluster, which helps reduce TiDB's memory usage [#295](https://github.com/pingcap/dumpling/issues/295) + - Reduce the frequency of accessing the database metadata to improve Dumpling's performance and stability [#315](https://github.com/pingcap/dumpling/pull/315) + +## Bug fixes + ++ TiDB + + - Fix the data loss issue that might occur when changing the column type with `tidb_enable_amend_pessimistic_txn=on` [#26203](https://github.com/pingcap/tidb/issues/26203) + - Fix the issue that the behavior of the `last_day` function is incompatible in the SQL mode [#26001](https://github.com/pingcap/tidb/pull/26001) + - Fix the panic issue that might occur when `LIMIT` is on top of window functions [#25344](https://github.com/pingcap/tidb/issues/25344) + - Fix the issue that committing pessimistic transactions might cause write conflict [#25964](https://github.com/pingcap/tidb/issues/25964) + - Fix the issue that the result of index join in correlated subqueries is wrong [#25799](https://github.com/pingcap/tidb/issues/25799) + - Fix a bug that the successfully committed optimistic transactions might report commit errors [#10468](https://github.com/tikv/tikv/issues/10468) + - Fix the issue that an incorrect result is returned when using merge join on the `SET` type column [#25669](https://github.com/pingcap/tidb/issues/25669) + - Fix a bug that the index keys in a pessimistic transaction might be repeatedly committed [#26359](https://github.com/pingcap/tidb/issues/26359) + - Fix the risk of integer overflow when the optimizer is locating partitions [#26227](https://github.com/pingcap/tidb/issues/26227) + - Fix the issue that invalid values might be written when casting `DATE` to timestamp [#26292](https://github.com/pingcap/tidb/issues/26292) + - Fix the issue that the Coprocessor Cache metrics are not displayed on Grafana [#26338](https://github.com/pingcap/tidb/issues/26338) + - Fix the issue of annoying logs caused by telemetry [#25760](https://github.com/pingcap/tidb/issues/25760) [#25785](https://github.com/pingcap/tidb/issues/25785) + - Fix a bug on the query range of prefix index [#26029](https://github.com/pingcap/tidb/issues/26029) + - Fix the issue that concurrently truncating the same partition hangs DDL executions [#26229](https://github.com/pingcap/tidb/issues/26229) + - Fix the issue of duplicate `ENUM` items [#25955](https://github.com/pingcap/tidb/issues/25955) + - Fix a bug that the CTE iterator is not correctly closed [#26112](https://github.com/pingcap/tidb/issues/26112) + - Fix the issue that the `LOAD DATA` statement might abnormally import non-utf8 data [#25979](https://github.com/pingcap/tidb/issues/25979) + - Fix the panic issue that might occur when using the window function on the unsigned integer columns [#25956](https://github.com/pingcap/tidb/issues/25956) + - Fix the issue that TiDB might panic when resolving async commit locks [#25778](https://github.com/pingcap/tidb/issues/25778) + - Fix the issue that Stale Read is not fully compatible with the `PREPARE` statements [#25800](https://github.com/pingcap/tidb/pull/25800) + - Fix the issue that the ODBC-styled constant (for example, `{d '2020-01-01'}`) cannot be used as the expression [#25531](https://github.com/pingcap/tidb/issues/25531) + - Fix an error that occurs when running TiDB alone [#25555](https://github.com/pingcap/tidb/pull/25555) + ++ TiKV + + - Fix the issue that the duration calculation might panic on certain platforms [#10569](https://github.com/tikv/tikv/pull/10569) + - Fix the issue that Load Base Split mistakenly uses the unencoded keys of `batch_get_command` [#10542](https://github.com/tikv/tikv/issues/10542) + - Fix the issue that changing the `resolved-ts.advance-ts-interval` configuration online cannot take effect immediately [#10426](https://github.com/tikv/tikv/issues/10426) + - Fix the issue of follower metadata corruption in rare cases with more than 4 replicas [#10225](https://github.com/tikv/tikv/issues/10225) + - Fix the panic issue that occurs when building a snapshot twice if encryption is enabled [#9786](https://github.com/tikv/tikv/issues/9786) [#10407](https://github.com/tikv/tikv/issues/10407) + - Fix the wrong `tikv_raftstore_hibernated_peer_state` metric [#10330](https://github.com/tikv/tikv/issues/10330) + - Fix the wrong arguments type of the `json_unquote()` function in the coprocessor [#10176](https://github.com/tikv/tikv/issues/10176) + - Fix a bug that the index keys in a pessimistic transaction might be repeatedly committed [#10468](https://github.com/tikv/tikv/issues/10468#issuecomment-869491061) + - Fix the issue that the `ReadIndex` request returns stale result right after the leader is transferred [#9351](https://github.com/tikv/tikv/issues/9351) + ++ PD + + - Fix the issue the expected scheduling cannot be generated when the conflict occurs due to multiple schedulers running at the same time [#3807](https://github.com/tikv/pd/issues/3807) [#3778](https://github.com/tikv/pd/issues/3778) + - Fix the issue that the scheduler might appear again even if the scheduler is already deleted [#2572](https://github.com/tikv/pd/issues/2572) + ++ TiFlash + + - Fix the potential panic issue that occurs when running table scan tasks + - Fix a bug that TiFlash raises the error about `duplicated region` when handling DAQ requests + - Fix the panic issue that occurs when the read load is heavy + - Fix the potential panic issue that occurs when executing the `DateFormat` function + - Fix the potential memory leak issue that occurs when executing MPP tasks + - Fix the issue of unexpected results when executing the aggregation functions `COUNT` or `COUNT DISTINCT` + - Fix a potential bug that TiFlash cannot restore data when deployed on multiple disks + - Fix the issue that TiDB Dashboard cannot display the disk information of TiFlash correctly + - Fix the potential panic issue that occurs when deconstructing `SharedQueryBlockInputStream` + - Fix the potential panic issue that occurs when deconstructing `MPPTask` + - Fix the potential issue of data inconsistency after synchronizing data via snapshot + ++ Tools + + + TiCDC + + - Fix the support for the new collation feature [#2301](https://github.com/pingcap/ticdc/issues/2301) + - Fix the issue that an unsynchronized access to a shared map at runtime might cause panic [#2300](https://github.com/pingcap/ticdc/pull/2300) + - Fix the potential DDL loss issue that occurs when the owner crashes while executing the DDL statement [#2290](https://github.com/pingcap/ticdc/pull/2290) + - Fix the issue of trying to resolve locks in TiDB prematurely [#2188](https://github.com/pingcap/ticdc/issues/2188) + - Fix a bug that might cause data loss if a TiCDC node is killed immediately after a table migration [#2033](https://github.com/pingcap/ticdc/pull/2033) + - Fix the handling logic of `changefeed update` on `--sort-dir` and `--start-ts` [#1921](https://github.com/pingcap/ticdc/pull/1921) + + + Backup & Restore (BR) + + - Fix the issue that the size of the data to restore is incorrectly calculated [#1270](https://github.com/pingcap/br/issues/1270) + - Fix the issue of missed DDL events that occurs when restoring from cdclog [#870](https://github.com/pingcap/br/issues/870) + + + TiDB Lightning + + - Fix the issue that TiDB fails to parse the `DECIMAL` type data in Parquet files [#1275](https://github.com/pingcap/br/pull/1275) + - Fix the issue of integer overflow when calculating key intervals [#1291](https://github.com/pingcap/br/issues/1291) [#1290](https://github.com/pingcap/br/issues/1290) diff --git a/releases/release-5.1.2.md b/releases/release-5.1.2.md new file mode 100644 index 000000000000..b3699df80159 --- /dev/null +++ b/releases/release-5.1.2.md @@ -0,0 +1,154 @@ +--- +title: TiDB 5.1.2 Release Notes +--- + +# TiDB 5.1.2 Release Notes + +Release Date: September 27, 2021 + +TiDB version: 5.1.2 + +## Compatibility changes + ++ TiDB + + + The following bug fixes change execution results, which might cause upgrade incompatibilities: + + - Fix the issue that `greatest(datetime) union null` returns empty string [#26532](https://github.com/pingcap/tidb/issues/26532) + - Fix the issue that the `having` clause might not work correctly [#26496](https://github.com/pingcap/tidb/issues/26496) + - Fix the wrong execution results that occur when the collations around the `between` expression are different [#27146](https://github.com/pingcap/tidb/issues/27146) + - Fix the wrong execution results that occur when the column in the `group_concat` function has a non-bin collation [#27429](https://github.com/pingcap/tidb/issues/27429) + - Fix an issue that using a `count(distinct)` expression on multiple columns returns wrong result when the new collation is enabled [#27091](https://github.com/pingcap/tidb/issues/27091) + - Fix the result wrong that occurs when the argument of the `extract` function is a negative duration [#27236](https://github.com/pingcap/tidb/issues/27236) + - Fix the issue that inserting an invalid date does not report an error when the `SQL_MODE` is 'STRICT_TRANS_TABLES' [#26762](https://github.com/pingcap/tidb/issues/26762) + - Fix the issue that using an invalid default date does not report an error when the `SQL_MODE` is 'NO_ZERO_IN_DATE' [#26766](https://github.com/pingcap/tidb/issues/26766) + ++ Tools + + + TiCDC + + - Set the compatible version from `5.1.0-alpha` to `5.2.0-alpha` [#2659](https://github.com/pingcap/ticdc/pull/2659) + +## Improvements + ++ TiDB + + - Trigger auto-analyze by histogram row count and increase the accuracy of this trigger action [#24237](https://github.com/pingcap/tidb/issues/24237) + ++ TiKV + + - Support dynamically modifying TiCDC configurations [#10645](https://github.com/tikv/tikv/issues/10645) + - Reduce the size of Resolved TS message to save network bandwidth [#2448](https://github.com/pingcap/ticdc/issues/2448) + - Limit the counts of peer stats in the heartbeat message reported by a single store [#10621](https://github.com/tikv/tikv/pull/10621) + ++ PD + + - Allow empty Regions to be scheduled and use a separate tolerance configuration in scatter range scheduler [#4117](https://github.com/tikv/pd/pull/4117) + - Improve the performance of synchronizing Region information between PDs [#3933](https://github.com/tikv/pd/pull/3933) + - Support dynamically adjusting the retry limit of a store based on the generated operator [#3744](https://github.com/tikv/pd/issues/3744) + ++ TiFlash + + - Support the `DATE()` function + - Add Grafana panels for write throughput per instance + - Optimize the performance of the `leader-read` process + - Accelerate the process of canceling MPP tasks + ++ Tools + + + TiCDC + + - Optimize memory management when the Unified Sorter is using memory to sort data [#2553](https://github.com/pingcap/ticdc/issues/2553) + - Optimize workerpool for fewer goroutines when concurrency is high [#2211](https://github.com/pingcap/ticdc/issues/2211) + - Reduce goroutine usage when a table's Region transfer away from a TiKV node [#2284](https://github.com/pingcap/ticdc/issues/2284) + + + Dumpling + + - Support backing up MySQL-compatible databases that do not support `START TRANSACTION ... WITH CONSISTENT SNAPSHOT` and `SHOW CREATE TABLE` [#309](https://github.com/pingcap/dumpling/issues/309) + - Add a global gRPC connection pool and share gRPC connections among KV clients [#2534](https://github.com/pingcap/ticdc/pull/2534) + - Prohibit operating TiCDC clusters across major and minor versions [#2599](https://github.com/pingcap/ticdc/pull/2599) + +## Bug fixes + ++ TiDB + + - Fix the potential wrong results of index hash join when the hash column is the `ENUM` type [#27893](https://github.com/pingcap/tidb/issues/27893) + - Fix a batch client bug that recycle idle connection might block sending requests in some rare cases [#27678](https://github.com/pingcap/tidb/pull/27678) + - Fix the issue that the overflow check of the `FLOAT64` type is different with that of MySQL [#23897](https://github.com/pingcap/tidb/issues/23897) + - Fix the issue that TiDB returns an `unknow` error while it should return the `pd is timeout` error [#26147](https://github.com/pingcap/tidb/issues/26147) + - Fix the wrong character set and collation for the `case when` expression [#26662](https://github.com/pingcap/tidb/issues/26662) + - Fix the potential `can not found column in Schema column` error for MPP queries [#28148](https://github.com/pingcap/tidb/pull/28148) + - Fix a bug that TiDB might panic when TiFlash is shutting down [#28096](https://github.com/pingcap/tidb/issues/28096) + - Fix the issue of wrong range caused by using `enum like 'x%'` [#27130](https://github.com/pingcap/tidb/issues/27130) + - Fix the Common Table Expression (CTE) dead lock issue when used with IndexLookupJoin [#27410](https://github.com/pingcap/tidb/issues/27410) + - Fix a bug that retryable deadlocks are incorrectly recorded into the `INFORMATION_SCHEMA.DEADLOCKS` table [#27400](https://github.com/pingcap/tidb/issues/27400) + - Fix the issue that the `TABLESAMPLE` query result from partitioned tables is not sorted as expected [#27349](https://github.com/pingcap/tidb/issues/27349) + - Remove the unused `/debug/sub-optimal-plan` HTTP API [#27265](https://github.com/pingcap/tidb/pull/27265) + - Fix a bug that the query might return wrong results when the hash partitioned table deals with unsigned data [#26569](https://github.com/pingcap/tidb/issues/26569) + - Fix a bug that creating partition fails if `NO_UNSIGNED_SUBTRACTION` is set [#26765](https://github.com/pingcap/tidb/issues/26765) + - Fix the issue that the `distinct` flag is missing when `Apply` is converted to `Join` [#26958](https://github.com/pingcap/tidb/issues/26958) + - Set a block duration for the newly recovered TiFlash node to avoid blocking queries during this time [#26897](https://github.com/pingcap/tidb/pull/26897) + - Fix a bug that might occur when the CTE is referenced more than once [#26212](https://github.com/pingcap/tidb/issues/26212) + - Fix a CTE bug when MergeJoin is used [#25474](https://github.com/pingcap/tidb/issues/25474) + - Fix a bug that the `SELECT FOR UPDATE` statement does not correctly lock the data when a normal table joins a partitioned table [#26251](https://github.com/pingcap/tidb/issues/26251) + - Fix the issue that the `SELECT FOR UPDATE` statement returns an error when a normal table joins a partitioned table [#26250](https://github.com/pingcap/tidb/issues/26250) + - Fix the issue that `PointGet` does not use the lite version of resolving lock [#26562](https://github.com/pingcap/tidb/pull/26562) + ++ TiKV + + - Fix a panic issue that occurs after TiKV is upgraded from v3.x to later versions [#10902](https://github.com/tikv/tikv/issues/10902) + - Fix the potential disk full issue caused by corrupted snapshot files [#10813](https://github.com/tikv/tikv/issues/10813) + - Make the slow log of TiKV coprocessor only consider the time spent on processing requests [#10841](https://github.com/tikv/tikv/issues/10841) + - Drop log instead of blocking threads when the slogger thread is overloaded and the queue is filled up [#10841](https://github.com/tikv/tikv/issues/10841) + - Fix a panic issue that occurs when processing Coprocessor requests times out [#10852](https://github.com/tikv/tikv/issues/10852) + - Fix the TiKV panic issue that occurs when upgrading from a pre-5.0 version with Titan enabled [#10842](https://github.com/tikv/tikv/pull/10842) + - Fix the issue that TiKV of a newer version cannot be rolled back to v5.0.x [#10842](https://github.com/tikv/tikv/pull/10842) + - Fix the issue that TiKV might delete files before ingesting data to RocksDB [#10438](https://github.com/tikv/tikv/issues/10438) + - Fix the parsing failure caused by the left pessimistic locks [#26404](https://github.com/pingcap/tidb/issues/26404) + ++ PD + + - Fix the issue that PD does not fix the down peers in time [#4077](https://github.com/tikv/pd/issues/4077) + - Fix the issue that the replica count of the default placement rules stays constant after `replication.max-replicas` is updated [#3886](https://github.com/tikv/pd/issues/3886) + - Fix a bug that PD might panic when scaling out TiKV [#3868](https://github.com/tikv/pd/issues/3868) + - Fix a bug that the hot Region scheduler cannot work when the cluster has the evict leader scheduler [#3697](https://github.com/tikv/pd/issues/3697) + ++ TiFlash + + - Fix the issue of unexpected results when TiFlash fails to establish MPP connections + - Fix the potential issue of data inconsistency that occurs when TiFlash is deployed on multiple disks + - Fix a bug that MPP queries get wrong results when TiFlash server is under high load + - Fix a potential bug that MPP queries hang forever + - Fix the panic issue when operating store initialization and DDL simultaneously + - Fix a bug of incorrect results that occurs when queries contain filters like `CONSTANT`, `<`, `<=`, `>`, `>=`, or `COLUMN` + - Fix the potential panic issue when `Snapshot` is applied simultaneously with multiple DDL operations + - Fix the issue that the store size in metrics is inaccurate under heavy writing + - Fix the potential issue that TiFlash cannot garbage-collect the delta data after running for a long time + - Fix the issue of wrong results when the new collation is enabled + - Fix the potential panic issue that occurs when resolving locks + - Fix a potential bug that metrics display wrong values + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue that the average speed is not accurate during data backup and restore [#1405](https://github.com/pingcap/br/issues/1405) + + + Dumpling + + - Fix the issue that Dumpling is pending when `show table status` returns incorrect results in some MySQL versions (8.0.3 and 8.0.23) [#322](https://github.com/pingcap/dumpling/issues/322) + - Fix the CLI compatibility issue with 4.0.x clusters on the default `sort-engine` option [#2373](https://github.com/pingcap/ticdc/issues/2373) + + + TiCDC + + - Fix a bug that the JSON encoding might cause panic when processing a string type value that is `string` or `[]byte` [#2758](https://github.com/pingcap/ticdc/issues/2758) + - Reduce gRPC window size to avoid OOM [#2202](https://github.com/pingcap/ticdc/issues/2202) + - Fix a gRPC `keepalive` error under high memory pressure [#2202](https://github.com/pingcap/ticdc/issues/2202) + - Fix a bug that an unsigned `tinyint` causes TiCDC to panic [#2648](https://github.com/pingcap/ticdc/issues/2648) + - Fix an empty value issue in TiCDC Open Protocol. An empty value is no longer output when there is no change in one transaction. [#2612](https://github.com/pingcap/ticdc/issues/2612) + - Fix a bug in DDL handling during manual restarts [#2603](https://github.com/pingcap/ticdc/issues/2603) + - Fix the issue that `EtcdWorker`'s snapshot isolation might be wrongly violated when managing the metadata [#2559](https://github.com/pingcap/ticdc/pull/2559) + - Fix a bug that multiple processors might write data to the same table when TiCDC is rescheduling the table [#2230](https://github.com/pingcap/ticdc/issues/2230) + - Fix a bug that changefeed might be reset unexpectedly when TiCDC gets the `ErrSchemaStorageTableMiss` error [#2422](https://github.com/pingcap/ticdc/issues/2422) + - Fix a bug that changefeed cannot be removed when TiCDC gets the `ErrGCTTLExceeded` error [#2391](https://github.com/pingcap/ticdc/issues/2391) + - Fix a bug that TiCDC fails to synchronize large tables to cdclog [#1259](https://github.com/pingcap/ticdc/issues/1259) [#2424](https://github.com/pingcap/ticdc/issues/2424) diff --git a/releases/release-5.2.0.md b/releases/release-5.2.0.md new file mode 100644 index 000000000000..7da9187a1c01 --- /dev/null +++ b/releases/release-5.2.0.md @@ -0,0 +1,325 @@ +--- +title: TiDB 5.2 Release Notes +--- + +# TiDB 5.2 Release Notes + +Release date: August 27, 2021 + +TiDB version: 5.2.0 + +> **Warning:** +> +> Some known issues are found in this version, and these issues are fixed in new versions. It is recommended that you use the latest 5.2.x version. + +In v5.2, the key new features and improvements are as follows: + +- Support using several functions in expression indexes to greatly improve query performance +- Improve the accuracy of optimizer cardinality estimation to help to select optimal execution plans +- Announce the general availability (GA) for the Lock View feature to observe transaction locking events and troubleshoot deadlock problems +- Add the TiFlash I/O traffic limit feature to improve the stability of read and write for TiFlash +- TiKV introduces a new flow control mechanism to replace the previous RocksDB write stall mechanism to improve the stability of TiKV flow control +- Simplify the operation and maintenance of Data Migration (DM) to reduce the management cost. +- TiCDC supports HTTP protocol OpenAPI to manage TiCDC tasks. It provides a more user-friendly operation method for both Kubernetes and on-premises environments. (Experimental feature) + +## Compatibility changes + +> **Note:** +> +> When upgrading from an earlier TiDB version to v5.2, if you want to know the compatibility change notes of all intermediate versions, you can check the [Release Note](/releases/release-notes.md) for the corresponding version. + +### System variables + +| Variable name | Change type | Description | +| :---------- | :----------- | :----------- | +| [`default_authentication_plugin`](/system-variables.md#default_authentication_plugin) | Newly added | Sets the authentication method that the server advertises. The default value is `mysql_native_password`. | +| [`tidb_enable_auto_increment_in_generated`](/system-variables.md#tidb_enable_auto_increment_in_generated) | Newly added | Determines whether to include the `AUTO_INCREMENT` columns when creating a generated column or an expression index. The default value is `OFF`. | +| [`tidb_opt_enable_correlation_adjustment`](/system-variables.md#tidb_opt_enable_correlation_adjustment) | Newly added | Controls whether the optimizer estimates the number of rows based on column order correlation. The default value is `ON`. | +| [`tidb_opt_limit_push_down_threshold`](/system-variables.md#tidb_opt_limit_push_down_threshold) | Newly added | Sets the threshold that determines whether to push the Limit or TopN operator down to TiKV. The default value is `100`. | +| [`tidb_stmt_summary_max_stmt_count`](/system-variables.md#tidb_stmt_summary_max_stmt_count-new-in-v40) | Modified | Sets the maximum number of statements that the statement summary tables store in memory. The default value is changed from `200` to `3000`. | +| `tidb_enable_streaming` | Deprecated | The system variable `enable-streaming` is deprecated and it is not recommended to use it any more. | + +### Configuration file parameters + +| Configuration file | Configuration item | Change type | Description | +| :---------- | :----------- | :----------- | :----------- | +| TiDB configuration file | [`pessimistic-txn.deadlock-history-collect-retryable`](/tidb-configuration-file.md#deadlock-history-collect-retryable) | Newly added | Controls whether the [`INFORMATION\_SCHEMA.DEADLOCKS`](/information-schema/information-schema-deadlocks.md) table collects retryable deadlock error messages or not. | +| TiDB configuration file | [`security.auto-tls`](/tidb-configuration-file.md#auto-tls) | Newly added | Determines whether to automatically generate the TLS certificates on startup. The default value is `false`. | +| TiDB configuration file | [`stmt-summary.max-stmt-count`](/tidb-configuration-file.md#max-stmt-count) | Modified | Indicates the maximum number of SQL categories allowed to be saved in the statement summary tables. The default value is changed from `200` to `3000`. | +| TiDB configuration file | `experimental.allow-expression-index` | Deprecated | The `allow-expression-index` configuration in the TiDB configuration file is deprecated. | +| TiKV configuration file | [`raftstore.cmd-batch`](/tikv-configuration-file.md#cmd-batch) | Newly added | Controls whether to enable batch processing of the requests. When it is enabled, the write performance is significantly improved. The default value is `true`. | +| TiKV configuration file | [`raftstore.inspect-interval`](/tikv-configuration-file.md#inspect-interval) | Newly added | At a certain interval, TiKV inspects the latency of the Raftstore component. This configuration item specifies the interval of the inspection. The default value is `500ms`. | +| TiKV configuration file | [`raftstore.max-peer-down-duration`](/tikv-configuration-file.md#max-peer-down-duration) | Modified | Indicates the longest inactive duration allowed for a peer. A peer with timeout is marked as `down`, and PD tries to delete it later. The default value is changed from `5m` to `10m`. | +| TiKV configuration file | [`server.raft-client-queue-size`](/tikv-configuration-file.md#raft-client-queue-size) | Newly added | Specifies the queue size of the Raft messages in TiKV. The default value is `8192`. | +| TiKV configuration file | [`storage.flow-control.enable`](/tikv-configuration-file.md#enable) | Newly added | Determines whether to enable the flow control mechanism. The default value is `true`. | +| TiKV configuration file | [`storage.flow-control.memtables-threshold`](/tikv-configuration-file.md#memtables-threshold) | Newly added | When the number of kvDB memtables reaches this threshold, the flow control mechanism starts to work. The default value is `5`. | +| TiKV configuration file | [`storage.flow-control.l0-files-threshold`](/tikv-configuration-file.md#l0-files-threshold) | Newly added | When the number of kvDB L0 files reaches this threshold, the flow control mechanism starts to work. The default value is `9`. | +| TiKV configuration file | [`storage.flow-control.soft-pending-compaction-bytes-limit`](/tikv-configuration-file.md#soft-pending-compaction-bytes-limit) | Newly added | When the pending compaction bytes in KvDB reach this threshold, the flow control mechanism starts to reject some write requests and reports the `ServerIsBusy` error. The default value is "192GB". | +| TiKV configuration file | [`storage.flow-control.hard-pending-compaction-bytes-limit`](/tikv-configuration-file.md#hard-pending-compaction-bytes-limit) | Newly added | When the pending compaction bytes in KvDB reach this threshold, the flow control mechanism rejects all write requests and reports the `ServerIsBusy` error. The default value is "1024GB". | + +### Others + +- Before the upgrade, check whether the value of the [`tidb_evolve_plan_baselines`](/system-variables.md#tidb_evolve_plan_baselines-new-in-v40) system variable is `ON`. If the value is `ON`, set it to `OFF`; otherwise, the upgrade will fail. +- For TiDB clusters upgraded from v4.0 to v5.2, the default value of [`tidb_multi_statement_mode`](/system-variables.md#tidb_multi_statement_mode-new-in-v4011) changes from `WARN` to `OFF`. +- Before the upgrade, check the value of the TiDB configuration [`feedback-probability`](/tidb-configuration-file.md#feedback-probability). If the value is not `0`, the "panic in the recoverable goroutine" error will occur after the upgrade, but this error does not affect the upgrade. +- TiDB is now compatible with MySQL 5.7's noop variable `innodb_default_row_format`. Setting this variable has no effect. [#23541](https://github.com/pingcap/tidb/issues/23541) +- Starting from TiDB 5.2, to improve system security, it is recommended (but not mandatory) to encrypt the transport layer for connections from clients. TiDB provides the Auto TLS feature to automatically configure and enable encryption in TiDB. To use the Auto TLS feature, before the TiDB upgrade, set [`security.auto-tls`](/tidb-configuration-file.md#auto-tls) in the TiDB configuration file to `true`. +- Support the `caching_sha2_password` authentication method to make migration from MySQL 8.0 easier and to improve security. + +## New features + +### SQL + +- **Support using several functions in expression indexes** + + The expression index is a type of special index that can be created on an expression. After an expression index is created, TiDB supports expression-based queries, which greatly improves query performance. + + [User document](/sql-statements/sql-statement-create-index.md), [#25150](https://github.com/pingcap/tidb/issues/25150) + +- **Support the `translate` function in Oracle** + + The `translate` function replaces all occurrences of characters by other characters in a string. In TiDB, this function does not treat empty strings as `NULL` as Oracle does. + + [User document](/functions-and-operators/string-functions.md) + +- **Support spilling HashAgg** + + Support spilling HashAgg into disk. When a SQL statement that includes an HashAgg operator causes out of memory (OOM), you can try to set the concurrency of this operator to `1` to trigger disk spill, which alleviates memory stress. + + [User document](/configure-memory-usage.md#other-memory-control-behaviors-of-tidb-server), [#25882](https://github.com/pingcap/tidb/issues/25882) + +- **Improve the accuracy of optimizer cardinality estimation** + + - Improve the accuracy of TiDB's estimation of TopN/Limit. For example, for pagination queries on a large table that contain the `order by col limit x` condition, TiDB can more easily select the right index and reduce query response time. + - Improve the accuracy of out-of-range estimation. For example, even if the statistics for a day have not been updated, TiDB can accurately select the corresponding index for a query that contains `where date=Now()`. + - Introduce the `tidb_opt_limit_push_down_threshold` variable to control the optimizer's behavior of pushing down Limit/TopN, which resolves the issue that Limit/TopN cannot be pushed down in some situations due to wrong estimation. + + [User document](/system-variables.md#tidb_opt_limit_push_down_threshold), [#26085](https://github.com/pingcap/tidb/issues/26085) + +- **Improve index selection of the optimizer** + + Add pruning rules for index selection. Before using the statistics for comparison, TiDB uses these rules to narrow down the scope of possible indexes to be selected, which reduces the possibility of selecting non-optimal indexes. + + [User document](/choose-index.md) + +### Transaction + +- **General availability (GA) for Lock View** + + The Lock View feature provides more information about lock conflicts and lock waits of pessimistic locks, which helps DBAs to observe transaction locking events and troubleshoot deadlock problems. + + In v5.2, the following enhancements are made to Lock View: + + - In addition to the SQL digest column in the Lock View-related tables, add a column to these tables that shows the corresponding normalized SQL text. You do not have to manually query the statement corresponding to a SQL digest. + - Add the `TIDB_DECODE_SQL_DIGESTS` function to query the normalized SQL statements (a form without formats and arguments) corresponding to a set of SQL digests in the cluster. This simplifies the operation of querying the statements that have been historically executed by a transaction. + - Add a column in the `DATA_LOCK_WAITS` and `DEADLOCKS` system tables to show the table name, row ID, index value, and other key information interpreted from a key. This simplifies the operations such as locating the table to which a key belongs and interpreting the key information. + - Support collecting the information of retryable deadlock errors in the `DEADLOCKS` table, which makes it easier to troubleshoot issues caused by such errors. The error collection is disabled by default and can be enabled using the `pessimistic-txn.deadlock-history-collect-retryable` configuration. + - Support distinguishing query-executing transactions from idle transactions on the `TIDB_TRX` system table. The `Normal` state is now divided into `Running` and `Idle` states. + + User documents: + + - View the pessimistic lock-waiting events that are occurring on all TiKV nodes in the cluster: [`DATA_LOCK_WAITS`](/information-schema/information-schema-data-lock-waits.md) + - View the deadlock errors recently occurred on a TiDB node: [`DEADLOCKS`](/information-schema/information-schema-deadlocks.md) + - View the executing transaction on a TiDB node: [`TIDB_TRX`](/information-schema/information-schema-tidb-trx.md) + +- Optimize the user scenarios of adding indexes on tables with the `AUTO_RANDOM` or `SHARD_ROW_ID_BITS` attribute. + +### Stability + +- **Add TiFlash I/O traffic limit** + + This new feature is suitable for cloud storage with disk bandwidth of a small and specific size. It is disabled by default. + + TiFlash I/O Rate Limiter provides a new mechanism to avoid excessive race for I/O resources between read and write tasks. It balances the responses to read and write tasks, and limits the rate automatically according to read/write workload. + + [User document](/tiflash/tiflash-configuration.md) + +- **Improve stability of TiKV flow control** + + TiKV introduces a new flow control mechanism to replace the previous RocksDB write stall mechanism. Compared with the write stall mechanism, this new mechanism reduces the impact on the stability of foreground write. + + In specific, when the stress of RocksDB compaction accumulates, flow control is performed at the TiKV scheduler layer instead of the RocksDB layer, to avoid the following issues: + + - Raftstore is stuck, which is caused by RocksDB write stall. + - Raft election times out, and the node leader is transferred as a result. + + This new mechanism improves the flow control algorithm to mitigate QPS decrease when the write traffic is high. + + [User document](/tikv-configuration-file.md#storageflow-control), [#10137](https://github.com/tikv/tikv/issues/10137) + +- **Detect and recover automatically from impact caused by a single slow TiKV node in a cluster** + + TiKV introduces the slow node detection mechanism. This mechanism calculates a score by inspecting the rate of TiKV Raftstore, and then reports the score to PD through store heartbeats. Meanwhile, it adds the `evict-slow-store-scheduler` scheduler on PD to automatically evict the leader on a single slow TiKV node. In this way, the impact on the whole cluster is mitigated. At the same time, more alert items about slow nodes are introduced to help you quickly pinpoint and solve problems. + + [User document]( /tikv-configuration-file.md#inspect-interval), [#10539](https://github.com/tikv/tikv/issues/10539) + +### Data Migration + +- **Simplify operations of Data Migration (DM)** + + DM v2.0.6 can automatically identify the change event (failover or plan change) of the data source using VIP, and can automatically connect to a new data source instance, to reduce data replication latency and simplify operation procedures. + +- TiDB Lightning supports customized line terminators in the CSV data, and is compatible with the MySQL LOAD DATA CSV data formats. You can then use TiDB Lightning directly in your data flow architecture. + + [#1297](https://github.com/pingcap/br/pull/1297) + +### TiDB data share subscription + +TiCDC supports using the HTTP protocol (OpenAPI) to manage TiCDC tasks, which is a more user-friendly operation method for both Kubernetes and on-premises environments. (Experimental feature) + +[#2411](https://github.com/pingcap/ticdc/issues/2411) + +### Deployment and operations + +Support running the `tiup playground` command on Mac computers with Apple M1 chips. + +## Feature Enhancements + ++ Tools + + + TiCDC + + - Add the binary MQ format designed for TiDB. It is more compact than the open protocols based on JSON [#1621](https://github.com/pingcap/ticdc/pull/1621) + - Remove support for file sorter [#2114](https://github.com/pingcap/ticdc/pull/2114) + - Support log rotation configurations [#2182](https://github.com/pingcap/ticdc/pull/2182) + + + TiDB Lightning + + - Support customized line terminators (except `\r` and `\n`) [#1297](https://github.com/pingcap/br/pull/1297) + - Support expression index and the index that depends on virtual generated columns [#1407](https://github.com/pingcap/br/pull/1407) + + + Dumpling + + - Support backing up MySQL compatible databases but does not support `START TRANSACTION ... WITH CONSISTENT SNAPSHOT` or`SHOW CREATE TABLE` [#311](https://github.com/pingcap/dumpling/pull/311) + +## Improvements + ++ TiDB + + - Support pushing down the built-in function `json_unquote()` to TiKV [#24415](https://github.com/pingcap/tidb/issues/24415) + - Support removing the `union` branch from the dual table [#25614](https://github.com/pingcap/tidb/pull/25614) + - Optimize the aggregate operator's cost factor [#25241](https://github.com/pingcap/tidb/pull/25241) + - Allow the MPP outer join to choose the build table based on the table row count [#25142](https://github.com/pingcap/tidb/pull/25142) + - Support balancing the MPP query workload among different TiFlash nodes based on Regions [#24724](https://github.com/pingcap/tidb/pull/24724) + - Support invalidating stale Regions in the cache after the MPP query is executed [#24432](https://github.com/pingcap/tidb/pull/24432) + - Improve the MySQL compatibility of the built-in function`str_to_date` for the format specifiers `%b/%M/%r/%T` [#25767](https://github.com/pingcap/tidb/pull/25767) + - Fix the issue that inconsistent binding caches might be created in multiple TiDB after recreating different bindings for the same query [#26015](https://github.com/pingcap/tidb/pull/26015) + - Fix the issue that the existing bindings cannot be loaded into cache after upgrade [#23295](https://github.com/pingcap/tidb/pull/23295) + - Support ordering the result of `SHOW BINDINGS` by (`original_sql`, `update_time`) [#26139](https://github.com/pingcap/tidb/pull/26139) + - Improve the logic of query optimization when bindings exist, and reduce optimization times of a query [#26141](https://github.com/pingcap/tidb/pull/26141) + - Support completing the garbage collection automatically for the bindings in the "deleted" status [#26206](https://github.com/pingcap/tidb/pull/26206) + - Support showing whether a binding is used for query optimization in the result of `EXPLAIN VERBOSE` [#26930](https://github.com/pingcap/tidb/pull/26930) + - Add a new status variation `last_plan_binding_update_time` to view the timestamp corresponding to the binding cache in the current TiDB instance [#26340](https://github.com/pingcap/tidb/pull/26340) + - Support reporting an error when starting binding evolution or running `admin evolve bindings` to ban the baseline evolution (currently disabled in the on-premises TiDB version because it is an experimental feature) affecting other features [#26333](https://github.com/pingcap/tidb/pull/26333) + ++ PD + + - Add more QPS dimensions for hot Region scheduling, and support adjusting the priority of the scheduling [#3869](https://github.com/tikv/pd/issues/3869) + - Support hot Region balance scheduling for the write hotspot of TiFlash [#3900](https://github.com/tikv/pd/pull/3900) + ++ TiFlash + + - Add operators: `MOD / %`, `LIKE` + - Add string functions: `ASCII()`, `COALESCE()`, `LENGTH()`, `POSITION()`, `TRIM()` + - Add mathematical functions: `CONV()`, `CRC32()`, `DEGREES()`, `EXP()`, `LN()`, `LOG()`, `LOG10()`, `LOG2()`, `POW()`, `RADIANS()`, `ROUND(decimal)`, `SIN()`, `MOD()` + - Add date functions: `ADDDATE(string, real)`, `DATE_ADD(string, real)`, `DATE()` + - Add other functions: `INET_NTOA()`, `INET_ATON()`, `INET6_ATON`, `INET6_NTOA()` + - Support Shuffled Hash Join calculation and Shuffled Hash Aggregation calculation in the MPP mode when a new collation is enabled + - Optimize basic code to improve MPP performance + - Support casting the `STRING` type to the `DOUBLE` type + - Optimize the non-joined data in right outer join using multiple threads + - Support automatically invalidating stale Regions in MPP queries + ++ Tools + + + TiCDC + + - Add the concurrency limit to the incremental scan of kv client [#1899](https://github.com/pingcap/ticdc/pull/1899) + - TiCDC can always pull the old value internally [#2271](https://github.com/pingcap/ticdc/pull/2271) + - TiCDC can fail and exit fast when unrecoverable DML errors occur [#1928](https://github.com/pingcap/ticdc/pull/1928) + - `resolve lock` cannot be run immediately after a Region is initialized [#2235](https://github.com/pingcap/ticdc/pull/2235) + - Optimize workerpool to reduce the number of goroutines under high concurrency [#2201](https://github.com/pingcap/ticdc/pull/2201) + + + Dumpling + + - Support always splitting TiDB v3.x tables through `tidb_rowid` to save TiDB memory [#301](https://github.com/pingcap/dumpling/pull/301) + - Reduce access of Dumpling to the `information_schema` to improve stability [#305](https://github.com/pingcap/dumpling/pull/305) + +## Bug Fixes + ++ TiDB + + - Fix the issue that an incorrect result is returned when using merge join on the `SET` type column [#25669](https://github.com/pingcap/tidb/issues/25669) + - Fix the data corruption issue in the `IN` expression's arguments [#25591](https://github.com/pingcap/tidb/issues/25591) + - Avoid the sessions of GC being affected by global variables [#24976](https://github.com/pingcap/tidb/issues/24976) + - Fix the panic issue that occurs when using `limit` in the window function queries [#25344](https://github.com/pingcap/tidb/issues/25344) + - Fix the wrong value returned when querying a partitioned table using `Limit` [#24636](https://github.com/pingcap/tidb/issues/24636) + - Fix the issue that `IFNULL` does not correctly take effect on the `ENUM` or `SET` type column [#24944](https://github.com/pingcap/tidb/issues/24944) + - Fix the wrong results caused by changing the `count` in the join subqueries to `first_row` [#24865](https://github.com/pingcap/tidb/issues/24865) + - Fix the query hang issue that occurs when `ParallelApply` is used under the `TopN` operator [#24930](https://github.com/pingcap/tidb/issues/24930) + - Fix the issue that more results than expected are returned when executing SQL statements using multi-column prefix indexes [#24356](https://github.com/pingcap/tidb/issues/24356) + - Fix the issue that the `<=>` operator cannot correctly take effect [#24477](https://github.com/pingcap/tidb/issues/24477) + - Fix the data race issue of the parallel `Apply` operator [#23280](https://github.com/pingcap/tidb/issues/23280) + - Fix the issue that the `index out of range` error is reported when sorting the IndexMerge results of the PartitionUnion operator [#23919](https://github.com/pingcap/tidb/issues/23919) + - Fix the issue that setting the `tidb_snapshot` variable to an unexpectedly large value might damage the transaction isolation [#25680](https://github.com/pingcap/tidb/issues/25680) + - Fix the issue that the ODBC-styled constant (for example, `{d '2020-01-01'}`) cannot be used as the expression [#25531](https://github.com/pingcap/tidb/issues/25531) + - Fix the issue that `SELECT DISTINCT` converted to `Batch Get` causes incorrect results [#25320](https://github.com/pingcap/tidb/issues/25320) + - Fix the issue that backing off queries from TiFlash to TiKV cannot be triggered [#23665](https://github.com/pingcap/tidb/issues/23665) [#24421](https://github.com/pingcap/tidb/issues/24421) + - Fix the `index-out-of-range` error that occurs when checking `only_full_group_by` [#23839](https://github.com/pingcap/tidb/issues/23839)) + - Fix the issue that the result of index join in correlated subqueries is wrong [#25799](https://github.com/pingcap/tidb/issues/25799) + ++ TiKV + + - Fix the wrong `tikv_raftstore_hibernated_peer_state` metric [#10330](https://github.com/tikv/tikv/issues/10330) + - Fix the wrong arguments type of the `json_unquote()` function in the coprocessor [#10176](https://github.com/tikv/tikv/issues/10176) + - Skip clearing callback during graceful shutdown to avoid breaking ACID in some cases [#10353](https://github.com/tikv/tikv/issues/10353) [#10307](https://github.com/tikv/tikv/issues/10307) + - Fix a bug that the read index is shared for replica reads on a Leader [#10347](https://github.com/tikv/tikv/issues/10347) + - Fix the wrong function that casts `DOUBLE` to `DOUBLE` [#25200](https://github.com/pingcap/tidb/issues/25200) + ++ PD + + - Fix the issue that the expected scheduling cannot be generated due to scheduling conflicts among multiple schedulers [#3807](https://github.com/tikv/pd/issues/3807) [#3778](https://github.com/tikv/pd/issues/3778) + ++ TiFlash + + - Fix the issue that TiFlash keeps restarting because of the split failure + - Fix the potential issue that TiFlash cannot delete the delta data + - Fix a bug that TiFlash adds wrong padding for non-binary characters in the `CAST` function + - Fix the issue of incorrect results when handling aggregation queries with complex `GROUP BY` columns + - Fix the TiFlash panic issue that occurs under heavy write pressure + - Fix the panic that occurs when the right jon key is not nullable and the left join key is nullable + - Fix the potential issue that the `read-index` requests take a long time + - Fix the panic issue that occurs when the read load is heavy + - Fix the panic issue that might occur when the `Date_Format` function is called with the `STRING` type argument and `NULL` values + ++ Tools + + + TiCDC + + - Fix a bug that TiCDC owner exits abnormally when refreshing the checkpoint [#1902](https://github.com/pingcap/ticdc/issues/1902) + - Fix a bug that changefeed fails immediately after its successful creation [#2113](https://github.com/pingcap/ticdc/issues/2113) + - Fix a bug that changefeed fails due to the invalid format of rules filter [#1625](https://github.com/pingcap/ticdc/issues/1625) + - Fix the potential DDL loss issue when the TiCDC owner panics [#1260](https://github.com/pingcap/ticdc/issues/1260) + - Fix the CLI compatibility issue with 4.0.x clusters on the default sort-engine option [#2373](https://github.com/pingcap/ticdc/issues/2373) + - Fix a bug that changefeed might be reset unexpectedly when TiCDC gets the `ErrSchemaStorageTableMiss` error [#2422](https://github.com/pingcap/ticdc/issues/2422) + - Fix a bug that changefeed cannot be removed when TiCDC gets the `ErrGCTTLExceeded` error [#2391](https://github.com/pingcap/ticdc/issues/2391) + - Fix a bug that TiCDC fails to synchronize large tables to cdclog [#1259](https://github.com/pingcap/ticdc/issues/1259) [#2424](https://github.com/pingcap/ticdc/issues/2424) + - Fix a bug that multiple processors might write data to the same table when TiCDC is rescheduling the table [#2230](https://github.com/pingcap/ticdc/issues/2230) + + + Backup & Restore (BR) + + - Fix a bug that BR skips restoring all system tables during the restore [#1197](https://github.com/pingcap/br/issues/1197) [#1201](https://github.com/pingcap/br/issues/1201) + - Fix a bug that BR misses DDL operations when restoring cdclog [#870](https://github.com/pingcap/br/issues/870) + + + TiDB Lightning + + - Fix a bug that TiDB Lightning fails to parse the `DECIMAL` data type in Parquet file [#1272](https://github.com/pingcap/br/pull/1272) + - Fix a bug that TiDB Lightning reports the "Error 9007: Write conflict" error when restoring table schemas [#1290](https://github.com/pingcap/br/issues/1290) + - Fix a bug that TiDB Lightning fails to import data due to the overflow of int handle [#1291](https://github.com/pingcap/br/issues/1291) + - Fix a bug that TiDB Lightning might get a checksum mismatching error due to data loss in the local backend mode [#1403](https://github.com/pingcap/br/issues/1403) + - Fix the Lighting incompatibility issue with clustered index when TiDB Lightning is restoring table schemas [#1362](https://github.com/pingcap/br/issues/1362) + + + Dumpling + + - Fix a bug that the data export fails because the Dumpling GC safepoint is set too late [#290](https://github.com/pingcap/dumpling/pull/290) + - Fix the Dumpling getting stuck issue when exporting table names from the upstream database in certain MySQL versions [#322](https://github.com/pingcap/dumpling/issues/322) diff --git a/releases/release-5.2.1.md b/releases/release-5.2.1.md new file mode 100644 index 000000000000..55e7ee9b1c90 --- /dev/null +++ b/releases/release-5.2.1.md @@ -0,0 +1,19 @@ +--- +title: TiDB 5.2.1 Release Notes +--- + +# TiDB 5.2.1 Release Notes + +Release date: September 9, 2021 + +TiDB version: 5.2.1 + +## Bug fixes + ++ TiDB + + - Fix an error that occurs during execution caused by the wrong execution plan. The wrong execution plan is caused by the shallow copy of schema columns when pushing down the aggregation operators on partitioned tables. [#27797](https://github.com/pingcap/tidb/issues/27797) [#26554](https://github.com/pingcap/tidb/issues/26554) + ++ TiKV + + - Fix the issue of unavailable TiKV caused by Raftstore deadlock when migrating Regions. The workaround is to disable the scheduling and restart the unavailable TiKV. [#10909](https://github.com/tikv/tikv/issues/10909) diff --git a/releases/release-notes.md b/releases/release-notes.md index ca02d43d472e..b374e82c5989 100644 --- a/releases/release-notes.md +++ b/releases/release-notes.md @@ -5,12 +5,20 @@ aliases: ['/docs/dev/releases/release-notes/','/docs/dev/releases/rn/'] # TiDB Release Notes +## 5.2 + +- [5.2.1](/releases/release-5.2.1.md) +- [5.2.0](/releases/release-5.2.0.md) + ## 5.1 +- [5.1.2](/releases/release-5.1.2.md) +- [5.1.1](/releases/release-5.1.1.md) - [5.1.0](/releases/release-5.1.0.md) ## 5.0 +- [5.0.4](/releases/release-5.0.4.md) - [5.0.3](/releases/release-5.0.3.md) - [5.0.2](/releases/release-5.0.2.md) - [5.0.1](/releases/release-5.0.1.md) @@ -19,6 +27,8 @@ aliases: ['/docs/dev/releases/release-notes/','/docs/dev/releases/rn/'] ## 4.0 +- [4.0.15](/releases/release-4.0.15.md) +- [4.0.14](/releases/release-4.0.14.md) - [4.0.13](/releases/release-4.0.13.md) - [4.0.12](/releases/release-4.0.12.md) - [4.0.11](/releases/release-4.0.11.md) diff --git a/roadmap.md b/roadmap.md deleted file mode 100644 index 430c89a8c9bf..000000000000 --- a/roadmap.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: TiDB Roadmap -aliases: ['/tidb/v5.0/roadmap','/docs/ROADMAP/','/docs/roadmap/','/docs/stable/roadmap/','/docs/v4.0/roadmap/','/tidb/stable/roadmap','/docs/v3.1/roadmap/','/tidb/v3.1/roadmap','/docs/v3.0/roadmap/','/tidb/v3.0/roadmap','/docs/v2.1/roadmap/','/tidb/v2.1/roadmap'] ---- - - - -# TiDB Roadmap - -## Improve system stability - -- [x] Create bindings for `UPDATE`/`DELETE`/`INSERT` queries [#15827](https://github.com/pingcap/tidb/issues/15827) -- [x] Optimize transaction commits to avoid commit failures caused by DDL execution [#18098](https://github.com/pingcap/tidb/issues/18098) -- [x] Reduce latency jitter [#18005](https://github.com/pingcap/tidb/issues/18005) - -## Improve system performance and reduce latency - -- [x] Optimize the performance and efficiency of bulk deletion [#18028](https://github.com/pingcap/tidb/issues/18028) -- [x] Improve memory management [#17479](https://github.com/pingcap/tidb/issues/17479) -- [ ] Improve the accuracy and robustness of index selection [#18065](https://github.com/pingcap/tidb/issues/18065) -- [ ] Improve the performance of partition pruning and data access on the partitioned table [#18016](https://github.com/pingcap/tidb/issues/18016) -- [x] Async Commit. This feature means that the statement being written can return to the client as soon as possible after the prewrite stage finishes, which reduces system latency. [#8316](https://github.com/tikv/tikv/issues/8316) -- [x] Clustered index [#4841](https://github.com/pingcap/tidb/issues/4841) -- [ ] Support cross-region deployment and geo-partition [#18273](https://github.com/pingcap/tidb/issues/18273) - -## Improve system security - -### Authentication - -- [x] Transport Layer Security (TLS) for TiFlash [#18080](https://github.com/pingcap/tidb/issues/18080) -- [x] TLS for internal communication in the TiDB cluster [#529](https://github.com/pingcap/tiup/issues/529) -- [x] SSH LDAP extension for TiUP [#528](https://github.com/pingcap/tiup/issues/528) - -### Transparent Data Encryption (TDE) - -- [ ] Transparent Data Encryption (TDE) for TiFlash [#18082](https://github.com/pingcap/tidb/issues/18082) -- [ ] TDE for PD [#18262](https://github.com/pingcap/tidb/issues/18262) - -### Mask - -- [x] Desensitize the TiDB general log [#18034](https://github.com/pingcap/tidb/issues/18034) - -## Cost-effectiveness - -- [ ] Optimize the performance and stability of TiDB running on AWS i3.xlarge/i3.2xlarge [#18025](https://github.com/pingcap/tidb/issues/18025) -- [ ] Optimize the performance and stability of TiDB running on non-NVMe SSD or on cloud disk (such as AWS EBS gp2) [#18024](https://github.com/pingcap/tidb/issues/18024) - -## New features - -- [ ] Point-in-time recovery [#325](https://github.com/pingcap/br/issues/325) -- [x] Change column types [#17526](https://github.com/pingcap/tidb/issues/17526) -- [ ] Easier to discover performance issues and diagnose the causes [#18867](https://github.com/pingcap/tidb/issues/18867) -- [x] Support the collations of `utf8mb4_unicode_ci` and `utf8_unicode_ci` [#17596](https://github.com/pingcap/tidb/issues/17596) -- [ ] Data import - - [ ] TiDB Lightning supports detecting conflicts in the imported data. - - [ ] TiDB Lightning supports distributed parallel import. -- [ ] Data replication: TiCDC is compatible with TiDB Binlog features, which makes using TiCDC easier for TiDB Binlog users [#690](https://github.com/pingcap/ticdc/issues/690) - - [ ] Support exporting data to file systems and cloud storages (S3/GCS) -- [ ] Data disaster recovery: Support snapshot-level consistent data replication in the disaster recovery [#691](https://github.com/pingcap/ticdc/issues/691) - - [ ] The TiDB disaster recovery cluster supports replicating the backup cluster to a consistent state at the snapshot level when a disaster occurs in the main cluster -- [ ] Manage TiCDC using API [#736](https://github.com/pingcap/ticdc/issues/736) -- [x] Data sharing: TiCDC supports the Avro sink, which makes TiCDC compatible with Kafka connect [#660](https://github.com/pingcap/ticdc/issues/660) -- [ ] Support Spark 3.0 [#1173](https://github.com/pingcap/tispark/issues/1173) -- [x] Support `EXCEPT`/`INTERSECT` operators [#18031](https://github.com/pingcap/tidb/issues/18031) -- [x] Support migrating the RDS (such as MySQL/Aurora) on cloud to TiDB [#18629](https://github.com/pingcap/tidb/issues/18629) - -## TiDB Operator - -See [TiDB Operator Roadmap](https://docs.pingcap.com/tidb-in-kubernetes/dev/roadmap). diff --git a/scale-tidb-using-tiup.md b/scale-tidb-using-tiup.md index 761de3d818d2..d0de352bdf7a 100644 --- a/scale-tidb-using-tiup.md +++ b/scale-tidb-using-tiup.md @@ -65,12 +65,12 @@ If you want to add a TiDB node to the `10.0.1.5` host, take the following steps. ```ini tikv_servers: - host: 10.0.1.5 - ssh_port: 22 - port: 20160 - status_port: 20180 - deploy_dir: /data/deploy/install/deploy/tikv-20160 - data_dir: /data/deploy/install/data/tikv-20160 - log_dir: /data/deploy/install/log/tikv-20160 + ssh_port: 22 + port: 20160 + status_port: 20180 + deploy_dir: /data/deploy/install/deploy/tikv-20160 + data_dir: /data/deploy/install/data/tikv-20160 + log_dir: /data/deploy/install/log/tikv-20160 ``` Here is a PD configuration file template: @@ -80,13 +80,13 @@ If you want to add a TiDB node to the `10.0.1.5` host, take the following steps. ```ini pd_servers: - host: 10.0.1.5 - ssh_port: 22 - name: pd-1 - client_port: 2379 - peer_port: 2380 - deploy_dir: /data/deploy/install/deploy/pd-2379 - data_dir: /data/deploy/install/data/pd-2379 - log_dir: /data/deploy/install/log/pd-2379 + ssh_port: 22 + name: pd-1 + client_port: 2379 + peer_port: 2380 + deploy_dir: /data/deploy/install/deploy/pd-2379 + data_dir: /data/deploy/install/data/pd-2379 + log_dir: /data/deploy/install/log/pd-2379 ``` To view the configuration of the current cluster, run `tiup cluster edit-config `. Because the parameter configuration of `global` and `server_configs` is inherited by `scale-out.yaml` and thus also takes effect in `scale-out.yaml`. @@ -247,6 +247,10 @@ If you want to remove a TiKV node from the `10.0.1.5` host, take the following s > > You can take similar steps to remove the TiDB and PD node. +> **Note:** +> +> The PD Client in TiKV caches the list of PD nodes. The current version of TiKV has a mechanism to automatically and regularly update PD nodes, which can help mitigate the issue of an expired list of PD nodes cached by TiKV. However, after scaling out PD, you should try to avoid directly removing all PD nodes at once that exist before the scaling. If necessary, before making all the previously existing PD nodes offline, make sure to switch the PD leader to a newly added PD node. + 1. View the node ID information: {{< copyable "shell-regular" >}} @@ -258,7 +262,7 @@ If you want to remove a TiKV node from the `10.0.1.5` host, take the following s ``` Starting /root/.tiup/components/cluster/v1.5.0/cluster display TiDB Cluster: - TiDB Version: v5.1.0 + TiDB Version: v5.2.1 ID Role Host Ports Status Data Dir Deploy Dir -- ---- ---- ----- ------ -------- ---------- 10.0.1.3:8300 cdc 10.0.1.3 8300 Up - deploy/cdc-8300 diff --git a/scripts/check-conflicts.py b/scripts/check-conflicts.py new file mode 100644 index 000000000000..0f003e5f9659 --- /dev/null +++ b/scripts/check-conflicts.py @@ -0,0 +1,73 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-conflicts.py. + +import re +import sys +import os + +lineNum = 0 +flag = 0 +pos = [] +single = [] +mark = 0 + +for filename in sys.argv[1:]: + single = [] + lineNum = 0 + if os.path.isfile(filename): + with open(filename,'r', encoding='utf-8') as file: + for line in file: + lineNum += 1 + if re.match(r'<{7}.*\n', line): + flag = 1 + single.append(lineNum) + elif re.match(r'={7}\n', line) : + flag = 2 + elif re.match(r'>{7}', line) and flag == 2: + single.append(lineNum) + pos.append(single) + single = [] + flag = 0 + else: + continue + + + if len(pos): + mark = 1 + print("\n" + filename + ": this file has conflicts in the following lines:\n") + for conflict in pos: + if len(conflict) == 2: + print("CONFLICTS: line " + str(conflict[0]) + " to line " + str(conflict[1]) + "\n") + + pos = [] + +if mark: + print("The above conflicts will cause website build failure. Please fix them.") + exit(1) diff --git a/scripts/check-control-char.py b/scripts/check-control-char.py new file mode 100644 index 000000000000..3bf7784c3b71 --- /dev/null +++ b/scripts/check-control-char.py @@ -0,0 +1,69 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-control-char.py. + +import re, sys, os + +# Check control characters. +def check_control_char(filename): + + lineNum = 0 + pos = [] + flag = 0 + + with open(filename,'r', encoding='utf-8') as file: + for line in file: + + lineNum += 1 + + if re.search(r'[\b]', line): + pos.append(lineNum) + flag = 1 + + if flag: + print("\n" + filename + ": this file has control characters in the following lines:\n") + for cc in pos: + print("CONTROL CHARACTERS: L" + str(cc)) + print("\nPlease delete these control characters.") + + return flag + +if __name__ == "__main__": + + count = 0 + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + flag = check_control_char(filename) + if flag: + count+=1 + + if count: + print("\nThe above issues will cause website build failure. Please fix them.") + exit(1) \ No newline at end of file diff --git a/scripts/check-file-encoding.py b/scripts/check-file-encoding.py new file mode 100644 index 000000000000..b207659cfc8b --- /dev/null +++ b/scripts/check-file-encoding.py @@ -0,0 +1,57 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-file-encoding.py. + +import sys, os, codecs + +# Convert the file encoding to the default UTF-8 without BOM. +def check_BOM(filename): + BUFSIZE = 4096 + BOMLEN = len(codecs.BOM_UTF8) + + with open(filename, "r+b") as fp: + chunk = fp.read(BUFSIZE) + if chunk.startswith(codecs.BOM_UTF8): + i = 0 + chunk = chunk[BOMLEN:] + while chunk: + fp.seek(i) + fp.write(chunk) + i += len(chunk) + fp.seek(BOMLEN, os.SEEK_CUR) + chunk = fp.read(BUFSIZE) + fp.seek(-BOMLEN, os.SEEK_CUR) + fp.truncate() + print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.") + +if __name__ == "__main__": + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + check_BOM(filename) \ No newline at end of file diff --git a/scripts/check-manual-line-breaks.py b/scripts/check-manual-line-breaks.py new file mode 100644 index 000000000000..771e1658f852 --- /dev/null +++ b/scripts/check-manual-line-breaks.py @@ -0,0 +1,115 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-manual-line-breaks.py. + +import re, sys, os + +# Check manual line break within a paragraph. +def check_manual_break(filename): + + two_lines = [] + metadata = 0 + toggle = 0 + ctoggle = 0 + lineNum = 0 + mark = 0 + + with open(filename,'r', encoding='utf-8') as file: + for line in file: + + lineNum += 1 + + # Count the number of '---' to skip metadata. + if metadata < 2 : + if re.match(r'(\s|\t)*(-){3}', line): + metadata += 1 + continue + else: + # Skip tables and notes. + if re.match(r'(\s|\t)*(\||>)\s*\w*',line): + continue + + # Skip html tags and markdownlint tags. + if re.match(r'(\s|\t)*((<\/*(.*)>)|)\s*\w*',line): + if re.match(r'(\s|\t)*(
|
)',line): + ctoggle = 1 + elif re.match(r'(\s|\t)*(<\/code><\/pre>|<\/table>)',line): + ctoggle = 0 + else: + continue + + # Skip image links. + if re.match(r'(\s|\t)*!\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line): + continue + + # Set a toggle to skip code blocks. + if re.match(r'(\s|\t)*`{3}', line): + toggle = abs(1-toggle) + + if toggle == 1 or ctoggle == 1: + continue + else: + # Keep a record of the current line and the former line. + if len(two_lines)<1: + two_lines.append(line) + continue + elif len(two_lines) == 1: + two_lines.append(line) + else: + two_lines.append(line) + two_lines.pop(0) + + # Compare if there is a manual line break between the two lines. + if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]): + continue + else: + if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]): + continue + + if mark == 0: + print("\n" + filename + ": this file has manual line breaks in the following lines:\n") + mark = 1 + + print("MANUAL LINE BREAKS: L" + str(lineNum)) + return mark + + +if __name__ == "__main__": + + count = 0 + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + mark = check_manual_break(filename) + if mark : + count+=1 + + if count: + print("\nThe above issues will cause website build failure. Please fix them.") + exit(1) \ No newline at end of file diff --git a/scripts/check-tags.py b/scripts/check-tags.py new file mode 100644 index 000000000000..1b1f8f84fd7e --- /dev/null +++ b/scripts/check-tags.py @@ -0,0 +1,184 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-tags.py. + +import re +import sys +import os + +# reference: https://stackoverflow.com/questions/35761133/python-how-to-check-for-open-and-close-tags +def stack_tag(tag, stack): + t = tag[1:-1] + first_space = t.find(' ') + #print(t) + if t[-1:] == '/': + self_closed_tag = True + elif t[:1] != '/': + # Add tag to stack + if first_space == -1: + stack.append(t) + # print("TRACE open", stack) + else: + stack.append(t[:first_space]) + # print("TRACE open", stack) + else: + if first_space != -1: + t = t[1:first_space] + else: + t = t[1:] + + if len(stack) == 0: + # print("No blocks are open; tried to close", t) + closed_tag = True + else: + if stack[-1] == t: + # Close the block + stack.pop() + # print("TRACE close", t, stack) + else: + # print("Tried to close", t, "but most recent open block is", stack[-1]) + if t in stack: + stack.remove(t) + # print("Prior block closed; continuing") + + # if len(stack): + # print("Blocks still open at EOF:", stack) + return stack + +def tag_is_wrapped(pos, content): + tag_start = pos[0] + tag_end = pos[1] + content_previous = content[:tag_start][::-1] # reverse content_previous + content_later = content[tag_end:] + + left_wraps_findall = re.findall(r'`', content_previous) + left_single_backtick = len(left_wraps_findall) % 2 + right_wraps_findall = re.findall(r'`', content_later) + right_single_backtick = len(right_wraps_findall) % 2 + # print(left_single_backtick, right_single_backtick) + + if left_single_backtick != 0 and right_single_backtick != 0: + # print(content_previous.find('`'), content_later.find('`')) + # print(content_previous) + # print(content_later) + return True + else: + # print(content_previous.find('`'), content_later.find('`')) + # print(content_previous) + # print(content_later) + return False + +def filter_frontmatter(content): + # if there is frontmatter, remove it + if content.startswith('---'): + collect = [] + content_finditer = re.finditer(r'---\n', content) + for i in content_finditer: + meta_pos = i.span()[1] + collect.append(meta_pos) + + # if the number of "---" >= 2 + if len(collect) >= 2: + filter_point = collect[1] + content = content[filter_point:] + + return content + +def filter_backticks(content, filename): + # remove content wrapped by backticks + backticks = [] + content_findall = re.findall(r'```', content) + if len(content_findall): + content_finditer = re.finditer(r'```', content) + for i in content_finditer: + pos = i.span() + backticks.append(pos) + # e.g. backticks = [[23, 26],[37, 40],[123, 126],[147, 150]] + if len(backticks) % 2 != 0: + # print(len(content_findall)) + # print(backticks) + # print(backticks[0][0], backticks[0][1]) + print(filename, ": Some of your code blocks ``` ``` are not closed. Please close them.") + exit(1) + elif len(backticks) != 0: + backticks_start = backticks[0][0] + backticks_end = backticks[1][1] + # print(backticks_start, backticks_end) + content = content.replace(content[backticks_start:backticks_end],'') + content = filter_backticks(content, filename) + return content + +status_code = 0 + +# print(sys.argv[1:]) +for filename in sys.argv[1:]: + # print("Checking " + filename + "......\n") + if os.path.isfile(filename): + file = open(filename, "r", encoding='utf-8') + content = file.read() + file.close() + + content = filter_frontmatter(content) + content = filter_backticks(content, filename) + # print(content) + result_findall = re.findall(r'<([^\n`>]*)>', content) + if len(result_findall) == 0: + # print("The edited markdown file " + filename + " has no tags!\n") + continue + else: + result_finditer = re.finditer(r'<([^\n`>]*)>', content) + stack = [] + for i in result_finditer: + # print(i.group(), i.span()) + tag = i.group() + pos = i.span() + + if tag[:4] == '': + continue + elif content[pos[0]-2:pos[0]] == '{{' and content[pos[1]:pos[1]+2] == '}}': + # print(tag) # filter copyable shortcodes + continue + elif tag[:5] == '' for i in stack] + print("ERROR: " + filename + ' has unclosed tags: ' + ', '.join(stack) + '.\n') + status_code = 1 + +if status_code: + print("HINT: Unclosed tags will cause website build failure. Please fix the reported unclosed tags. You can use backticks `` to wrap them or close them. Thanks.") + exit(1) diff --git a/scripts/check-zh-punctuation.py b/scripts/check-zh-punctuation.py new file mode 100644 index 000000000000..87fc0a58b989 --- /dev/null +++ b/scripts/check-zh-punctuation.py @@ -0,0 +1,84 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-zh-punctuation.py. + +import sys, os, zhon.hanzi + +# Check Chinese punctuation in English files. + +def check_zh_punctuation(filename): + + lineNum = 0 + pos = [] + zh_punc = [] + acceptable_punc = ['–','—'] # em dash and en dash + flag = 0 + + with open(filename, 'r') as file: + for line in file: + + count = 0 + lineNum += 1 + punc_inline = "" + + for char in line: + + if char in zhon.hanzi.punctuation and char not in acceptable_punc : + flag = 1 + if count != 1: + pos.append(lineNum) + punc_inline += char + count = 1 + + if punc_inline != "": + zh_punc.append(punc_inline) + + if flag: + print("\n" + filename + ": this file has Chinese punctuation in the following lines:\n") + + count = 0 + for lineNum in pos: + print("Chinese punctuation: L" + str(lineNum) + " has " + zh_punc[count]) + count += 1 + + return flag + +if __name__ == "__main__": + + count = 0 + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + flag = check_zh_punctuation(filename) + if flag: + count+=1 + + if count: + print("\nThe above issues will ruin your article. Please convert these marks into English punctuation.") + exit(1) \ No newline at end of file diff --git a/scripts/file-format-lint.py b/scripts/file-format-lint.py new file mode 100644 index 000000000000..e4ba16b3e176 --- /dev/null +++ b/scripts/file-format-lint.py @@ -0,0 +1,156 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/file-format-lint.py. + +import re, sys, os, codecs + +# Convert the file encoding to the default UTF-8 without BOM. +def check_BOM(filename): + BUFSIZE = 4096 + BOMLEN = len(codecs.BOM_UTF8) + + with open(filename, "r+b") as fp: + chunk = fp.read(BUFSIZE) + if chunk.startswith(codecs.BOM_UTF8): + i = 0 + chunk = chunk[BOMLEN:] + while chunk: + fp.seek(i) + fp.write(chunk) + i += len(chunk) + fp.seek(BOMLEN, os.SEEK_CUR) + chunk = fp.read(BUFSIZE) + fp.seek(-BOMLEN, os.SEEK_CUR) + fp.truncate() + print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.") + +# Check control characters. +def check_control_char(filename): + + lineNum = 0 + pos = [] + flag = 0 + + with open(filename,'r') as file: + for line in file: + + lineNum += 1 + + if re.search(r'[\b]', line): + pos.append(lineNum) + flag = 1 + + if flag: + print("\n" + filename + ": this file has control characters in the following lines:\n") + for cc in pos: + print("CONTROL CHARACTERS IN L" + str(cc)) + print("Please delete these control characters.") + + return flag + + +# Check manual line break within a paragraph. +def check_manual_break(filename): + + two_lines = [] + metadata = 0 + toggle = 0 + lineNum = 0 + mark = 0 + + with open(filename,'r') as file: + for line in file: + + lineNum += 1 + + # Count the number of '---' to skip metadata. + if metadata < 2 : + if re.match(r'(\s|\t)*(-){3}', line): + metadata += 1 + continue + else: + # Skip tables and notes. + if re.match(r'(\s|\t)*(\||>)\s*\w*',line): + continue + + # Skip html tags and markdownlint tags. + if re.match(r'(\s|\t)*((<\/*\w+>)|)\s*\w*',line): + continue + + # Skip links and images. + if re.match(r'(\s|\t)*!*\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line): + continue + + # Set a toggle to skip code blocks. + if re.match(r'(\s|\t)*`{3}', line): + toggle = abs(1-toggle) + + if toggle == 1: + continue + else: + # Keep a record of the current line and the former line. + if len(two_lines)<1: + two_lines.append(line) + continue + elif len(two_lines) == 1: + two_lines.append(line) + else: + two_lines.append(line) + two_lines.pop(0) + + # Compare if there is a manual line break between the two lines. + if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]): + continue + else: + if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]): + continue + + if mark == 0: + print("\n" + filename + ": this file has manual line breaks in the following lines:\n") + mark = 1 + + print("MANUAL LINE BREAKS: L" + str(lineNum)) + return mark + + +if __name__ == "__main__": + + count = 0 + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + check_BOM(filename) + flag = check_control_char(filename) + mark = check_manual_break(filename) + if mark or flag: + count+=1 + + if count: + print("\nThe above issues will cause website build failure. Please fix them.") + exit(1) \ No newline at end of file diff --git a/scripts/get-issue-number.py b/scripts/get-issue-number.py new file mode 100644 index 000000000000..e8f8dde36831 --- /dev/null +++ b/scripts/get-issue-number.py @@ -0,0 +1,122 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/get-issue-number.py. + +import re, os, sys +import requests +from tempfile import mkstemp +from shutil import move +from os import remove +from bs4 import BeautifulSoup + +def get_issue_link(pr_url): + + print("Connecting to " + pr_url + " ...") + + response = requests.get(pr_url) + + if response: + + resp = BeautifulSoup(response.text, "html.parser") + + table = resp.find("table", "d-block") + + paragraphs = table.findAll("p") + + flag = 0 + match = 0 + + for p in paragraphs: + # print(p.contents[0]) + + if isinstance(p.contents[0], str): + match = re.search(r'(Issue Number)|(fix)|(bug).*', p.contents[0], re.I) + + if match or p.find('span', attrs = {"class": "issue-keyword"}): + issue_link = p.find('a', attrs = {"data-hovercard-type":"issue"}) or p.find('a', attrs = {"class": "issue-link"}) + if issue_link: + flag = 1 + link = issue_link['href'] + break + + if flag: + print('Related issue number: ' + link) + return link + else: + print("No related issue number.\n") + return 0 + + #print(paragraphs) + + else: + print('Connection failed. No html content') + return 0 + +def change_pr_to_issue(filename): + + fh, target_file_path = mkstemp() + source_file_path = filename + match_start = 1 + with open(target_file_path, 'w', encoding='utf-8') as target_file: + with open(source_file_path,'r', encoding='utf-8') as source_file: + + for line in source_file: + + if re.match(r'## Bug',line): + match_start = 0 + print("Match Start\n") + + if match_start == 0: + matchObj = re.search(r'\[#\d+\]\([a-zA-z]+://[^\s]*\)',line) + if matchObj: + link = re.search(r'[a-zA-z]+://[^\s]*[^\)]', matchObj.group()) + pr_url = link.group() + issue_url = get_issue_link(pr_url) + + # 判断有记录 issue link 的在原文件中替换 + if issue_url: + issue_num = re.search(r'\d+', issue_url) + issue_md = '[#' + issue_num.group() + ']' + '(' + issue_url + ')' + line = re.sub(r'\[#\d+\]\([a-zA-z]+://[^\s]*\)', issue_md, line) + print(issue_md + '\n') + + target_file.write(line) + + remove(source_file_path) + move(target_file_path, source_file_path) + +# get_issue_link("https://github.com/pingcap/tidb/pull/22924") + +# change_pr_to_issue('./releases/release-4.0.13.md') + +if __name__ == "__main__": + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + change_pr_to_issue(filename) diff --git a/security-compatibility-with-mysql.md b/security-compatibility-with-mysql.md index fce6496e8e1a..766ae1203de1 100644 --- a/security-compatibility-with-mysql.md +++ b/security-compatibility-with-mysql.md @@ -8,9 +8,29 @@ aliases: ['/docs/dev/security-compatibility-with-mysql/','/docs/dev/reference/se TiDB supports similar security functionality to MySQL 5.7, with the following exceptions: -- Only the `mysql_native_password` password-based and certificate-based authentication is supported -- External authentication (such as with LDAP) is not currently supported - Column level permissions are not supported - Password expiry, as well as password last-changed tracking and password lifetime are not supported [#9709](https://github.com/pingcap/tidb/issues/9709) - The permission attributes `max_questions`, `max_updated`, `max_connections`, `max_user_connections` are not supported - Password validation is not currently supported [#9741](https://github.com/pingcap/tidb/issues/9741) + +## Authentication plugin status + +TiDB supports multiple authentication methods. These methods can be specified on a per user basis using [`CREATE USER`](/sql-statements/sql-statement-create-user.md) and [`ALTER USER`](/sql-statements/sql-statement-create-user.md). These methods are compatible with the authentication methods of MySQL with the same names. + +You can use one of the following supported authentication methods in the table. To specify a default method that the server advertises when the client-server connection is being established, set the [`default_authentication_plugin`](/system-variables.md#default_authentication_plugin) variable. + +The support for TLS authentication is configured differently. For detailed information, see [Enable TLS between TiDB Clients and Servers](/enable-tls-between-clients-and-servers.md). + +| Authentication Method | Supported | +| :------------------------| :--------------- | +| `mysql_native_password` | Yes | +| `sha256_password` | No | +| `caching_sha2_password` | Yes, since 5.2.0 | +| `auth_socket` | No | +| [TLS Certificates] | Yes | +| LDAP | No | +| PAM | No | +| ed25519 (MariaDB) | No | +| GSSAPI (MariaDB) | No | + +[TLS Certificates]: /enable-tls-between-clients-and-servers.md diff --git a/sql-plan-management.md b/sql-plan-management.md index b2cf69acd896..c93c772b04cd 100644 --- a/sql-plan-management.md +++ b/sql-plan-management.md @@ -162,6 +162,10 @@ explain select * from t1,t2 where t1.id = t2.id; In the example above, the dropped binding in the SESSION scope shields the corresponding binding in the GLOBAL scope. The optimizer does not add the `sm_join(t1, t2)` hint to the statement. The top node of the execution plan in the `explain` result is not fixed to MergeJoin by this hint. Instead, the top node is independently selected by the optimizer according to the cost estimation. +> **Note:** +> +> Executing `DROP GLOBAL BINDING` drops the binding in the current tidb-server instance cache and changes the status of the corresponding row in the system table to 'deleted'. This statement does not directly delete the records in the system table, because other tidb-server instances need to read the 'deleted' status to drop the corresponding binding in their cache. For the records in these system tables with the status of 'deleted', at every 100 `bind-info-lease` (the default value is `3s`, and `300s` in total) interval, the background thread triggers an operation of reclaiming and clearing on the bindings of `update_time` before 10 `bind-info-lease` (to ensure that all tidb-server instances have read the 'deleted' status and updated the cache). + ### View binding {{< copyable "sql" >}} @@ -170,7 +174,7 @@ In the example above, the dropped binding in the SESSION scope shields the corre SHOW [GLOBAL | SESSION] BINDINGS [ShowLikeOrWhere] ``` -This statement outputs the execution plan bindings at the GLOBAL or SESSION level. The default scope is SESSION. Currently `SHOW BINDINGS` outputs eight columns, as shown below: +This statement outputs the execution plan bindings at the GLOBAL or SESSION level according to the order of binding update time from the latest to earliest. The default scope is SESSION. Currently `SHOW BINDINGS` outputs eight columns, as shown below: | Column Name | Note | | :-------- | :------------- | @@ -184,6 +188,32 @@ This statement outputs the execution plan bindings at the GLOBAL or SESSION leve | collation | Ordering rule | | source | The way in which a binding is created, including `manual` (created by the `create [global] binding` SQL statement), `capture` (captured automatically by TiDB), and `evolve` (evolved automatically by TiDB) | +### Troubleshoot binding + +{{< copyable "sql" >}} + +```sql +SELECT @@[SESSION.]last_plan_from_binding; +``` + +This statement uses the system variable [`last_plan_from_binding`](/system-variables.md#last_plan_from_binding-new-in-v40) to show whether the execution plan used by the last executed statement is from the binding. + +In addition, when you use the `explain format = 'verbose'` statement to view the query plan of a SQL statement, if the SQL statement uses binding, the `explain` statement will return a warning. In this situation, you can check the warning message to learn which binding is used in the SQL statement. + +```sql +-- Create a global binding. + +create global binding for + select * from t +using + select * from t; + +-- Use the `explain format = 'verbose'` statement to check the SQL execution plan. Check the warning message to view the binding used in the query. + +explain format = 'verbose' select * from t; +show warnings; +``` + ## Baseline capturing To enable baseline capturing, set `tidb_capture_plan_baselines` to `on`. The default value is `off`. @@ -226,9 +256,10 @@ set global tidb_evolve_plan_baselines = on; The default value of `tidb_evolve_plan_baselines` is `off`. -> **Note:** +> **Warning:** > -> The feature baseline evolution is not generally available for now. It is **NOT RECOMMENDED** to use it in the production environment. +> + Baseline evolution is an experimental feature. Unknown risks might exist. It is **NOT** recommended that you use it in the production environment. +> + This variable is forcibly set to `off` until the baseline evolution feature becomes generally available (GA). If you try to enable this feature, an error is returned. If you have already used this feature in a production environment, disable it as soon as possible. If you find that the binding status is not as expected, contact PingCAP's technical support for help. After the automatic binding evolution feature is enabled, if the optimal execution plan selected by the optimizer is not among the binding execution plans, the optimizer marks the plan as an execution plan that waits for verification. At every `bind-info-lease` (the default value is `3s`) interval, an execution plan to be verified is selected and compared with the binding execution plan that has the least cost in terms of the actual execution time. If the plan to be verified has shorter execution time (the current criterion for the comparison is that the execution time of the plan to be verified is no longer than 2/3 that of the binding execution plan), this plan is marked as a usable binding. The following example describes the process above. @@ -287,3 +318,40 @@ Because the baseline evolution automatically creates a new binding, when the que | `max_execution_time` | The longest duration for a query. | + `read_from_storage` is a special hint in that it specifies whether to read data from TiKV or from TiFlash when reading tables. Because TiDB provides isolation reads, when the isolation condition changes, this hint has a great influence on the evolved execution plan. Therefore, when this hint exists in the initially created binding, TiDB ignores all its evolved bindings. + +## Upgrade checklist + +During cluster upgrade, SQL Plan Management (SPM) might cause compatibility issues and make the upgrade fail. To ensure a successful upgrade, you need to include the following list for upgrade precheck: + +* When you upgrade from a version earlier than v5.2.0 (that is, v4.0, v5.0, and v5.1) to the current version, make sure that `tidb_evolve_plan_baselines` is disabled before the upgrade. To disable this variable, perform the following steps. + + {{< copyable "sql" >}} + + ```sql + -- Check whether `tidb_evolve_plan_baselines` is disabled in the earlier version. + + select @@global.tidb_evolve_plan_baselines; + + -- If `tidb_evolve_plan_baselines` is still enabled, disable it. + + set global tidb_evolve_plan_baselines = off; + ``` + +* Before you upgrade from v4.0 to the current version, you need to check whether the syntax of all queries corresponding to the available SQL bindings is correct in the new version. If any syntax errors exist, delete the corresponding SQL binding. To do that, perform the following steps. + + {{< copyable "sql" >}} + + ```sql + -- Check the query corresponding to the available SQL binding in the version to be upgraded. + + select bind_sql from mysql.bind_info where status = 'using'; + + -- Verify the result from the above SQL query in the test environment of the new version. + + bind_sql_0; + bind_sql_1; + ... + + -- In the case of a syntax error (ERROR 1064 (42000): You have an error in your SQL syntax), delete the corresponding binding. + -- For any other errors (for example, tables are not found), it means that the syntax is compatible. No other operation is needed. + ``` diff --git a/sql-statements/sql-statement-admin-show-telemetry.md b/sql-statements/sql-statement-admin-show-telemetry.md new file mode 100644 index 000000000000..960ca3fcd553 --- /dev/null +++ b/sql-statements/sql-statement-admin-show-telemetry.md @@ -0,0 +1,431 @@ +--- +title: ADMIN SHOW TELEMETRY | TiDB SQL Statement Reference +summary: An overview of the usage of ADMIN SHOW TELEMETRY for the TiDB database. +--- + +# ADMIN SHOW TELEMETRY + +The `ADMIN SHOW TELEMETRY` statement shows the information that will be reported back to PingCAP as part of the [telemetry](/telemetry.md) feature. + +## Synopsis + +```ebnf+diagram +AdminStmt ::= + 'ADMIN' ( 'SHOW' ( 'DDL' ( 'JOBS' Int64Num? WhereClauseOptional | 'JOB' 'QUERIES' NumList )? | TableName 'NEXT_ROW_ID' | 'SLOW' AdminShowSlow | 'TELEMETRY' ) | 'CHECK' ( 'TABLE' TableNameList | 'INDEX' TableName Identifier ( HandleRange ( ',' HandleRange )* )? ) | 'RECOVER' 'INDEX' TableName Identifier | 'CLEANUP' ( 'INDEX' TableName Identifier | 'TABLE' 'LOCK' TableNameList ) | 'CHECKSUM' 'TABLE' TableNameList | 'CANCEL' 'DDL' 'JOBS' NumList | 'RELOAD' ( 'EXPR_PUSHDOWN_BLACKLIST' | 'OPT_RULE_BLACKLIST' | 'BINDINGS' ) | 'PLUGINS' ( 'ENABLE' | 'DISABLE' ) PluginNameList | 'REPAIR' 'TABLE' TableName CreateTableStmt | ( 'FLUSH' | 'CAPTURE' | 'EVOLVE' ) 'BINDINGS' ) + +``` + +## Examples + +{{< copyable "sql" >}} + +```sql +ADMIN SHOW TELEMETRY\G +``` + +```sql +*************************** 1. row *************************** + TRACKING_ID: a1ba1d97-b940-4d5b-a9d5-ddb0f2ac29e7 + LAST_STATUS: { + "check_at": "2021-08-11T08:23:38+02:00", + "is_error": false, + "error_msg": "", + "is_request_sent": true +} +DATA_PREVIEW: { + "hardware": [ + { + "instanceType": "tidb", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "4000", + "cpu": { + "cache": "8192", + "cpuFrequency": "2301.00MHz", + "cpuLogicalCores": "8", + "cpuPhysicalCores": "4" + }, + "memory": { + "capacity": "16410021888" + }, + "disk": { + "ebbca862689fa9fef7c55c3112e375c4ce575fe4": { + "deviceName": "ebbca862689fa9fef7c55c3112e375c4ce575fe4", + "free": "624438726656", + "freePercent": "0.61", + "fstype": "btrfs", + "opts": "bind,rw,relatime", + "path": "fb365c1216b59e1cfc86950425867007a60f4435", + "total": "1022488477696", + "used": "397115568128", + "usedPercent": "0.39" + }, + "nvme0n1p1": { + "deviceName": "nvme0n1p1", + "free": "582250496", + "freePercent": "0.93", + "fstype": "vfat", + "opts": "rw,relatime", + "path": "0fc8c8d71702d81a02e216fb6ef19f4dda4973df", + "total": "627900416", + "used": "45649920", + "usedPercent": "0.07" + }, + "nvme0n1p2": { + "deviceName": "nvme0n1p2", + "free": "701976576", + "freePercent": "0.74", + "fstype": "ext4", + "opts": "rw,relatime", + "path": "/boot", + "total": "1023303680", + "used": "250863616", + "usedPercent": "0.26" + } + } + }, + { + "instanceType": "pd", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "2379", + "cpu": { + "cache": "8192", + "cpuFrequency": "2301.00MHz", + "cpuLogicalCores": "8", + "cpuPhysicalCores": "4" + }, + "memory": { + "capacity": "16410021888" + }, + "disk": { + "ebbca862689fa9fef7c55c3112e375c4ce575fe4": { + "deviceName": "ebbca862689fa9fef7c55c3112e375c4ce575fe4", + "free": "624438726656", + "freePercent": "0.61", + "fstype": "btrfs", + "opts": "bind,rw,relatime", + "path": "fb365c1216b59e1cfc86950425867007a60f4435", + "total": "1022488477696", + "used": "397115568128", + "usedPercent": "0.39" + }, + "nvme0n1p1": { + "deviceName": "nvme0n1p1", + "free": "582250496", + "freePercent": "0.93", + "fstype": "vfat", + "opts": "rw,relatime", + "path": "0fc8c8d71702d81a02e216fb6ef19f4dda4973df", + "total": "627900416", + "used": "45649920", + "usedPercent": "0.07" + }, + "nvme0n1p2": { + "deviceName": "nvme0n1p2", + "free": "701976576", + "freePercent": "0.74", + "fstype": "ext4", + "opts": "rw,relatime", + "path": "/boot", + "total": "1023303680", + "used": "250863616", + "usedPercent": "0.26" + } + } + }, + { + "instanceType": "tikv", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "20160", + "cpu": { + "cpuFrequency": "3730MHz", + "cpuLogicalCores": "8", + "cpuPhysicalCores": "4", + "cpuVendorId": "GenuineIntel", + "l1CacheLineSize": "64", + "l1CacheSize": "32768", + "l2CacheLineSize": "64", + "l2CacheSize": "262144", + "l3CacheLineSize": "64", + "l3CacheSize": "8388608" + }, + "memory": { + "capacity": "16803861504" + }, + "disk": { + "36e7dfacbb83843f83075d78aeb4cf850a4882a1": { + "deviceName": "36e7dfacbb83843f83075d78aeb4cf850a4882a1", + "free": "624438726656", + "freePercent": "0.61", + "fstype": "btrfs", + "path": "fb365c1216b59e1cfc86950425867007a60f4435", + "total": "1022488477696", + "used": "398049751040", + "usedPercent": "0.39" + }, + "nvme0n1p1": { + "deviceName": "nvme0n1p1", + "free": "582250496", + "freePercent": "0.93", + "fstype": "vfat", + "path": "0fc8c8d71702d81a02e216fb6ef19f4dda4973df", + "total": "627900416", + "used": "45649920", + "usedPercent": "0.07" + }, + "nvme0n1p2": { + "deviceName": "nvme0n1p2", + "free": "701976576", + "freePercent": "0.69", + "fstype": "ext4", + "path": "/boot", + "total": "1023303680", + "used": "321327104", + "usedPercent": "0.31" + } + } + }, + { + "instanceType": "tiflash", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "3930", + "cpu": { + "cpuFrequency": "3400MHz", + "cpuLogicalCores": "8", + "cpuPhysicalCores": "4", + "l1CacheLineSize": "64", + "l1CacheSize": "32768", + "l2CacheLineSize": "64", + "l2CacheSize": "262144", + "l3CacheLineSize": "64", + "l3CacheSize": "8388608" + }, + "memory": { + "capacity": "16410021888" + }, + "disk": { + "36e7dfacbb83843f83075d78aeb4cf850a4882a1": { + "deviceName": "36e7dfacbb83843f83075d78aeb4cf850a4882a1", + "free": "624438726656", + "freePercent": "0.61", + "fstype": "btrfs", + "path": "fb365c1216b59e1cfc86950425867007a60f4435", + "total": "1022488477696", + "used": "398049751040", + "usedPercent": "0.39" + } + } + } + ], + "instances": [ + { + "instanceType": "tidb", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "4000", + "statusHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "statusPort": "10080", + "version": "5.1.1", + "gitHash": "797bddd25310ed42f0791c8eccb78be8cce2f502", + "startTime": "2021-08-11T08:23:38+02:00", + "upTime": "22.210217487s" + }, + { + "instanceType": "pd", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "2379", + "statusHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "statusPort": "2379", + "version": "5.1.1", + "gitHash": "7cba1912b317a533e18b16ea2ba9a14ed2891129", + "startTime": "2021-08-11T08:23:32+02:00", + "upTime": "28.210220368s" + }, + { + "instanceType": "tikv", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "20160", + "statusHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "statusPort": "20180", + "version": "5.1.1", + "gitHash": "4705d7c6e9c42d129d3309e05911ec6b08a25a38", + "startTime": "2021-08-11T08:23:33+02:00", + "upTime": "27.210221447s" + }, + { + "instanceType": "tiflash", + "listenHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "listenPort": "3930", + "statusHostHash": "4b84b15bff6ee5796152495a230e45e3d7e947d9", + "statusPort": "20292", + "version": "v5.1.1", + "gitHash": "c8fabfb50fe28db17cc5118133a69be255c40efd", + "startTime": "2021-08-11T08:23:40+02:00", + "upTime": "20.210222452s" + } + ], + "hostExtra": { + "cpuFlags": [ + "fpu", + "vme", + "de", + "pse", + "tsc", + "msr", + "pae", + "mce", + "cx8", + "apic", + "sep", + "mtrr", + "pge", + "mca", + "cmov", + "pat", + "pse36", + "clflush", + "dts", + "acpi", + "mmx", + "fxsr", + "sse", + "sse2", + "ss", + "ht", + "tm", + "pbe", + "syscall", + "nx", + "pdpe1gb", + "rdtscp", + "lm", + "constant_tsc", + "art", + "arch_perfmon", + "pebs", + "bts", + "rep_good", + "nopl", + "xtopology", + "nonstop_tsc", + "cpuid", + "aperfmperf", + "pni", + "pclmulqdq", + "dtes64", + "monitor", + "ds_cpl", + "vmx", + "est", + "tm2", + "ssse3", + "sdbg", + "fma", + "cx16", + "xtpr", + "pdcm", + "pcid", + "sse4_1", + "sse4_2", + "x2apic", + "movbe", + "popcnt", + "tsc_deadline_timer", + "aes", + "xsave", + "avx", + "f16c", + "rdrand", + "lahf_lm", + "abm", + "3dnowprefetch", + "cpuid_fault", + "epb", + "invpcid_single", + "ssbd", + "ibrs", + "ibpb", + "stibp", + "ibrs_enhanced", + "tpr_shadow", + "vnmi", + "flexpriority", + "ept", + "vpid", + "ept_ad", + "fsgsbase", + "tsc_adjust", + "sgx", + "bmi1", + "avx2", + "smep", + "bmi2", + "erms", + "invpcid", + "mpx", + "rdseed", + "adx", + "smap", + "clflushopt", + "intel_pt", + "xsaveopt", + "xsavec", + "xgetbv1", + "xsaves", + "dtherm", + "ida", + "arat", + "pln", + "pts", + "hwp", + "hwp_notify", + "hwp_act_window", + "hwp_epp", + "md_clear", + "flush_l1d", + "arch_capabilities" + ], + "cpuModelName": "Intel(R) Core(TM) i7-10510U CPU @ 1.80GHz", + "os": "linux", + "platform": "fedora", + "platformFamily": "fedora", + "platformVersion": "34", + "kernelVersion": "5.13.5-200.fc34.x86_64", + "kernelArch": "x86_64", + "virtualizationSystem": "kvm", + "virtualizationRole": "host" + }, + "reportTimestamp": 1628663040, + "trackingId": "a1ba1d97-b940-4d5b-a9d5-ddb0f2ac29e7", + "featureUsage": { + "txn": { + "asyncCommitUsed": true, + "onePCUsed": true, + "txnCommitCounter": { + "twoPC": 9, + "asyncCommit": 0, + "onePC": 0 + } + }, + "clusterIndex": {}, + "temporaryTable": false, + "cte": { + "nonRecursiveCTEUsed": 0, + "recursiveUsed": 0, + "nonCTEUsed": 13 + } + }, + "windowedStats": [], + "slowQueryStats": { + "slowQueryBucket": {} + } +} +1 row in set (0.0259 sec) +``` + +## MySQL compatibility + +The `ADMIN` statement is a TiDB extension to MySQL syntax. + +## See also + +* [Telemetry](/telemetry.md) +* [`tidb_enable_telemetry` System Variable](/system-variables.md#tidb_enable_telemetry-new-in-v402) diff --git a/sql-statements/sql-statement-create-index.md b/sql-statements/sql-statement-create-index.md index f59aba3a17dd..d725c687ad8d 100644 --- a/sql-statements/sql-statement-create-index.md +++ b/sql-statements/sql-statement-create-index.md @@ -104,43 +104,135 @@ Query OK, 0 rows affected (0.31 sec) ## Expression index +In some scenarios, the filtering condition of a query is based on a certain expression. In these scenarios, the query performance is relatively poor because ordinary indexes cannot take effect, the query can only be executed by scanning the entire table. The expression index is a type of special index that can be created on an expression. Once an expression index is created, TiDB can use the index for the expression-based query, which significantly improves the query performance. + +For example, if you want to create an index based on `lower(col1)`, execute the following SQL statement: + +{{< copyable "sql" >}} + +```sql +CREATE INDEX idx1 ON t1 ((lower(col1))); +``` + +Or you can execute the following equivalent statement: + +{{< copyable "sql" >}} + +```sql +ALTER TABLE t1 ADD INDEX idx1((lower(col1))); +``` + +You can also specify the expression index when you create the table: + +{{< copyable "sql" >}} + +```sql +CREATE TABLE t1(col1 char(10), col2 char(10), key index((lower(col1)))); +``` + +> **Note** +> +> The expression in an expression index must be surrounded by '(' and ')'. Otherwise, a syntax error is reported. + +You can drop an expression index in the same way as dropping an ordinary index: + +{{< copyable "sql" >}} + +```sql +DROP INDEX idx1 ON t1; +``` + > **Note:** +> +> Expression index involves various kinds of expressions. To ensure correctness, only some fully tested functions are allowed for creating an expression index. This means that only these functions are allowed in expressions in a production environment. You can get these functions by querying `tidb_allow_function_for_expression_index` variable. In future versions, more functions might be added to the list. +> +> {{< copyable "sql" >}} +> +> ```sql +> mysql> select @@tidb_allow_function_for_expression_index; +> +--------------------------------------------+ +> | @@tidb_allow_function_for_expression_index | +> +--------------------------------------------+ +> | lower, md5, reverse, upper, vitess_hash | +> +--------------------------------------------+ +> 1 row in set (0.00 sec) +> ``` +> +> For the functions that are not included in the returned result above, those functions are not fully tested and not recommended for a production environment, which can be seen as experimental. Other expressions such as operators, `cast`, and `case when` are also seen as experimental and not recommended for production. However, if you still want to use those expressions, you can make the following configuration in the [TiDB configuration file](/tidb-configuration-file.md#allow-expression-index-new-in-v400): +> +> {{< copyable "sql" >}} +> +> ```sql +> allow-expression-index = true +> ``` +> +> An expression index cannot be created on a primary key. +> +> The expression in an expression index cannot contain the following content: +> +> - Volatile functions, such as `rand()` and `now()`. +> - System variables and user variables. +> - Subqueries. +> - `AUTO_INCREMENT` column. You can remove this restriction by setting the value of `tidb_enable_auto_increment_in_generated` (system variable) to `true`. +> - Window functions. +> - ROW functions, such as `create table t (j json, key k (((j,j))));`. +> - Aggregate functions. +> +> An expression index implicitly takes up a name (for example, `_V$_{index_name}_{index_offset}`). If you try to create a new expression index with the name that a column has already had, an error occurs. In addition, if you add a new column with the same name, an error also occurs. > -> Expression index is still an experimental feature. It is **NOT** recommended that you use it in the production environment. +> Make sure that the number of function parameters in the expression of an expression index is correct. +> +> When the expression of an index contains a string-related function, affected by the returned type and the length, creating the expression index might fail. In this situation, you can use the `cast()` function to explicitly specify the returned type and the length. For example, to create an expression index based on the `repeat(a, 3)` expression, you need to modify this expression to `cast(repeat(a, 3) as char(20))`. + +When the expression in a query statement matches the expression in an expression index, the optimizer can choose the expression index for the query. In some cases, the optimizer might not choose an expression index depending on statistics. In this situation, you can force the optimizer to select an expression index by using optimizer hints. + +In the following examples, suppose that you create the expression index `idx` on the expression `lower(col1)`: -To use this feature, make the following setting in [TiDB Configuration File](/tidb-configuration-file.md#allow-expression-index-new-in-v400): +If the results of the query statement are the same expressions, the expression index applies. Take the following statement as an example: {{< copyable "sql" >}} ```sql -allow-expression-index = true +SELECT lower(col1) FROM t; ``` -TiDB can build indexes not only on one or more columns in a table, but also on an expression. When queries involve expressions, expression indexes can speed up those queries. +If the same expression is included in the filtering conditions, the expression index applies. Take the following statements as an example: + +{{< copyable "sql" >}} + +```sql +SELECT * FROM t WHERE lower(col1) = "a"; +SELECT * FROM t WHERE lower(col1) > "a"; +SELECT * FROM t WHERE lower(col1) BETWEEN "a" AND "b"; +SELECT * FROM t WHERE lower(col1) in ("a", "b"); +SELECT * FROM t WHERE lower(col1) > "a" AND lower(col1) < "b"; +SELECT * FROM t WHERE lower(col1) > "b" OR lower(col1) < "a"; +``` -Take the following query as an example: +When the queries are sorted by the same expression, the expression index applies. Take the following statement as an example: {{< copyable "sql" >}} ```sql -SELECT * FROM t WHERE lower(name) = "pingcap"; +SELECT * FROM t ORDER BY lower(col1); ``` -If the following expression index is built, you can use the index to speed up the above query: +If the same expression is included in the aggregate (`GROUP BY`) functions, the expression index applies. Take the following statements as an example: {{< copyable "sql" >}} ```sql -CREATE INDEX idx ON t ((lower(name))); +SELECT max(lower(col1)) FROM t; +SELECT min(col1) FROM t GROUP BY lower(col1); ``` +To see the expression corresponding to the expression index, execute `show index`, or check the system tables `information_schema.tidb_indexes` and the table `information_schema.STATISTICS`. The `Expression` column in the output indicates the corresponded expression. For the non-expression indexes, the column shows `NULL`. + The cost of maintaining an expression index is higher than that of maintaining other indexes, because the value of the expression needs to be calculated whenever a row is inserted or updated. The value of the expression is already stored in the index, so this value does not require recalculation when the optimizer selects the expression index. Therefore, when the query performance outweighs the insert and update performance, you can consider indexing the expressions. -Expression indexes have the same syntax and limitations as in MySQL. They are implemented by building indexes on generated virtual columns that are invisible, so the supported expressions inherit all [limitations of virtual generated columns](/generated-columns.md#limitations). - -Currently, the optimizer can use the indexed expressions when the expressions are only in the `FIELD` clause, `WHERE` clause, and `ORDER BY` clause. The `GROUP BY` clause will be supported in future updates. +Expression indexes have the same syntax and limitations as in MySQL. They are implemented by creating indexes on generated virtual columns that are invisible, so the supported expressions inherit all [limitations of virtual generated columns](/generated-columns.md#limitations). ## Invisible index @@ -153,15 +245,17 @@ CREATE UNIQUE INDEX c1 ON t1 (c1) INVISIBLE; For details, see [`ALTER INDEX`](/sql-statements/sql-statement-alter-index.md). -## Associated session variables +## Associated system variables -The global variables associated with the `CREATE INDEX` statement are `tidb_ddl_reorg_worker_cnt`, `tidb_ddl_reorg_batch_size` and `tidb_ddl_reorg_priority`. Refer to [system variables](/system-variables.md#tidb_ddl_reorg_worker_cnt) for details. +The system variables associated with the `CREATE INDEX` statement are `tidb_ddl_reorg_worker_cnt`, `tidb_ddl_reorg_batch_size`, `tidb_enable_auto_increment_in_generated`, and `tidb_ddl_reorg_priority`. Refer to [system variables](/system-variables.md#tidb_ddl_reorg_worker_cnt) for details. ## MySQL compatibility * `FULLTEXT`, `HASH` and `SPATIAL` indexes are not supported. * Descending indexes are not supported (similar to MySQL 5.7). * Adding the primary key of the `CLUSTERED` type to a table is not supported. For more details about the primary key of the `CLUSTERED` type, refer to [clustered index](/clustered-indexes.md). +* Expression indexes are incompatible with views. When a query is executed using a view, the expression index cannot be used at the same time. +* Expression indexes have compatibility issues with bindings. When the expression of an expression index has a constant, the binding created for the corresponding query expands its scope. For example, suppose that the expression in the expression index is `a+1`, and the corresponding query condition is `a+1 > 2`. In this case, the created binding is `a+? > ?`, which means that the query with the condition such as `a+2 > 2` is also forced to use the expression index and results in a poor execution plan. In addition, this also affects the baseline capturing and baseline evolution in SQL Plan Management (SPM). ## See also diff --git a/sql-statements/sql-statement-kill.md b/sql-statements/sql-statement-kill.md index 2d986bc64db1..9396e4ab3946 100644 --- a/sql-statements/sql-statement-kill.md +++ b/sql-statements/sql-statement-kill.md @@ -34,8 +34,9 @@ Query OK, 0 rows affected (0.00 sec) ## MySQL compatibility -* By design, this statement is not compatible with MySQL by default. This helps prevent against a case of a connection being terminated on the wrong TiDB server, because it is common to place multiple TiDB servers behind a load balancer. -* The `KILL TIDB` statement is a TiDB extension. If you are certain that the session you are attempting to kill is on the same TiDB server, set [`compatible-kill-query = true`](/tidb-configuration-file.md#compatible-kill-query) in your configuration file. +* By design, `KILL` is not compatible with MySQL by default. This helps prevent against a case of a connection being terminated on the wrong TiDB server, because it is common to place multiple TiDB servers behind a load balancer. +* DO NOT set [`compatible-kill-query = true`](/tidb-configuration-file.md#compatible-kill-query) in your configuration file UNLESS you are certain that clients will be always connected to the same TiDB node. This is because pressing ctrl+c in the default MySQL client opens a new connection in which `KILL` is executed. If there are proxies in between, the new connection might be routed to a different TiDB node, which possibly kills a different session. +* The `KILL TIDB` statement is a TiDB extension. The feature of this statement is similar to the MySQL `KILL [CONNECTION|QUERY]` command and the MySQL command-line ctrl+c feature. It is safe to use `KILL TIDB` on the same TiDB node. ## See also diff --git a/sql-statements/sql-statement-select.md b/sql-statements/sql-statement-select.md index fc2811a18b60..64afe8dbdca1 100644 --- a/sql-statements/sql-statement-select.md +++ b/sql-statements/sql-statement-select.md @@ -98,7 +98,7 @@ AsOfClause ::= |`ORDER BY` | The `ORDER BY` clause is used to sort the data in ascending or descending order, based on columns, expressions or items in the `select_expr` list.| |`LIMIT` | The `LIMIT` clause can be used to constrain the number of rows. `LIMIT` takes one or two numeric arguments. With one argument, the argument specifies the maximum number of rows to return, the first row to return is the first row of the table by default; with two arguments, the first argument specifies the offset of the first row to return, and the second specifies the maximum number of rows to return. TiDB also supports the `FETCH FIRST/NEXT n ROW/ROWS ONLY` syntax, which has the same effect as `LIMIT n`. You can omit `n` in this syntax and its effect is the same as `LIMIT 1`. | |`Window window_definition`| This is the syntax for window function, which is usually used to do some analytical computation. For more information, refer to [Window Function](/functions-and-operators/window-functions.md). | -| `FOR UPDATE` | The `SELECT FOR UPDATE` clause locks all the data in the result sets to detect concurrent updates from other transactions. Data that match the query conditions but do not exist in the result sets are not read-locked, such as the row data written by other transactions after the current transaction is started. TiDB uses the [Optimistic Transaction Model](/optimistic-transaction.md). The transaction conflicts are not detected in the statement execution phase. Therefore, the current transaction does not block other transactions from executing `UPDATE`, `DELETE` or `SELECT FOR UPDATE` like other databases such as PostgreSQL. In the committing phase, the rows read by `SELECT FOR UPDATE` are committed in two phases, which means they can also join the conflict detection. If write conflicts occur, the commit fails for all transactions that include the `SELECT FOR UPDATE` clause. If no conflict is detected, the commit succeeds. And a new version is generated for the locked rows, so that write conflicts can be detected when other uncommitted transactions are being committed later. When using pessimistic transaction model, the behavior is basically the same as other databases. Refer to [Difference with MySQL InnoDB](/pessimistic-transaction.md#difference-with-mysql-innodb) to see the details. | +| `FOR UPDATE` | The `SELECT FOR UPDATE` clause locks all the data in the result sets to detect concurrent updates from other transactions. Data that match the query conditions but do not exist in the result sets are not read-locked, such as the row data written by other transactions after the current transaction is started. TiDB uses the [Optimistic Transaction Model](/optimistic-transaction.md). The transaction conflicts are not detected in the statement execution phase. Therefore, the current transaction does not block other transactions from executing `UPDATE`, `DELETE` or `SELECT FOR UPDATE` like other databases such as PostgreSQL. In the committing phase, the rows read by `SELECT FOR UPDATE` are committed in two phases, which means they can also join the conflict detection. If write conflicts occur, the commit fails for all transactions that include the `SELECT FOR UPDATE` clause. If no conflict is detected, the commit succeeds. And a new version is generated for the locked rows, so that write conflicts can be detected when other uncommitted transactions are being committed later. When using pessimistic transaction model, the behavior is basically the same as other databases. Refer to [Difference with MySQL InnoDB](/pessimistic-transaction.md#difference-with-mysql-innodb) to see the details. TiDB supports the `NOWAIT` modifier for `FOR UPDATE`. See [TiDB Pessimistic Transaction Model](/pessimistic-transaction.md) for details. | |`LOCK IN SHARE MODE` | To guarantee compatibility, TiDB parses these three modifiers, but will ignore them. | ## Examples diff --git a/sql-statements/sql-statement-show-analyze-status.md b/sql-statements/sql-statement-show-analyze-status.md index 4d31e91e7a8a..874a70595ea0 100644 --- a/sql-statements/sql-statement-show-analyze-status.md +++ b/sql-statements/sql-statement-show-analyze-status.md @@ -1,6 +1,6 @@ --- title: SHOW ANALYZE STATUS -summary: An overview of the usage of SHOW ANALYZE STATUS for the TiDB database。 +summary: An overview of the usage of SHOW ANALYZE STATUS for the TiDB database. aliases: ['/docs/dev/sql-statements/sql-statement-show-analyze-status/'] --- diff --git a/sql-statements/sql-statement-show-histograms.md b/sql-statements/sql-statement-show-histograms.md index ee3ee952f669..333a791532c6 100644 --- a/sql-statements/sql-statement-show-histograms.md +++ b/sql-statements/sql-statement-show-histograms.md @@ -1,6 +1,6 @@ --- title: SHOW STATS_HISTOGRAMS -summary: An overview of the usage of SHOW HISTOGRAMS for TiDB database。 +summary: An overview of the usage of SHOW HISTOGRAMS for TiDB database. aliases: ['/docs/dev/sql-statements/sql-statement-show-histograms/'] --- diff --git a/sql-statements/sql-statement-show-variables.md b/sql-statements/sql-statement-show-variables.md index df3237ed7bb9..6a8120d4d8f6 100644 --- a/sql-statements/sql-statement-show-variables.md +++ b/sql-statements/sql-statement-show-variables.md @@ -130,7 +130,7 @@ mysql> SHOW GLOBAL VARIABLES LIKE 'tidb%'; | tidb_stmt_summary_history_size | 24 | | tidb_stmt_summary_internal_query | 0 | | tidb_stmt_summary_max_sql_length | 4096 | -| tidb_stmt_summary_max_stmt_count | 200 | +| tidb_stmt_summary_max_stmt_count | 3000 | | tidb_stmt_summary_refresh_interval | 1800 | | tidb_store_limit | 0 | | tidb_txn_mode | | diff --git a/stale-read.md b/stale-read.md index 3736c20ca58e..ddccc76b31d0 100644 --- a/stale-read.md +++ b/stale-read.md @@ -9,10 +9,6 @@ This document describes the usage scenarios of Stale Read. Stale Read is a mecha When you are using Steal Read, TiDB will randomly select a replica for data reading, which means that all replicas are available for data reading. If your application cannot tolerate reading non-real-time data, do not use Stale Read; otherwise, the data read from the replica might not be the latest data written into TiDB. -> **Warning:** -> -> Currently, Stale Read is an experimental feature. It is not recommended to use it in the production environment. - ## Scenario examples + Scenario one: If a transaction only involves read operations and is tolerant of data staleness to some extent, you can use Stale Read to get historical data. Using Stale Read, TiDB makes the query requests sent to any replica at the expense of some real-time performance, and thus increases the throughput of query executions. Especially in some scenarios where small tables are queried, if strongly consistent reads are used, leader might be concentrated on a certain storage node, causing the query pressure to be concentrated on that node as well. Therefore, that node might become a bottleneck for the whole query. Stale Read, however, can improve the overall query throughput and significantly improve the query performance. diff --git a/statement-summary-tables.md b/statement-summary-tables.md index a0ac5c7e3add..a67732c0f8a9 100644 --- a/statement-summary-tables.md +++ b/statement-summary-tables.md @@ -8,18 +8,19 @@ aliases: ['/docs/dev/statement-summary-tables/','/docs/dev/reference/performance To better handle SQL performance issues, MySQL has provided [statement summary tables](https://dev.mysql.com/doc/refman/5.6/en/statement-summary-tables.html) in `performance_schema` to monitor SQL with statistics. Among these tables, `events_statements_summary_by_digest` is very useful in locating SQL problems with its abundant fields such as latency, execution times, rows scanned, and full table scans. -Therefore, starting from v4.0.0-rc.1, TiDB provides system tables in `information_schema`. These system tables are similar to `events_statements_summary_by_digest` in terms of features. +Therefore, starting from v4.0.0-rc.1, TiDB provides system tables in `information_schema` (_not_ `performance_schema`) that are similar to `events_statements_summary_by_digest` in terms of features. - [`statements_summary`](#statements_summary) - [`statements_summary_history`](#statements_summary_history) -- [`cluster_statements_summary`](#cluster_statements_summary-and-cluster_statements_summary_history) -- [`cluster_statements_summary_history`](#cluster_statements_summary-and-cluster_statements_summary_history) +- [`cluster_statements_summary`](#statements_summary_evicted) +- [`cluster_statements_summary_history`](#statements_summary_evicted) +- [`statements_summary_evicted`](#statements_summary_evicted) This document details these tables and introduces how to use them to troubleshoot SQL performance issues. ## `statements_summary` -`statements_summary` is a system table in `performance_schema`. `statements_summary` groups the SQL statements by the SQL digest and the plan digest, and provides statistics for each SQL category. +`statements_summary` is a system table in `information_schema`. `statements_summary` groups the SQL statements by the SQL digest and the plan digest, and provides statistics for each SQL category. The "SQL digest" here means the same as used in slow logs, which is a unique identifier calculated through normalized SQL statements. The normalization process ignores constant, blank characters, and is case insensitive. Therefore, statements with consistent syntaxes have the same digest. For example: @@ -89,11 +90,17 @@ The table schema of `statements_summary_history` is identical to that of `statem The fields `SUMMARY_BEGIN_TIME` and `SUMMARY_END_TIME` represent the start time and the end time of the historical time range. -## `cluster_statements_summary` and `cluster_statements_summary_history` +## `statements_summary_evicted` -`statements_summary` and `statements_summary_history` display the statement summary data of only a single TiDB server. To query the data of the entire cluster, you need to query `cluster_statements_summary` and `cluster_statements_summary_history`. +The `tidb_stmt_summary_max_stmt_count` variable controls the maximum number of statements that the `statement_summary` table stores in memory. The `statement_summary` table uses the LRU algorithm. Once the number of SQL statements exceeds the `tidb_stmt_summary_max_stmt_count` value, the longest unused record is evicted from the table. The number of evicted SQL statements during each period is recorded in the `statements_summary_evicted` table. -`cluster_statements_summary` displays the `statements_summary` data of each TiDB server, and `cluster_statements_summary_history` displays the `statements_summary_history` data of each TiDB server. These two tables use the `INSTANCE` field to represent the address of the TiDB server. The other fields are the same as those in `statements_summary`. +The `statements_summary_evicted` table is updated only when a SQL record is evicted from the `statement_summary` table. The `statements_summary_evicted` only records the period during which the eviction occurs and the number of evicted SQL statements. + +## The `cluster` tables for statement summary + +The `statements_summary`, `statements_summary_history`, and `statements_summary_evicted` tables only show the statement summary of a single TiDB server. To query the data of the entire cluster, you need to query the `cluster_statements_summary`, `cluster_statements_summary_history`, or `cluster_statements_summary_evicted` tables. + +`cluster_statements_summary` displays the `statements_summary` data of each TiDB server. `cluster_statements_summary_history` displays the `statements_summary_history` data of each TiDB server. `cluster_statements_summary_evicted` displays the `statements_summary_evicted` data of each TiDB server. These tables use the `INSTANCE` field to represent the address of the TiDB server. The other fields are the same as those in `statements_summary`. ## Parameter configuration @@ -101,14 +108,14 @@ The following system variables are used to control the statement summary: - `tidb_enable_stmt_summary`: Determines whether to enable the statement summary feature. `1` represents `enable`, and `0` means `disable`. The feature is enabled by default. The statistics in the system table are cleared if this feature is disabled. The statistics are re-calculated next time this feature is enabled. Tests have shown that enabling this feature has little impact on performance. - `tidb_stmt_summary_refresh_interval`: The interval at which the `statements_summary` table is refreshed. The time unit is second (s). The default value is `1800`. -- `tidb_stmt_summary_history_size`: The size of each SQL statement category stored in the `statements_summary_history` table. The default value is `24`. -- `tidb_stmt_summary_max_stmt_count`: Limits the number of SQL statements that can be stored in statement summary tables. The default value is `200`. If the limit is exceeded, those SQL statements that recently remain unused are cleared. +- `tidb_stmt_summary_history_size`: The size of each SQL statement category stored in the `statements_summary_history` table, which is also the maximum number of records in the `statement_summary_evicted` table. The default value is `24`. +- `tidb_stmt_summary_max_stmt_count`: Limits the number of SQL statements that can be stored in statement summary tables. The default value is `3000`. If the limit is exceeded, those SQL statements that recently remain unused are cleared. These cleared SQL statements are recorded in the `statement_summary_evicted` table. - `tidb_stmt_summary_max_sql_length`: Specifies the longest display length of `DIGEST_TEXT` and `QUERY_SAMPLE_TEXT`. The default value is `4096`. - `tidb_stmt_summary_internal_query`: Determines whether to count the TiDB SQL statements. `1` means to count, and `0` means not to count. The default value is `0`. > **Note:** > -> When a category of SQL statement needs to be removed because the `tidb_stmt_summary_max_stmt_count` limit is exceeded, TiDB removes the data of that SQL statement category of all time ranges from the `statement summary history` table. Therefore, even if the number of SQL statement categories in a certain time range does not reach the limit, the number of SQL statements stored in the `statement summary history` table is less than the actual number of SQL statements. If this situation occurs, you are recommended to increase the value of `tidb_stmt_summary_max_stmt_count`. +> When a category of SQL statement needs to be removed because the `tidb_stmt_summary_max_stmt_count` limit is exceeded, TiDB removes the data of that SQL statement category of all time ranges from the `statement_summary_history` table. Therefore, even if the number of SQL statement categories in a certain time range does not reach the limit, the number of SQL statements stored in the `statement_summary_history` table is less than the actual number of SQL statements. If this situation occurs and affects performance, you are recommended to increase the value of `tidb_stmt_summary_max_stmt_count`. An example of the statement summary configuration is shown as follows: @@ -122,6 +129,8 @@ set global tidb_stmt_summary_history_size = 24; After the configuration above takes effect, every 30 minutes the `statements_summary` table is cleared. The `statements_summary_history` table stores data generated over the recent 12 hours. +The `statements_summary_evicted` table records the recent 24 periods during which SQL statements are evicted from the statement summary. The `statements_summary_evicted` table is updated every 30 minutes. + The system variables above have two scopes: global and session. These scopes work differently from other system variables: - After setting the global variable, your setting applies to the whole cluster immediately. @@ -133,6 +142,50 @@ The system variables above have two scopes: global and session. These scopes wor > > The `tidb_stmt_summary_history_size`, `tidb_stmt_summary_max_stmt_count`, and `tidb_stmt_summary_max_sql_length` configuration items affect memory usage. It is recommended that you adjust these configurations based on your needs. It is not recommended to set them too large values. +### Set a proper size for statement summary + +After the system has run for a period of time, you can check the `statement_summary` table to see whether SQL eviction has occurred. For example: + +```sql +select @@global.tidb_stmt_summary_max_stmt_count; +select count(*) from information_schema.statements_summary; +``` + +```sql ++-------------------------------------------+ +| @@global.tidb_stmt_summary_max_stmt_count | ++-------------------------------------------+ +| 3000 | ++-------------------------------------------+ +1 row in set (0.001 sec) + ++----------+ +| count(*) | ++----------+ +| 3001 | ++----------+ +1 row in set (0.001 sec) +``` + +You can see that the `statements_summary` table is full of records. Then check the evicted data from the `statements_summary_evicted` table: + +```sql +select * from information_schema.statements_summary_evicted; +``` + +```sql ++---------------------+---------------------+---------------+ +| BEGIN_TIME | END_TIME | EVICTED_COUNT | ++---------------------+---------------------+---------------+ +| 2020-01-02 16:30:00 | 2020-01-02 17:00:00 | 59 | ++---------------------+---------------------+---------------+ +| 2020-01-02 16:00:00 | 2020-01-02 16:30:00 | 45 | ++---------------------+---------------------+---------------+ +2 row in set (0.001 sec) +``` + +From the result above, you can see that a maximum of 59 SQL categories are evicted, which indicates that the proper size of the statement summary is 59 records. + ## Limitation The statement summary tables have the following limitation: @@ -197,7 +250,9 @@ The result shows that the following three categories of SQL statements consume t 3 rows in set (0.00 sec) ``` -### Fields description +## Fields description + +### `statements_summary` fields description The following are descriptions of fields in the `statements_summary` table. @@ -285,3 +340,9 @@ Transaction-related fields: - `BACKOFF_TYPES`: All types of errors that require retries and the number of retries for each type. The format of the field is `type:number`. If there is more than one error type, each is separated by a comma, like `txnLock:2,pdRPC:1`. - `AVG_AFFECTED_ROWS`: The average number of rows affected. - `PREV_SAMPLE_TEXT`: When the current SQL statement is `COMMIT`, `PREV_SAMPLE_TEXT` is the previous statement to `COMMIT`. In this case, SQL statements are grouped by the digest and `prev_sample_text`. This means that `COMMIT` statements with different `prev_sample_text` are grouped to different rows. When the current SQL statement is not `COMMIT`, the `PREV_SAMPLE_TEXT` field is an empty string. + +### `statements_summary_evicted` fields description + +- `BEGIN_TIME`: Records the starting time. +- `END_TIME`: Records the ending time. +- `EVICTED_COUNT`: The number of SQL categories that are evicted during the record period. diff --git a/support.md b/support.md index d2bd71a71989..ba8cf097a1eb 100644 --- a/support.md +++ b/support.md @@ -8,7 +8,7 @@ aliases: ['/docs/dev/support/','/docs/dev/support-resources/'] You can reach out to the community members via any one of the following ways: -+ Slack Channel: [https://pingcap.com/tidbslack](https://pingcap.com/tidbslack) ++ Slack Channel: [https://slack.tidb.io/](https://slack.tidb.io/invite?team=tidb-community&channel=everyone&ref=pingcap) + Google Groups: [https://groups.google.com/forum/#!forum/tidb-user](https://groups.google.com/forum/#!forum/tidb-user) + Stack Overflow: [https://stackoverflow.com/questions/tagged/tidb](https://stackoverflow.com/questions/tagged/tidb) + Twitter: [https://twitter.com/PingCAP](https://twitter.com/PingCAP) diff --git a/sync-diff-inspector/sync-diff-inspector-overview.md b/sync-diff-inspector/sync-diff-inspector-overview.md index a2674c04f05e..bcbecd9df8cb 100644 --- a/sync-diff-inspector/sync-diff-inspector-overview.md +++ b/sync-diff-inspector/sync-diff-inspector-overview.md @@ -142,7 +142,7 @@ fix-sql-file = "fix.sql" # Specifies the column used to divide data into chunks. If you do not configure it, # sync-diff-inspector chooses an appropriate column (primary key, unique key, or a field with index). - index-field = "id" + index-fields = "id" # Specifies the range of the data to be checked # It needs to comply with the syntax of the WHERE clause in SQL. diff --git a/synchronous-replication.md b/synchronous-replication.md deleted file mode 100644 index 16b51c2a31b3..000000000000 --- a/synchronous-replication.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -title: Synchronous Replication for Dual Data Centers -summary: Learn how to configure synchronous replication for dual data centers. ---- - -# Synchronous Replication for Dual Data Centers - -This document introduces how to configure synchronous replication for dual data centers. - -> **Warning:** -> -> Synchronous replication is still an experimental feature. Do not use it in a production environment. - -In the scenario of dual data centers, one is the primary center and the other is the DR (data recovery) center. When a Region has an odd number of replicas, more replicas are placed in the primary center. When the DR center is down for more than a specified period of time, the asynchronous mode is used by default for the replication between two centers. - -To use the synchronous mode, you can configure it in the PD configuration file or change the replication mode manually using pd-ctl. - -## Enable synchronous replication in the PD configuration file - -The replication mode is controlled by PD. You can configure it in the PD configuration file when deploying a cluster. See the following example: - -{{< copyable "" >}} - -```toml -[replication-mode] -replication-mode = "dr-auto-sync" -[replication-mode.dr-auto-sync] -label-key = "zone" -primary = "z1" -dr = "z2" -primary-replicas = 2 -dr-replicas = 1 -wait-store-timeout = "1m" -wait-sync-timeout = "1m" -``` - -In the configuration above: - -+ `dr-auto-sync` is the mode to enable synchronous replication. -+ The label key `zone` is used to distinguish different data centers. -+ TiKV instances with the `"z1"` value are considered in the primary data center, and TiKV instances with `"z2"` are in the DR data center. -+ `primary-replicas` is the number of replicas that should be placed in the primary data center. -+ `dr-replicas` is the number of replicas that should be placed in the DR data center. -+ `wait-store-timeout` is the time to wait before falling back to asynchronous replication. - -To check the current replication state of the cluster, use the following URL: - -{{< copyable "shell-regular" >}} - -```bash -% curl http://pd_ip:pd_port/pd/api/v1/replication_mode/status -``` - -```bash -{ - "mode": "dr-auto-sync", - "dr-auto-sync": { - "label-key": "zone", - "state": "sync" - } -} -``` - -> **Note:** -> -> The replication state of the cluster indicates how all Regions are replicated, with the options of `async`, `sync-recover`, and `sync`. - -After the cluster state becomes `sync`, it will not become `async` unless the number of down instances is larger than the specified number of replicas in either data center. Once the cluster state becomes `async`, PD requests TiKV to change the replication mode to `asynchronous` and checks whether TiKV instances are recovered from time to time. When the number of down instances is smaller than the number of replicas in both data centers, the cluster enters the `sync-recover` state, and then requests TiKV to change the replication mode to `synchronous`. After all Regions become `synchronous`, the cluster becomes `sync` again. - -## Change the replication mode manually - -You can use [`pd-ctl`](/pd-control.md) to change a cluster from `asynchronous` to `synchronous`. - -{{< copyable "shell-regular" >}} - -```bash ->> config set replication-mode dr-auto-sync -``` - -Or change back to `asynchronous`: - -{{< copyable "shell-regular" >}} - -```bash ->> config set replication-mode majority -``` - -You can also update the label key: - -{{< copyable "shell-regular" >}} - -```bash ->> config set replication-mode dr-auto-sync label-key dc -``` diff --git a/system-variables.md b/system-variables.md index ea7a58394fda..d3985068ba98 100644 --- a/system-variables.md +++ b/system-variables.md @@ -84,6 +84,12 @@ mysql> SELECT * FROM t1; - Default value: `ON` - Controls whether statements should automatically commit when not in an explicit transaction. See [Transaction Overview](/transaction-overview.md#autocommit) for more information. +### block_encryption_mode + +- Scope: SESSION | GLOBAL +- Default value: `aes-128-ecb` +- Defines the encryption mode for the `AES_ENCRYPT()` and `AES_DECRYPT()` functions. + ### character_set_client - Scope: SESSION | GLOBAL @@ -112,12 +118,31 @@ mysql> SELECT * FROM t1; - Scope: SESSION | GLOBAL - Default value: `utf8mb4` -- The character set used for new schemas when no character set is specified in the `CREATE SCHEMA` statement. +- The default character set for the server. + +### collation_connection + +- Scope: SESSION | GLOBAL +- Default value: `utf8mb4_bin` +- This variable indicates the collation for string literals that do not have a specified collation. + +### collation_database + +- Scope: SESSION | GLOBAL +- Default value: `utf8mb4_bin` +- This variable indicates the collation of the default database in use. **It is NOT recommended to set this variable**. When a new default database is selected, the server changes the variable value. -### `cte_max_recursion_depth` +### collation_server -- Scope:SESSION | GLOBAL -- Default value:1000 +- Scope: SESSION | GLOBAL +- Default value: `utf8mb4_bin` +- The default collation for the server. + +### cte_max_recursion_depth + +- Scope: SESSION | GLOBAL +- Default value: `1000` +- Range: `[0, 4294967295]` - Controls the maximum recursion depth in Common Table Expressions. ### datadir @@ -133,12 +158,46 @@ mysql> SELECT * FROM t1; - Default value: `300` - DDL operations whose execution time exceeds the threshold value are output to the log. The unit is millisecond. +### default_authentication_plugin + +- Scope: GLOBAL +- Default value: `mysql_native_password` +- Possible values: `mysql_native_password`, `caching_sha2_password` +- This variable sets the authentication method that the server advertises when the server-client connection is being established. Possible values for this variable are documented in [Authentication plugin status](/security-compatibility-with-mysql.md#authentication-plugin-status). +- Value options: `mysql_native_password` and `caching_sha2_password`. For more details, see [Authentication plugin status](/security-compatibility-with-mysql.md#authentication-plugin-status). + +### default_week_format + +- Scope: SESSION | GLOBAL +- Default value: `0` +- Range: `[0, 7]` +- Sets the week format used by the `WEEK()` function. + ### foreign_key_checks - Scope: SESSION | GLOBAL - Default value: `OFF` - For compatibility, TiDB returns foreign key checks as `OFF`. +### group_concat_max_len + +- Scope: SESSION | GLOBAL +- Default value: `1024` +- Range: `[4, 18446744073709551615]` +- The maximum buffer size for items in the `GROUP_CONCAT()` function. + +### have_openssl + +- Scope: NONE +- Default value: `DISABLED` +- A read-only variable for MySQL compatibility. Set to `YES` by the server when the server has TLS enabled. + +### have_ssl + +- Scope: NONE +- Default value: `DISABLED` +- A read-only variable for MySQL compatibility. Set to `YES` by the server when the server has TLS enabled. + ### hostname - Scope: NONE @@ -166,6 +225,13 @@ mysql> SELECT * FROM t1; - Unit: Seconds - This variable represents the idle timeout of the interactive user session, which is measured in seconds. Interactive user session refers to the session established by calling [`mysql_real_connect()`](https://dev.mysql.com/doc/c-api/5.7/en/mysql-real-connect.html) API using the `CLIENT_INTERACTIVE` option (for example, MySQL shell client). This variable is fully compatible with MySQL. +### last_insert_id + +- Scope: SESSION +- Default value: `0` +- This variable returns the last `AUTO_INCREMENT` or `AUTO_RANDOM` value generated by an insert statement. +- The value of `last_insert_id` is the same as the value returned by the function `LAST_INSERT_ID()`. + ### last_plan_from_binding New in v4.0 - Scope: SESSION @@ -181,9 +247,22 @@ mysql> SELECT * FROM t1; ### license - Scope: NONE -- Default value: Apache License 2.0 +- Default value: `Apache License 2.0` - This variable indicates the license of your TiDB server installation. +### log_bin + +- Scope: NONE +- Default value: `OFF` +- This variable indicates whether [TiDB Binlog](/tidb-binlog/tidb-binlog-overview.md) is used. + +### max_allowed_packet + +- Scope: GLOBAL | SESSION +- Default value: `67108864` +- Range: `[1024, 1073741824]` +- The maximum size of a packet for the MySQL protocol. + ### max_execution_time - Scope: SESSION | GLOBAL @@ -196,6 +275,18 @@ mysql> SELECT * FROM t1; > > Unlike in MySQL, the `max_execution_time` system variable currently works on all kinds of statements in TiDB, not only restricted to the `SELECT` statement. The precision of the timeout value is roughly 100ms. This means the statement might not be terminated in accurate milliseconds as you specify. +### plugin_dir + +- Scope: INSTANCE +- Default value: "" +- Indicates the directory to load plugins as specified by a command-line flag. + +### plugin_load + +- Scope: INSTANCE +- Default value: "" +- Indicates the plugins to load when TiDB is started. These plugins are specified by a command-line flag and separated by commas. + ### port - Scope: NONE @@ -203,12 +294,39 @@ mysql> SELECT * FROM t1; - Range: `[0, 65535]` - The port that the `tidb-server` is listening on when speaking the MySQL protocol. +### skip_name_resolve New in v5.2.0 + +- Scope: GLOBAL +- Default value: `OFF` +- This variable controls whether the `tidb-server` instance resolves hostnames as a part of the connection handshake. +- When the DNS is unreliable, you can enable this option to improve network performance. + +> **Note:** +> +> When `skip_name_resolve=ON`, users with a hostname in their identity will no longer be able to log into the server. For example: +> +> ```sql +> CREATE USER 'appuser'@'apphost' IDENTIFIED BY 'app-password'; +> ``` +> +> In this example, it is recommended to replace `apphost` with an IP address or the wildcard (`%`). + ### socket - Scope: NONE - Default value: "" - The local unix socket file that the `tidb-server` is listening on when speaking the MySQL protocol. +### sql_log_bin + +- Scope: SESSION | GLOBAL +- Default value: `ON` +- Indicates whether to write changes to [TiDB Binlog](/tidb-binlog/tidb-binlog-overview.md) or not. + +> **Note:** +> +> It is not recommended to set `sql_log_bin` as a global variable because the future versions of TiDB might only allow setting this as a session variable. + ### sql_mode - Scope: SESSION | GLOBAL @@ -222,6 +340,24 @@ mysql> SELECT * FROM t1; - Range: `[0, 18446744073709551615]` - The maximum number of rows returned by the `SELECT` statements. +### ssl_ca + +- Scope: NONE +- Default value: "" +- The location of the certificate authority file (if there is one). + +### ssl_cert + +- Scope: NONE +- Default value: "" +- The location of the certificate file (if there is a file) that is used for SSL/TLS connections. + +### ssl_key + +- Scope: NONE +- Default value: "" +- The location of the private key file (if there is one) that is used for SSL/TLS connections. + ### system_time_zone - Scope: NONE @@ -245,6 +381,11 @@ mysql> SELECT * FROM t1; - Default value: "" - This variable is used to specify a list of storage engines that might fall back to TiKV. If the execution of a SQL statement fails due to a failure of the specified storage engine in the list, TiDB retries executing this SQL statement with TiKV. This variable can be set to "" or "tiflash". When this variable is set to "tiflash", if the execution of a SQL statement fails due to a failure of TiFlash, TiDB retries executing this SQL statement with TiKV. +### tidb_allow_function_for_expression_index New in v5.2.0 + +- Scope: NONE +- This variable is used to show the functions that are allowed to be used for creating expression indexes. + ### tidb_allow_mpp New in v5.0 - Scope: SESSION | GLOBAL @@ -264,8 +405,8 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a ### tidb_analyze_version New in v5.1.0 - Scope: SESSION | GLOBAL -- Value options: `1` and `2` - Default value: `2` +- Range: `[1, 2]` - Controls how TiDB collects statistics. - In versions before v5.1.0, the default value of this variable is `1`. In v5.1.0, the default value of this variable is `2`, which serves as an experimental feature. For detailed introduction, see [Introduction to Statistics](/statistics.md). @@ -493,11 +634,21 @@ Constraint checking is always performed in place for pessimistic transactions (d > - If you have enabled TiDB Binlog, enabling this variable cannot improve the performance. To improve the performance, it is recommended to use [TiCDC](/ticdc/ticdc-overview.md) instead. > - Enabling this parameter only means that Async Commit becomes an optional mode of transaction commit. In fact, the most suitable mode of transaction commit is determined by TiDB. +### tidb_enable_auto_increment_in_generated + +- Scope: SESSION | GLOBAL +- Default value: `OFF` +- This variable is used to determine whether to include the `AUTO_INCREMENT` columns when creating a generated column or an expression index. + ### tidb_enable_cascades_planner +> **Warning:** +> +> Currently, cascades planner is an experimental feature. It is not recommended that you use it in the production environment. + - Scope: SESSION | GLOBAL - Default value: `OFF` -- This variable is used to control whether to enable the cascades planner, which is currently considered experimental. +- This variable is used to control whether to enable the cascades planner. ### tidb_enable_chunk_rpc New in v4.0 @@ -534,6 +685,10 @@ Constraint checking is always performed in place for pessimistic transactions (d ### tidb_enable_fast_analyze +> **Warning:** +> +> Currently, `Fast Analyze` is an experimental feature. It is not recommended that you use it in the production environment. + - Scope: SESSION | GLOBAL - Default value: `OFF` - This variable is used to set whether to enable the statistics `Fast Analyze` feature. @@ -551,20 +706,10 @@ Constraint checking is always performed in place for pessimistic transactions (d > > Currently, List partition and List COLUMNS partition are experimental features. It is not recommended that you use it in the production environment. -- Scope: SESSION +- Scope: SESSION | GLOBAL - Default value: `OFF` - This variable is used to set whether to enable the `LIST (COLUMNS) TABLE PARTITION` feature. -### `tidb_partition_prune_mode` New in v5.1 - -> **Warning:** -> -> Currently, the dynamic mode for partitioned tables is an experimental feature. It is not recommended that you use it in the production environment. - -- Scope: SESSION | GLOBAL -- Default value: `static` -- Specifies whether to enable `dynamic` mode for partitioned tables. For details about the dynamic mode, see [Dynamic Mode for Partitioned Tables](/partitioned-table.md#dynamic-mode). - ### tidb_enable_noop_functions New in v4.0 - Scope: SESSION | GLOBAL @@ -577,11 +722,11 @@ Constraint checking is always performed in place for pessimistic transactions (d * `CREATE TEMPORARY TABLE` syntax * `DROP TEMPORARY TABLE` syntax * `START TRANSACTION READ ONLY` and `SET TRANSACTION READ ONLY` syntax - * The `tx_read_only`, `transaction_read_only`, `offline_mode`, `super_read_only` and `read_only` system variables + * The `tx_read_only`, `transaction_read_only`, `offline_mode`, `super_read_only`, `read_only` and `sql_auto_is_null` system variables -> **Note:** +> **Warning:** > -> Only the default value of `OFF` can be considered safe. Setting `tidb_enable_noop_functions=1` might lead to unexpected behaviors in your application, because it permits TiDB to ignore certain syntax without providing an error. +> Only the default value of `OFF` can be considered safe. Setting `tidb_enable_noop_functions=1` might lead to unexpected behaviors in your application, because it permits TiDB to ignore certain syntax without providing an error. For example, the syntax `START TRANSACTION READ ONLY` is permitted, but the transaction remains in read-write mode. ### tidb_enable_parallel_apply New in v5.0 @@ -660,10 +805,11 @@ Query OK, 0 rows affected (0.09 sec) - Default value: `ON` - This variable is used to control whether to enable the support for window functions. Note that window functions may use reserved keywords. This might cause SQL statements that could be executed normally cannot be parsed after upgrading TiDB. In this case, you can set `tidb_enable_window_function` to `OFF`. -### `tidb_enforce_mpp` New in v5.1 +### tidb_enforce_mpp New in v5.1 - Scope: SESSION -- Default value: `OFF`. To change this default value, modify the [`performance.enforce-mpp`](/tidb-configuration-file.md#enforce-mpp) configuration value. +- Default value: `OFF` +- To change this default value, modify the [`performance.enforce-mpp`](/tidb-configuration-file.md#enforce-mpp) configuration value. - Controls whether to ignore the optimizer's cost estimation and to forcibly use TiFlash's MPP mode for query execution. The value options are as follows: - `0` or `OFF`, which means that the MPP mode is not forcibly used (by default). - `1` or `ON`, which means that the cost estimation is ignored and the MPP mode is forcibly used. Note that this setting only takes effect when `tidb_allow_mpp=true`. @@ -760,6 +906,7 @@ For a system upgraded to v5.0 from an earlier version, if you have not modified - Scope: GLOBAL - Default value: `10m0s` +- Range: `[10m0s, 8760h0m0s]` - The time limit during which data is retained for each GC, in the format of Go Duration. When a GC happens, the current time minus this value is the safe point. > **Note:** @@ -773,6 +920,7 @@ For a system upgraded to v5.0 from an earlier version, if you have not modified - Scope: GLOBAL - Default value: `10m0s` +- Range: `[10m0s, 8760h0m0s]` - Specifies the GC interval, in the format of Go Duration, for example, `"1h30m"`, and `"15m"` ### tidb_gc_scan_lock_mode New in v5.0 @@ -945,7 +1093,7 @@ For a system upgraded to v5.0 from an earlier version, if you have not modified ### tidb_memory_usage_alarm_ratio -- Scope: SESSION +- Scope: INSTANCE - Default value: `0.8` - TiDB triggers an alarm when the percentage of the memory it takes exceeds a certain threshold. For the detailed usage description of this feature, see [`memory-usage-alarm-ratio`](/tidb-configuration-file.md#memory-usage-alarm-ratio-new-in-v409). - You can set the initial value of this variable by configuring [`memory-usage-alarm-ratio`](/tidb-configuration-file.md#memory-usage-alarm-ratio-new-in-v409). @@ -1048,6 +1196,12 @@ mysql> desc select count(distinct a) from test.t; 4 rows in set (0.00 sec) ``` +### tidb_opt_enable_correlation_adjustment + +- Scope: SESSION | GLOBAL +- Default value: `ON` +- This variable is used to control whether the optimizer estimates the number of rows based on column order correlation + ### tidb_opt_insubq_to_join_and_agg - Scope: SESSION | GLOBAL @@ -1071,12 +1225,20 @@ mysql> desc select count(distinct a) from test.t; select * from t, t1 where t.a=t1.a ``` +### tidb_opt_limit_push_down_threshold + +- Scope: SESSION | GLOBAL +- Default value: `100` +- Range: `[0, 2147483647]` +- This variable is used to set the threshold that determines whether to push the Limit or TopN operator down to TiKV. +- If the value of the Limit or TopN operator is smaller than or equal to this threshold, these operators are forcibly pushed down to TiKV. This variable resolves the issue that the Limit or TopN operator cannot be pushed down to TiKV partly due to wrong estimation. + ### tidb_opt_prefer_range_scan New in v5.0 -- Scope: SESSION +- Scope: SESSION | GLOBAL - Default value: `OFF` -- After you set the value of this variable to `1`, the optimizer always prefers index scans over full table scans. -- In the following example, before you enable `tidb_opt_prefer_range_scan`, the TiDB optimizer performs a full table scan. After you enable `tidb_opt_prefer_range_scan`, the optimizer selects an index scan. +- After you set the value of this variable to `ON`, the optimizer always prefers range scans over full table scans. +- In the following example, before you enable `tidb_opt_prefer_range_scan`, the TiDB optimizer performs a full table scan. After you enable `tidb_opt_prefer_range_scan`, the optimizer selects an index range scan. ```sql explain select * from t where age=5; @@ -1108,6 +1270,16 @@ explain select * from t where age=5; - Default value: `OFF` - This variable is used to control whether to allow `INSERT`, `REPLACE`, and `UPDATE` statements to operate on the `_tidb_rowid` column. This variable can be used only when you import data using TiDB tools. +### tidb_partition_prune_mode New in v5.1 + +> **Warning:** + +> Currently, the dynamic pruning mode for partitioned tables is an experimental feature. It is not recommended that you use it in the production environment. + +- Scope: SESSION | GLOBAL +- Default value: `static` +- Specifies whether to enable `dynamic` mode for partitioned tables. For details about the dynamic pruning mode, see [Dynamic Pruning Mode for Partitioned Tables](/partitioned-table.md#dynamic-pruning-mode). + ### tidb_pprof_sql_cpu New in v4.0 - Scope: INSTANCE @@ -1156,7 +1328,7 @@ SET tidb_query_log_max_len = 20 ### tidb_replica_read New in v4.0 -- Scope: SESSION +- Scope: SESSION | GLOBAL - Default value: `leader` - Possible values: `leader`, `follower`, `leader-and-follower` - This variable is used to control where TiDB reads data. Here are three options: @@ -1248,34 +1420,35 @@ SET tidb_slow_log_threshold = 200; - Scope: SESSION | GLOBAL - Default value: `24` - Range: `[0, 255]` -- This variable is used to set the history capacity of the statement summary. +- This variable is used to set the history capacity of [statement summary tables](/statement-summary-tables.md). ### tidb_stmt_summary_internal_query New in v4.0 - Scope: SESSION | GLOBAL - Default value: `OFF` -- This variable is used to control whether to include the SQL information of TiDB in the statement summary. +- This variable is used to control whether to include the SQL information of TiDB in [statement summary tables](/statement-summary-tables.md). ### tidb_stmt_summary_max_sql_length New in v4.0 - Scope: SESSION | GLOBAL - Default value: `4096` - Range: `[0, 2147483647]` -- This variable is used to control the length of the SQL string in the statement summary. +- This variable is used to control the length of the SQL string in [statement summary tables](/statement-summary-tables.md). ### tidb_stmt_summary_max_stmt_count New in v4.0 - Scope: SESSION | GLOBAL -- Default value: `200` +- Default value: `3000` - Range: `[1, 32767]` -- This variable is used to set the maximum number of statements that the statement summary stores in memory. +- This variable is used to set the maximum number of statements that [statement summary tables](/statement-summary-tables.md) store in memory. ### tidb_stmt_summary_refresh_interval New in v4.0 - Scope: SESSION | GLOBAL - Default value: `1800` - Range: `[1, 2147483647]` -- This variable is used to set the refresh time of the statement summary. The unit is second. +- Unit: Seconds +- This variable is used to set the refresh time of [statement summary tables](/statement-summary-tables.md). ### tidb_store_limit New in v3.0.4 and v4.0 @@ -1313,7 +1486,8 @@ SET tidb_slow_log_threshold = 200; - Scope: SESSION - Default value: `300` - Range: `[1, 2147483647]` -- This variable is used to set the timeout for executing the `SPLIT REGION` statement. The unit is second. If a statement is not executed completely within the specified time value, a timeout error is returned. +- Unit: Seconds +- This variable is used to set the timeout for executing the `SPLIT REGION` statement. If a statement is not executed completely within the specified time value, a timeout error is returned. ### tidb_window_concurrency New in v4.0 @@ -1334,6 +1508,19 @@ SET tidb_slow_log_threshold = 200; - This variable returns the current time zone. Values can be specified as either an offset such as '-8:00' or a named zone 'America/Los_Angeles'. - The value `SYSTEM` means that the time zone should be the same as the system host, which is available via the [`system_time_zone`](#system_time_zone) variable. +### timestamp + +- Scope: SESSION +- Default value: "" +- A non-empty value of this variable indicates the UNIX epoch that is used as the timestamp for `CURRENT_TIMESTAMP()`, `NOW()`, and other functions. This variable might be used in data restore or replication. + +### tmp_table_size + +- Scope: SESSION | GLOBAL +- Default value: `16777216` +- Unit: Bytes +- Indicates the maximum size of a temporary table. + ### transaction_isolation - Scope: SESSION | GLOBAL @@ -1368,7 +1555,7 @@ This variable is an alias for _transaction_isolation_. ### warning_count - Scope: SESSION -- Default value: 0 +- Default value: `0` - This read-only variable indicates the number of warnings that occurred in the statement that was previously executed. ### windowing_use_high_precision diff --git a/three-data-centers-in-two-cities-deployment.md b/three-data-centers-in-two-cities-deployment.md index 620496c7491b..148dff84a77e 100644 --- a/three-data-centers-in-two-cities-deployment.md +++ b/three-data-centers-in-two-cities-deployment.md @@ -10,11 +10,11 @@ This document introduces the architecture and configuration of the three data ce ## Overview -The architecture of three DCs in two cities is a highly available and disaster tolerant deployment solution that provides a production data center, a disaster recovery center in the same city, and a disaster recovery centers in another city. In this mode, the three DCs in two cities are interconnected. If one DC fails or suffers from a disaster, other DCs can still operate well and take over the the key applications or all applications. Compared with the the multi-DC in one city deployment, this solution has the advantage of cross-city high availability and can survive city-level natural disasters. +The architecture of three DCs in two cities is a highly available and disaster tolerant deployment solution that provides a production data center, a disaster recovery center in the same city, and a disaster recovery center in another city. In this mode, the three DCs in two cities are interconnected. If one DC fails or suffers from a disaster, other DCs can still operate well and take over the the key applications or all applications. Compared with the the multi-DC in one city deployment, this solution has the advantage of cross-city high availability and can survive city-level natural disasters. The distributed database TiDB natively supports the three-DC-in-two-city architecture by using the Raft algorithm, and guarantees the consistency and high availability of data within a database cluster. Because the network latency across DCs in the same city is relatively low, the application traffic can be dispatched to two DCs in the same city, and the traffic load can be shared by these two DCs by controlling the distribution of TiKV Region leaders and PD leaders. -## Architecture +## Deployment architecture This section takes the example of Seattle and San Francisco to explain the deployment mode of three DCs in two cities for the distributed database of TiDB. @@ -72,7 +72,7 @@ server_configs: tikv: server.grpc-compression-type: gzip pd: - replication.location-labels: ["dc","rack","zone","host"] + replication.location-labels: ["dc","zone","rack","host"] schedule.tolerant-size-ratio: 20.0 pd_servers: @@ -192,6 +192,10 @@ In the deployment of three DCs in two cities, to optimize performance, you need ```yaml config set label-property reject-leader dc 3 ``` + + > **Note:** + > + > Since TiDB 5.2, the `label-property` configuration is not supported by default. To set the replica policy, use the [placement rules](/configure-placement-rules.md). - Configure the priority of PD. To avoid the situation where the PD leader is in another city (IDC3), you can increase the priority of local PD (in Seattle) and decrease the priority of PD in another city (San Francisco). The larger the number, the higher the priority. diff --git a/ticdc/manage-ticdc.md b/ticdc/manage-ticdc.md index 2f2634009b24..cbb807fc157f 100644 --- a/ticdc/manage-ticdc.md +++ b/ticdc/manage-ticdc.md @@ -6,18 +6,20 @@ aliases: ['/docs/dev/ticdc/manage-ticdc/','/docs/dev/reference/tools/ticdc/manag # Manage TiCDC Cluster and Replication Tasks -This document describes how to manage the TiCDC cluster and replication tasks using the command line tool `cdc cli` and the HTTP interface. +This document describes how to upgrade TiCDC cluster and modify the configuration of TiCDC cluster using TiUP, and how to manage the TiCDC cluster and replication tasks using the command-line tool `cdc cli`. + +You can also use the HTTP interface (the TiCDC OpenAPI feature) to manage the TiCDC cluster and replication tasks. For details, see [TiCDC OpenAPI](/ticdc/ticdc-open-api.md). ## Upgrade TiCDC using TiUP -This section introduces how to upgrade the TiCDC cluster using TiUP. In the following example, assume that you need to upgrade TiCDC and the entire TiDB cluster to v5.1.0. +This section introduces how to upgrade the TiCDC cluster using TiUP. In the following example, assume that you need to upgrade TiCDC and the entire TiDB cluster to v5.2.1. {{< copyable "shell-regular" >}} ```shell tiup update --self && \ tiup update --all && \ -tiup cluster upgrade v5.1.0 +tiup cluster upgrade v5.2.1 ``` ### Notes for upgrade @@ -25,6 +27,35 @@ tiup cluster upgrade v5.1.0 * The `changefeed` configuration has changed in TiCDC v4.0.2. See [Compatibility notes for the configuration file](/production-deployment-using-tiup.md#step-3-initialize-cluster-topology-file) for details. * If you encounter any issues, see [Upgrade TiDB using TiUP - FAQ](/upgrade-tidb-using-tiup.md#faq). +## Modify TiCDC configuration using TiUP + +This section introduces how to modify the configuration of TiCDC cluster using the [`tiup cluster edit-config`](/tiup/tiup-component-cluster-edit-config.md) command of TiUP. The following example changes the value of `gc-ttl` from the default `86400` to `3600`, namely, one hour. + +First, execute the following command. You need to replace `` with your actual cluster name. + +{{< copyable "shell-regular" >}} + +```shell +tiup cluster edit-config +``` + +Then, enter the vi editor page and modify the `cdc` configuraion under [`server-configs`](/tiup/tiup-cluster-topology-reference.md#server_configs). The configuration is shown below: + +```shell + server_configs: + tidb: {} + tikv: {} + pd: {} + tiflash: {} + tiflash-learner: {} + pump: {} + drainer: {} + cdc: + gc-ttl: 3600 +``` + +After the modification, execute the `tiup cluster reload -R cdc` command to reload the configuration. + ## Use TLS For details about using encrypted data transmission (TLS), see [Enable TLS Between TiDB Components](/enable-tls-between-components.md). @@ -536,95 +567,6 @@ Currently, you can modify the following configuration items: - `resolved-ts`: The largest TSO among the sorted data in the current processor. - `checkpoint-ts`: The largest TSO that has been successfully written to the downstream in the current processor. -## Use HTTP interface to manage cluster status and data replication task - -Currently, the HTTP interface provides some basic features for query and maintenance. - -In the following examples, suppose that the TiCDC server listens on `127.0.0.1`, and the port is `8300` (you can specify the IP and port in `--addr=ip:port` when starting the TiCDC server). - -### Get the TiCDC server status - -Use the following command to get the TiCDC server status: - -{{< copyable "shell-regular" >}} - -```shell -curl http://127.0.0.1:8300/status -``` - -``` -{ -"version": "0.0.1", -"git_hash": "863f8ea889b144244ff53593a45c47ad22d37396", -"id": "6d92386a-73fc-43f3-89de-4e337a42b766", # capture id -"pid": 12102 # cdc server pid -} -``` - -### Evict the owner node - -{{< copyable "shell-regular" >}} - -```shell -curl -X POST http://127.0.0.1:8300/capture/owner/resign -``` - -The above command takes effect only for requesting on the **owner node**. - -``` -{ - "status": true, - "message": "" -} -``` - -{{< copyable "shell-regular" >}} - -```shell -curl -X POST http://127.0.0.1:8301/capture/owner/resign -``` - -For nodes other than owner nodes, executing the above command will return the following error. - -``` -election: not leader -``` - -### Manually schedule a table to other node - -{{< copyable "shell-regular" >}} - -```shell -curl -X POST http://127.0.0.1:8300/capture/owner/move_table -d 'cf-id=cf060953-036c-4f31-899f-5afa0ad0c2f9&target-cp-id=6f19a6d9-0f8c-4dc9-b299-3ba7c0f216f5&table-id=49' -``` - -Parameter description: - -| Parameter name | Description | -| :----------- | :--- | -| `cf-id` | The ID of the `changefeed` to be scheduled | -| `target-cp-id` | The ID of the target `capture` | -| `table-id` | The ID of the table to be scheduled | - -For nodes other than owner nodes, executing the above command will return the following error. - -``` -{ - "status": true, - "message": "" -} -``` - -### Dynamically change the log level of TiCDC server - -{{< copyable "shell-regular" >}} - -```shell -curl -X POST -d '"debug"' http://127.0.0.1:8301/admin/log -``` - -In the command above, the `POST` parameter indicates the new log level. The [zap-provided](https://godoc.org/go.uber.org/zap#UnmarshalText) log level options are supported: "debug", "info", "warn", "error", "dpanic", "panic", and "fatal". This interface parameter is JSON-encoded and you need to pay attention to the use of quotation marks. For example: `'"debug"'`. - ## Task configuration file This section introduces the configuration of a replication task. @@ -844,6 +786,6 @@ In the output of the above command, if the value of `sort-engine` is "unified", > **Note:** > > + If your servers use mechanical hard drives or other storage devices that have high latency or limited bandwidth, use the unified sorter with caution. -> + The total free capacity of hard drives must be greater than or equal to 128G. If you need to replicate a large amount of historical data, make sure that the free capacity on each node is greater than or equal to the size of the incremental data that needs to be replicated. +> + The total free capacity of hard drives must be greater than or equal to 500G. If you need to replicate a large amount of historical data, make sure that the free capacity on each node is greater than or equal to the size of the incremental data that needs to be replicated. > + Unified sorter is enabled by default. If your servers do not match the above requirements and you want to disable the unified sorter, you need to manually set `sort-engine` to `memory` for the changefeed. > + To enable Unified Sorter on an existing changefeed, see the methods provided in [How do I handle the OOM that occurs after TiCDC is restarted after a task interruption?](/ticdc/troubleshoot-ticdc.md#how-do-i-handle-the-oom-that-occurs-after-ticdc-is-restarted-after-a-task-interruption). diff --git a/ticdc/ticdc-open-api.md b/ticdc/ticdc-open-api.md new file mode 100644 index 000000000000..e43892a9d3a8 --- /dev/null +++ b/ticdc/ticdc-open-api.md @@ -0,0 +1,620 @@ +--- +title: TiCDC OpenAPI +summary: Learn how to use the OpenAPI interface to manage the cluster status and data replication. +--- + +# TiCDC OpenAPI + + + +> **Warning:** +> +> TiCDC OpenAPI is still an experimental feature. It is not recommended to use it in a production environment. + +TiCDC provides the OpenAPI feature for querying and operating the TiCDC cluster, which is similar to the feature of [`cdc cli` tool](/ticdc/manage-ticdc.md#use-cdc-cli-to-manage-cluster-status-and-data-replication-task). + +You can use the APIs to perform the following maintenance operations on the TiCDC cluster: + +- [Get the status information of a TiCDC node](#get-the-status-information-of-a-ticdc-node) +- [Check the health status of a TiCDC cluster](#check-the-health-status-of-a-ticdc-cluster) +- [Create a replication task](#create-a-replication-task) +- [Remove a replication task](#remove-a-replication-task) +- [Update the replication configuration](#update-the-replication-configuration) +- [Query the replication task list](#query-the-replication-task-list) +- [Query a specific replication task](#query-a-specific-replication-task) +- [Pause a replication task](#pause-a-replication-task) +- [Resume a replication task](#resume-a-replication-task) +- [Query the replication subtask list](#query-the-replication-subtask-list) +- [Query a specific replication subtask](#query-a-specific-replication-subtask) +- [Query the TiCDC service process list](#query-the-ticdc-service-process-list) +- [Evict an owner node](#evict-an-owner-node) +- [Manually trigger the load balancing of all tables in a replication task](#manually-trigger-the-load-balancing-of-all-tables-in-a-replication-task) +- [Manually schedule a table to another node](#manually-schedule-a-table-to-another-node) +- [Dynamically adjust the log level of the TiCDC server](#dynamically-adjust-the-log-level-of-the-ticdc-server) + +The request body and returned value of all APIs are in JSON format. The following sections describe the specific usage of the APIs. + +In the following examples, the listening IP address of the TiCDC server is `127.0.0.1` and the port is `8300`. You can bind a specified IP and port via `--addr=ip:port` when starting the TiCDC server. + +## API error message template + +After sending an API request, if an error occurs, the returned error message is in the following format: + +```json +{ + "error_msg": "", + "error_code": "" +} +``` + +From the above JSON output, `error_msg` describes the error message and `error_code` is the corresponding error code. + +## Get the status information of a TiCDC node + +This API is a synchronous interface. If the request is successful, the status information of the corresponding node is returned. + +### Request URI + +`GET /api/v1/status` + +### Example + +The following request gets the status information of the TiCDC node whose IP address is `127.0.0.1` and port number is `8300`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/status +``` + +```json +{ + "version": "v5.2.0-master-dirty", + "git_hash": "f191cd00c53fdf7a2b1c9308a355092f9bf8824e", + "id": "c6a43c16-0717-45af-afd6-8b3e01e44f5d", + "pid": 25432, + "is_owner": true +} +``` + +The fields of the above output are described as follows: + +- version: The current TiCDC version number. +- git_hash: The Git hash value. +- id: The capture ID of the node. +- pid: The capture process PID of the node. +- is_owner: Indicates whether the node is an owner. + +## Check the health status of a TiCDC cluster + +This API is a synchronous interface. If the cluster is healthy, `200 OK` is returned. + +### Request URI + +`GET /api/v1/health` + +### Example + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/health +``` + +## Create a replication task + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`POST /api/v1/changefeeds` + +### Parameter description + +Compared to the optional parameters for creating a replication task using the `cli` command, the optional parameters for creating such task using the API are not as complete. This API supports the following parameters. + +#### Parameters for the request body + +| Parameter name | Description | +| :------------------------ | :---------------------- ------------------------------- | +| `changefeed_id` | `STRING` type. The ID of the replication task. (Optional) | +| `start_ts` | `UINT64` type. Specifies the start TSO of the changefeed. (Optional) | +| `target_ts` | `UINT64` type. Specifies the target TSO of the changefeed. (Optional) | +| **`sink_uri`** | `STRING` type. The downstream address of the replication task. (**Required**) | +| `force_replicate` | `BOOLEAN` type. Determines whether to forcibly replicate the tables without unique indexes. (Optional) | +| `ignore_ineligible_table` | `BOOLEAN` type. Determines whether to ignore the tables that cannot be replicated. (Optional) | +| `filter_rules` | `STRING` type array. The rules for table schema filtering. (Optional) | +| `ignore_txn_start_ts` | `UINT64` type array. Ignores the transaction of a specified start_ts. (Optional) | +| `mounter_worker_num` | `INT` type. The mounter thread number. (Optional) | +| `sink_config` | The configuration parameters of sink. (Optional) | + +The meaning and format of `changefeed_id`, `start_ts`, `target_ts`, and `sink_uri` are the same as those described in the [Use `cdc cli` to create a replication task](/ticdc/manage-ticdc.md#create-a-replication-task) document. For the detailed description of these parameters, see this document. Note that when you specify the certificate path in `sink_uri`, make sure you have uploaded the corresponding certificate to the corresponding TiCDC server. + +Some other parameters in the above table are described further as follows. + +`force_replicate`: This parameter defaults to `false`. When it is specified as `true`, TiCDC tries to forcibly replicate tables that do not have a unique index. + +`ignore_ineligible_table`: This parameter defaults to `false`. When it is specified as `true`, TiCDC ignores tables that cannot be replicated. + +`filter_rules`: The rules for table schema filtering, such as `filter_rules = ['foo*.*','bar*.*']`. For details, see the [Table Filter](/table-filter.md) document. + +`ignore_txn_start_ts`: When this parameter is specified, the specified start_ts is ignored. For example, `ignore-txn-start-ts = [1, 2]`. + +`mounter_worker_num`: The thread number of mounter. Mounter is used to decode the data output from TiKV. The default value is `16`. + +The configuration parameters of sink are as follows: + +```json +{ + "dispatchers":[ + {"matcher":["test1.*", "test2.*"], "dispatcher":"ts"}, + {"matcher":["test3.*", "test4.*"], "dispatcher":"rowid"}, + ], + "protocal":"default", +} +``` + +`dispatchers`: For the sink of MQ type, you can use dispatchers to configure the event dispatcher. Four dispatchers are supported: `default`, `ts`, `rowid`, and `table`. The dispatcher rules are as follows: + +- `default`: When multiple unique indexes (including the primary key) exist or the Old Value feature is enabled, events are dispatched in the `table` mode. When only one unique index (or the primary key) exists, events are dispatched in the `rowid` mode. +- `ts`: Uses the commitTs of the row change to create the hash value and dispatch events. +- `rowid`: Uses the name and value of the selected HandleKey column to create the hash value and dispatch events. +- `table`: Uses the schema name of the table and the table name to create the hash value and dispatch events. + +`matcher`: The matching syntax of matcher is the same as the filter rule syntax. + +`protocal`: For the sink of MQ type, you can specify the protocol format of the message. Currently four protocols are supported: `default`, `canal`, `avro`, and `maxwell`. The default protocol is the TiCDC Open Protocol. + +### Example + +The following request creates a replication task with an ID of `test5` and a `sink_uri` of `blackhome://`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X POST -H "'Content-type':'application/json'" http://127.0.0.1:8300/api/v1/changefeeds -d '{"changefeed_id":"test5","sink_uri":"blackhole://"}' +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Remove a replication task + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`DELETE /api/v1/changefeeds/{changefeed_id}` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be removed. | + +### Example + +The following request removes the replication task with the ID `test1`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X DELETE http://127.0.0.1:8300/api/v1/changefeeds/test1 +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Update the replication configuration + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +To modify the changefeed configuration, follow the steps of `pause the replication task -> modify the configuration -> resume the replication task`. + +### Request URI + +`PUT /api/v1/changefeeds/{changefeed_id}` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be updated. | + +#### Parameters for the request body + +Currently, only the following configuration can be modified via the API. + +| Parameter name | Description | +| :-------------------- | :-------------------------- --------------------------- | +| `target_ts` | `UINT64` type. Specifies the target TSO of the changefeed. (Optional) | +| `sink_uri` | `STRING` type. The downstream address of the replication task. (Optional) | +| `filter_rules` | `STRING` type array. The rules for table schema filtering. (Optional) | +| `ignore_txn_start_ts` | `UINT64` type array. Ignores the transaction of a specified start_ts. (Optional) | +| `mounter_worker_num` | `INT` type. The mounter thread number. (Optional) | +| `sink_config` | The configuration parameters of sink. (Optional) | + +The meanings of the above parameters are the same as those in the [Create a replication task](#create-a-replication-task) section. See that section for details. + +### Example + +The following request updates the `mounter_worker_num` of the replication task with the ID `test1` to `32`. + +{{< copyable "shell-regular" >}} + +```shell + curl -X PUT -H "'Content-type':'application/json'" http://127.0.0.1:8300/api/v1/changefeeds/test1 -d '{"mounter_worker_num":32}' +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Query the replication task list + +This API is a synchronous interface. If the request is successful, the basic information of all nodes in the TiCDC cluster is returned. + +### Request URI + +`GET /api/v1/changefeeds` + +### Parameter description + +#### Query parameters + +| Parameter name | Description | +| :------ | :---------------------------------------- ----- | +| `state` | When this parameter is specified, the replication status information only of this state is returned.(Optional) | + +The value options for `state` are `all`, `normal`, `stopped`, `error`, `failed`, and `finished`. + +If this parameter is not specified, the basic information of replication tasks whose state is normal, stopped, or failed is returned by default. + +### Example + +The following request queries the basic information of all replication tasks whose state is `normal`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/changefeeds?state=normal +``` + +```json +[ + { + "id": "test1", + "state": "normal", + "checkpoint_tso": 426921294362574849, + "checkpoint_time": "2021-08-10 14:04:54.242", + "error": null + }, + { + "id": "test2", + "state": "normal", + "checkpoint_tso": 426921294362574849, + "checkpoint_time": "2021-08-10 14:04:54.242", + "error": null + } +] +``` + +The fields in the returned result above are described as follows: + +- id: The ID of the replication task. +- state: The current [state](/ticdc/manage-ticdc.md#state-transfer-of-replication-tasks) of the replication task. +- checkpoint_tso: The TSO representation of the current checkpoint of the replication task. +- checkpoint_tso: The formatted time representation of the current checkpoint of the replication task. +- error: The error information of the replication task. + +## Query a specific replication task + +This API is a synchronous interface. If the request is successful, the detailed information of the specified replication task is returned. + +### Request URI + +`GET /api/v1/changefeeds/{changefeed_id}` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be queried. | + +### Example + +The following request queries the detailed information of the replication task with the ID `test1`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/changefeeds/test1 +``` + +```json +{ + "id": "test1", + "sink_uri": "blackhole://", + "create_time": "2021-08-10 11:41:30.642", + "start_ts": 426919038970232833, + "target_ts": 0, + "checkpoint_tso": 426921014615867393, + "checkpoint_time": "2021-08-10 13:47:07.093", + "sort_engine": "unified", + "state": "normal", + "error": null, + "error_history": null, + "creator_version": "", + "task_status": [ + { + "capture_id": "d8924259-f52f-4dfb-97a9-c48d26395945", + "table_ids": [ + 63, + 65, + ], + "table_operations": {} + } + ] +} +``` + +## Pause a replication task + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`POST /api/v1/changefeeds/{changefeed_id}/pause` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be paused. | + +### Example + +The following request pauses the replication task with the ID `test1`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X POST http://127.0.0.1:8300/api/v1/changefeeds/test1/pause +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Resume a replication task + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`POST /api/v1/changefeeds/{changefeed_id}/resume` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be resumed. | + +### Example + +The following request resumes the replication task with the ID `test1`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X POST http://127.0.0.1:8300/api/v1/changefeeds/test1/resume +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Query the replication subtask list + +This API is a synchronous interface. If the request is successful, the basic information of all replication subtasks (`processor`) is returned. + +### Request URI + +`GET /api/v1/processors` + +### Example + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/processors +``` + +```json +[ + { + "changefeed_id": "test1", + "capture_id": "561c3784-77f0-4863-ad52-65a3436db6af" + } +] +``` + +## Query a specific replication subtask + +This API is a synchronous interface. If the request is successful, the detailed information of the specified replication subtask (`processor`) is returned. + +### Request URI + +`GET /api/v1/processors/{changefeed_id}/{capture_id}` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The changefeed ID of the replication subtask to be queried. | +| `capture_id` | The capture ID of the replication subtask to be queried. | + +### Example + +The following request queries the detailed information of a subtask whose `changefeed_id` is `test` and `capture_id` is `561c3784-77f0-4863-ad52-65a3436db6af`. A subtask can be indentifed by `changefeed_id` and `capture_id`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/processors/test1/561c3784-77f0-4863-ad52-65a3436db6af +``` + +```json +{ + "checkpoint_ts": 426919123303006208, + "resolved_ts": 426919123369066496, + "table_ids": [ + 63, + 65, + ], + "error": null +} +``` + +## Query the TiCDC service process list + +This API is a synchronous interface. If the request is successful, the basic information of all replication processes (`capture`) is returned. + +### Request URI + +`GET /api/v1/captures` + +### Example + +{{< copyable "shell-regular" >}} + +```shell +curl -X GET http://127.0.0.1:8300/api/v1/captures +``` + +```json +[ + { + "id": "561c3784-77f0-4863-ad52-65a3436db6af", + "is_owner": true, + "address": "127.0.0.1:8300" + } +] +``` + +## Evict an owner node + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`POST /api/v1/owner/resign` + +### Example + +The following request evicts the current owner node of TiCDC and triggers a new round of elections to generate a new owner node. + +{{< copyable "shell-regular" >}} + +```shell +curl -X POST http://127.0.0.1:8300/api/v1/owner/resign +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Manually trigger the load balancing of all tables in a replication task + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`POST /api/v1/changefeeds/{changefeed_id}/tables/rebalance_table` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be scheduled. | + +### Example + +The following request triggers the load balancing of all tables in the changefeed with the ID `test1`. + +{{< copyable "shell-regular" >}} + +```shell + curl -X POST http://127.0.0.1:8300/api/v1/changefeeds/test1/tables/rebalance_table +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Manually schedule a table to another node + +This API is an asynchronous interface. If the request is successful, `202 Accepted` is returned. The returned result only means that the server agrees to run the command but does not guarantee that the command will be run successfully. + +### Request URI + +`POST /api/v1/changefeeds/{changefeed_id}/tables/move_table` + +### Parameter description + +#### Path parameters + +| Parameter name | Description | +| :-------------- | :----------------------------------- | +| `changefeed_id` | The ID of the replication task (changefeed) to be scheduled. | + +#### Parameters for the request body + +| Parameter name | Description | +| :------------------ | :------------------ | +| `target_capture_id` | The ID of the target capture. | +| `table_id` | The ID of the table to be scheduled. | + +### Example + +The following request schedules the table with the ID `49` in the changefeed with the ID `test1` to the capture with the ID `6f19a6d9-0f8c-4dc9-b299-3ba7c0f216f5`. + +{{< copyable "shell-regular" >}} + +```shell +curl -X POST -H "'Content-type':'application/json'" http://127.0.0.1:8300/api/v1/changefeeds/changefeed-test1/tables/move_table -d '{"capture_id":"6f19a6d9-0f8c-4dc9-b299-3ba7c0f216f5","table_id":49}' + +``` + +If the request is successful, `202 Accepted` is returned. If the request fails, an error message and error code are returned. + +## Dynamically adjust the log level of the TiCDC server + +This API is a synchronous interface. If the request is successful, `202 OK` is returned. + +### Request URI + +`POST /api/v1/log` + +### Request parameters + +#### Parameters for the request body + +| Parameter name | Description | +| :---------- | :----------------- | +| `log_level` | The log level you want to set. | + +`log_level` supports the [log levels provided by zap](https://godoc.org/go.uber.org/zap#UnmarshalText): "debug", "info", "warn", "error", "dpanic" , "panic", and "fatal". + +### Example + +{{< copyable "shell-regular" >}} + +```shell +curl -X POST -H "'Content-type':'application/json'" http://127.0.0.1:8300/api/v1/log -d '{"log_level":"debug"}' + +``` + +If the request is successful, `202 OK` is returned. If the request fails, an error message and error code are returned. diff --git a/ticdc/ticdc-open-protocol.md b/ticdc/ticdc-open-protocol.md index 22c5d9e6aa53..3e514ce565db 100644 --- a/ticdc/ticdc-open-protocol.md +++ b/ticdc/ticdc-open-protocol.md @@ -274,7 +274,7 @@ COMMIT; Currently, TiCDC does not provide the standard parsing library for TiCDC Open Protocol, but the Golang version and Java version of parsing demonstrations are provided. You can refer to the data format provided in this document and the following demonstrations to implement the protocol parsing for consumers. -- [Golang demo](https://github.com/pingcap/ticdc/tree/master/kafka_consumer) +- [Golang demo](https://github.com/pingcap/ticdc/tree/master/cmd/kafka-consumer) - [Java demo](https://github.com/pingcap/ticdc/tree/master/demo/java) ## Column type code diff --git a/ticdc/ticdc-overview.md b/ticdc/ticdc-overview.md index da926141a934..3f61b8083adb 100644 --- a/ticdc/ticdc-overview.md +++ b/ticdc/ticdc-overview.md @@ -120,7 +120,7 @@ You can either deploy TiCDC along with a new TiDB cluster or add the TiCDC compo Currently, you can use the `cdc cli` tool to manage the status of a TiCDC cluster and data replication tasks. For details, see: - [Use `cdc cli` to manage cluster status and data replication task](/ticdc/manage-ticdc.md#use-cdc-cli-to-manage-cluster-status-and-data-replication-task) -- [Use HTTP interface to manage cluster status and data replication task](/ticdc/manage-ticdc.md#use-http-interface-to-manage-cluster-status-and-data-replication-task) +- [Use OpenAPI to manage cluster status and data replication task](/ticdc/ticdc-open-api.md) ## Troubleshoot TiCDC @@ -129,3 +129,13 @@ For details, refer to [Troubleshoot TiCDC](/ticdc/troubleshoot-ticdc.md). ## TiCDC Open Protocol TiCDC Open Protocol is a row-level data change notification protocol that provides data sources for monitoring, caching, full-text indexing, analysis engines, and primary-secondary replication between different databases. TiCDC complies with TiCDC Open Protocol and replicates data changes of TiDB to third-party data medium such as MQ (Message Queue). For more information, see [TiCDC Open Protocol](/ticdc/ticdc-open-protocol.md). + +## Compatibility notes for `sort-dir` and `data-dir` + +The `sort-dir` configuration is used to specify the temporary file directory for the TiCDC sorter. Its functionalities might vary in different versions. The following table lists `sort-dir`'s compatibility changes across versions. + +| Version | `sort-engine` functionality | Note | Recommendation | +| :--- | :--- | :-- | :-- | +| v4.0.11 or an earlier v4.0 version, v5.0.0-rc | It is a changefeed configuration item and specifies temporary file directory for the `file` sorter and `unified` sorter. | In these versions, `file` sorter and `unified` sorter are **experimental features** and **NOT** recommended for the production environment.

If multiple changefeeds use the `unified` sorter as its `sort-engine`, the actual temporary file directory might be the `sort-dir` configuration of any changefeed, and the directory used for each TiCDC node might be different. | It is not recommended to use `unified` sorter in the production environment. | +| v4.0.12, v4.0.13, v5.0.0, and v5.0.1 | It is a configuration item of changefeed or of `cdc server`. | By default, the `sort-dir` configuration of a changefeed does not take effect, and the `sort-dir` configuration of `cdc server` defaults to `/tmp/cdc_sort`. It is recommended to only configure `cdc server` in the production environment.

If you use TiUP to deploy TiCDC, it is recommended to use the latest TiUP version and set `sorter.sort-dir` in the TiCDC server configuration.

The `unified` sorter is enabled by default in v4.0.13, v5.0.0, and v5.0.1. If you want to upgrade your cluster to these versions, make sure that you have correctly configured `sorter.sort-dir` in the TiCDC server configuration. | You need to configure `sort-dir` using the `cdc server` command-line parameter (or TiUP). | +| v4.0.14 and later v4.0 versions, v5.0.2 and later v5.0 versions, later TiDB versions | `sort-dir` is deprecated. It is recommended to configure `data-dir`. | You can configure `data-dir` using the latest version of TiUP. In these TiDB versions, `unified` sorter is enabled by default. Make sure that `data-dir` has been configured correctly when you upgrade your cluster. Otherwise, `/tmp/cdc_data` will be used by default as the temporary file directory.

If the storage capacity of the device where the directory is located is insufficient, the problem of insufficient hard disk space might occur. In this situation, the previous `sort-dir` configuration of changefeed will become invalid.| You need to configure `data-dir` using the `cdc server` command-line parameter (or TiUP). | diff --git a/ticdc/troubleshoot-ticdc.md b/ticdc/troubleshoot-ticdc.md index 351da4a9223c..be47a53d9caf 100644 --- a/ticdc/troubleshoot-ticdc.md +++ b/ticdc/troubleshoot-ticdc.md @@ -398,3 +398,43 @@ If you want to skip this DDL statement that goes wrong, set the start-ts of the cdc cli changefeed update -c test-cf --pd=http://10.0.10.25:2379 --start-ts 415241823337054210 cdc cli changefeed resume -c test-cf --pd=http://10.0.10.25:2379 ``` + +## The default value of the time type field is inconsistent when replicating a DDL statement to the downstream MySQL 5.7. What can I do? + +Suppose that the `create table test (id int primary key, ts timestamp)` statement is executed in the upstream TiDB. When TiCDC replicates this statement to the downstream MySQL 5.7, MySQL uses the default configuration. The table schema after the replication is as follows. The default value of the `timestamp` field becomes `CURRENT_TIMESTAMP`: + +{{< copyable "sql" >}} + +```sql +mysql root@127.0.0.1:test> show create table test; ++-------+----------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+----------------------------------------------------------------------------------+ +| test | CREATE TABLE `test` ( | +| | `id` int(11) NOT NULL, | +| | `ts` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, | +| | PRIMARY KEY (`id`) | +| | ) ENGINE=InnoDB DEFAULT CHARSET=latin1 | ++-------+----------------------------------------------------------------------------------+ +1 row in set +``` + +From the result, you can see that the table schema before and after the replication is inconsistent. This is because the default value of `explicit_defaults_for_timestamp` in TiDB is different from that in MySQL. See [MySQL Compatibility](/mysql-compatibility.md#default-differences) for details. + +Since v5.0.1 or v4.0.13, for each replication to MySQL, TiCDC automatically sets `explicit_defaults_for_timestamp = ON` to ensure that the time type is consistent between the upstream and downstream. For versions earlier than v5.0.1 or v4.0.13, pay attention to the compatibility issue caused by the inconsistent `explicit_defaults_for_timestamp` value when using TiCDC to replicate the time type data. + +## When the sink of the replication downstream is TiDB or MySQL, what permissions do users of the downstream database need? + +When the sink is TiDB or MySQL, the users of the downstream database need the following permissions: + +- `Select` +- `Index` +- `Insert` +- `Update` +- `Delete` +- `Create` +- `Drop` +- `Alter` +- `Create View` + +If you need to replicate `recover table` to the downstream TiDB, the `Super` permission is required. diff --git a/tidb-binlog/deploy-tidb-binlog.md b/tidb-binlog/deploy-tidb-binlog.md index e2d5095c7d58..75d805a69cf2 100644 --- a/tidb-binlog/deploy-tidb-binlog.md +++ b/tidb-binlog/deploy-tidb-binlog.md @@ -19,6 +19,10 @@ In environments of development, testing and production, the requirements on serv | Pump | 3 | 8 core+ | SSD, 200 GB+ | 16G | | Drainer | 1 | 8 core+ | SAS, 100 GB+ (If binlogs are output as local files, the disk size depends on how long these files are retained.) | 16G | +## Deploy TiDB Binlog using TiUP + +It is recommended to deploy TiDB Binlog using TiUP. To do that, when deploying TiDB using TiUP, you need to add the node information of `drainer` and `pump` of TiDB Binlog in [TiDB Binlog Deployment Topology](/tidb-binlog-deployment-topology.md). For detailed deployment information, refer to [Deploy a TiDB Cluster Using TiUP](/production-deployment-using-tiup.md). + ## Deploy TiDB Binlog using a Binary package ### Download the official Binary package @@ -331,9 +335,9 @@ The following part shows how to use Pump and Drainer based on the nodes above. [syncer.to] host = "192.168.0.13" user = "root" + # If you do not want to set a cleartext `password` in the configuration file, you can create `encrypted_password` using `./binlogctl -cmd encrypt -text string`. + # When you have created an `encrypted_password` that is not empty, the `password` above will be ignored, because `encrypted_password` and `password` cannot take effect at the same time. password = "" - # `encrypted_password` is encrypted using `./binlogctl -cmd encrypt -text string`. - # When `encrypted_password` is not empty, the `password` above will be ignored. encrypted_password = "" port = 3306 diff --git a/tidb-binlog/tidb-binlog-configuration-file.md b/tidb-binlog/tidb-binlog-configuration-file.md index bdc710c2c437..802ea1e85ece 100644 --- a/tidb-binlog/tidb-binlog-configuration-file.md +++ b/tidb-binlog/tidb-binlog-configuration-file.md @@ -173,8 +173,12 @@ This section introduces the configuration items of Drainer. For the example of a ### initial-commit-ts -* Specifies from which commit timestamp the replication task starts. This configuration is only applicable to the Drainer node that starts replication for the first time. If a checkpoint already exists downstream, the replication will be performed according to the time recorded in the checkpoint. -* Default value: `-1`. Drainer will get a new timestamp from PD as the starting time. +* Specifies from which commit timestamp of the transaction the replication process starts. This configuration is applicable only to the Drainer node that is in the replication process for the first time. If a checkpoint already exists in the downstream, the replication will be performed according to the time recorded in the checkpoint. +* commit ts (commit timestamp) is a specific point in time for [transaction](/transaction-overview.md#transactions) commits in TiDB. It is a globally unique and increasing timestamp from PD as the unique ID of the current transaction. You can get the `initial-commit-ts` configuration in the following typical ways: + - If BR is used, you can get `initial-commit-ts` from the backup TS recorded in the metadata backed up by BR (backupmeta). + - If Dumpling is used, you can get `initial-commit-ts` from the Pos recorded in the metadata backed up by Dumpling (metadata), + - If PD Control is used, `initial-commit-ts` is in the output of the `tso` command. +* Default value: `-1`. Drainer will get a new timestamp from PD as the starting time, which means that the replication process starts from the current time. ### synced-check-time @@ -331,22 +335,17 @@ When the downstream is Kafka, the valid configuration items are as follows: ### syncer.to.checkpoint -This section introduces a configuration item related to `syncer.to.checkpoint`. +* `type`: Specifies in what way the replication progress is saved. Currently, the available options are `mysql`, `tidb`, and `file`. -### type + This configuration item is the same as the downstream type by default. For example, when the downstream is `file`, the checkpoint progress is saved in the local file `/savepoint`; when the downstream is `mysql`, the progress is saved in the downstream database. If you need to explicitly specify using `mysql` or `tidb` to store the progress, make the following configuration: -* Specifies in what way the replication progress is saved. -* Available options: `mysql` and `tidb`. +* `schema`: `"tidb_binlog"` by default. -* Default value: The same as the downstream type. For example, when the downstream is `file`, the progress is saved in the local file system; when the downstream is `mysql`, the progress is saved in the downstream database. If you explicitly specify using `mysql` or `tidb` to store the progress, make the following configuration: + > **Note:** + > + > When deploying multiple Drainer nodes in the same TiDB cluster, you need to specify a different checkpoint schema for each node. Otherwise, the replication progress of two instances will overwrite each other. - * `schema`: `tidb_binlog` by default. - - > **Note:** - > - > When deploying multiple Drainer nodes in the same TiDB cluster, you need to specify a different checkpoint schema for each node. Otherwise, the replication progress of two instances will overwrite each other. - - * `host` - * `user` - * `password` - * `port` +* `host` +* `user` +* `password` +* `port` \ No newline at end of file diff --git a/tidb-binlog/tidb-binlog-faq.md b/tidb-binlog/tidb-binlog-faq.md index a8fbdcdb015e..596e2169e08e 100644 --- a/tidb-binlog/tidb-binlog-faq.md +++ b/tidb-binlog/tidb-binlog-faq.md @@ -244,7 +244,7 @@ To solve the problem, follow these steps: 3. Check `drainer.log`. Search for the failed DDL operation and find the `commit-ts` of this operation. For example: ``` - [2020/05/21 09:51:58.019 +08:00] [INFO] [syncer.go:398] ["add ddl item to syncer, you can add this commit ts to `ignore-txn-commit-ts` to skip this ddl if needed"] [sql="ALTER TABLE `test` ADD INDEX (`index1`)"] ["commit ts"=416815754209656834]。 + [2020/05/21 09:51:58.019 +08:00] [INFO] [syncer.go:398] ["add ddl item to syncer, you can add this commit ts to `ignore-txn-commit-ts` to skip this ddl if needed"] [sql="ALTER TABLE `test` ADD INDEX (`index1`)"] ["commit ts"=416815754209656834]. ``` 4. Modify the `drainer.toml` configuration file. Add the `commit-ts` in the `ignore-txn-commit-ts` item and restart the Drainer node. diff --git a/tidb-configuration-file.md b/tidb-configuration-file.md index 49fb30aa8798..b658a018f9a9 100644 --- a/tidb-configuration-file.md +++ b/tidb-configuration-file.md @@ -274,6 +274,11 @@ Configuration items related to log files. Configuration items related to security. +### `require-secure-transport` + +- Determines whether to require the client to use the secure mode for data transport. +- Default value: `false` + ### `enable-sem` - Enables the Security Enhanced Mode (SEM). @@ -321,6 +326,11 @@ Configuration items related to security. + Default value: `"plaintext"`, which disables encryption. + Optional values: `"plaintext"` and `"aes128-ctr"` +### `auto-tls` + +- Determines whether to automatically generate the TLS certificates on startup. +- Default value: `false` + ## Performance Configuration items related to performance. @@ -358,14 +368,14 @@ Configuration items related to performance. ### `committer-concurrency` + The number of goroutines for requests related to executing commit in the commit phase of the single transaction. -+ Default value: `16` ++ Default value: `128` + If the transaction to commit is too large, the waiting time for the flow control queue when the transaction is committed might be too long. In this situation, you can increase the configuration value to speed up the commit. ### `stmt-count-limit` - The maximum number of statements allowed in a single TiDB transaction. - Default value: `5000` -- If a transaction does not roll back or commit after the number of statements exceeds `stmt-count-limit`, TiDB returns the `statement count 5001 exceeds the transaction limitation, autocommit = false` error. This configuration takes effect **only** in the retriable optimistic transaction. If you use the pessimistic transaction or have disabled the transaction retry, the number of statements in a transaction is not limited by this configuration. +- If a transaction does not roll back or commit after the number of statements exceeds `stmt-count-limit`, TiDB returns the `statement count 5001 exceeds the transaction limitation, autocommit = false` error. This configuration takes effect **only** in the retryable optimistic transaction. If you use the pessimistic transaction or have disabled the transaction retry, the number of statements in a transaction is not limited by this configuration. ### `txn-entry-size-limit` New in v5.0 @@ -492,13 +502,13 @@ The Plan Cache configuration of the `PREPARE` statement. - The `keepalive` time interval of the RPC connection between TiDB and TiKV nodes. If there is no network packet within the specified time interval, the gRPC client executes `ping` command to TiKV to see if it is alive. - Default: `10` -- unit: second +- Unit: second ### `grpc-keepalive-timeout` - The timeout of the RPC `keepalive` check between TiDB and TiKV nodes. - Default value: `3` -- unit: second +- Unit: second ### `commit-timeout` @@ -515,7 +525,7 @@ The Plan Cache configuration of the `PREPARE` statement. - Waits for `max-batch-wait-time` to encapsulate the data packets into a large packet in batch and send it to the TiKV node. It is valid only when the value of `tikv-client.max-batch-size` is greater than `0`. It is recommended not to modify this value. - Default value: `0` -- unit: nanoseconds +- Unit: nanoseconds ### `batch-wait-size` @@ -600,16 +610,16 @@ Configuration related to the status of TiDB service. ## stmt-summary New in v3.0.4 -Configurations related to the `events_statement_summary_by_digest` table. +Configurations related to [statement summary tables](/statement-summary-tables.md). ### max-stmt-count -- The maximum number of SQL categories allowed to be saved in the `events_statement_summary_by_digest` table. -- Default value: `100` +- The maximum number of SQL categories allowed to be saved in [statement summary tables](/statement-summary-tables.md). +- Default value: `3000` ### max-sql-length -- The longest display length for the `DIGEST_TEXT` and `QUERY_SAMPLE_TEXT` columns in the `events_statement_summary_by_digest` table. +- The longest display length for the `DIGEST_TEXT` and `QUERY_SAMPLE_TEXT` columns in [statement summary tables](/statement-summary-tables.md). - Default value: `4096` ## pessimistic-txn @@ -628,11 +638,16 @@ For pessimistic transaction usage, refer to [TiDB Pessimistic Transaction Mode]( + Minimum value: `0` + Maximum value: `10000` +### deadlock-history-collect-retryable + ++ Controls whether the [`INFORMATION_SCHEMA.DEADLOCKS`](/information-schema/information-schema-deadlocks.md) table collects the information of retryable deadlock errors. For the description of retryable deadlock errors, see [Retryable deadlock errors](/information-schema/information-schema-deadlocks.md#retryable-deadlock-errors). ++ Default value: `false` + ## experimental -The `experimental` section, introduced in v3.1.0, describes configurations related to the experimental features of TiDB. +The `experimental` section, introduced in v3.1.0, describes the configurations related to the experimental features of TiDB. ### `allow-expression-index` New in v4.0.0 -- Determines whether to create the expression index. -- Default value: `false` ++ Controls whether an expression index can be created. Since TiDB v5.2.0, if the function in an expression is safe, you can create an expression index directly based on this function without enabling this configuration. If you want to create an expression index based on other functions, you can enable this configuration, but correctness issues might exist. By querying the `tidb_allow_function_for_expression_index` variable, you can get the functions that are safe to be directly used for creating an expression. ++ Default value: `false` diff --git a/tidb-lightning/tidb-lightning-backends.md b/tidb-lightning/tidb-lightning-backends.md index 64991a0c6773..eb0aab23a2f5 100644 --- a/tidb-lightning/tidb-lightning-backends.md +++ b/tidb-lightning/tidb-lightning-backends.md @@ -10,14 +10,14 @@ The backend determines how TiDB Lightning imports data into the target cluster. TiDB Lightning supports the following [backends](/tidb-lightning/tidb-lightning-glossary.md#back-end): -+ [Importer-backend](#tidb-lightning-importer-backend) (default) + [Local-backend](#tidb-lightning-local-backend) ++ [Importer-backend](#tidb-lightning-importer-backend) + [TiDB-backend](#tidb-lightning-tidb-backend) -The **Importer-backend** (default): `tidb-lightning` first encodes the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. - The **Local-backend**: `tidb-lightning` first encodes data into key-value pairs, sorts and stores them in a local temporary directory, and *upload* these key-value pairs to each TiKV node *as SST files*. Then, TiKV ingests these *SST files* into the cluster. The implementation of Local-backend is the same with that of Importer-backend but does not rely on the external `tikv-importer` component. +The **Importer-backend**: `tidb-lightning` first encodes the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. + The **TiDB-backend**: `tidb-lightning` first encodes these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. | Backend | Local-backend | Importer-backend | TiDB-backend | diff --git a/tidb-lightning/tidb-lightning-configuration.md b/tidb-lightning/tidb-lightning-configuration.md index 47975da4d7a1..d2432d520ae2 100644 --- a/tidb-lightning/tidb-lightning-configuration.md +++ b/tidb-lightning/tidb-lightning-configuration.md @@ -98,8 +98,8 @@ driver = "file" #keep-after-success = false [tikv-importer] -# Delivery backend, can be "importer", "local", or "tidb". -# backend = "importer" +# Delivery backend, can be "local", "importer" or "tidb". +# backend = "local" # The listening address of tikv-importer when backend is "importer". Change it to the actual address. addr = "172.16.31.10:8287" # Action to do when trying to insert a duplicated entry in the "tidb" backend. @@ -362,7 +362,7 @@ min-available-ratio = 0.05 | -d *directory* | Directory or [external storage URL](/br/backup-and-restore-storages.md) of the data dump to read from | `mydumper.data-source-dir` | | -L *level* | Log level: debug, info, warn, error, fatal (default = info) | `lightning.log-level` | | -f *rule* | [Table filter rules](/table-filter.md) (can be specified multiple times) | `mydumper.filter` | -| --backend *backend* | [Delivery backend](/tidb-lightning/tidb-lightning-backends.md) (`importer`, `local`, or `tidb`) | `tikv-importer.backend` | +| --backend *backend* | [Delivery backend](/tidb-lightning/tidb-lightning-backends.md) (`local`, `importer`, or `tidb`) | `tikv-importer.backend` | | --log-file *file* | Log file path (default = a temporary file in `/tmp`) | `lightning.log-file` | | --status-addr *ip:port* | Listening address of the TiDB Lightning server | `lightning.status-port` | | --importer *host:port* | Address of TiKV Importer | `tikv-importer.addr` | diff --git a/tidb-lightning/tidb-lightning-faq.md b/tidb-lightning/tidb-lightning-faq.md index 1033900cf127..d24584f24ae0 100644 --- a/tidb-lightning/tidb-lightning-faq.md +++ b/tidb-lightning/tidb-lightning-faq.md @@ -348,3 +348,19 @@ This error occurs usually because the CSV data file does not contain a header (t [mydumper.csv] header = false ``` + +## How to get the runtime goroutine information of TiDB Lightning + +1. If [`status-port`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-configuration) has been specified in the configuration file of TiDB Lightning, skip this step. Otherwise, you need to send the USR1 signal to TiDB Lightning to enable `status-port`. + + Get the process ID (PID) of TiDB Lightning using commands like `ps`, and then run the following command: + + {{< copyable "shell-regular" >}} + + ```sh + kill -USR1 + ``` + + Check the log of TiDB Lightning. The log of `starting HTTP server` / `start HTTP server` / `started HTTP server` shows the newly enabled `status-port`. + +2. Access `http://:/debug/pprof/goroutine?debug=2` to get the goroutine information. diff --git a/tidb-monitoring-api.md b/tidb-monitoring-api.md index f5e6313b60b2..13dc89b2f47d 100644 --- a/tidb-monitoring-api.md +++ b/tidb-monitoring-api.md @@ -8,7 +8,8 @@ aliases: ['/docs/dev/tidb-monitoring-api/'] You can use the following two types of interfaces to monitor the TiDB cluster state: -- [The state interface](#use-the-state-interface): this interface uses the HTTP interface to get the component information. +- [The state interface](#running-status): this interface uses the HTTP interface to get the component information. +- [Storage information](#storage-information): this interface uses the HTTP interface to get the storage information of data tables. - [The metrics interface](#use-the-metrics-interface): this interface uses Prometheus to record the detailed information of the various operations in components and views these metrics using Grafana. ## Use the state interface @@ -20,7 +21,9 @@ The state interface monitors the basic information of a specific component in th - TiDB API address: `http://${host}:${port}` - Default port: `10080` -The following example uses `http://${host}:${port}/status` to get the current state of the TiDB server and to determine whether the server is alive. The result is returned in JSON format. +### Running status + +The following example uses `http://${host}:${port}/status` to get the current state of the TiDB server and to determine whether the server is alive. The result is returned in **JSON** format. ```bash curl http://127.0.0.1:10080/status @@ -31,6 +34,48 @@ curl http://127.0.0.1:10080/status } ``` +#### Storage information + +The following example uses `http://${host}:${port}/schema_storage/${db}/${table}` to get the storage information of the specific data table. The result is returned in **JSON** format. + +{{< copyable "shell-regular" >}} + +```bash +curl http://127.0.0.1:10080/schema_storage/mysql/stats_histograms +``` + +``` +{ + "table_schema": "mysql", + "table_name": "stats_histograms", + "table_rows": 0, + "avg_row_length": 0, + "data_length": 0, + "max_data_length": 0, + "index_length": 0, + "data_free": 0 +} +``` + +```bash +curl http://127.0.0.1:10080/schema_storage/test +``` + +``` +[ + { + "table_schema": "test", + "table_name": "test", + "table_rows": 0, + "avg_row_length": 0, + "data_length": 0, + "max_data_length": 0, + "index_length": 0, + "data_free": 0 + } +] +``` + ### PD server - PD API address: `http://${host}:${port}/pd/api/v1/${api_name}` diff --git a/tidb-scheduling.md b/tidb-scheduling.md index a011bdbb3030..f8820ed0275e 100644 --- a/tidb-scheduling.md +++ b/tidb-scheduling.md @@ -45,7 +45,7 @@ The above situations can be classified into two types: * Storage capacity of all TiKV peers are balanced; * Hot spots are balanced; * Speed of load balancing for the Regions needs to be limited to ensure that online services are stable; - * Maintainers are able to to take peers online/offline manually. + * Maintainers are able to take peers online/offline manually. After the first type of requirements is satisfied, the system will be failure tolerable. After the second type of requirements is satisfied, resources will be utilized more efficiently and the system will have better scalability. diff --git a/tiflash/monitor-tiflash.md b/tiflash/monitor-tiflash.md index 90d28c38238e..bdf3c0d1f51f 100644 --- a/tiflash/monitor-tiflash.md +++ b/tiflash/monitor-tiflash.md @@ -73,6 +73,20 @@ The following sections introduce the default monitoring information of **TiFlash > > These metrics only cover the monitoring information of the TiFlash storage layer and do not cover that in TiFlash-Proxy. +## Storage Write Stall + +- Write & Delta Management Throughput: The throughput of write and data compaction for all instances. + - `throughput_write` means the throughput of data synchronization through Raft. + - `throughput_delta-management` means the throughput of data compaction. + - `total_write` means the total bytes written since the last start. + - `total_delta-management` means the total bytes of data compacted since the last start. +- Write Stall Duration: The stall duration of write and removing Region data (deleting ranges) by instance. +- Write Throughput By Instance: The throughput of write by instance. It includes the throughput by applying the Raft write commands and Raft snapshots. +- Write Command OPS By Instance: The total count of different kinds of commands received by instance. + - `write block` means the data logs synchronized through Raft. + - `delete_range` means that some Regions are removed from or moved to this instance. + - `ingest` means some Region snapshots are applied to this instance. + ## Raft - Read Index OPS: The number of times that each TiFlash instance triggers the `read_index` request per second, which equals to the number of Regions triggered. diff --git a/tiflash/tiflash-configuration.md b/tiflash/tiflash-configuration.md index db418a6a8a82..b46b9ff75cc8 100644 --- a/tiflash/tiflash-configuration.md +++ b/tiflash/tiflash-configuration.md @@ -51,29 +51,27 @@ minmax_index_cache_size = 5368709120 delta_index_cache_size = 0 ## The storage path of TiFlash data. If there are multiple directories, separate each directory with a comma. -## `path` and `path_realtime_mode` are deprecated since v4.0.9. Use the configurations -## in the `[storage]` section to get better performance in the multi-disk deployment scenarios +## path and path_realtime_mode are deprecated since v4.0.9. Use the configurations +## in the [storage] section to get better performance in the multi-disk deployment scenarios +## Since TiDB v5.2.0, if you need to use the storage.io_rate_limit configuration, you need to set the storage path of TiFlash data to storage.main.dir at the same time. +## When the [storage] configurations exist, both path and path_realtime_mode configurations are ignored. # path = "/tidb-data/tiflash-9000" ## or # path = "/ssd0/tidb-data/tiflash,/ssd1/tidb-data/tiflash,/ssd2/tidb-data/tiflash" -## The default value is `false`. If you set it to `true` and multiple directories +## The default value is false. If you set it to true and multiple directories ## are set in the path, the latest data is stored in the first directory and older ## data is stored in the rest directories. # path_realtime_mode = false -## The path in which the TiFlash temporary files are stored. By default it is the first directory in `path` -## or in `storage.latest.dir` appended with "/tmp". +## The path in which the TiFlash temporary files are stored. By default it is the first directory in path +## or in storage.latest.dir appended with "/tmp". # tmp_path = "/tidb-data/tiflash-9000/tmp" ## Storage paths settings take effect starting from v4.0.9 [storage] - ## [Experimental] New in v5.0. This item limits the total write rate of background tasks in bytes per second. It is not recommended to use this experimental feature in a production environment. - ## The unit is bytes. Currently, the setting such as "10GB" is not supported. - ## The default value is 0, which means no limit. - ## This parameter is used to control the usage of machine disk bandwidth by background tasks mainly for the scenario where TiFlash is deployed on the AWS EBS (gp2/gp3) disk. - ## This parameter can be used to improve the stability of the TiFlash query performance. The recommended configuration in this scenario is 50% of the disk bandwidth. - ## It is not recommended to modify this configuration in other scenarios. - bg_task_io_rate_limit = 0 + ## This configuration item is deprecated since v5.2.0. You can use the [storage.io_rate_limit] settings below instead. + + # bg_task_io_rate_limit = 0 [storage.main] ## The list of directories to store the main data. More than 90% of the total data is stored in @@ -82,30 +80,54 @@ delta_index_cache_size = 0 ## or # dir = [ "/ssd0/tidb-data/tiflash", "/ssd1/tidb-data/tiflash" ] - ## The maximum storage capacity of each directory in `storage.main.dir`. + ## The maximum storage capacity of each directory in storage.main.dir. ## If it is not set, or is set to multiple 0, the actual disk (the disk where the directory is located) capacity is used. ## Note that human-readable numbers such as "10GB" are not supported yet. ## Numbers are specified in bytes. - ## The size of the `capacity` list should be the same with the `dir` size. + ## The size of the capacity list should be the same with the dir size. ## For example: # capacity = [ 10737418240, 10737418240 ] [storage.latest] ## The list of directories to store the latest data. About 10% of the total data is stored in ## the directory list. The directories (or directory) listed here require higher IOPS - ## metrics than those in `storage.main.dir`. - ## If it is not set (by default), the values of `storage.main.dir` are used. + ## metrics than those in storage.main.dir. + ## If it is not set (by default), the values of storage.main.dir are used. # dir = [ ] - ## The maximum storage capacity of each directory in `storage.latest.dir`. + ## The maximum storage capacity of each directory in storage.latest.dir. ## If it is not set, or is set to multiple 0, the actual disk (the disk where the directory is located) capacity is used. # capacity = [ 10737418240, 10737418240 ] + ## [storage.io_rate_limit] settings are new in v5.2.0. + [storage.io_rate_limit] + ## This configuration item determines whether to limit the I/O traffic, which is disabled by default. This traffic limit in TiFlash is suitable for cloud storage that has the disk bandwidth of a small and specific size. + ## The total I/O bandwidth for disk reads and writes. The unit is bytes and the default value is 0, which means the I/O traffic is not limited by default. + # max_bytes_per_sec = 0 + ## max_read_bytes_per_sec and max_write_bytes_per_sec have similar meanings to max_bytes_per_sec. max_read_bytes_per_sec means the total I/O bandwidth for disk reads, and max_write_bytes_per_sec means the total I/O bandwidth for disk writes. + ## These configuration items limit I/O bandwidth for disk reads and writes separately. You can use them for cloud storage that calculates the limit of I/O bandwidth for disk reads and writes separately, such as the Persistent Disk provided by Google Cloud Platform. + ## When the value of max_bytes_per_sec is not 0, max_bytes_per_sec is prioritized. + # max_read_bytes_per_sec = 0 + # max_write_bytes_per_sec = 0 + + ## The following parameters control the bandwidth weights assigned to different I/O traffic types. Generally, you do not need to adjust these parameters. + ## TiFlash internally divides I/O requests into four types: foreground writes, background writes, foreground reads, background reads. + ## When the I/O traffic limit is initialized, TiFlash assigns the bandwidth according to the following weight ratio. + ## The following default configurations indicate that each type of traffic gets a weight of 25% (25 / (25 + 25 + 25 + 25) = 25%). + ## If the weight is configured to 0, the corresponding I/O traffic is not limited. + # foreground_write_weight = 25 + # background_write_weight = 25 + # foreground_read_weight = 25 + # background_read_weight = 25 + ## TiFlash supports automatically tuning the traffic limit for different I/O types according to the current I/O load. Sometimes, the tuned bandwidth might exceed the weight ratio set above. + ## auto_tune_sec indicates the interval of automatic tuning. The unit is seconds. If the value of auto_tune_sec is 0, the automatic tuning is disabled. + # auto_tune_sec = 5 + [flash] tidb_status_addr = TiDB status port and address. # Multiple addresses are separated with commas. service_addr = The listening address of TiFlash Raft services and coprocessor services. ## Multiple TiFlash nodes elect a master to add or delete placement rules to PD, -## and the configurations in `flash.flash_cluster` control this process. +## and the configurations in flash.flash_cluster control this process. [flash.flash_cluster] refresh_interval = Master regularly refreshes the valid period. update_rule_interval = Master regularly gets the status of TiFlash replicas and interacts with PD. @@ -140,7 +162,7 @@ delta_index_cache_size = 0 [profiles] [profiles.default] - ## The default value is `true`. This parameter determines whether the segment + ## The default value is true. This parameter determines whether the segment ## of DeltaTree Storage Engine uses logical split. ## Using the logical split can reduce the write amplification, and improve the write speed. ## However, these are at the cost of disk space waste. @@ -162,12 +184,12 @@ delta_index_cache_size = 0 ## Security settings take effect starting from v4.0.5. [security] ## New in v5.0. This configuration item enables or disables log redaction. If the configuration value - ## is set to `true`, all user data in the log will be replaced by `?`. - ## Note that you also need to set `security.redact-info-log` for tiflash-learner's logging in tiflash-learner.toml. + ## is set to true, all user data in the log will be replaced by ?. + ## Note that you also need to set security.redact-info-log for tiflash-learner's logging in tiflash-learner.toml. # redact_info_log = false ## Path of the file that contains a list of trusted SSL CAs. If set, the following settings - ## `cert_path` and `key_path` are also needed. + ## cert_path and key_path are also needed. # ca_path = "/path/to/ca.pem" ## Path of the file that contains X509 certificate in PEM format. # cert_path = "/path/to/tiflash-server.pem" @@ -175,8 +197,8 @@ delta_index_cache_size = 0 # key_path = "/path/to/tiflash-server-key.pem" ## New in v5.0. This configuration item enables or disables log redaction. If the configuration value - ## is set to `true`, all user data in the log will be replaced by `?`. - ## Note that you also need to set `security.redact-info-log` for tiflash-learner's logging in tiflash-learner.toml. + ## is set to true, all user data in the log will be replaced by ?. + ## Note that you also need to set security.redact-info-log for tiflash-learner's logging in tiflash-learner.toml. # redact_info_log = false ``` diff --git a/tiflash/tune-tiflash-performance.md b/tiflash/tune-tiflash-performance.md index 9bbe13d0dcc5..be517d1805d0 100644 --- a/tiflash/tune-tiflash-performance.md +++ b/tiflash/tune-tiflash-performance.md @@ -52,14 +52,4 @@ If you want to save machine resources and have no requirement on isolation, you ```sql set @@tidb_opt_distinct_agg_push_down = 1; - ``` - -5. If the `JOIN` operator does not choose the MPP mode, you can modify the value of `tidb_opt_network_factor` to make the`JOIN` operator choose the MPP mode: - - The variable `tidb_opt_network_factor` is used to set the ratio of network overhead that the optimizer takes into account when calculating the cost. The smaller the variable value is, the smaller the estimated cost for a large amount of network transmissions is, and the more TiDB inclined to choose the MPP operator. - - {{< copyable "sql" >}} - - ```sql - set @@tidb_opt_network_factor = 0.001; - ``` + ``` \ No newline at end of file diff --git a/tiflash/use-tiflash.md b/tiflash/use-tiflash.md index d7adc9cf3853..32b05252e262 100644 --- a/tiflash/use-tiflash.md +++ b/tiflash/use-tiflash.md @@ -242,16 +242,15 @@ In TiDB, operators are organized in a tree structure. For an operator to be push Currently, TiFlash supports the following push-down expressions: -* Mathematical functions: `+, -, /, *, >=, <=, =, !=, <, >, round(int), round(double), abs, floor(int), ceil(int), ceiling(int)` -* Logical functions: `and, or, not, case when, if, ifnull, isnull, in` +* Mathematical functions: `+, -, /, *, %, >=, <=, =, !=, <, >, round(int), round(double), round(decimal), abs, floor(int), ceil(int), ceiling(int), sqrt, log, log2, log10, ln, exp, pow, sign, radians, degrees, conv, crc32` +* Logical functions: `and, or, not, case when, if, ifnull, isnull, in, like, coalesce` * Bitwise operations: `bitand, bitor, bigneg, bitxor` -* String functions: `substr, char_length, replace, concat, concat_ws, left, right` -* Date functions: `date_format, timestampdiff, from_unixtime, unix_timestamp(int), unix_timestamp(decimal), str_to_date(date), str_to_date(datetime), date_add(string, int), date_add(datetime, int), date_sub(datetime, int), date_sub(string, int), datediff, year, month, day, extract(datetime)` +* String functions: `substr, char_length, replace, concat, concat_ws, left, right, ascii, length, trim, position` +* Date functions: `date_format, timestampdiff, from_unixtime, unix_timestamp(int), unix_timestamp(decimal), str_to_date(date), str_to_date(datetime), datediff, year, month, day, extract(datetime), date` * JSON function: `json_length` * Conversion functions: `cast(int as double), cast(int as decimal), cast(int as string), cast(int as time), cast(double as int), cast(double as decimal), cast(double as string), cast(double as time), cast(string as int), cast(string as double), cast(string as decimal), cast(string as time), cast(decimal as int), cast(decimal as string), cast(decimal as time), cast(time as int), cast(time as decimal), cast(time as string)` * Aggregate functions: `min, max, sum, count, avg, approx_count_distinct` - -Among them, the push-down of `cast` and `date_add` is not enabled by default. To enable it, refer to [Blocklist of Optimization Rules and Expression Pushdown](/blocklist-control-plan.md). +* Miscellaneous functions: `inetntoa, inetaton, inet6ntoa, inet6aton` In addition, expressions that contain the Time/Bit/Set/Enum/Geometry type cannot be pushed down to TiFlash. @@ -389,6 +388,3 @@ Currently, TiFlash does not support some features. These features might be incom ``` In the example above, `a/b`'s inferred type from the compiling is `Decimal(7,4)` both in TiDB and in TiFlash. Constrained by `Decimal(7,4)`, `a/b`'s returned type should be `0.0000`. In TiDB, `a/b`'s runtime precision is higher than `Decimal(7,4)`, so the original table data is not filtered by the `where a/b` condition. However, in TiFlash, the calculation of `a/b` uses `Decimal(7,4)` as the result type, so the original table data is filtered by the `where a/b` condition. - -* The MPP mode in TiFlash does not support the following features: - * If the [`new_collations_enabled_on_first_bootstrap`](/tidb-configuration-file.md#new_collations_enabled_on_first_bootstrap) configuration item's value is `true`, the MPP mode does not support the string-type join key or the string column type in the `group by` aggregation. For these two query types, the MPP mode is not selected by default. diff --git a/tikv-configuration-file.md b/tikv-configuration-file.md index 9d29c99c2b94..3851a1281010 100644 --- a/tikv-configuration-file.md +++ b/tikv-configuration-file.md @@ -14,7 +14,7 @@ This document only describes the parameters that are not included in command-lin ## Global configuration -### abort-on-panic +### `abort-on-panic` + Sets whether to call `abort()` to exit the process when TiKV panics. This option affects whether TiKV allows the system to generate core dump files. @@ -23,7 +23,45 @@ This document only describes the parameters that are not included in command-lin + Default value: `false` -### server +### `log-level` + ++ The log level ++ Value options: "trace", "debug", "info", "warning", "error", "critical" ++ Default value: "info" + +### `log-file` + ++ The log file. If this configuration is not set, logs are output to "stderr" by default. ++ Default value: "" + +### `log-format` + ++ The log format ++ Value options: "json", "text" ++ Default value: "text" + +### `log-rotation-timespan` + ++ The timespan between log rotations. When this timespan passes, log files are rotated, that is, a timestamp is appended to the file name of the current log file, and a new file is created. ++ Default value: "24h" + +### `log-rotation-size` + ++ The size of a log file that triggers log rotation. Once the size of a log file is bigger than the specified threshold value, log files are rotated. The old log file is placed into the new file, and the new file name is the old file name with a timestamp suffix. ++ Default value: "300MB" + +### `slow-log-file` + ++ The file to store slow logs ++ If this configuration is not set but `log-file` is set, slow logs are output to the log file specified by `log-file`. If neither `slow-log-file` nor `log-file` are set, all logs are output to "stderr". ++ Default value: "" + +### `slow-log-threshold` + ++ The threshold for outputing slow logs. If the processing time is longer than this threshold, slow logs are output. ++ Default value: "1s" + +## server + Configuration items related to the server @@ -116,10 +154,15 @@ This document only describes the parameters that are not included in command-lin ### `end-point-slow-log-threshold` -+ The time threshold for a TiDB's push down request to print slow log ++ The time threshold for a TiDB's push-down request to output slow log. If the processing time is longer than this threshold, the slow logs are output. + Default value: `"1s"` + Minimum value: `0` +### `raft-client-queue-size` + ++ Specifies the queue size of the Raft messages in TiKV. If too many messages not sent in time result in a full buffer, or messages discarded, you can specify a greater value to improve system stability. ++ Default value: `8192` + ## readpool.unified Configuration items related to the single thread pool serving read requests. This thread pool supersedes the original storage thread pool and coprocessor thread pool since the 4.0 version. @@ -312,6 +355,35 @@ Configuration items related to the sharing of block cache among multiple RocksDB + Default value: 45% of the size of total system memory + Unit: KB|MB|GB +## storage.flow-control + +Configuration items related to the flow control mechanism in TiKV. This mechanism replaces the write stall mechanism in RocksDB and controls flow at the scheduler layer, which avoids the issue of QPS drop caused by the stuck Raftstore or Apply threads when the write traffic is high. + +### `enable` + ++ Determines whether to enable the flow control mechanism. After it is enabled, TiKV automatically disables the write stall mechanism of KvDB and the write stall mechanism of RaftDB (excluding memtable). ++ Default value: `true` + +### `memtables-threshold` + ++ When the number of kvDB memtables reaches this threshold, the flow control mechanism starts to work. ++ Default value: `5` + +### `l0-files-threshold` + ++ When the number of kvDB L0 files reaches this threshold, the flow control mechanism starts to work. ++ Default value: `9` + +### `soft-pending-compaction-bytes-limit` + ++ When the pending compaction bytes in KvDB reach this threshold, the flow control mechanism starts to reject some write requests and reports the `ServerIsBusy` error. ++ Default value: `"192GB"` + +### `hard-pending-compaction-bytes-limit` + ++ When the pending compaction bytes in KvDB reach this threshold, the flow control mechanism rejects all write requests and reports the `ServerIsBusy` error. ++ Default value: `"1024GB"` + ## storage.io-rate-limit Configuration items related to the I/O rate limiter. @@ -429,14 +501,9 @@ Configuration items related to Raftstore ### `hibernate-regions` -+ Enables or disables Hibernate Region. When this option is enabled, a Region idle for a long time is automatically set as hibernated. This reduces the extra overhead caused by heartbeat messages between the Raft leader and the followers for idle Regions. You can use `raftstore.peer-stale-state-check-interval` to modify the heartbeat interval between the leader and the followers of hibernated Regions. ++ Enables or disables Hibernate Region. When this option is enabled, a Region idle for a long time is automatically set as hibernated. This reduces the extra overhead caused by heartbeat messages between the Raft leader and the followers for idle Regions. You can use `peer-stale-state-check-interval` to modify the heartbeat interval between the leader and the followers of hibernated Regions. + Default value: `true` in v5.0.2 and later versions; `false` in versions before v5.0.2 -### `raftstore.peer-stale-state-check-interval` - -+ Modifies the state check interval for Regions. -+ Default value: 5 min - ### `split-region-check-tick-interval` + Specifies the interval at which to check whether the Region split is needed. `0` means that this feature is disabled. @@ -637,6 +704,18 @@ Configuration items related to Raftstore + Default value: `1` + Minimum value: greater than `0` +### `cmd-batch` + ++ Controls whether to enable batch processing of the requests. When it is enabled, the write performance is significantly improved. ++ Default value: `true` + +### `inspect-interval` + ++ At a certain interval, TiKV inspects the latency of the Raftstore component. This parameter specifies the interval of the inspection. If the latency exceeds this value, this inspection is marked as timeout. ++ Judges whether the TiKV node is slow based on the ratio of timeout inspection. ++ Default value: `"500ms"` ++ Minimum value: `"1ms"` + ## Coprocessor Configuration items related to Coprocessor @@ -717,7 +796,11 @@ Configuration items related to RocksDB ### `wal-recovery-mode` + WAL recovery mode -+ Optional values: `0` (`TolerateCorruptedTailRecords`), `1` (`AbsoluteConsistency`), `2` (`PointInTimeRecovery`), `3` (`SkipAnyCorruptedRecords`) ++ Value options: `0`, `1`, `2`, `3` ++ `0` (`TolerateCorruptedTailRecords`): tolerates and discards the records that have incomplete trailing data on all logs. ++ `1` (`AbsoluteConsistency`): abandons recovery when corrupted logs are found. ++ `2` (`PointInTimeRecovery`): recovers log sequentially until the first corrupted log is encountered. ++ `3` (`SkipAnyCorruptedRecords`): recovery after a disaster. Corrupted records are skipped + Default value: `2` + Minimum value: `0` + Maximum value: `3` @@ -753,7 +836,7 @@ Configuration items related to RocksDB ### `compaction-readahead-size` -+ The size of `readahead` when compaction is being performed ++ Enables the readahead feature during RocksDB compaction and specifies the size of readahead data. If you are using mechanical disks, it is recommended to set the value to 2MB at least. + Default value: `0` + Minimum value: `0` + Unit: B|KB|MB|GB @@ -767,7 +850,7 @@ Configuration items related to RocksDB ### `use-direct-io-for-flush-and-compaction` -+ Determines whether to use `O_DIRECT` for both reads and writes in background flush and compactions ++ Determines whether to use `O_DIRECT` for both reads and writes in the background flush and compactions. The performance impact of this option: enabling `O_DIRECT` bypasses and prevents contamination of the OS buffer cache, but the subsequent file reads require re-reading the contents to the buffer cache. + Default value: `false` ### `rate-bytes-per-sec` @@ -890,7 +973,7 @@ Configuration items related to `rocksdb.defaultcf`, `rocksdb.writecf`, and `rock ### `pin-l0-filter-and-index-blocks` -+ Determines whether to pin the index and filter at L0 ++ Determines whether to pin the index and filter blocks of the level 0 SST files in memory. + Default value: `true` ### `use-bloom-filter` @@ -1240,7 +1323,7 @@ Configuration items related to TiDB Lightning import and BR restore. ### `enable-compaction-filter` New in v5.0 + Controls whether to enable the GC in Compaction Filter feature -+ Default value: `false` ++ Default value: `true` ## backup diff --git a/tikv-control.md b/tikv-control.md index fd70acaf1b16..d5cb2f52325b 100644 --- a/tikv-control.md +++ b/tikv-control.md @@ -518,3 +518,40 @@ Type "I consent" to continue, anything else to exit: I consent > **Note** > > The command will expose data encryption keys as plaintext. In production, DO NOT redirect the output to a file. Even deleting the output file afterward may not cleanly wipe out the content from disk. + +### Print information related to damaged SST files + +Damaged SST files in TiKV might cause the TiKV process to panic. To clean up the damaged SST files, you will need the information of these files. To get the information, you can execute the `bad-ssts` command in TiKV Control. The needed information is shown in the output. The following is an example command and output. + +> **Note:** +> +> Before running this command, stop the running TiKV instance. + +```bash +$ tikv-ctl bad-ssts --db --pd +``` + +```bash +-------------------------------------------------------- +corruption info: +data/tikv-21107/db/000014.sst: Corruption: Bad table magic number: expected 9863518390377041911, found 759105309091689679 in data/tikv-21107/db/000014.sst + +sst meta: +14:552997[1 .. 5520]['0101' seq:1, type:1 .. '7A7480000000000000FF0F5F728000000000FF0002160000000000FAFA13AB33020BFFFA' seq:2032, type:1] at level 0 for Column family "default" (ID 0) +it isn't easy to handle local data, start key:0101 + +overlap region: +RegionInfo { region: id: 4 end_key: 7480000000000000FF0500000000000000F8 region_epoch { conf_ver: 1 version: 2 } peers { id: 5 store_id: 1 }, leader: Some(id: 5 store_id: 1) } + +suggested operations: +tikv-ctl ldb --db=data/tikv-21107/db unsafe_remove_sst_file "data/tikv-21107/db/000014.sst" +tikv-ctl --db=data/tikv-21107/db tombstone -r 4 --pd +-------------------------------------------------------- +corruption analysis has completed +``` + +From the output above, you can see that the information of the damaged SST file is printed first and then the meta-information is printed. + ++ In the `sst meta` part, `14` means the SST file number; `552997` means the file size, followed by the smallest and largest sequence numbers and other meta-information. ++ The `overlap region` part shows the information of the Region involved. This information is obtained through the PD server. ++ The `suggested operations` part provides you suggestion to clean up the damaged SST file. You can take the suggestion to clean up files and restart the TiKV instance. diff --git a/tiup/tiup-cluster-topology-reference.md b/tiup/tiup-cluster-topology-reference.md index 8a4c90365cf1..bdfe33bff30d 100644 --- a/tiup/tiup-cluster-topology-reference.md +++ b/tiup/tiup-cluster-topology-reference.md @@ -4,7 +4,7 @@ title: Topology Configuration File for TiDB Deployment Using TiUP # Topology Configuration File for TiDB Deployment Using TiUP -To deploy or scale TiDB using TiUP, you need to provide a topology file ([sample](https://github.com/pingcap/tiup/blob/master/embed/templates/examples/topology.example.yaml)) to describe the cluster topology. +To deploy or scale TiDB using TiUP, you need to provide a topology file ([sample](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml)) to describe the cluster topology. Similarly, to modify the cluster topology, you need to modify the topology file. The difference is that, after the cluster is deployed, you can only modify a part of the fields in the topology file. This document introduces each section of the topology file and each field in each section. @@ -58,7 +58,7 @@ The `global` section corresponds to the cluster's global configuration and has t - If `data_dir` is a relative path, the component data is placed in `/`. For the calculation rules of ``, see the application rules of the `deploy_dir` field. -- `log_dir`: The data directory. Default value: `"log"`. Its application rules are as follows: +- `log_dir`: The log directory. Default value: `"log"`. Its application rules are as follows: - If the absolute path `log_dir` is configured at the instance level, the actual log directory is the `log_dir` configured for the instance. @@ -493,6 +493,8 @@ drainer_servers: - `deploy_dir`: Specifies the deployment directory. If it is not specified or specified as a relative directory, the directory is generated according to the `deploy_dir` directory configured in `global`. +- `data_dir`:Specifies the data directory. If it is not specified or specified as a relative directory, the directory is generated according to the `data_dir` directory configured in `global`. + - `log_dir`: Specifies the log directory. If it is not specified or specified as a relative directory, the log is generated according to the `log_dir` directory configured in `global`. - `gc-ttl`: The Time To Live (TTL) duration of the service level GC safepoint set by TiCDC in PD, in seconds. The default value is `86400`, which is 24 hours. @@ -514,6 +516,7 @@ For the above fields, you cannot modify these configured fields after the deploy - `host` - `port` - `deploy_dir` +- `data_dir` - `log_dir` - `gc-ttl` - `tz` diff --git a/tiup/tiup-cluster.md b/tiup/tiup-cluster.md index e5c685d32d24..dd6c219f3ed4 100644 --- a/tiup/tiup-cluster.md +++ b/tiup/tiup-cluster.md @@ -63,7 +63,7 @@ tiup cluster deploy [flags] This command requires you to provide the cluster name, the TiDB cluster version, and a topology file of the cluster. -To write a topology file, refer to [the example](https://github.com/pingcap/tiup/blob/master/embed/templates/examples/topology.example.yaml). The following file is an example of the simplest topology: +To write a topology file, refer to [the example](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml). The following file is an example of the simplest topology: > **Note:** > diff --git a/tiup/tiup-component-cluster-deploy.md b/tiup/tiup-component-cluster-deploy.md index 19eb1bd01c34..92acb198c48c 100644 --- a/tiup/tiup-component-cluster-deploy.md +++ b/tiup/tiup-component-cluster-deploy.md @@ -13,7 +13,7 @@ tiup cluster deploy [flags] ``` - ``: the name of the new cluster, which cannot be the same as the existing cluster names. -- ``: the version number of the TiDB cluster to deploy, such as `v5.1.0`. +- ``: the version number of the TiDB cluster to deploy, such as `v5.2.1`. - ``: the prepared [topology file](/tiup/tiup-cluster-topology-reference.md). ## Options diff --git a/tiup/tiup-component-cluster-import.md b/tiup/tiup-component-cluster-import.md index d9cf8054aacf..bd2fedc7fb34 100644 --- a/tiup/tiup-component-cluster-import.md +++ b/tiup/tiup-component-cluster-import.md @@ -10,13 +10,14 @@ Before TiDB v4.0, TiDB clusters were mainly deployed using TiDB Ansible. For TiD > > + After importing the TiDB Ansible configuration to TiUP for management, **DO NOT** use TiDB Ansible for cluster operations anymore. Otherwise, conflicts might be caused due to inconsistent meta information. > + If the clusters deployed using TiDB Ansible are in any of the following situations, do not use the `import` command. -> + Clusters with TLS encryption enabled -> + Pure KV clusters (clusters without TiDB instances) -> + Clusters with Kafka enabled -> + Clusters with Spark enabled -> + Clusters with TiDB Lightning/TiKV Importer enabled -> + Clusters still using the old `push` mode to collect monitoring metrics (if you keep the default mode `pull` unchanged, using the `import` command is supported) -> + Clusters in which the non-default ports (the ports configured in the `group_vars` directory are compatible) are separately configured in the `inventory.ini` configuration file using `node_exporter_port` / `blackbox_exporter_port` +> + Clusters with TLS encryption enabled +> + Pure KV clusters (clusters without TiDB instances) +> + Clusters with Kafka enabled +> + Clusters with Spark enabled +> + Clusters with TiDB Lightning/TiKV Importer enabled +> + Clusters still using the old `push` mode to collect monitoring metrics (if you keep the default mode `pull` unchanged, using the `import` command is supported) +> + Clusters in which the non-default ports (the ports configured in the `group_vars` directory are compatible) are separately configured in the `inventory.ini` configuration file using `node_exporter_port` / `blackbox_exporter_port` +> + If some nodes in the cluster deployed using TiDB Ansible are deployed without monitoring components, you should first use TiDB Ansible to add the corresponding node information in the `monitored_servers` section of the `inventory.ini` file, and then use the `deploy.yaml` playbook to fully deploy monitoring components. Otherwise, when you perform maintenance operations after the cluster is imported into TiUP, errors might occur due to the lack of monitoring components. ## Syntax diff --git a/tiup/tiup-component-dm-edit-config.md b/tiup/tiup-component-dm-edit-config.md index b2cd02bc55f4..30f48a84fb86 100644 --- a/tiup/tiup-component-dm-edit-config.md +++ b/tiup/tiup-component-dm-edit-config.md @@ -9,7 +9,7 @@ If you need to modify the cluster service configuration after the cluster is dep > **Note:** > > + When you modify the configuration, you cannot add or delete machines. For how to add machines, see [Scale out a cluster](/tiup/tiup-component-dm-scale-out.md). For how to delete machines, see [Scale in a cluster](/tiup/tiup-component-dm-scale-in.md). -> + After you execute the `tiup dm edit-config` command, the configuration is modified only on the control machine. Then you need to execute the `tiup dm relaod` command to reload the configuration. +> + After you execute the `tiup dm edit-config` command, the configuration is modified only on the control machine. Then you need to execute the `tiup dm reload` command to reload the configuration. ## Syntax diff --git a/tiup/tiup-component-management.md b/tiup/tiup-component-management.md index 342e757e295c..0aa46aca25fa 100644 --- a/tiup/tiup-component-management.md +++ b/tiup/tiup-component-management.md @@ -70,12 +70,12 @@ Example 2: Use TiUP to install the nightly version of TiDB. tiup install tidb:nightly ``` -Example 3: Use TiUP to install TiKV v5.1.0. +Example 3: Use TiUP to install TiKV v5.2.1. {{< copyable "shell-regular" >}} ```shell -tiup install tikv:v5.1.0 +tiup install tikv:v5.2.1 ``` ## Upgrade components @@ -128,12 +128,12 @@ Before the component is started, TiUP creates a directory for it, and then puts If you want to start the same component multiple times and reuse the previous working directory, you can use `--tag` to specify the same name when the component is started. After the tag is specified, the working directory will *not be automatically deleted* when the instance is terminated, which makes it convenient to reuse the working directory. -Example 1: Operate TiDB v5.1.0. +Example 1: Operate TiDB v5.2.1. {{< copyable "shell-regular" >}} ```shell -tiup tidb:v5.1.0 +tiup tidb:v5.2.1 ``` Example 2: Specify the tag with which TiKV operates. @@ -219,12 +219,12 @@ The following flags are supported in this command: - If the version is ignored, adding `--all` means to uninstall all versions of this component. - If the version and the component are both ignored, adding `--all` means to uninstall all components of all versions. -Example 1: Uninstall TiDB v5.0.0. +Example 1: Uninstall TiDB v5.2.1. {{< copyable "shell-regular" >}} ```shell -tiup uninstall tidb:v5.0.0 +tiup uninstall tidb:v5.2.1 ``` Example 2: Uninstall TiKV of all versions. diff --git a/tiup/tiup-dm-topology-reference.md b/tiup/tiup-dm-topology-reference.md index a135e4583acd..b87d23b04d24 100644 --- a/tiup/tiup-dm-topology-reference.md +++ b/tiup/tiup-dm-topology-reference.md @@ -4,7 +4,7 @@ title: Topology Configuration File for DM Cluster Deployment Using TiUP # Topology Configuration File for DM Cluster Deployment Using TiUP -To deploy or scale a TiDB Data Migration (DM) cluster, you need to provide a topology file ([sample](https://github.com/pingcap/tiup/blob/master/embed/templates/examples/dm/topology.example.yaml)) to describe the cluster topology. +To deploy or scale a TiDB Data Migration (DM) cluster, you need to provide a topology file ([sample](https://github.com/pingcap/tiup/blob/master/embed/examples/dm/topology.example.yaml)) to describe the cluster topology. Similarly, to modify the cluster topology, you need to modify the topology file. The difference is that, after the cluster is deployed, you can only modify a part of the fields in the topology file. This document introduces each section of the topology file and each field in each section. diff --git a/tiup/tiup-mirror.md b/tiup/tiup-mirror.md index efa07e9092cb..a3c626211b98 100644 --- a/tiup/tiup-mirror.md +++ b/tiup/tiup-mirror.md @@ -77,9 +77,9 @@ The `tiup mirror clone` command provides many optional flags (might provide more If you want to clone only one version (not all versions) of a component, use `--=` to specify this version. For example: - - Execute the `tiup mirror clone --tidb v5.1.0` command to clone the v5.1.0 version of the TiDB component. - - Execute the `tiup mirror clone --tidb v5.1.0 --tikv all` command to clone the v5.1.0 version of the TiDB component and all versions of the TiKV component. - - Execute the `tiup mirror clone v5.1.0` command to clone the v5.1.0 version of all components in a cluster. + - Execute the `tiup mirror clone --tidb v5.2.1` command to clone the v5.2.1 version of the TiDB component. + - Execute the `tiup mirror clone --tidb v5.2.1 --tikv all` command to clone the v5.2.1 version of the TiDB component and all versions of the TiKV component. + - Execute the `tiup mirror clone v5.2.1` command to clone the v5.2.1 version of all components in a cluster. ## Usage examples diff --git a/tiup/tiup-playground.md b/tiup/tiup-playground.md index f907980aec6b..582bbd080827 100644 --- a/tiup/tiup-playground.md +++ b/tiup/tiup-playground.md @@ -22,7 +22,7 @@ This command actually performs the following operations: - Because this command does not specify the version of the playground component, TiUP first checks the latest version of the installed playground component. Assume that the latest version is v1.5.0, then this command works the same as `tiup playground:v1.5.0`. - If you have not used TiUP playground to install the TiDB, TiKV, and PD components, the playground component installs the latest stable version of these components, and then start these instances. -- Because this command does not specify the version of the TiDB, PD, and TiKV component, TiUP playground uses the latest version of each component by default. Assume that the latest version is v5.0.0, then this command works the same as `tiup playground:v1.5.0 v5.1.0`. +- Because this command does not specify the version of the TiDB, PD, and TiKV component, TiUP playground uses the latest version of each component by default. Assume that the latest version is v5.0.0, then this command works the same as `tiup playground:v1.5.0 v5.2.1`. - Because this command does not specify the number of each component, TiUP playground, by default, starts a smallest cluster that consists of one TiDB instance, one TiKV instance, and one PD instance. - After starting each TiDB component, TiUP playground reminds you that the cluster is successfully started and provides you some useful information, such as how to connect to the TiDB cluster through the MySQL client and how to access the [TiDB Dashboard](/dashboard/dashboard-intro.md). @@ -64,7 +64,7 @@ Flags: tiup playground nightly ``` -In the command above, `nightly` is the version number of the cluster. Similarly, you can replace `nightly` with `v5.1.0`, and the command is `tiup playground v5.1.0`. +In the command above, `nightly` is the version number of the cluster. Similarly, you can replace `nightly` with `v5.2.1`, and the command is `tiup playground v5.2.1`. ### Start a cluster with monitor diff --git a/transaction-isolation-levels.md b/transaction-isolation-levels.md index 3b2ec9ca8f8d..f6dc9540010a 100644 --- a/transaction-isolation-levels.md +++ b/transaction-isolation-levels.md @@ -50,9 +50,7 @@ The Repeatable Read isolation level in TiDB differs from ANSI Repeatable Read is ### Difference between TiDB and MySQL Repeatable Read -The Repeatable Read isolation level in TiDB differs from that in MySQL. The MySQL Repeatable Read isolation level does not check whether the current version is visible when updating, which means it can continue to update even if the row has been updated after the transaction starts. In contrast, if the row has been updated after the transaction starts, the TiDB transaction is rolled back and retried. Transaction Retries in TiDB might fail, leading to a final failure of the transaction, while in MySQL the updating transaction can be successful. - -The MySQL Repeatable Read isolation level is not the snapshot isolation level. The consistency of MySQL Repeatable Read isolation level is weaker than both the snapshot isolation level and TiDB Repeatable Read isolation level. +The Repeatable Read isolation level in TiDB differs from that in MySQL. The MySQL Repeatable Read isolation level does not check whether the current version is visible when updating, which means it can continue to update even if the row has been updated after the transaction starts. In contrast, if the row has been updated after the transaction starts, the TiDB optimistic transaction is rolled back and retried. Transaction retries in TiDB's optimistic concurrency control might fail, leading to a final failure of the transaction, while in TiDB's pessimistic concurrency control and MySQL, the updating transaction can be successful. ## Read Committed isolation level diff --git a/troubleshoot-lock-conflicts.md b/troubleshoot-lock-conflicts.md index 16646e062855..eb3c31ff23f8 100644 --- a/troubleshoot-lock-conflicts.md +++ b/troubleshoot-lock-conflicts.md @@ -241,9 +241,9 @@ Since v5.1, TiDB supports the Lock View feature. This feature has several system * [`DATA_LOCK_WAITS`](/information-schema/information-schema-data-lock-waits.md): Provides the pessimistic lock-waiting information in TiKV, including the `start_ts` of the blocking and blocked transaction, the digest of the blocked SQL statement, and the key on which the waiting occurs. * [`DEADLOCKS` and `CLUSTER_DEADLOCKS`](/information-schema/information-schema-deadlocks.md): Provides the information of several deadlock events that have recently occurred on the current TiDB node or in the entire cluster, including the waiting relationship among transactions in the deadlock loops, the digest of the statement currently being executed in the transaction, and the key on which the waiting occurs. -> **Warning:** +> **Note:** > -> Currently, this is an experimental feature. The definition and behavior of the table structure might have major changes in future releases. +> The SQL statements shown in the Lock View-related system tables are normalized SQL statements (that is, SQL statements without formats and arguments), which are obtained by internal queries according to SQL digests, so the tables cannot obtain the complete statements that include the format and arguments. For the detailed description of SQL digests and normalized SQL statement, see [Statement Summary Tables](/statement-summary-tables.md). The following sections show the examples of troubleshooting some issues using these tables. @@ -258,32 +258,17 @@ select * from information_schema.deadlocks; ``` ```sql -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -| DEADLOCK_ID | OCCUR_TIME | RETRYABLE | TRY_LOCK_TRX_ID | CURRENT_SQL_DIGEST | KEY | TRX_HOLDING_LOCK | -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ -| 1 | 2021-06-04 08:22:38.765699 | 0 | 425405959304904707 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000002 | 425405959304904708 | -| 1 | 2021-06-04 08:22:38.765699 | 0 | 425405959304904708 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | 7480000000000000385F728000000000000001 | 425405959304904707 | -+-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+----------------------------------------+--------------------+ ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +| DEADLOCK_ID | OCCUR_TIME | RETRYABLE | TRY_LOCK_TRX_ID | CURRENT_SQL_DIGEST | CURRENT_SQL_DIGEST_TEXT | KEY | KEY_INFO | TRX_HOLDING_LOCK | ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ +| 1 | 2021-08-05 11:09:03.230341 | 0 | 426812829645406216 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000002 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"2"} | 426812829645406217 | +| 1 | 2021-08-05 11:09:03.230341 | 0 | 426812829645406217 | 22230766411edb40f27a68dadefc63c6c6970d5827f1e5e22fc97be2c4d8350d | update `t` set `v` = ? where `id` = ? ; | 7480000000000000355F728000000000000001 | {"db_id":1,"db_name":"test","table_id":53,"table_name":"t","handle_type":"int","handle_value":"1"} | 426812829645406216 | ++-------------+----------------------------+-----------+--------------------+------------------------------------------------------------------+-----------------------------------------+----------------------------------------+----------------------------------------------------------------------------------------------------+--------------------+ ``` -The query result above shows the waiting relationship among multiple transactions in the deadlock error, the digest of the SQL statement currently being executed in each transaction, and the key on which the conflict occurs. +The query result above shows the waiting relationship among multiple transactions in the deadlock error, the normalized form of the SQL statements currently being executed in each transaction (statements without formats and arguments), the key on which the conflict occurs, and the information of the key. -You can get the text of the normalized SQL statement corresponding to the digest of the SQL statements executed recently from the `STATEMENTS_SUMMARY` or `STATEMENTS_SUMMARY_HISTORY` table. For details, see [`STATEMENTS_SUMMARY` and `STATEMENTS_SUMMARY_HISTORY` tables](/statement-summary-tables.md). You can also join the obtained results directly with the `DEADLOCKS` table. Note that the `STATEMENTS_SUMMARY` table might not contain the information of all SQL statements, so left join is used in the following example: - -{{< copyable "sql" >}} - -```sql -select l.deadlock_id, l.occur_time, l.try_lock_trx_id, l.trx_holding_lock, s.digest_text from information_schema.deadlocks as l left join information_schema.statements_summary as s on l.current_sql_digest = s.digest; -``` - -```sql -+-------------+----------------------------+--------------------+--------------------+-----------------------------------------+ -| deadlock_id | occur_time | try_lock_trx_id | trx_holding_lock | digest_text | -+-------------+----------------------------+--------------------+--------------------+-----------------------------------------+ -| 1 | 2021-06-04 08:22:38.765699 | 425405959304904707 | 425405959304904708 | update `t` set `v` = ? where `id` = ? ; | -| 1 | 2021-06-04 08:22:38.765699 | 425405959304904708 | 425405959304904707 | update `t` set `v` = ? where `id` = ? ; | -+-------------+----------------------------+--------------------+--------------------+-----------------------------------------+ -``` +For example, in the above example, the first row means that the transaction with the ID of `426812829645406216` is executing a statement like ``update `t` set `v` =? Where `id` =? ;`` but is blocked by another transaction with the ID of `426812829645406217`. The transaction with the ID of `426812829645406217` is also executing a statement that is in the form of ``update `t` set `v` =? Where `id` =? ;`` but is blocked by the transaction with the ID of `426812829645406216`. The two transactions thus form a deadlock. #### A few hot keys cause queueing locks @@ -299,8 +284,8 @@ select `key`, count(*) as `count` from information_schema.data_lock_waits group +----------------------------------------+-------+ | key | count | +----------------------------------------+-------+ -| 7480000000000000415f728000000000000001 | 2 | -| 7480000000000000415f728000000000000002 | 1 | +| 7480000000000000415F728000000000000001 | 2 | +| 7480000000000000415F728000000000000002 | 1 | +----------------------------------------+-------+ ``` @@ -313,36 +298,36 @@ Note that the information displayed in the `TIDB_TRX` and `CLUSTER_TIDB_TRX` tab {{< copyable "sql" >}} ```sql -select trx.* from information_schema.data_lock_waits as l left join information_schema.tidb_trx as trx on l.trx_id = trx.id where l.key = "7480000000000000415f728000000000000001"\G +select trx.* from information_schema.data_lock_waits as l left join information_schema.tidb_trx as trx on l.trx_id = trx.id where l.key = "7480000000000000415F728000000000000001"\G ``` ```sql *************************** 1. row *************************** - ID: 425496938634543111 - START_TIME: 2021-06-08 08:46:48.341000 -CURRENT_SQL_DIGEST: a4e28cc182bdd18288e2a34180499b9404cd0ba07e3cc34b6b3be7b7c2de7fe9 - STATE: LockWaiting -WAITING_START_TIME: 2021-06-08 08:46:48.388024 - MEM_BUFFER_KEYS: 1 - MEM_BUFFER_BYTES: 19 - SESSION_ID: 87 - USER: root - DB: test - ALL_SQL_DIGESTS: [0fdc781f19da1c6078c9de7eadef8a307889c001e05f107847bee4cfc8f3cdf3, a4e28cc182bdd18288e2a34180499b9404cd0 -ba07e3cc34b6b3be7b7c2de7fe9, a4e28cc182bdd18288e2a34180499b9404cd0ba07e3cc34b6b3be7b7c2de7fe9] + ID: 426831815660273668 + START_TIME: 2021-08-06 07:16:00.081000 + CURRENT_SQL_DIGEST: 06da614b93e62713bd282d4685fc5b88d688337f36e88fe55871726ce0eb80d7 +CURRENT_SQL_DIGEST_TEXT: update `t` set `v` = `v` + ? where `id` = ? ; + STATE: LockWaiting + WAITING_START_TIME: 2021-08-06 07:16:00.087720 + MEM_BUFFER_KEYS: 0 + MEM_BUFFER_BYTES: 0 + SESSION_ID: 77 + USER: root + DB: test + ALL_SQL_DIGESTS: ["0fdc781f19da1c6078c9de7eadef8a307889c001e05f107847bee4cfc8f3cdf3","06da614b93e62713bd282d4685fc5b88d688337f36e88fe55871726ce0eb80d7"] *************************** 2. row *************************** - ID: 425496940994101249 - START_TIME: 2021-06-08 08:46:57.342000 -CURRENT_SQL_DIGEST: a4e28cc182bdd18288e2a34180499b9404cd0ba07e3cc34b6b3be7b7c2de7fe9 - STATE: LockWaiting -WAITING_START_TIME: 2021-06-08 08:46:57.590060 - MEM_BUFFER_KEYS: 0 - MEM_BUFFER_BYTES: 0 - SESSION_ID: 85 - USER: root - DB: test - ALL_SQL_DIGESTS: [0fdc781f19da1c6078c9de7eadef8a307889c001e05f107847bee4cfc8f3cdf3, a4e28cc182bdd18288e2a34180499b9404cd0 -ba07e3cc34b6b3be7b7c2de7fe9] + ID: 426831818019569665 + START_TIME: 2021-08-06 07:16:09.081000 + CURRENT_SQL_DIGEST: 06da614b93e62713bd282d4685fc5b88d688337f36e88fe55871726ce0eb80d7 +CURRENT_SQL_DIGEST_TEXT: update `t` set `v` = `v` + ? where `id` = ? ; + STATE: LockWaiting + WAITING_START_TIME: 2021-08-06 07:16:09.290271 + MEM_BUFFER_KEYS: 0 + MEM_BUFFER_BYTES: 0 + SESSION_ID: 75 + USER: root + DB: test + ALL_SQL_DIGESTS: ["0fdc781f19da1c6078c9de7eadef8a307889c001e05f107847bee4cfc8f3cdf3","06da614b93e62713bd282d4685fc5b88d688337f36e88fe55871726ce0eb80d7"] 2 rows in set (0.00 sec) ``` @@ -353,25 +338,29 @@ If a transaction is known to be blocked by another transaction (or multiple tran {{< copyable "sql" >}} ```sql -select l.key, trx.* from information_schema.data_lock_waits as l join information_schema.tidb_trx as trx on l.current_holding_trx_id = trx.id where l.trx_id = 425497223886536705\G +select l.key, trx.*, tidb_decode_sql_digests(trx.all_sql_digests) as sqls from information_schema.data_lock_waits as l join information_schema.cluster_tidb_trx as trx on l.current_holding_trx_id = trx.id where l.trx_id = 426831965449355272\G ``` ```sql *************************** 1. row *************************** - key: 7480000000000000475f728000000000000002 - ID: 425497219115778059 - START_TIME: 2021-06-08 09:04:38.292000 -CURRENT_SQL_DIGEST: a4e28cc182bdd18288e2a34180499b9404cd0ba07e3cc34b6b3be7b7c2de7fe9 - STATE: LockWaiting -WAITING_START_TIME: 2021-06-08 09:04:38.336264 - MEM_BUFFER_KEYS: 1 - MEM_BUFFER_BYTES: 19 - SESSION_ID: 97 - USER: root - DB: test - ALL_SQL_DIGESTS: [0fdc781f19da1c6078c9de7eadef8a307889c001e05f107847bee4cfc8f3cdf3, a4e28cc182bdd18288e2a34180499b9404cd0 -ba07e3cc34b6b3be7b7c2de7fe9, a4e28cc182bdd18288e2a34180499b9404cd0ba07e3cc34b6b3be7b7c2de7fe9] + key: 74800000000000004D5F728000000000000001 + INSTANCE: 127.0.0.1:10080 + ID: 426832040186609668 + START_TIME: 2021-08-06 07:30:16.581000 + CURRENT_SQL_DIGEST: 06da614b93e62713bd282d4685fc5b88d688337f36e88fe55871726ce0eb80d7 +CURRENT_SQL_DIGEST_TEXT: update `t` set `v` = `v` + ? where `id` = ? ; + STATE: LockWaiting + WAITING_START_TIME: 2021-08-06 07:30:16.592763 + MEM_BUFFER_KEYS: 1 + MEM_BUFFER_BYTES: 19 + SESSION_ID: 113 + USER: root + DB: test + ALL_SQL_DIGESTS: ["0fdc781f19da1c6078c9de7eadef8a307889c001e05f107847bee4cfc8f3cdf3","a4e28cc182bdd18288e2a34180499b9404cd0ba07e3cc34b6b3be7b7c2de7fe9","06da614b93e62713bd282d4685fc5b88d688337f36e88fe55871726ce0eb80d7"] + sqls: ["begin ;","select * from `t` where `id` = ? for update ;","update `t` set `v` = `v` + ? where `id` = ? ;"] 1 row in set (0.01 sec) ``` +In the above query, the [`TIDB_DECODE_SQL_DIGESTS`](/functions-and-operators/tidb-functions.md#tidb_decode_sql_digests) function is used on the `ALL_SQL_DIGESTS` column of the `CLUSTER_TIDB_TRX` table. This function tries to convert this column (the value is a set of SQL digests) to the normalized SQL statements, which improves readability. + If the `start_ts` of the current transaction is unknown, you can try to find it out from the information in the `TIDB_TRX` / `CLUSTER_TIDB_TRX` table or in the [`PROCESSLIST` / `CLUSTER_PROCESSLIST`](/information-schema/information-schema-processlist.md) table. diff --git a/troubleshoot-write-conflicts.md b/troubleshoot-write-conflicts.md index 97bcb5a90095..944c42dbba4c 100644 --- a/troubleshoot-write-conflicts.md +++ b/troubleshoot-write-conflicts.md @@ -46,7 +46,7 @@ In the TiDB Grafana panel, check the following monitoring metrics under **KV Err ![kv-retry-duration](/media/troubleshooting-write-conflict-kv-retry-duration.png) -You can also use `[kv:9007]Write conflict` as the key word to search in the TiDB log. The key word also indicates the write conflict exists in the cluster. +You can also use `[kv:9007]Write conflict` as the keywords to search in the TiDB log. The keywords also indicate the write conflict exists in the cluster. ## Resolve write conflicts diff --git a/tune-tikv-memory-performance.md b/tune-tikv-memory-performance.md index 8da49518bd93..726c0aea4439 100644 --- a/tune-tikv-memory-performance.md +++ b/tune-tikv-memory-performance.md @@ -99,10 +99,6 @@ address = "" job = "tikv" [raftstore] -# The default value is true, which means writing the data on the disk compulsorily. If it is not in a business scenario -# of the financial security level, it is recommended to set the value to false to achieve better performance. -sync-log = true - # Raft RocksDB directory. The default value is Raft subdirectory of [storage.data-dir]. # If there are multiple disks on the machine, store the data of Raft RocksDB on different disks to improve TiKV performance. # raftdb-path = "/tmp/tikv/store/raft" diff --git a/tune-tikv-thread-performance.md b/tune-tikv-thread-performance.md index b6ddfa986854..0ed2c23d20b4 100644 --- a/tune-tikv-thread-performance.md +++ b/tune-tikv-thread-performance.md @@ -37,7 +37,7 @@ Starting from TiKV v5.0, all read requests use the unified thread pool for queri * The gRPC thread pool. - The default size (configured by `server.grpc-concurrency`) of the gRPC thread pool is `4`. This thread pool has almost no computing overhead and is mainly responsible for network I/O and deserialization requests, so generally you do not need to adjust the default configuration. + The default size (configured by `server.grpc-concurrency`) of the gRPC thread pool is `5`. This thread pool has almost no computing overhead and is mainly responsible for network I/O and deserialization requests, so generally you do not need to adjust the default configuration. - If the machine deployed with TiKV has a small number (less than or equal to 8) of CPU cores, consider setting the `server.grpc-concurrency` configuration item to `2`. - If the machine deployed with TiKV has very high configuration, TiKV undertakes a large number of read and write requests, and the value of `gRPC poll CPU` that monitors Thread CPU on Grafana exceeds 80% of `server.grpc-concurrency`, then consider increasing the value of `server.grpc-concurrency` to keep the thread pool usage rate below 80% (that is, the metric on Grafana is lower than `80% * server.grpc-concurrency`). @@ -55,7 +55,7 @@ Starting from TiKV v5.0, all read requests use the unified thread pool for queri * The Raftstore thread pool. - The Raftstore thread pool is the most complex thread pool in TiKV. The default size (configured by `raftstore.store-pool-size`) is `2`. All write requests are written into RocksDB in the way of `fsync` from the Raftstore thread, unless you manually set `raftstore.sync-log` to `false`. Setting `raftstore.sync-log` to `false` improves write performance to a certain degree, but increases the risk of data loss in the case of machine failure). + The Raftstore thread pool is the most complex thread pool in TiKV. The default size (configured by `raftstore.store-pool-size`) is `2`. All write requests are written into RocksDB in the way of `fsync` from the Raftstore thread. Due to I/O, Raftstore threads cannot reach 100% CPU usage theoretically. To reduce disk writes as much as possible, you can put together multiple write requests and write them to RocksDB. It is recommended to keep the overall CPU usage below 60% (If the default number of threads is `2`, it is recommended to keep `TiKV-Details.Thread CPU.Raft store CPU` on Grafana within 120%). Do not increase the size of the Raftstore thread pool to improve write performance without thinking, because this might increase the disk burden and degrade performance. diff --git a/two-data-centers-in-one-city-deployment.md b/two-data-centers-in-one-city-deployment.md new file mode 100644 index 000000000000..88b8db1e739d --- /dev/null +++ b/two-data-centers-in-one-city-deployment.md @@ -0,0 +1,233 @@ +--- +title: Two Data Centers in One City Deployment +summary: Learn the deployment solution of two data centers in one city. +aliases: ['/tidb/dev/synchronous-replication'] +--- + +# Two Data Centers in One City Deployment + +This document introduces the deployment mode of two data centers (DCs) in one city, including the architecture, configuration, how to enable this deployment mode, and how to use replicas in this mode. + +In an on-premises environment, TiDB usually adopts the multi-data-center deployment solution to ensure high availability and disaster recovery capability. The multi-data-center deployment solution includes multiple deployment modes, such as three data centers in two cities and three data centers in one city. This document introduces the deployment mode of two data centers in one city. Deployed in this mode, TiDB can also meet the requirements of high availability and disaster recovery, with a lower cost. This deployment solution adopts Data Replication Auto Synchronous mode, or the DR Auto-Sync mode. + +Under the mode of two data centers in one city, the two data centers are less than 50 kilometers apart. They are usually located in the same city or in two adjacent cities. The network latency between the two data centers is lower than 1.5 milliseconds and the bandwidth is higher than 10 Gbps. + +## Deployment architecture + +This section takes the example of a city where two data centers IDC1 and IDC2 are located respectively in the east and west. + +The architecture of the cluster deployment is as follows: + +- The TiDB cluster is deployed to two DCs in one city: the primary IDC1 in the east, and the disaster recovery (DR) IDC2 in the west. +- The cluster has 4 replicas: 2 Voter replicas in IDC1, 1 Voter replica and 1 Learner replica in IDC2. For the TiKV component, each rack has a proper label. +- The Raft protocol is adopted to ensure consistency and high availability of data, which is transparent to users. + +![2-DC-in-1-city architecture](/media/two-dc-replication-1.png) + +This deployment solution defines three statuses to control and identify the replication status of the cluster, which restricts the replication mode of TiKV. The replication mode of the cluster can automatically and adaptively switch between the three statuses. For details, see the [Status switch](#status-switch) section. + +- **sync**: Synchronous replication mode. In this mode, at least one replica in the disaster recovery (DR) data center synchronizes with the primary data center. The Raft algorithm ensures that each log is replicated to the DR based on the label. +- **async**: Asynchronous replication mode. In this mode, the DR data center is not fully synchronized with the primary data center. The Raft algorithm follows the majority protocol to replicate logs. +- **sync-recover**: Synchronous recovery mode. In this mode, the DR data center is not fully synchronized with the primary data center. Raft gradually switches to the label replication mode and then reports the label information to PD. + +## Configuration + +### Example + +The following `tiup topology.yaml` example file is a typical topology configuration for the two data centers in one city deployment mode: + +``` +# # Global variables are applied to all deployments and used as the default value of +# # the deployments if a specific deployment value is missing. +global: + user: "tidb" + ssh_port: 22 + deploy_dir: "/data/tidb_cluster/tidb-deploy" + data_dir: "/data/tidb_cluster/tidb-data" +server_configs: + pd: + replication.location-labels: ["zone","rack","host"] +pd_servers: + - host: 10.63.10.10 + name: "pd-10" + - host: 10.63.10.11 + name: "pd-11" + - host: 10.63.10.12 + name: "pd-12" +tidb_servers: + - host: 10.63.10.10 + - host: 10.63.10.11 + - host: 10.63.10.12 +tikv_servers: + - host: 10.63.10.30 + config: + server.labels: { zone: "east", rack: "east-1", host: "30" } + - host: 10.63.10.31 + config: + server.labels: { zone: "east", rack: "east-2", host: "31" } + - host: 10.63.10.32 + config: + server.labels: { zone: "west", rack: "west-1", host: "32" } + - host: 10.63.10.33 + config: + server.labels: { zone: "west", rack: "west-2", host: "33" } +monitoring_servers: + - host: 10.63.10.60 +grafana_servers: + - host: 10.63.10.60 +alertmanager_servers: + - host: 10.63.10.60 +``` + +### Placement Rules + +To deploy a cluster based on the planned topology, you need to use [placement rules](/configure-placement-rules.md) to determine the locations of the cluster replicas. If 4 replicas and 2 Voter replicas are at the primary center and 1 Voter replica and 1 Learner replica are at the DR center, you can use the placement rules to configure the replicas as follows: + +``` +cat rule.json +[ + { + "group_id": "pd", + "id": "zone-east", + "start_key": "", + "end_key": "", + "role": "voter", + "count": 2, + "label_constraints": [ + { + "key": "zone", + "op": "in", + "values": [ + "east" + ] + } + ], + "location_labels": [ + "zone", + "rack", + "host", + ] + }, + { + "group_id": "pd", + "id": "zone-west", + "start_key": "", + "end_key": "", + "role": "voter", + "count": 1, + "label_constraints": [ + { + "key": "zone", + "op": "in", + "values": [ + "west" + ] + } + ], + "location_labels": [ + "zone", + "rack", + "host" + ] + }, + { + "group_id": "pd", + "id": "zone-west", + "start_key": "", + "end_key": "", + "role": "learner", + "count": 1, + "label_constraints": [ + { + "key": "zone", + "op": "in", + "values": [ + "west" + ] + } + ], + "location_labels": [ + "zone", + "rack", + "host" + ] + } +] +``` + +### Enable the DR Auto-Sync mode + +The replication mode is controlled by PD. When deploying a cluster, you can configure the replication mode in the PD configuration file. For example: + +{{< copyable "" >}} + +```toml +[replication-mode] +replication-mode = "dr-auto-sync" +[replication-mode.dr-auto-sync] +label-key = "zone" +primary = "east" +dr = "west" +primary-replicas = 2 +dr-replicas = 1 +wait-store-timeout = "1m" +wait-sync-timeout = "1m" +``` + +In the configuration above: + ++ `replication-mode` is the replication mode to be enabled. In the above example, it is set to `dr-auto-sync`. By default, the majority protocol is used. ++ `label-key` is used to distinguish different data centers and needs to match placement rules. In this example, the primary data center is "east" and the DR data center is "west". ++ `primary-replicas` is the number of Voter replicas in the primary data center. ++ `dr-replicas` is the number of Voter replicas in the DR data center. ++ `wait-store-timeout` is the waiting time for switching to asynchronous replication mode when network isolation or failure occurs. If the time of network failure exceeds the waiting time, asynchronous replication mode is enabled. The default waiting time is 60 seconds. + +To check the current replication status of the cluster, use the following API: + +{{< copyable "shell-regular" >}} + +```bash +curl http://pd_ip:pd_port/pd/api/v1/replication_mode/status +``` + +{{< copyable "shell-regular" >}} + +```bash +{ + "mode": "dr-auto-sync", + "dr-auto-sync": { + "label-key": "zone", + "state": "sync" + } +} +``` + +#### Status switch + +The replication mode of a cluster can automatically and adaptively switch between three statuses: + +- When the cluster is normal, the synchronous replication mode is enabled to maximize the data integrity of the disaster recovery data center. +- When the network connection between the two data centers fails or the DR data center breaks down, after a pre-set protective interval, the cluster enables the asynchronous replication mode to ensure the availability of the application. +- When the network reconnects or the DR data center recovers, the TiKV node joins the cluster again and gradually replicates the data. Finally, the cluster switches to the synchronous replication mode. + +The details for the status switch are as follows: + +1. **Initialization**: At the initialization stage, the cluster is in the synchronous replication mode. PD sends the status information to TiKV, and all TiKV nodes strictly follow the synchronous replication mode to work. + +2. **Switch from sync to async**: PD regularly checks the heartbeat information of TiKV to judge whether the TiKV node fails or is disconnected. If the number of failed nodes exceeds the number of replicas of the primary data center (`primary-replicas`) and the DR data center (`dr-replicas`), the synchronous replication mode can no longer serve the data replication and it is necessary to switch the status. When the failure or disconnect time exceeds the time set by `wait-store-timeout`, PD switches the status of the cluster to the async mode. Then PD sends the status of async to all TiKV nodes, and the replication mode for TiKV switches from two-center replication to the native Raft majority. + +3. **Switch from async to sync**: PD regularly checks the heartbeat information of TiKV to judge whether the TiKV node is reconnected. If the number of failed nodes is less than the number of replicas of the primary data center (`primary-replicas`) and the DR data center (`dr-replicas`), the synchronous replication mode can be enabled again. PD first switches the status of the cluster to sync-recover and sends the status information to all TiKV nodes. All Regions of TiKV gradually switch to the two-data-center synchronous replication mode and then report the heartbeat information to PD. PD records the status of TiKV Regions and calculates the recovery progress. When all TiKV Regions finish the switching, PD switches the replication mode to sync. + +### Disaster recovery + +This section introduces the disaster recovery solution of the two data centers in one city deployment. + +When a disaster occurs to a cluster in the synchronous replication mode, you can perform data recovery with `RPO = 0`: + +- If the primary data center fails and most of the Voter replicas are lost, but complete data exists in the DR data center, the lost data can be recovered from the DR data center. At this time, manual intervention is required with professional tools. You can contact the TiDB team for a recovery solution. + +- If the DR center fails and a few Voter replicas are lost, the cluster automatically switches to the asynchronous replication mode. + +When a disaster occurs to a cluster that is not in the synchronous replication mode and you cannot perform data recovery with `RPO = 0`: + +- If most of the Voter replicas are lost, manual intervention is required with professional tools. You can contact the TiDB team for a recovery solution. \ No newline at end of file diff --git a/upgrade-tidb-using-tiup.md b/upgrade-tidb-using-tiup.md index 0c49c9d5b49c..a491cf3b8d5f 100644 --- a/upgrade-tidb-using-tiup.md +++ b/upgrade-tidb-using-tiup.md @@ -8,22 +8,23 @@ aliases: ['/docs/dev/upgrade-tidb-using-tiup/','/docs/dev/how-to/upgrade/using-t This document is targeted for the following upgrade paths: -- Upgrade from TiDB 4.0 versions to TiDB 5.1 versions. -- Upgrade from TiDB 5.0 versions to TiDB 5.1 versions. +- Upgrade from TiDB 4.0 versions to TiDB 5.2 versions. +- Upgrade from TiDB 5.0 versions to TiDB 5.2 versions. +- Upgrade from TiDB 5.2 versions to TiDB 5.2 versions. > **Note:** > -> If your cluster to be upgraded is v3.1 or an earlier version (v3.0 or v2.1), the direct upgrade to v5.1 or its patch versions is not supported. You need to upgrade your cluster first to v4.0 and then to v5.1. +> If your cluster to be upgraded is v3.1 or an earlier version (v3.0 or v2.1), the direct upgrade to v5.2 or its patch versions is not supported. You need to upgrade your cluster first to v4.0 and then to v5.2. ## Upgrade caveat - TiDB currently does not support version downgrade or rolling back to an earlier version after the upgrade. -- For the v4.0 cluster managed using TiDB Ansible, you need to import the cluster to TiUP (`tiup cluster`) for new management according to [Upgrade TiDB Using TiUP (v4.0)](https://docs.pingcap.com/tidb/v4.0/upgrade-tidb-using-tiup#import-tidb-ansible-and-the-inventoryini-configuration-to-tiup). Then you can upgrade the cluster to v5.1 or its patch versions according to this document. -- To update versions earlier than 3.0 to 5.1: +- For the v4.0 cluster managed using TiDB Ansible, you need to import the cluster to TiUP (`tiup cluster`) for new management according to [Upgrade TiDB Using TiUP (v4.0)](https://docs.pingcap.com/tidb/v4.0/upgrade-tidb-using-tiup#import-tidb-ansible-and-the-inventoryini-configuration-to-tiup). Then you can upgrade the cluster to v5.2 or its patch versions according to this document. +- To update versions earlier than 3.0 to 5.2: 1. Update this version to 3.0 using [TiDB Ansible](https://docs.pingcap.com/tidb/v3.0/upgrade-tidb-using-ansible). 2. Use TiUP (`tiup cluster`) to import the TiDB Ansible configuration. 3. Update the 3.0 version to 4.0 according to [Upgrade TiDB Using TiUP (v4.0)](https://docs.pingcap.com/tidb/v4.0/upgrade-tidb-using-tiup#import-tidb-ansible-and-the-inventoryini-configuration-to-tiup). - 4. Upgrade the cluster to v5.1 according to this document. + 4. Upgrade the cluster to v5.2 according to this document. - Support upgrading the versions of TiDB Binlog, TiCDC, TiFlash, and other components. - For detailed compatibility changes of different versions, see the [Release Notes](/releases/release-notes.md) of each version. Modify your cluster configuration according to the "Compatibility Changes" section of the corresponding release notes. @@ -96,7 +97,7 @@ Now, the offline mirror has been upgraded successfully. If an error occurs durin > Skip this step if one of the following situations applies: > > + You have not modified the configuration parameters of the original cluster. Or you have modified the configuration parameters using `tiup cluster` but no more modification is needed. -> + After the upgrade, you want to use v5.1's default parameter values for the unmodified configuration items. +> + After the upgrade, you want to use v5.2's default parameter values for the unmodified configuration items. 1. Enter the `vi` editing mode to edit the topology file: @@ -106,15 +107,15 @@ Now, the offline mirror has been upgraded successfully. If an error occurs durin tiup cluster edit-config ``` -2. Refer to the format of [topology](https://github.com/pingcap/tiup/blob/release-1.4/embed/templates/examples/topology.example.yaml) configuration template and fill the parameters you want to modify in the `server_configs` section of the topology file. +2. Refer to the format of [topology](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml) configuration template and fill the parameters you want to modify in the `server_configs` section of the topology file. 3. After the modification, enter : + w + q to save the change and exit the editing mode. Enter Y to confirm the change. > **Note:** > -> Before you upgrade the cluster to v5.1, make sure that the parameters you have modified in v4.0 are compatible in v5.1. For details, see [TiKV Configuration File](/tikv-configuration-file.md). +> Before you upgrade the cluster to v5.2, make sure that the parameters you have modified in v4.0 are compatible in v5.2. For details, see [TiKV Configuration File](/tikv-configuration-file.md). > -> The following three TiKV parameters are obsolete in TiDB v5.1. If the following parameters have been configured in your original cluster, you need to delete these parameters through `edit-config`: +> The following three TiKV parameters are obsolete in TiDB v5.2. If the following parameters have been configured in your original cluster, you need to delete these parameters through `edit-config`: > > - pessimistic-txn.enabled > - server.request-batch-enable-cross-command @@ -155,12 +156,12 @@ If your application has a maintenance window for the database to be stopped for tiup cluster upgrade ``` -For example, if you want to upgrade the cluster to v5.1.0: +For example, if you want to upgrade the cluster to v5.2.1: {{< copyable "shell-regular" >}} ```shell -tiup cluster upgrade v5.1.0 +tiup cluster upgrade v5.2.1 ``` > **Note:** @@ -210,7 +211,7 @@ tiup cluster display ``` Cluster type: tidb Cluster name: -Cluster version: v5.1.0 +Cluster version: v5.2.1 ``` > **Note:** @@ -260,10 +261,10 @@ You can upgrade the tool version by using TiUP to install the `ctl` component of {{< copyable "shell-regular" >}} ```shell -tiup install ctl:v5.1.0 +tiup install ctl:v5.2.1 ``` -## TiDB 5.1 compatibility changes +## TiDB 5.2 compatibility changes -- See TiDB 5.1 Release Notes for the compatibility changes. -- Try to avoid creating a new clustered index table when you apply rolling updates to the clusters using TiDB-Binlog. +- See TiDB 5.2 Release Notes for the compatibility changes. +- Try to avoid creating a new clustered index table when you apply rolling updates to the clusters using TiDB Binlog. diff --git a/user-defined-variables.md b/user-defined-variables.md index 470287ea2c60..43bd94f8b7bd 100644 --- a/user-defined-variables.md +++ b/user-defined-variables.md @@ -86,7 +86,7 @@ SELECT @a1, @a2, @a3, @a4 := @a1+@a2+@a3; +------+------+------+--------------------+ ``` -Before the variable `@a4` is modified or the connection is closed, its value is always `7`。 +Before the variable `@a4` is modified or the connection is closed, its value is always `7`. If a hexadecimal literal or binary literal is used when setting the user-defined variable, TiDB will treat it as a binary string. If you want to set it to a number, you can manually add the `CAST` conversion, or use the numeric operator in the expression: