From f319ffe1fdc31064dc2cd25816f6e3977b212025 Mon Sep 17 00:00:00 2001 From: lidezhu Date: Fri, 4 Aug 2023 14:56:02 +0800 Subject: [PATCH 1/7] fix region range key not in standard format --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 19 +++++++++++- .../DeltaMerge/tests/gtest_key_range.cpp | 30 +++++++++---------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index 46e2024565d..422c5d78f01 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -72,7 +72,9 @@ struct RowKeyValue : is_common_handle(is_common_handle_) , value(value_) , int_value(int_value_) - {} + { + RUNTIME_CHECK_MSG(is_common_handle || value->size() == sizeof(Int64), "Invalid int handle value {}", Redact::keyToHexString(value->data(), value->size())); + } RowKeyValue(bool is_common_handle_, HandleValuePtr value_) : is_common_handle(is_common_handle_) @@ -84,6 +86,21 @@ struct RowKeyValue { size_t cursor = 0; int_value = DB::DecodeInt64(cursor, *value); + if (unlikely(value->size() != sizeof(Int64))) + { + // For int type handle, the standard key enconding format should be t{table_id}_r{handle_value}. + // But TiKV may generate region range keys which are not strictly following the standard format. + // More concretely, the key may be t{table_id}_r{handle_value} + some other bytes. + // We need to adapt the key to the standard format. + // For example, the key may be t100_r1000 + 0x00, we need to adapt it to t100_r1001. + // This is ok, because + // 1) if the key is the start range, then [t100_r1000 + 0x00, xxx) has the same semantics with [t100_r1001, xxx) + // 1) if the key is the end range, then [xxx, t100_r1000 + 0x00) also has the same semantics with [xxx, t100_r1001) + int_value = int_value + 1; + WriteBufferFromOwnString ss; + DB::EncodeInt64(int_value, ss); + value = std::make_shared(ss.releaseStr()); + } } } diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp index a64ad3cb5b7..2ae57fafb3b 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_key_range.cpp @@ -119,21 +119,21 @@ TEST(RowKey, ToNextKeyIntHandle) /* row_key_column_size */ 1); EXPECT_EQ(0, compare(next.toRowKeyValueRef(), range.getEnd())); } - // Note: The following does not work, because {20,00} will be regarded as Key=20 in RowKeyRange::fromRegionRange. - // { - // auto key_end = RecordKVFormat::genRawKey(1, 20); - // key_end.push_back(0); - // auto tikv_key_end = RecordKVFormat::encodeAsTiKVKey(key_end); - // const auto range_keys = std::make_shared( - // RecordKVFormat::genKey(1, 0), - // std::move(tikv_key_end)); - // const auto range = RowKeyRange::fromRegionRange( - // range_keys, - // /* table_id */ 1, - // /* is_common_handle */ false, - // /* row_key_column_size */ 1); - // EXPECT_EQ(0, compare(next.toRowKeyValueRef(), range.getEnd())); - // } + // Note: {20,00} will be regarded as Key=21 in RowKeyRange::fromRegionRange. + { + auto key_end = RecordKVFormat::genRawKey(1, 20); + key_end.push_back(0); + auto tikv_key_end = RecordKVFormat::encodeAsTiKVKey(key_end); + const auto range_keys = std::make_shared( + RecordKVFormat::genKey(1, 0), + std::move(tikv_key_end)); + const auto range = RowKeyRange::fromRegionRange( + range_keys, + /* table_id */ 1, + /* is_common_handle */ false, + /* row_key_column_size */ 1); + EXPECT_EQ(0, compare(next.toRowKeyValueRef(), range.getEnd())); + } } TEST(RowKey, ToNextKeyCommonHandle) From b973a3a958a56fb5ee62a99ef426bc74cb6945d6 Mon Sep 17 00:00:00 2001 From: lidezhu Date: Fri, 4 Aug 2023 22:59:46 +0800 Subject: [PATCH 2/7] fix test --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index 422c5d78f01..a7199107e09 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -72,9 +72,7 @@ struct RowKeyValue : is_common_handle(is_common_handle_) , value(value_) , int_value(int_value_) - { - RUNTIME_CHECK_MSG(is_common_handle || value->size() == sizeof(Int64), "Invalid int handle value {}", Redact::keyToHexString(value->data(), value->size())); - } + {} RowKeyValue(bool is_common_handle_, HandleValuePtr value_) : is_common_handle(is_common_handle_) From 06ae23d278dfc576f4e6a06c708018401aa3989d Mon Sep 17 00:00:00 2001 From: lidezhu Date: Sat, 5 Aug 2023 12:04:22 +0800 Subject: [PATCH 3/7] fix unit test --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index a7199107e09..a2744898d50 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -94,10 +94,14 @@ struct RowKeyValue // This is ok, because // 1) if the key is the start range, then [t100_r1000 + 0x00, xxx) has the same semantics with [t100_r1001, xxx) // 1) if the key is the end range, then [xxx, t100_r1000 + 0x00) also has the same semantics with [xxx, t100_r1001) - int_value = int_value + 1; - WriteBufferFromOwnString ss; - DB::EncodeInt64(int_value, ss); - value = std::make_shared(ss.releaseStr()); + // FIXME: check the case when int_value == Int64::max_value + if (int_value < std::numeric_limits::max()) + { + int_value = int_value + 1; + WriteBufferFromOwnString ss; + DB::EncodeInt64(int_value, ss); + value = std::make_shared(ss.releaseStr()); + } } } } From d8f16db8334a796fdbb0c73d6f14da4484665bf7 Mon Sep 17 00:00:00 2001 From: lidezhu Date: Mon, 7 Aug 2023 18:39:16 +0800 Subject: [PATCH 4/7] add more comment --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index a2744898d50..a37b256151c 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -93,8 +93,11 @@ struct RowKeyValue // For example, the key may be t100_r1000 + 0x00, we need to adapt it to t100_r1001. // This is ok, because // 1) if the key is the start range, then [t100_r1000 + 0x00, xxx) has the same semantics with [t100_r1001, xxx) - // 1) if the key is the end range, then [xxx, t100_r1000 + 0x00) also has the same semantics with [xxx, t100_r1001) - // FIXME: check the case when int_value == Int64::max_value + // 2) if the key is the end range, then [xxx, t100_r1000 + 0x00) also has the same semantics with [xxx, t100_r1001) + // + // Note if the `int_value` is Int64::max_value, + // it is a value generated by tiflash itself to means +inf()(which is RowKeyValue::INT_HANDLE_MAX_KEY). + // So we can just ignore it. if (int_value < std::numeric_limits::max()) { int_value = int_value + 1; From 54116904745a60c937fe364a1dde995b62060f8c Mon Sep 17 00:00:00 2001 From: lidezhu Date: Mon, 7 Aug 2023 18:50:12 +0800 Subject: [PATCH 5/7] add warning log --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index a37b256151c..71cc5f96da6 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -100,6 +100,10 @@ struct RowKeyValue // So we can just ignore it. if (int_value < std::numeric_limits::max()) { + LOG_WARNING( + Logger::get(), + "Meet rowkey {} which is not strictly following the standard encoding format", + Redact::keyToDebugString(value->data(), value->size())); int_value = int_value + 1; WriteBufferFromOwnString ss; DB::EncodeInt64(int_value, ss); From ac403b150868be743c7b87366750f15f0d28fb4d Mon Sep 17 00:00:00 2001 From: lidezhu Date: Tue, 8 Aug 2023 11:49:42 +0800 Subject: [PATCH 6/7] address comment --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index 71cc5f96da6..6d52031246a 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -98,16 +98,26 @@ struct RowKeyValue // Note if the `int_value` is Int64::max_value, // it is a value generated by tiflash itself to means +inf()(which is RowKeyValue::INT_HANDLE_MAX_KEY). // So we can just ignore it. - if (int_value < std::numeric_limits::max()) + if (value->size() == sizeof(UInt64) + 1) + { + if (int_value < std::numeric_limits::max() && value->back() == 0x00) + { + LOG_WARNING( + Logger::get(), + "Meet rowkey {} which has an extra zero suffix", + Redact::keyToDebugString(value->data(), value->size())); + int_value = int_value + 1; + WriteBufferFromOwnString ss; + DB::EncodeInt64(int_value, ss); + value = std::make_shared(ss.releaseStr()); + } + } + else { LOG_WARNING( Logger::get(), - "Meet rowkey {} which is not strictly following the standard encoding format", + "Meet rowkey {} with unexpected encoding format", Redact::keyToDebugString(value->data(), value->size())); - int_value = int_value + 1; - WriteBufferFromOwnString ss; - DB::EncodeInt64(int_value, ss); - value = std::make_shared(ss.releaseStr()); } } } From c3c643d20c36442eabb1e2ffc66a5621afd97ebd Mon Sep 17 00:00:00 2001 From: lidezhu Date: Tue, 8 Aug 2023 13:12:10 +0800 Subject: [PATCH 7/7] fix log --- dbms/src/Storages/DeltaMerge/RowKeyRange.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/RowKeyRange.h b/dbms/src/Storages/DeltaMerge/RowKeyRange.h index 6d52031246a..51ddbc7f279 100644 --- a/dbms/src/Storages/DeltaMerge/RowKeyRange.h +++ b/dbms/src/Storages/DeltaMerge/RowKeyRange.h @@ -98,9 +98,16 @@ struct RowKeyValue // Note if the `int_value` is Int64::max_value, // it is a value generated by tiflash itself to means +inf()(which is RowKeyValue::INT_HANDLE_MAX_KEY). // So we can just ignore it. - if (value->size() == sizeof(UInt64) + 1) + if (value->size() != sizeof(UInt64) + 1 || value->back() != 0x00) { - if (int_value < std::numeric_limits::max() && value->back() == 0x00) + LOG_WARNING( + Logger::get(), + "Meet rowkey {} with unexpected encoding format", + Redact::keyToDebugString(value->data(), value->size())); + } + else + { + if (int_value < std::numeric_limits::max()) { LOG_WARNING( Logger::get(), @@ -111,13 +118,10 @@ struct RowKeyValue DB::EncodeInt64(int_value, ss); value = std::make_shared(ss.releaseStr()); } - } - else - { - LOG_WARNING( - Logger::get(), - "Meet rowkey {} with unexpected encoding format", - Redact::keyToDebugString(value->data(), value->size())); + else + { + // ignore RowKeyValue::INT_HANDLE_MAX_KEY + } } } }