From 815763b87527f67c2a90b72445bb0946cf93c02d Mon Sep 17 00:00:00 2001 From: Yang Keao Date: Thu, 14 Sep 2023 23:05:37 +0800 Subject: [PATCH] modify the collation of multi-valued index to binary Signed-off-by: Yang Keao --- ddl/ddl_api.go | 6 +++ ddl/tests/multivaluedindex/BUILD.bazel | 19 ++++++++ ddl/tests/multivaluedindex/main_test.go | 35 ++++++++++++++ .../multi_valued_index_test.go | 47 +++++++++++++++++++ planner/core/plan_to_pb_test.go | 15 ++++++ util/misc.go | 6 +++ 6 files changed, 128 insertions(+) create mode 100644 ddl/tests/multivaluedindex/BUILD.bazel create mode 100644 ddl/tests/multivaluedindex/main_test.go create mode 100644 ddl/tests/multivaluedindex/multi_valued_index_test.go diff --git a/ddl/ddl_api.go b/ddl/ddl_api.go index efb41f6285751..aacb4e1e76aaa 100644 --- a/ddl/ddl_api.go +++ b/ddl/ddl_api.go @@ -7158,6 +7158,12 @@ func BuildHiddenColumnInfo(ctx sessionctx.Context, indexPartSpecifications []*as colInfo.FieldType.SetDecimal(types.MaxFsp) } } + // For an array, the collation is set to "binary". The collation has no effect on the array itself (as it's usually + // regarded as a JSON), but will influence how TiKV handles the index value. + if colInfo.FieldType.IsArray() { + colInfo.SetCharset("binary") + colInfo.SetCollate("binary") + } checkDependencies := make(map[string]struct{}) for _, colName := range FindColumnNamesInExpr(idxPart.Expr) { colInfo.Dependences[colName.Name.L] = struct{}{} diff --git a/ddl/tests/multivaluedindex/BUILD.bazel b/ddl/tests/multivaluedindex/BUILD.bazel new file mode 100644 index 0000000000000..25df7f9079d69 --- /dev/null +++ b/ddl/tests/multivaluedindex/BUILD.bazel @@ -0,0 +1,19 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_test") + +go_test( + name = "multivaluedindex_test", + timeout = "short", + srcs = [ + "main_test.go", + "multi_valued_index_test.go", + ], + flaky = True, + deps = [ + "//infoschema", + "//parser/model", + "//testkit", + "//testkit/testsetup", + "@com_github_stretchr_testify//require", + "@org_uber_go_goleak//:goleak", + ], +) diff --git a/ddl/tests/multivaluedindex/main_test.go b/ddl/tests/multivaluedindex/main_test.go new file mode 100644 index 0000000000000..17eda6ca0900b --- /dev/null +++ b/ddl/tests/multivaluedindex/main_test.go @@ -0,0 +1,35 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multivaluedindex + +import ( + "testing" + + "github.com/pingcap/tidb/testkit/testsetup" + "go.uber.org/goleak" +) + +func TestMain(m *testing.M) { + testsetup.SetupForCommonTest() + + opts := []goleak.Option{ + goleak.IgnoreTopFunction("github.com/golang/glog.(*fileSink).flushDaemon"), + goleak.IgnoreTopFunction("github.com/lestrrat-go/httprc.runFetchWorker"), + goleak.IgnoreTopFunction("go.etcd.io/etcd/client/pkg/v3/logutil.(*MergeLogger).outputLoop"), + goleak.IgnoreTopFunction("go.opencensus.io/stats/view.(*worker).start"), + } + + goleak.VerifyTestMain(m, opts...) +} diff --git a/ddl/tests/multivaluedindex/multi_valued_index_test.go b/ddl/tests/multivaluedindex/multi_valued_index_test.go new file mode 100644 index 0000000000000..6442c8df40445 --- /dev/null +++ b/ddl/tests/multivaluedindex/multi_valued_index_test.go @@ -0,0 +1,47 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package multivaluedindex + +import ( + "testing" + + "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tidb/testkit" + "github.com/stretchr/testify/require" +) + +func TestCreateMultiValuedIndexHasBinaryCollation(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("create table test.t (pk varchar(4) primary key clustered, j json, str varchar(255), value int, key idx((cast(j as char(100) array)), str));") + is := tk.Session().GetDomainInfoSchema().(infoschema.InfoSchema) + require.NotNil(t, is) + + tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + + foundIndex := false + for _, c := range tbl.Cols() { + if c.Hidden { + foundIndex = true + require.True(t, c.FieldType.IsArray()) + require.Equal(t, c.FieldType.GetCharset(), "binary") + require.Equal(t, c.FieldType.GetCollate(), "binary") + } + } + require.True(t, foundIndex) +} diff --git a/planner/core/plan_to_pb_test.go b/planner/core/plan_to_pb_test.go index cb108678629f2..62b13e4fece59 100644 --- a/planner/core/plan_to_pb_test.go +++ b/planner/core/plan_to_pb_test.go @@ -79,4 +79,19 @@ func TestColumnToProto(t *testing.T) { } pc = util.ColumnToProto(col2, false) require.Len(t, pc.Elems, 2) + + tp = types.NewFieldTypeBuilder(). + SetType(mysql.TypeString). + SetCharset("utf8mb4"). + SetCollate("utf8mb4_bin"). + SetFlen(100). + SetFlag(10). + SetArray(true). + BuildP() + col3 := &model.ColumnInfo{ + FieldType: *tp, + } + pc = util.ColumnToProto(col3, true) + expect = &tipb.ColumnInfo{ColumnId: 0, Tp: 0xfe, Collation: 63, ColumnLen: 100, Decimal: 0, Flag: 10, Elems: []string(nil), DefaultVal: []uint8(nil), PkHandle: false, XXX_unrecognized: []uint8(nil)} + require.Equal(t, expect, pc) } diff --git a/util/misc.go b/util/misc.go index ea955f38ebd32..8e5d99b417fea 100644 --- a/util/misc.go +++ b/util/misc.go @@ -423,6 +423,12 @@ func ColumnToProto(c *model.ColumnInfo, forIndex bool) *tipb.ColumnInfo { if forIndex { // Use array type for read the multi-valued index. pc.Tp = int32(c.FieldType.ArrayType().GetType()) + if c.FieldType.IsArray() { + // Use "binary" collation for read the multi-valued index. Most of the time, the `Collation` of this hidden + // column should already been set to "binary". However, in old versions, the collation is set to the default + // value. See https://github.com/pingcap/tidb/issues/46717 + pc.Collation = int32(mysql.CollationNames["binary"]) + } } else { pc.Tp = int32(c.GetType()) }