Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression, util: add KeyWithoutTrimRightSpace for collator #35475

Merged
merged 4 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions expression/integration_serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -812,9 +812,16 @@ func TestCollateStringFunction(t *testing.T) {
tk.MustQuery("select locate('S', 'a' collate utf8mb4_general_ci);").Check(testkit.Rows("0"))
// MySQL return 0 here, I believe it is a bug in MySQL since 'ß' == 's' under utf8mb4_general_ci collation.
tk.MustQuery("select locate('ß', 's' collate utf8mb4_general_ci);").Check(testkit.Rows("1"))
tk.MustQuery("select locate('world', 'hello world' collate utf8mb4_general_ci);").Check(testkit.Rows("7"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_general_ci);").Check(testkit.Rows("6"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_general_ci);").Check(testkit.Rows("0"))

tk.MustQuery("select locate('S', 's' collate utf8mb4_unicode_ci);").Check(testkit.Rows("1"))
tk.MustQuery("select locate('S', 'a' collate utf8mb4_unicode_ci);").Check(testkit.Rows("0"))
tk.MustQuery("select locate('ß', 'ss' collate utf8mb4_unicode_ci);").Check(testkit.Rows("1"))
tk.MustQuery("select locate('world', 'hello world' collate utf8mb4_unicode_ci);").Check(testkit.Rows("7"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_unicode_ci);").Check(testkit.Rows("6"))
tk.MustQuery("select locate(' ', 'hello world' collate utf8mb4_unicode_ci);").Check(testkit.Rows("0"))

tk.MustExec("truncate table t1;")
tk.MustExec("insert into t1 (a) values (1);")
Expand Down
6 changes: 3 additions & 3 deletions expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,8 +547,8 @@ func SubstituteCorCol2Constant(expr Expression) (Expression, error) {

func locateStringWithCollation(str, substr, coll string) int64 {
collator := collate.GetCollator(coll)
strKey := collator.Key(str)
subStrKey := collator.Key(substr)
strKey := collator.KeyWithoutTrimRightSpace(str)
subStrKey := collator.KeyWithoutTrimRightSpace(substr)

index := bytes.Index(strKey, subStrKey)
if index == -1 || index == 0 {
Expand All @@ -560,7 +560,7 @@ func locateStringWithCollation(str, substr, coll string) int64 {
for {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, may i ask. What is the effect of this loop? Can we return below code directly?

index := bytes.Index(strKey, subStrKey)
	return int64(index + 1)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, some of the characters have more than one-byte weight

r, size := utf8.DecodeRuneInString(str)
count += 1
index -= len(collator.Key(string(r)))
index -= len(collator.KeyWithoutTrimRightSpace(string(r)))
if index == 0 {
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
return count + 1
}
Expand Down
10 changes: 10 additions & 0 deletions util/collate/bin.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ func (bc *binCollator) Key(str string) []byte {
return []byte(str)
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (bc *binCollator) KeyWithoutTrimRightSpace(str string) []byte {
return []byte(str)
}

// Pattern implements Collator interface.
func (bc *binCollator) Pattern() WildcardPattern {
return &binPattern{}
Expand All @@ -49,6 +54,11 @@ func (bpc *binPaddingCollator) Key(str string) []byte {
return []byte(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (bpc *binPaddingCollator) KeyWithoutTrimRightSpace(str string) []byte {
return []byte(str)
}

// Pattern implements Collator interface.
// Notice that trailing spaces are significant.
func (bpc *binPaddingCollator) Pattern() WildcardPattern {
Expand Down
2 changes: 2 additions & 0 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ type Collator interface {
Compare(a, b string) int
// Key returns the collate key for str. If the collation is padding, make sure the PadLen >= len(rune[]str) in opt.
Key(str string) []byte
// KeyWithoutTrimRightSpace returns the collate key for str. The difference with Key is str will not be trimed.
KeyWithoutTrimRightSpace(str string) []byte
// Pattern get a collation-aware WildcardPattern.
Pattern() WildcardPattern
}
Expand Down
6 changes: 5 additions & 1 deletion util/collate/gbk_bin.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ func (g *gbkBinCollator) Compare(a, b string) int {

// Key implement Collator interface.
func (g *gbkBinCollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return g.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (g *gbkBinCollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str))
for len(str) > 0 {
l := runeLen(str[0])
Expand Down
6 changes: 5 additions & 1 deletion util/collate/gbk_chinese_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ func (g *gbkChineseCICollator) Compare(a, b string) int {

// Key implements Collator interface.
func (g *gbkChineseCICollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return g.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implement Collator interface.
func (g *gbkChineseCICollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str)*2)
i := 0
r := rune(0)
Expand Down
6 changes: 5 additions & 1 deletion util/collate/general_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ func (gc *generalCICollator) Compare(a, b string) int {

// Key implements Collator interface.
func (gc *generalCICollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return gc.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implements Collator interface.
func (gc *generalCICollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str))
i := 0
r := rune(0)
Expand Down
5 changes: 5 additions & 0 deletions util/collate/pinyin_tidb_as_cs.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ func (py *zhPinyinTiDBASCSCollator) Key(str string) []byte {
panic("implement me")
}

// Collator interface, no implements now.
func (py *zhPinyinTiDBASCSCollator) KeyWithoutTrimRightSpace(str string) []byte {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me remove it

panic("implement me")
}

// Collator interface, no implements now.
func (py *zhPinyinTiDBASCSCollator) Pattern() WildcardPattern {
panic("implement me")
Expand Down
6 changes: 5 additions & 1 deletion util/collate/unicode_ci.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ func (uc *unicodeCICollator) Compare(a, b string) int {

// Key implements Collator interface.
func (uc *unicodeCICollator) Key(str string) []byte {
str = truncateTailingSpace(str)
return uc.KeyWithoutTrimRightSpace(truncateTailingSpace(str))
}

// KeyWithoutTrimRightSpace implements Collator interface.
func (uc *unicodeCICollator) KeyWithoutTrimRightSpace(str string) []byte {
buf := make([]byte, 0, len(str)*2)
r := rune(0)
si := 0 // decode index of s
Expand Down