New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ranger: fix prefix index when charset is UTF-8 #7194
Changes from all commits
979f987
e2320b8
2f2b51b
d81c10e
91effa8
e0034f9
afa3245
7224e7c
fd2d6e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ import ( | |
"bytes" | ||
"math" | ||
"sort" | ||
"unicode/utf8" | ||
|
||
"github.com/juju/errors" | ||
"github.com/pingcap/tidb/ast" | ||
|
@@ -25,6 +26,7 @@ import ( | |
"github.com/pingcap/tidb/mysql" | ||
"github.com/pingcap/tidb/sessionctx/stmtctx" | ||
"github.com/pingcap/tidb/types" | ||
"github.com/pingcap/tidb/util/charset" | ||
"github.com/pingcap/tidb/util/codec" | ||
) | ||
|
||
|
@@ -327,7 +329,7 @@ func buildCNFIndexRange(sc *stmtctx.StatementContext, cols []*expression.Column, | |
|
||
// Take prefix index into consideration. | ||
if hasPrefix(lengths) { | ||
fixPrefixColRange(ranges, lengths) | ||
fixPrefixColRange(ranges, lengths, newTp) | ||
} | ||
|
||
if len(ranges) > 0 && len(ranges[0].LowVal) < len(cols) { | ||
|
@@ -410,23 +412,37 @@ func hasPrefix(lengths []int) bool { | |
return false | ||
} | ||
|
||
func fixPrefixColRange(ranges []*Range, lengths []int) { | ||
func fixPrefixColRange(ranges []*Range, lengths []int, tp []*types.FieldType) { | ||
for _, ran := range ranges { | ||
for i := 0; i < len(ran.LowVal); i++ { | ||
fixRangeDatum(&ran.LowVal[i], lengths[i]) | ||
fixRangeDatum(&ran.LowVal[i], lengths[i], tp[i]) | ||
} | ||
ran.LowExclude = false | ||
for i := 0; i < len(ran.HighVal); i++ { | ||
fixRangeDatum(&ran.HighVal[i], lengths[i]) | ||
fixRangeDatum(&ran.HighVal[i], lengths[i], tp[i]) | ||
} | ||
ran.HighExclude = false | ||
} | ||
} | ||
|
||
func fixRangeDatum(v *types.Datum, length int) { | ||
func fixRangeDatum(v *types.Datum, length int, tp *types.FieldType) { | ||
// If this column is prefix and the prefix length is smaller than the range, cut it. | ||
if length != types.UnspecifiedLength && length < len(v.GetBytes()) { | ||
v.SetBytes(v.GetBytes()[:length]) | ||
// In case of UTF8, prefix should be cut by characters rather than bytes | ||
if v.Kind() == types.KindString || v.Kind() == types.KindBytes { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For other types, should we consider length? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I doubt if it is possible to have prefix index on other types... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For string columns, indexes can be created that use only the leading part of column values, using col_name(length) syntax to specify an index prefix length:
|
||
colCharset := tp.Charset | ||
colValue := v.GetBytes() | ||
isUTF8Charset := colCharset == charset.CharsetUTF8 || colCharset == charset.CharsetUTF8MB4 | ||
if isUTF8Charset { | ||
if length != types.UnspecifiedLength && utf8.RuneCount(colValue) > length { | ||
rs := bytes.Runes(colValue) | ||
truncateStr := string(rs[:length]) | ||
// truncate value and limit its length | ||
v.SetString(truncateStr) | ||
} | ||
} else if length != types.UnspecifiedLength && len(colValue) > length { | ||
// truncate value and limit its length | ||
v.SetBytes(colValue[:length]) | ||
} | ||
} | ||
} | ||
|
||
|
@@ -438,11 +454,14 @@ func newFieldType(tp *types.FieldType) *types.FieldType { | |
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong: | ||
newTp := types.NewFieldType(mysql.TypeLonglong) | ||
newTp.Flag = tp.Flag | ||
newTp.Charset = tp.Charset | ||
return newTp | ||
// To avoid data truncate error. | ||
case mysql.TypeFloat, mysql.TypeDouble, mysql.TypeBlob, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, | ||
mysql.TypeString, mysql.TypeVarchar, mysql.TypeVarString: | ||
return types.NewFieldType(tp.Tp) | ||
newTp := types.NewFieldType(tp.Tp) | ||
newTp.Charset = tp.Charset | ||
return newTp | ||
default: | ||
return tp | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this changed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You see, now prefix index is set by string when charset is UTF-8 rather than bytes.