Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/ranger: consider good non-point ranges from CNF item #44384

Merged
merged 8 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,8 @@ var (

// TiDBOptFixControl44262 controls whether to allow to use dynamic-mode to access partitioning tables without global-stats (#44262).
TiDBOptFixControl44262 uint64 = 44262
// TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges.
TiDBOptFixControl44389 uint64 = 44389
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
Expand Down
2 changes: 2 additions & 0 deletions util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ go_library(
"//parser/terror",
"//sessionctx",
"//sessionctx/stmtctx",
"//sessionctx/variable",
"//types",
"//types/parser_driver",
"//util/chunk",
"//util/codec",
"//util/collate",
"//util/dbterror",
"//util/mathutil",
"@com_github_pingcap_errors//:errors",
"@org_golang_x_exp//slices",
],
Expand Down
137 changes: 94 additions & 43 deletions util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import (
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/mathutil"
)

// detachColumnCNFConditions detaches the condition for calculating range from the other conditions.
Expand Down Expand Up @@ -185,19 +187,67 @@ func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expres
return -1
}

// extractIndexPointRangesForCNF extracts a CNF item from the input CNF expressions, such that the CNF item
// is totally composed of point range filters.
type cnfItemRangeResult struct {
rangeResult *DetachRangeResult
offset int
// sameLenPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum).
sameLenPointRanges bool
maxColNum int
minColNum int
}

func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResult, offset int) *cnfItemRangeResult {
sameLenPointRanges := true
var maxColNum, minColNum int
for i, ran := range rangeResult.Ranges {
if !ran.IsPoint(sctx) {
sameLenPointRanges = false
}
if i == 0 {
maxColNum = len(ran.LowVal)
minColNum = len(ran.LowVal)
} else {
maxColNum = mathutil.Max(maxColNum, len(ran.LowVal))
minColNum = mathutil.Min(minColNum, len(ran.LowVal))
}
}
if minColNum != maxColNum {
sameLenPointRanges = false
}
return &cnfItemRangeResult{
rangeResult: rangeResult,
offset: offset,
sameLenPointRanges: sameLenPointRanges,
maxColNum: maxColNum,
minColNum: minColNum,
}
}

func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIsBetter bool) {
if curResult.sameLenPointRanges && bestResult.sameLenPointRanges {
return curResult.minColNum > bestResult.minColNum
}
if !curResult.sameLenPointRanges && !bestResult.sameLenPointRanges {
if curResult.minColNum == bestResult.minColNum {
return curResult.maxColNum > bestResult.maxColNum
}
return curResult.minColNum > bestResult.minColNum
}
// Point ranges is better than non-point ranges since we can append subsequent column ranges to point ranges.
return curResult.sameLenPointRanges
}

// extractBestCNFItemRanges builds ranges for each CNF item from the input CNF expressions and returns the best CNF
// item ranges.
// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, int, []*valueInfo, error) {
func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) {
if len(conds) < 2 {
return nil, -1, nil, nil
return nil, nil, nil
}
var r *DetachRangeResult
var bestRes *cnfItemRangeResult
columnValues := make([]*valueInfo, len(cols))
maxNumCols := int(0)
offset := int(-1)
for i, cond := range conds {
tmpConds := []expression.Expression{cond}
colSets := expression.ExtractColumnSet(cond)
Expand All @@ -213,43 +263,25 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E
// which are not point ranges, and we cannot append `c = 1` anymore.
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize)
if err != nil {
return nil, -1, nil, err
return nil, nil, err
}
if len(res.Ranges) == 0 {
return &DetachRangeResult{}, -1, nil, nil
return &cnfItemRangeResult{rangeResult: res, offset: i}, nil, nil
}
// take the union of the two columnValues
columnValues = unionColumnValues(columnValues, res.ColumnValues)
if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 {
continue
}
sameLens, allPoints := true, true
numCols := int(0)
for j, ran := range res.Ranges {
if !ran.IsPoint(sctx) {
allPoints = false
break
}
if j == 0 {
numCols = len(ran.LowVal)
} else if numCols != len(ran.LowVal) {
sameLens = false
break
}
}
if !allPoints || !sameLens {
continue
}
if numCols > maxNumCols {
r = res
offset = i
maxNumCols = numCols
curRes := getCNFItemRangeResult(sctx, res, i)
if bestRes == nil || compareCNFItemRangeResult(curRes, bestRes) {
bestRes = curRes
}
}
if r != nil {
r.IsDNFCond = false
if bestRes != nil && bestRes.rangeResult != nil {
bestRes.rangeResult.IsDNFCond = false
}
return r, offset, columnValues, nil
return bestRes, columnValues, nil
}

func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo {
Expand Down Expand Up @@ -344,27 +376,46 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan,
}
if considerDNF {
pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
if err != nil {
return nil, err
}
res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues)
if pointRes != nil {
if len(pointRes.Ranges) == 0 {
if bestCNFItemRes != nil && bestCNFItemRes.rangeResult != nil {
if len(bestCNFItemRes.rangeResult.Ranges) == 0 {
return &DetachRangeResult{}, nil
}
if len(pointRes.Ranges[0].LowVal) > eqOrInCount {
pointRes.ColumnValues = res.ColumnValues
res = pointRes
pointRanges = pointRes.Ranges
if bestCNFItemRes.sameLenPointRanges && bestCNFItemRes.minColNum > eqOrInCount {
bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues
res = bestCNFItemRes.rangeResult
pointRanges = bestCNFItemRes.rangeResult.Ranges
eqOrInCount = len(res.Ranges[0].LowVal)
newConditions = newConditions[:0]
newConditions = append(newConditions, conditions[:offset]...)
newConditions = append(newConditions, conditions[offset+1:]...)
newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...)
newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...)
if eqOrInCount == len(d.cols) || len(newConditions) == 0 {
res.RemainedConds = append(res.RemainedConds, newConditions...)
return res, nil
}
} else {
considerCNFItemNonPointRanges := false
fixValue, ok := d.sctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44389)
if ok && variable.TiDBOptOn(fixValue) {
considerCNFItemNonPointRanges = true
}
if considerCNFItemNonPointRanges && !bestCNFItemRes.sameLenPointRanges && eqOrInCount == 0 && bestCNFItemRes.minColNum > 0 && bestCNFItemRes.maxColNum > 1 {
// When eqOrInCount is 0, if we don't enter the IF branch, we would use detachColumnCNFConditions to build
// ranges on the first index column.
// Considering minColNum > 0 and maxColNum > 1, bestCNFItemRes is better than the ranges built by detachColumnCNFConditions
// in most cases.
bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues
res = bestCNFItemRes.rangeResult
newConditions = newConditions[:0]
newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...)
newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...)
res.RemainedConds = append(res.RemainedConds, newConditions...)
return res, nil
}
}
}
if eqOrInCount > 0 {
Expand Down
30 changes: 29 additions & 1 deletion util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,7 @@ func TestIssue41572(t *testing.T) {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows())
})
testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...))
testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...))
Expand Down Expand Up @@ -2592,3 +2592,31 @@ create table t(
require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr))
}
}

func TestIssue44389(t *testing.T) {
store := testkit.CreateMockStore(t)

testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a varchar(100), b int, c int, index idx_ab(a, b))")
testKit.MustExec("insert into t values ('kk', 1, 10), ('kk', 1, 20), ('hh', 2, 10), ('hh', 3, 10), ('xx', 4, 10), ('yy', 5, 10), ('yy', 6, 20), ('zz', 7, 10)")
testKit.MustExec("set @@tidb_opt_fix_control = '44389:ON'")

var input []string
var output []struct {
SQL string
Plan []string
Result []string
}
rangerSuiteData.LoadTestCases(t, &input, &output)
for i, tt := range input {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows())
})
testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...))
testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...))
}
}
7 changes: 7 additions & 0 deletions util/ranger/testdata/ranger_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,12 @@
"select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73) and col3 != \"2024-10-19 08:55:32\"",
"select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73, 74) and col3 != \"2024-10-19 08:55:32\""
]
},
{
"name": "TestIssue44389",
"cases": [
"select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))",
"select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))"
]
}
]
33 changes: 33 additions & 0 deletions util/ranger/testdata/ranger_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -723,5 +723,38 @@
]
}
]
},
{
"Name": "TestIssue44389",
"Cases": [
{
"SQL": "select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))",
"Plan": [
"IndexLookUp_11 0.01 root ",
"├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:[\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], keep order:false, stats:pseudo",
"└─Selection_10(Probe) 0.01 cop[tikv] eq(test.t.c, 10)",
" └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo"
],
"Result": [
"kk 1 10",
"xx 4 10"
]
},
{
"SQL": "select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))",
"Plan": [
"IndexLookUp_11 0.02 root ",
"├─IndexRangeScan_8(Build) 20.20 cop[tikv] table:t, index:idx_ab(a, b) range:[\"hh\" 2,\"hh\" 2], [\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], [\"yy\",\"yy\"], keep order:false, stats:pseudo",
"└─Selection_10(Probe) 0.02 cop[tikv] eq(test.t.c, 10)",
" └─TableRowIDScan_9 20.20 cop[tikv] table:t keep order:false, stats:pseudo"
],
"Result": [
"hh 2 10",
"kk 1 10",
"xx 4 10",
"yy 5 10"
]
}
]
}
]