From ef15ed3edf188d5306fcf83112f00862bfb16b5d Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Wed, 22 Feb 2023 15:27:38 +0800 Subject: [PATCH 1/6] don't merge consecutive ranges when building ranges for cnf item --- util/ranger/detacher.go | 26 ++++++++++++++---- util/ranger/ranger_test.go | 31 +++++++++++++++++++++ util/ranger/testdata/ranger_suite_in.json | 7 +++++ util/ranger/testdata/ranger_suite_out.json | 32 ++++++++++++++++++++++ 4 files changed, 90 insertions(+), 6 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 606f53c40265f..acd1359ad1cb2 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -204,7 +204,14 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E if colSets.Len() == 0 { continue } - res, err := DetachCondAndBuildRangeForIndex(sctx, tmpConds, cols, lengths, rangeMaxSize) + // When we build ranges for the CNF item, we choose not to merge consecutive ranges because we hope to get point + // ranges here. See https://github.com/pingcap/tidb/issues/41572 for more details. + // + // Here is an example. Assume that the index is `idx(a,b,c)` and the condition is `((a,b) in ((1,1),(1,2)) and c = 1`. + // We build ranges for `(a,b) in ((1,1),(1,2))` and get `[1 1, 1 1] [1 2, 1 2]`, which are point ranges and we can + // append `c = 1` to the point ranges. However, if we choose to merge consecutive ranges here, we get `[1 1, 1 2]`, + // which are not point ranges, and we cannot append `c = 1` anymore. + res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize) if err != nil { return nil, -1, nil, err } @@ -821,11 +828,9 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre return d.detachCondAndBuildRangeForCols() } -// DetachCondAndBuildRangeForPartition will detach the index filters from table filters. -// rangeMaxSize is the max memory limit for ranges. O indicates no memory limit. If you ask that all conditions must be used -// for building ranges, set rangeMemQuota to 0 to avoid range fallback. -// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation. -func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, +// detachCondAndBuildRangeWithoutMerging detaches the index filters from table filters and uses them to build ranges. +// When building ranges, it doesn't merge consecutive ranges. +func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) { d := &rangeDetacher{ sctx: sctx, @@ -838,6 +843,15 @@ func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []e return d.detachCondAndBuildRangeForCols() } +// DetachCondAndBuildRangeForPartition will detach the index filters from table filters. +// rangeMaxSize is the max memory limit for ranges. O indicates no memory limit. If you ask that all conditions must be used +// for building ranges, set rangeMemQuota to 0 to avoid range fallback. +// The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation. +func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, + lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) { + return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize) +} + type rangeDetacher struct { sctx sessionctx.Context allConds []expression.Expression diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 74af841563b0f..3ea217aff2242 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -1000,6 +1000,33 @@ func TestCompIndexMultiColDNF2(t *testing.T) { } } +func TestCompIndexMultiColDNF3(t *testing.T) { + store := testkit.CreateMockStore(t) + + testKit := testkit.NewTestKit(t, store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a varchar(100), b int, c int, d int, index idx(a, b, c))") + testKit.MustExec("insert into t values ('t',1,1,1),('t',1,3,3),('t',2,1,3),('t',2,3,1),('w',0,3,3),('z',0,1,1)") + + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + rangerSuiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Rows()) + }) + testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...)) + testKit.MustQuery(tt).Check(testkit.Rows(output[i].Result...)) + } +} + func TestPrefixIndexMultiColDNF(t *testing.T) { store := testkit.CreateMockStore(t) @@ -2565,3 +2592,7 @@ create table t( require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr)) } } + +func TestConsiderDNFPath(t *testing.T) { + +} diff --git a/util/ranger/testdata/ranger_suite_in.json b/util/ranger/testdata/ranger_suite_in.json index c1641c0f251f7..81e1cfb72ef58 100644 --- a/util/ranger/testdata/ranger_suite_in.json +++ b/util/ranger/testdata/ranger_suite_in.json @@ -59,6 +59,13 @@ "select * from t where ((a = 1 and b = 1) or (a = 2 and b = 2)) and c > 2;" ] }, + { + "name": "TestCompIndexMultiColDNF3", + "cases": [ + "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and c > 2", + "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and d > 2" + ] + }, { "name": "TestPrefixIndexMultiColDNF", "cases": [ diff --git a/util/ranger/testdata/ranger_suite_out.json b/util/ranger/testdata/ranger_suite_out.json index 4a4f0c85e2682..0834f4be40ece 100644 --- a/util/ranger/testdata/ranger_suite_out.json +++ b/util/ranger/testdata/ranger_suite_out.json @@ -333,6 +333,38 @@ } ] }, + { + "Name": "TestCompIndexMultiColDNF3", + "Cases": [ + { + "SQL": "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and c > 2", + "Plan": [ + "IndexLookUp_7 1.00 root ", + "├─IndexRangeScan_5(Build) 1.00 cop[tikv] table:t, index:idx(a, b, c) range:(\"t\" 1 2,\"t\" 1 +inf], (\"t\" 2 2,\"t\" 2 +inf], (\"w\" 0 2,\"w\" 0 +inf], keep order:false, stats:pseudo", + "└─TableRowIDScan_6(Probe) 1.00 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Result": [ + "t 1 3 3", + "t 2 3 1", + "w 0 3 3" + ] + }, + { + "SQL": "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and d > 2", + "Plan": [ + "IndexLookUp_8 0.10 root ", + "├─IndexRangeScan_5(Build) 0.30 cop[tikv] table:t, index:idx(a, b, c) range:[\"t\" 1,\"t\" 1], [\"t\" 2,\"t\" 2], [\"w\" 0,\"w\" 0], keep order:false, stats:pseudo", + "└─Selection_7(Probe) 0.10 cop[tikv] gt(test.t.d, 2)", + " └─TableRowIDScan_6 0.30 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Result": [ + "t 1 3 3", + "t 2 1 3", + "w 0 3 3" + ] + } + ] + }, { "Name": "TestPrefixIndexMultiColDNF", "Cases": [ From f035de406ee162b81d0ef35de3fb88eb28f590c1 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 29 May 2023 09:49:53 +0800 Subject: [PATCH 2/6] upd --- util/ranger/detacher.go | 50 ++++++++++++++-------- util/ranger/ranger_test.go | 6 +-- util/ranger/testdata/ranger_suite_in.json | 2 +- util/ranger/testdata/ranger_suite_out.json | 2 +- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index f825f9363b53d..94c9a12066d6e 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -186,18 +186,20 @@ func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expres } // extractIndexPointRangesForCNF extracts a CNF item from the input CNF expressions, such that the CNF item -// is totally composed of point range filters. +// has the maximum range length among all the CNF items. If two CNF items has the same range length, we choose the one +// with totally point ranges. // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, - lengths []int, rangeMaxSize int64) (*DetachRangeResult, int, []*valueInfo, error) { + lengths []int, rangeMaxSize int64) (*DetachRangeResult, int, bool, []*valueInfo, error) { if len(conds) < 2 { - return nil, -1, nil, nil + return nil, -1, false, nil, nil } var r *DetachRangeResult columnValues := make([]*valueInfo, len(cols)) maxNumCols := int(0) offset := int(-1) + allPoints := false for i, cond := range conds { tmpConds := []expression.Expression{cond} colSets := expression.ExtractColumnSet(cond) @@ -213,22 +215,21 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E // which are not point ranges, and we cannot append `c = 1` anymore. res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize) if err != nil { - return nil, -1, nil, err + return nil, -1, false, nil, err } if len(res.Ranges) == 0 { - return &DetachRangeResult{}, -1, nil, nil + return &DetachRangeResult{}, -1, false, nil, nil } // take the union of the two columnValues columnValues = unionColumnValues(columnValues, res.ColumnValues) if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { continue } - sameLens, allPoints := true, true + sameLens, curAllPoints := true, true numCols := int(0) for j, ran := range res.Ranges { if !ran.IsPoint(sctx) { - allPoints = false - break + curAllPoints = false } if j == 0 { numCols = len(ran.LowVal) @@ -237,19 +238,22 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E break } } - if !allPoints || !sameLens { + if !sameLens { continue } - if numCols > maxNumCols { + // The meaning of `numCols == maxNumCols && !allPoints && curAllPoints` is as follows: + // If two CNF items has the same range length, we choose the one with totally point ranges. + if numCols > maxNumCols || (numCols == maxNumCols && !allPoints && curAllPoints) { r = res offset = i maxNumCols = numCols + allPoints = curAllPoints } } if r != nil { r.IsDNFCond = false } - return r, offset, columnValues, nil + return r, offset, allPoints, columnValues, nil } func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { @@ -344,19 +348,19 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } if considerDNF { - pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) + bestCNFItemRes, offset, allPoints, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) if err != nil { return nil, err } res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues) - if pointRes != nil { - if len(pointRes.Ranges) == 0 { + if bestCNFItemRes != nil { + if len(bestCNFItemRes.Ranges) == 0 { return &DetachRangeResult{}, nil } - if len(pointRes.Ranges[0].LowVal) > eqOrInCount { - pointRes.ColumnValues = res.ColumnValues - res = pointRes - pointRanges = pointRes.Ranges + if allPoints && len(bestCNFItemRes.Ranges[0].LowVal) > eqOrInCount { + bestCNFItemRes.ColumnValues = res.ColumnValues + res = bestCNFItemRes + pointRanges = bestCNFItemRes.Ranges eqOrInCount = len(res.Ranges[0].LowVal) newConditions = newConditions[:0] newConditions = append(newConditions, conditions[:offset]...) @@ -365,6 +369,16 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil } + } else if !allPoints && len(bestCNFItemRes.Ranges[0].LowVal) > eqOrInCount+1 { + // For the case `!allPoints && len(bestCNFItemRes.Ranges[0].LowVal) == eqOrInCount+1`, we don't choose to + // do early return here because the subsequent code can build better tail range. + bestCNFItemRes.ColumnValues = res.ColumnValues + res = bestCNFItemRes + newConditions = newConditions[:0] + newConditions = append(newConditions, conditions[:offset]...) + newConditions = append(newConditions, conditions[offset+1:]...) + res.RemainedConds = append(res.RemainedConds, newConditions...) + return res, nil } } if eqOrInCount > 0 { diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 750af759dbaa5..ab2bb87f6ab8d 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -1000,7 +1000,7 @@ func TestCompIndexMultiColDNF2(t *testing.T) { } } -func TestCompIndexMultiColDNF3(t *testing.T) { +func TestComplexCondForConsiderDNFPath(t *testing.T) { store := testkit.CreateMockStore(t) testKit := testkit.NewTestKit(t, store) @@ -2592,7 +2592,3 @@ create table t( require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr)) } } - -func TestConsiderDNFPath(t *testing.T) { - -} diff --git a/util/ranger/testdata/ranger_suite_in.json b/util/ranger/testdata/ranger_suite_in.json index 81e1cfb72ef58..7eac585e51d55 100644 --- a/util/ranger/testdata/ranger_suite_in.json +++ b/util/ranger/testdata/ranger_suite_in.json @@ -60,7 +60,7 @@ ] }, { - "name": "TestCompIndexMultiColDNF3", + "name": "TestComplexCondForConsiderDNFPath", "cases": [ "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and c > 2", "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and d > 2" diff --git a/util/ranger/testdata/ranger_suite_out.json b/util/ranger/testdata/ranger_suite_out.json index 0834f4be40ece..aa434a7755bac 100644 --- a/util/ranger/testdata/ranger_suite_out.json +++ b/util/ranger/testdata/ranger_suite_out.json @@ -334,7 +334,7 @@ ] }, { - "Name": "TestCompIndexMultiColDNF3", + "Name": "TestComplexCondForConsiderDNFPath", "Cases": [ { "SQL": "select * from t use index (idx) where ((a = 't' and b = 1) or (a = 't' and b = 2) or (a = 'w' and b = 0)) and c > 2", From a81194dd77f32a92a81ab714df4f13b8f7eb012c Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Sun, 4 Jun 2023 16:19:05 +0800 Subject: [PATCH 3/6] upd --- util/ranger/detacher.go | 132 ++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 53 deletions(-) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 94c9a12066d6e..459ddcb690551 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -185,21 +185,69 @@ func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expres return -1 } -// extractIndexPointRangesForCNF extracts a CNF item from the input CNF expressions, such that the CNF item -// has the maximum range length among all the CNF items. If two CNF items has the same range length, we choose the one -// with totally point ranges. +type cnfItemRangeResult struct { + emptyRange bool + rangeResult *DetachRangeResult + offset int + // isPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum). + isPointRanges bool + maxColNum int + minColNum int +} + +func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResult, offset int) *cnfItemRangeResult { + isPointRanges := true + var maxColNum, minColNum int + for i, ran := range rangeResult.Ranges { + if !ran.IsPoint(sctx) { + isPointRanges = false + } + if i == 0 { + maxColNum = len(ran.LowVal) + minColNum = len(ran.LowVal) + } else { + if len(ran.LowVal) > maxColNum { + maxColNum = len(ran.LowVal) + } + if len(ran.LowVal) < minColNum { + minColNum = len(ran.LowVal) + } + } + } + return &cnfItemRangeResult{ + rangeResult: rangeResult, + offset: offset, + isPointRanges: isPointRanges, + maxColNum: maxColNum, + minColNum: minColNum, + } +} + +func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIsBetter bool) { + if curResult.isPointRanges && bestResult.isPointRanges { + return curResult.minColNum > bestResult.minColNum + } + if !curResult.isPointRanges && !bestResult.isPointRanges { + if curResult.minColNum == bestResult.minColNum { + return curResult.maxColNum > bestResult.maxColNum + } + return curResult.minColNum > bestResult.minColNum + } + // Point ranges is better than non-point ranges since we can append subsequent column ranges to point ranges. + return curResult.isPointRanges +} + +// extractBestCNFItemRanges builds ranges for each CNF item from the input CNF expressions and returns the best CNF +// item ranges. // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. -func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, - lengths []int, rangeMaxSize int64) (*DetachRangeResult, int, bool, []*valueInfo, error) { +func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, + lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) { if len(conds) < 2 { - return nil, -1, false, nil, nil + return nil, nil, nil } - var r *DetachRangeResult + var bestRes *cnfItemRangeResult columnValues := make([]*valueInfo, len(cols)) - maxNumCols := int(0) - offset := int(-1) - allPoints := false for i, cond := range conds { tmpConds := []expression.Expression{cond} colSets := expression.ExtractColumnSet(cond) @@ -215,45 +263,25 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E // which are not point ranges, and we cannot append `c = 1` anymore. res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize) if err != nil { - return nil, -1, false, nil, err + return nil, nil, err } if len(res.Ranges) == 0 { - return &DetachRangeResult{}, -1, false, nil, nil + return &cnfItemRangeResult{emptyRange: true, offset: i}, nil, nil } // take the union of the two columnValues columnValues = unionColumnValues(columnValues, res.ColumnValues) if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { continue } - sameLens, curAllPoints := true, true - numCols := int(0) - for j, ran := range res.Ranges { - if !ran.IsPoint(sctx) { - curAllPoints = false - } - if j == 0 { - numCols = len(ran.LowVal) - } else if numCols != len(ran.LowVal) { - sameLens = false - break - } - } - if !sameLens { - continue - } - // The meaning of `numCols == maxNumCols && !allPoints && curAllPoints` is as follows: - // If two CNF items has the same range length, we choose the one with totally point ranges. - if numCols > maxNumCols || (numCols == maxNumCols && !allPoints && curAllPoints) { - r = res - offset = i - maxNumCols = numCols - allPoints = curAllPoints + curRes := getCNFItemRangeResult(sctx, res, i) + if bestRes == nil || compareCNFItemRangeResult(curRes, bestRes) { + bestRes = curRes } } - if r != nil { - r.IsDNFCond = false + if bestRes != nil && bestRes.rangeResult != nil { + bestRes.rangeResult.IsDNFCond = false } - return r, offset, allPoints, columnValues, nil + return bestRes, columnValues, nil } func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { @@ -348,35 +376,33 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } if considerDNF { - bestCNFItemRes, offset, allPoints, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) + bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) if err != nil { return nil, err } res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues) if bestCNFItemRes != nil { - if len(bestCNFItemRes.Ranges) == 0 { + if bestCNFItemRes.emptyRange { return &DetachRangeResult{}, nil } - if allPoints && len(bestCNFItemRes.Ranges[0].LowVal) > eqOrInCount { - bestCNFItemRes.ColumnValues = res.ColumnValues - res = bestCNFItemRes - pointRanges = bestCNFItemRes.Ranges + if bestCNFItemRes.isPointRanges && bestCNFItemRes.minColNum > eqOrInCount { + bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues + res = bestCNFItemRes.rangeResult + pointRanges = bestCNFItemRes.rangeResult.Ranges eqOrInCount = len(res.Ranges[0].LowVal) newConditions = newConditions[:0] - newConditions = append(newConditions, conditions[:offset]...) - newConditions = append(newConditions, conditions[offset+1:]...) + newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...) + newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...) if eqOrInCount == len(d.cols) || len(newConditions) == 0 { res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil } - } else if !allPoints && len(bestCNFItemRes.Ranges[0].LowVal) > eqOrInCount+1 { - // For the case `!allPoints && len(bestCNFItemRes.Ranges[0].LowVal) == eqOrInCount+1`, we don't choose to - // do early return here because the subsequent code can build better tail range. - bestCNFItemRes.ColumnValues = res.ColumnValues - res = bestCNFItemRes + } else if !bestCNFItemRes.isPointRanges && eqOrInCount == 0 && bestCNFItemRes.minColNum > 0 && bestCNFItemRes.maxColNum > 1 { + bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues + res = bestCNFItemRes.rangeResult newConditions = newConditions[:0] - newConditions = append(newConditions, conditions[:offset]...) - newConditions = append(newConditions, conditions[offset+1:]...) + newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...) + newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...) res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil } From be361b396094bc673287fcabac6774873c5fead8 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Sun, 4 Jun 2023 18:38:56 +0800 Subject: [PATCH 4/6] fix and add ut --- sessionctx/variable/session.go | 2 + util/ranger/detacher.go | 61 ++++++++++++++-------- util/ranger/ranger_test.go | 30 ++++++++++- util/ranger/testdata/ranger_suite_in.json | 7 +++ util/ranger/testdata/ranger_suite_out.json | 33 ++++++++++++ 5 files changed, 109 insertions(+), 24 deletions(-) diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 80bc1681db74f..20b1766fab8b7 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1497,6 +1497,8 @@ var ( // TiDBOptFixControl44262 controls whether to allow to use dynamic-mode to access partitioning tables without global-stats (#44262). TiDBOptFixControl44262 uint64 = 44262 + // TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges. + TiDBOptFixControl44389 uint64 = 44389 ) // GetOptimizerFixControlValue returns the specified value of the optimizer fix control. diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 459ddcb690551..ac2f38641565f 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -15,6 +15,7 @@ package ranger import ( + "github.com/pingcap/tidb/sessionctx/variable" "math" "github.com/pingcap/errors" @@ -189,18 +190,18 @@ type cnfItemRangeResult struct { emptyRange bool rangeResult *DetachRangeResult offset int - // isPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum). - isPointRanges bool - maxColNum int - minColNum int + // sameLenPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum). + sameLenPointRanges bool + maxColNum int + minColNum int } func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResult, offset int) *cnfItemRangeResult { - isPointRanges := true + sameLenPointRanges := true var maxColNum, minColNum int for i, ran := range rangeResult.Ranges { if !ran.IsPoint(sctx) { - isPointRanges = false + sameLenPointRanges = false } if i == 0 { maxColNum = len(ran.LowVal) @@ -214,27 +215,30 @@ func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResu } } } + if minColNum != maxColNum { + sameLenPointRanges = false + } return &cnfItemRangeResult{ - rangeResult: rangeResult, - offset: offset, - isPointRanges: isPointRanges, - maxColNum: maxColNum, - minColNum: minColNum, + rangeResult: rangeResult, + offset: offset, + sameLenPointRanges: sameLenPointRanges, + maxColNum: maxColNum, + minColNum: minColNum, } } func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIsBetter bool) { - if curResult.isPointRanges && bestResult.isPointRanges { + if curResult.sameLenPointRanges && bestResult.sameLenPointRanges { return curResult.minColNum > bestResult.minColNum } - if !curResult.isPointRanges && !bestResult.isPointRanges { + if !curResult.sameLenPointRanges && !bestResult.sameLenPointRanges { if curResult.minColNum == bestResult.minColNum { return curResult.maxColNum > bestResult.maxColNum } return curResult.minColNum > bestResult.minColNum } // Point ranges is better than non-point ranges since we can append subsequent column ranges to point ranges. - return curResult.isPointRanges + return curResult.sameLenPointRanges } // extractBestCNFItemRanges builds ranges for each CNF item from the input CNF expressions and returns the best CNF @@ -385,7 +389,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi if bestCNFItemRes.emptyRange { return &DetachRangeResult{}, nil } - if bestCNFItemRes.isPointRanges && bestCNFItemRes.minColNum > eqOrInCount { + if bestCNFItemRes.sameLenPointRanges && bestCNFItemRes.minColNum > eqOrInCount { bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues res = bestCNFItemRes.rangeResult pointRanges = bestCNFItemRes.rangeResult.Ranges @@ -397,14 +401,25 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil } - } else if !bestCNFItemRes.isPointRanges && eqOrInCount == 0 && bestCNFItemRes.minColNum > 0 && bestCNFItemRes.maxColNum > 1 { - bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues - res = bestCNFItemRes.rangeResult - newConditions = newConditions[:0] - newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...) - newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...) - res.RemainedConds = append(res.RemainedConds, newConditions...) - return res, nil + } else { + considerCNFItemNonPointRanges := false + fixValue, ok := d.sctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44389) + if ok && variable.TiDBOptOn(fixValue) { + considerCNFItemNonPointRanges = true + } + if considerCNFItemNonPointRanges && !bestCNFItemRes.sameLenPointRanges && eqOrInCount == 0 && bestCNFItemRes.minColNum > 0 && bestCNFItemRes.maxColNum > 1 { + // When eqOrInCount is 0, if we don't enter the IF branch, we would use detachColumnCNFConditions to build + // ranges on the first index column. + // Considering minColNum > 0 and maxColNum > 1, bestCNFItemRes is better than the ranges built by detachColumnCNFConditions + // in most cases. + bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues + res = bestCNFItemRes.rangeResult + newConditions = newConditions[:0] + newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...) + newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...) + res.RemainedConds = append(res.RemainedConds, newConditions...) + return res, nil + } } } if eqOrInCount > 0 { diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index 63d99ac5c3f9b..db54c134b6cfd 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -1020,7 +1020,7 @@ func TestIssue41572(t *testing.T) { testdata.OnRecord(func() { output[i].SQL = tt output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows()) - output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows()) }) testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...)) testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...)) @@ -2592,3 +2592,31 @@ create table t( require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr)) } } + +func TestIssue44389(t *testing.T) { + store := testkit.CreateMockStore(t) + + testKit := testkit.NewTestKit(t, store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a varchar(100), b int, c int, index idx_ab(a, b))") + testKit.MustExec("insert into t values ('kk', 1, 10), ('kk', 1, 20), ('hh', 2, 10), ('hh', 3, 10), ('xx', 4, 10), ('yy', 5, 10), ('yy', 6, 20), ('zz', 7, 10)") + testKit.MustExec("set @@tidb_opt_fix_control = '44389:ON'") + + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + rangerSuiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows()) + }) + testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...)) + testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...)) + } +} diff --git a/util/ranger/testdata/ranger_suite_in.json b/util/ranger/testdata/ranger_suite_in.json index fbfc86083e661..a862e85f2450e 100644 --- a/util/ranger/testdata/ranger_suite_in.json +++ b/util/ranger/testdata/ranger_suite_in.json @@ -116,5 +116,12 @@ "select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73) and col3 != \"2024-10-19 08:55:32\"", "select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73, 74) and col3 != \"2024-10-19 08:55:32\"" ] + }, + { + "name": "TestIssue44389", + "cases": [ + "select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))", + "select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))" + ] } ] diff --git a/util/ranger/testdata/ranger_suite_out.json b/util/ranger/testdata/ranger_suite_out.json index 6a86b7b19c4c4..8e5385a843848 100644 --- a/util/ranger/testdata/ranger_suite_out.json +++ b/util/ranger/testdata/ranger_suite_out.json @@ -723,5 +723,38 @@ ] } ] + }, + { + "Name": "TestIssue44389", + "Cases": [ + { + "SQL": "select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))", + "Plan": [ + "IndexLookUp_11 0.01 root ", + "├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:[\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], keep order:false, stats:pseudo", + "└─Selection_10(Probe) 0.01 cop[tikv] eq(test.t.c, 10)", + " └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Result": [ + "kk 1 10", + "xx 4 10" + ] + }, + { + "SQL": "select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))", + "Plan": [ + "IndexLookUp_11 0.02 root ", + "├─IndexRangeScan_8(Build) 20.20 cop[tikv] table:t, index:idx_ab(a, b) range:[\"hh\" 2,\"hh\" 2], [\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], [\"yy\",\"yy\"], keep order:false, stats:pseudo", + "└─Selection_10(Probe) 0.02 cop[tikv] eq(test.t.c, 10)", + " └─TableRowIDScan_9 20.20 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Result": [ + "hh 2 10", + "kk 1 10", + "xx 4 10", + "yy 5 10" + ] + } + ] } ] From 8b0e2084bee03dc8d27ca36271de606792b9d183 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Sun, 4 Jun 2023 19:27:59 +0800 Subject: [PATCH 5/6] fix --- util/ranger/BUILD.bazel | 1 + util/ranger/detacher.go | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/util/ranger/BUILD.bazel b/util/ranger/BUILD.bazel index 78d6091bb5749..95c6d42c0bafa 100644 --- a/util/ranger/BUILD.bazel +++ b/util/ranger/BUILD.bazel @@ -23,6 +23,7 @@ go_library( "//parser/terror", "//sessionctx", "//sessionctx/stmtctx", + "//sessionctx/variable", "//types", "//types/parser_driver", "//util/chunk", diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index ac2f38641565f..ac8afbf8f857b 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -15,7 +15,6 @@ package ranger import ( - "github.com/pingcap/tidb/sessionctx/variable" "math" "github.com/pingcap/errors" @@ -25,6 +24,7 @@ import ( "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" + "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/collate" @@ -187,7 +187,6 @@ func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expres } type cnfItemRangeResult struct { - emptyRange bool rangeResult *DetachRangeResult offset int // sameLenPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum). @@ -270,7 +269,7 @@ func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expres return nil, nil, err } if len(res.Ranges) == 0 { - return &cnfItemRangeResult{emptyRange: true, offset: i}, nil, nil + return &cnfItemRangeResult{rangeResult: res, offset: i}, nil, nil } // take the union of the two columnValues columnValues = unionColumnValues(columnValues, res.ColumnValues) @@ -385,8 +384,8 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return nil, err } res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues) - if bestCNFItemRes != nil { - if bestCNFItemRes.emptyRange { + if bestCNFItemRes != nil && bestCNFItemRes.rangeResult != nil { + if len(bestCNFItemRes.rangeResult.Ranges) == 0 { return &DetachRangeResult{}, nil } if bestCNFItemRes.sameLenPointRanges && bestCNFItemRes.minColNum > eqOrInCount { From 8ae29ed62bd1556a516cce30dd0e09b539ba1210 Mon Sep 17 00:00:00 2001 From: xuyifan <675434007@qq.com> Date: Mon, 5 Jun 2023 08:17:47 +0800 Subject: [PATCH 6/6] address comment --- util/ranger/BUILD.bazel | 1 + util/ranger/detacher.go | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/util/ranger/BUILD.bazel b/util/ranger/BUILD.bazel index 95c6d42c0bafa..be470b8c15d6c 100644 --- a/util/ranger/BUILD.bazel +++ b/util/ranger/BUILD.bazel @@ -30,6 +30,7 @@ go_library( "//util/codec", "//util/collate", "//util/dbterror", + "//util/mathutil", "@com_github_pingcap_errors//:errors", "@org_golang_x_exp//slices", ], diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index ac8afbf8f857b..8ac79b9502ba8 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/collate" + "github.com/pingcap/tidb/util/mathutil" ) // detachColumnCNFConditions detaches the condition for calculating range from the other conditions. @@ -206,12 +207,8 @@ func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResu maxColNum = len(ran.LowVal) minColNum = len(ran.LowVal) } else { - if len(ran.LowVal) > maxColNum { - maxColNum = len(ran.LowVal) - } - if len(ran.LowVal) < minColNum { - minColNum = len(ran.LowVal) - } + maxColNum = mathutil.Max(maxColNum, len(ran.LowVal)) + minColNum = mathutil.Min(minColNum, len(ran.LowVal)) } } if minColNum != maxColNum {