diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 3e170f6655f8..16f630cde85e 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1335,6 +1335,8 @@ var ( // TiDBOptFixControl44262 controls whether to allow to use dynamic-mode to access partitioning tables without global-stats (#44262). TiDBOptFixControl44262 uint64 = 44262 + // TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges. + TiDBOptFixControl44389 uint64 = 44389 ) // GetOptimizerFixControlValue returns the specified value of the optimizer fix control. diff --git a/util/ranger/BUILD.bazel b/util/ranger/BUILD.bazel index 2183e7f361b7..2a46c199c266 100644 --- a/util/ranger/BUILD.bazel +++ b/util/ranger/BUILD.bazel @@ -23,12 +23,14 @@ go_library( "//parser/terror", "//sessionctx", "//sessionctx/stmtctx", + "//sessionctx/variable", "//types", "//types/parser_driver", "//util/chunk", "//util/codec", "//util/collate", "//util/dbterror", + "//util/mathutil", "@com_github_pingcap_errors//:errors", "@org_golang_x_exp//slices", ], diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index acd1359ad1cb..4639a142e3f7 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -24,9 +24,11 @@ import ( "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" + "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/collate" + "github.com/pingcap/tidb/util/mathutil" ) // detachColumnCNFConditions detaches the condition for calculating range from the other conditions. @@ -185,19 +187,67 @@ func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expres return -1 } -// extractIndexPointRangesForCNF extracts a CNF item from the input CNF expressions, such that the CNF item -// is totally composed of point range filters. +type cnfItemRangeResult struct { + rangeResult *DetachRangeResult + offset int + // sameLenPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum). + sameLenPointRanges bool + maxColNum int + minColNum int +} + +func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResult, offset int) *cnfItemRangeResult { + sameLenPointRanges := true + var maxColNum, minColNum int + for i, ran := range rangeResult.Ranges { + if !ran.IsPoint(sctx) { + sameLenPointRanges = false + } + if i == 0 { + maxColNum = len(ran.LowVal) + minColNum = len(ran.LowVal) + } else { + maxColNum = mathutil.Max(maxColNum, len(ran.LowVal)) + minColNum = mathutil.Min(minColNum, len(ran.LowVal)) + } + } + if minColNum != maxColNum { + sameLenPointRanges = false + } + return &cnfItemRangeResult{ + rangeResult: rangeResult, + offset: offset, + sameLenPointRanges: sameLenPointRanges, + maxColNum: maxColNum, + minColNum: minColNum, + } +} + +func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIsBetter bool) { + if curResult.sameLenPointRanges && bestResult.sameLenPointRanges { + return curResult.minColNum > bestResult.minColNum + } + if !curResult.sameLenPointRanges && !bestResult.sameLenPointRanges { + if curResult.minColNum == bestResult.minColNum { + return curResult.maxColNum > bestResult.maxColNum + } + return curResult.minColNum > bestResult.minColNum + } + // Point ranges is better than non-point ranges since we can append subsequent column ranges to point ranges. + return curResult.sameLenPointRanges +} + +// extractBestCNFItemRanges builds ranges for each CNF item from the input CNF expressions and returns the best CNF +// item ranges. // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. -func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, - lengths []int, rangeMaxSize int64) (*DetachRangeResult, int, []*valueInfo, error) { +func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, + lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) { if len(conds) < 2 { - return nil, -1, nil, nil + return nil, nil, nil } - var r *DetachRangeResult + var bestRes *cnfItemRangeResult columnValues := make([]*valueInfo, len(cols)) - maxNumCols := int(0) - offset := int(-1) for i, cond := range conds { tmpConds := []expression.Expression{cond} colSets := expression.ExtractColumnSet(cond) @@ -213,43 +263,25 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E // which are not point ranges, and we cannot append `c = 1` anymore. res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize) if err != nil { - return nil, -1, nil, err + return nil, nil, err } if len(res.Ranges) == 0 { - return &DetachRangeResult{}, -1, nil, nil + return &cnfItemRangeResult{rangeResult: res, offset: i}, nil, nil } // take the union of the two columnValues columnValues = unionColumnValues(columnValues, res.ColumnValues) if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 { continue } - sameLens, allPoints := true, true - numCols := int(0) - for j, ran := range res.Ranges { - if !ran.IsPoint(sctx) { - allPoints = false - break - } - if j == 0 { - numCols = len(ran.LowVal) - } else if numCols != len(ran.LowVal) { - sameLens = false - break - } - } - if !allPoints || !sameLens { - continue - } - if numCols > maxNumCols { - r = res - offset = i - maxNumCols = numCols + curRes := getCNFItemRangeResult(sctx, res, i) + if bestRes == nil || compareCNFItemRangeResult(curRes, bestRes) { + bestRes = curRes } } - if r != nil { - r.IsDNFCond = false + if bestRes != nil && bestRes.rangeResult != nil { + bestRes.rangeResult.IsDNFCond = false } - return r, offset, columnValues, nil + return bestRes, columnValues, nil } func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { @@ -344,27 +376,46 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } if considerDNF { - pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) + bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) if err != nil { return nil, err } res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues) - if pointRes != nil { - if len(pointRes.Ranges) == 0 { + if bestCNFItemRes != nil && bestCNFItemRes.rangeResult != nil { + if len(bestCNFItemRes.rangeResult.Ranges) == 0 { return &DetachRangeResult{}, nil } - if len(pointRes.Ranges[0].LowVal) > eqOrInCount { - pointRes.ColumnValues = res.ColumnValues - res = pointRes - pointRanges = pointRes.Ranges + if bestCNFItemRes.sameLenPointRanges && bestCNFItemRes.minColNum > eqOrInCount { + bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues + res = bestCNFItemRes.rangeResult + pointRanges = bestCNFItemRes.rangeResult.Ranges eqOrInCount = len(res.Ranges[0].LowVal) newConditions = newConditions[:0] - newConditions = append(newConditions, conditions[:offset]...) - newConditions = append(newConditions, conditions[offset+1:]...) + newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...) + newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...) if eqOrInCount == len(d.cols) || len(newConditions) == 0 { res.RemainedConds = append(res.RemainedConds, newConditions...) return res, nil } + } else { + considerCNFItemNonPointRanges := false + fixValue, ok := d.sctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44389) + if ok && variable.TiDBOptOn(fixValue) { + considerCNFItemNonPointRanges = true + } + if considerCNFItemNonPointRanges && !bestCNFItemRes.sameLenPointRanges && eqOrInCount == 0 && bestCNFItemRes.minColNum > 0 && bestCNFItemRes.maxColNum > 1 { + // When eqOrInCount is 0, if we don't enter the IF branch, we would use detachColumnCNFConditions to build + // ranges on the first index column. + // Considering minColNum > 0 and maxColNum > 1, bestCNFItemRes is better than the ranges built by detachColumnCNFConditions + // in most cases. + bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues + res = bestCNFItemRes.rangeResult + newConditions = newConditions[:0] + newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...) + newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...) + res.RemainedConds = append(res.RemainedConds, newConditions...) + return res, nil + } } } if eqOrInCount > 0 { diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index f6ffb681a9e4..3ff16f32a44d 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -1020,7 +1020,7 @@ func TestIssue41572(t *testing.T) { testdata.OnRecord(func() { output[i].SQL = tt output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows()) - output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows()) }) testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...)) testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...)) @@ -2592,3 +2592,31 @@ create table t( require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr)) } } + +func TestIssue44389(t *testing.T) { + store := testkit.CreateMockStore(t) + + testKit := testkit.NewTestKit(t, store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a varchar(100), b int, c int, index idx_ab(a, b))") + testKit.MustExec("insert into t values ('kk', 1, 10), ('kk', 1, 20), ('hh', 2, 10), ('hh', 3, 10), ('xx', 4, 10), ('yy', 5, 10), ('yy', 6, 20), ('zz', 7, 10)") + testKit.MustExec("set @@tidb_opt_fix_control = '44389:ON'") + + var input []string + var output []struct { + SQL string + Plan []string + Result []string + } + rangerSuiteData.LoadTestCases(t, &input, &output) + for i, tt := range input { + testdata.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows()) + output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows()) + }) + testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...)) + testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...)) + } +} diff --git a/util/ranger/testdata/ranger_suite_in.json b/util/ranger/testdata/ranger_suite_in.json index fbfc86083e66..a862e85f2450 100644 --- a/util/ranger/testdata/ranger_suite_in.json +++ b/util/ranger/testdata/ranger_suite_in.json @@ -116,5 +116,12 @@ "select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73) and col3 != \"2024-10-19 08:55:32\"", "select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73, 74) and col3 != \"2024-10-19 08:55:32\"" ] + }, + { + "name": "TestIssue44389", + "cases": [ + "select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))", + "select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))" + ] } ] diff --git a/util/ranger/testdata/ranger_suite_out.json b/util/ranger/testdata/ranger_suite_out.json index 6a86b7b19c4c..8e5385a84384 100644 --- a/util/ranger/testdata/ranger_suite_out.json +++ b/util/ranger/testdata/ranger_suite_out.json @@ -723,5 +723,38 @@ ] } ] + }, + { + "Name": "TestIssue44389", + "Cases": [ + { + "SQL": "select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))", + "Plan": [ + "IndexLookUp_11 0.01 root ", + "├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:[\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], keep order:false, stats:pseudo", + "└─Selection_10(Probe) 0.01 cop[tikv] eq(test.t.c, 10)", + " └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Result": [ + "kk 1 10", + "xx 4 10" + ] + }, + { + "SQL": "select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))", + "Plan": [ + "IndexLookUp_11 0.02 root ", + "├─IndexRangeScan_8(Build) 20.20 cop[tikv] table:t, index:idx_ab(a, b) range:[\"hh\" 2,\"hh\" 2], [\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], [\"yy\",\"yy\"], keep order:false, stats:pseudo", + "└─Selection_10(Probe) 0.02 cop[tikv] eq(test.t.c, 10)", + " └─TableRowIDScan_9 20.20 cop[tikv] table:t keep order:false, stats:pseudo" + ], + "Result": [ + "hh 2 10", + "kk 1 10", + "xx 4 10", + "yy 5 10" + ] + } + ] } ]