Skip to content

Commit

Permalink
util/ranger: consider good non-point ranges from CNF item (#44384)
Browse files Browse the repository at this point in the history
close #44389
  • Loading branch information
xuyifangreeneyes committed Jun 5, 2023
1 parent 9bac155 commit 85d6323
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 44 deletions.
2 changes: 2 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1497,6 +1497,8 @@ var (

// TiDBOptFixControl44262 controls whether to allow to use dynamic-mode to access partitioning tables without global-stats (#44262).
TiDBOptFixControl44262 uint64 = 44262
// TiDBOptFixControl44389 controls whether to consider non-point ranges of some CNF item when building ranges.
TiDBOptFixControl44389 uint64 = 44389
)

// GetOptimizerFixControlValue returns the specified value of the optimizer fix control.
Expand Down
2 changes: 2 additions & 0 deletions util/ranger/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ go_library(
"//parser/terror",
"//sessionctx",
"//sessionctx/stmtctx",
"//sessionctx/variable",
"//types",
"//types/parser_driver",
"//util/chunk",
"//util/codec",
"//util/collate",
"//util/dbterror",
"//util/mathutil",
"@com_github_pingcap_errors//:errors",
"@org_golang_x_exp//slices",
],
Expand Down
137 changes: 94 additions & 43 deletions util/ranger/detacher.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import (
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/mathutil"
)

// detachColumnCNFConditions detaches the condition for calculating range from the other conditions.
Expand Down Expand Up @@ -185,19 +187,67 @@ func getPotentialEqOrInColOffset(sctx sessionctx.Context, expr expression.Expres
return -1
}

// extractIndexPointRangesForCNF extracts a CNF item from the input CNF expressions, such that the CNF item
// is totally composed of point range filters.
type cnfItemRangeResult struct {
rangeResult *DetachRangeResult
offset int
// sameLenPointRanges means that each range is point range and all of them have the same column numbers(i.e., maxColNum = minColNum).
sameLenPointRanges bool
maxColNum int
minColNum int
}

func getCNFItemRangeResult(sctx sessionctx.Context, rangeResult *DetachRangeResult, offset int) *cnfItemRangeResult {
sameLenPointRanges := true
var maxColNum, minColNum int
for i, ran := range rangeResult.Ranges {
if !ran.IsPoint(sctx) {
sameLenPointRanges = false
}
if i == 0 {
maxColNum = len(ran.LowVal)
minColNum = len(ran.LowVal)
} else {
maxColNum = mathutil.Max(maxColNum, len(ran.LowVal))
minColNum = mathutil.Min(minColNum, len(ran.LowVal))
}
}
if minColNum != maxColNum {
sameLenPointRanges = false
}
return &cnfItemRangeResult{
rangeResult: rangeResult,
offset: offset,
sameLenPointRanges: sameLenPointRanges,
maxColNum: maxColNum,
minColNum: minColNum,
}
}

func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIsBetter bool) {
if curResult.sameLenPointRanges && bestResult.sameLenPointRanges {
return curResult.minColNum > bestResult.minColNum
}
if !curResult.sameLenPointRanges && !bestResult.sameLenPointRanges {
if curResult.minColNum == bestResult.minColNum {
return curResult.maxColNum > bestResult.maxColNum
}
return curResult.minColNum > bestResult.minColNum
}
// Point ranges is better than non-point ranges since we can append subsequent column ranges to point ranges.
return curResult.sameLenPointRanges
}

// extractBestCNFItemRanges builds ranges for each CNF item from the input CNF expressions and returns the best CNF
// item ranges.
// e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2))
// ((a,b,c) in (1,1,1),(2,2,2)) would be extracted.
func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*DetachRangeResult, int, []*valueInfo, error) {
func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column,
lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) {
if len(conds) < 2 {
return nil, -1, nil, nil
return nil, nil, nil
}
var r *DetachRangeResult
var bestRes *cnfItemRangeResult
columnValues := make([]*valueInfo, len(cols))
maxNumCols := int(0)
offset := int(-1)
for i, cond := range conds {
tmpConds := []expression.Expression{cond}
colSets := expression.ExtractColumnSet(cond)
Expand All @@ -213,43 +263,25 @@ func extractIndexPointRangesForCNF(sctx sessionctx.Context, conds []expression.E
// which are not point ranges, and we cannot append `c = 1` anymore.
res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize)
if err != nil {
return nil, -1, nil, err
return nil, nil, err
}
if len(res.Ranges) == 0 {
return &DetachRangeResult{}, -1, nil, nil
return &cnfItemRangeResult{rangeResult: res, offset: i}, nil, nil
}
// take the union of the two columnValues
columnValues = unionColumnValues(columnValues, res.ColumnValues)
if len(res.AccessConds) == 0 || len(res.RemainedConds) > 0 {
continue
}
sameLens, allPoints := true, true
numCols := int(0)
for j, ran := range res.Ranges {
if !ran.IsPoint(sctx) {
allPoints = false
break
}
if j == 0 {
numCols = len(ran.LowVal)
} else if numCols != len(ran.LowVal) {
sameLens = false
break
}
}
if !allPoints || !sameLens {
continue
}
if numCols > maxNumCols {
r = res
offset = i
maxNumCols = numCols
curRes := getCNFItemRangeResult(sctx, res, i)
if bestRes == nil || compareCNFItemRangeResult(curRes, bestRes) {
bestRes = curRes
}
}
if r != nil {
r.IsDNFCond = false
if bestRes != nil && bestRes.rangeResult != nil {
bestRes.rangeResult.IsDNFCond = false
}
return r, offset, columnValues, nil
return bestRes, columnValues, nil
}

func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo {
Expand Down Expand Up @@ -344,27 +376,46 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi
optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan,
}
if considerDNF {
pointRes, offset, columnValues, err := extractIndexPointRangesForCNF(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize)
if err != nil {
return nil, err
}
res.ColumnValues = unionColumnValues(res.ColumnValues, columnValues)
if pointRes != nil {
if len(pointRes.Ranges) == 0 {
if bestCNFItemRes != nil && bestCNFItemRes.rangeResult != nil {
if len(bestCNFItemRes.rangeResult.Ranges) == 0 {
return &DetachRangeResult{}, nil
}
if len(pointRes.Ranges[0].LowVal) > eqOrInCount {
pointRes.ColumnValues = res.ColumnValues
res = pointRes
pointRanges = pointRes.Ranges
if bestCNFItemRes.sameLenPointRanges && bestCNFItemRes.minColNum > eqOrInCount {
bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues
res = bestCNFItemRes.rangeResult
pointRanges = bestCNFItemRes.rangeResult.Ranges
eqOrInCount = len(res.Ranges[0].LowVal)
newConditions = newConditions[:0]
newConditions = append(newConditions, conditions[:offset]...)
newConditions = append(newConditions, conditions[offset+1:]...)
newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...)
newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...)
if eqOrInCount == len(d.cols) || len(newConditions) == 0 {
res.RemainedConds = append(res.RemainedConds, newConditions...)
return res, nil
}
} else {
considerCNFItemNonPointRanges := false
fixValue, ok := d.sctx.GetSessionVars().GetOptimizerFixControlValue(variable.TiDBOptFixControl44389)
if ok && variable.TiDBOptOn(fixValue) {
considerCNFItemNonPointRanges = true
}
if considerCNFItemNonPointRanges && !bestCNFItemRes.sameLenPointRanges && eqOrInCount == 0 && bestCNFItemRes.minColNum > 0 && bestCNFItemRes.maxColNum > 1 {
// When eqOrInCount is 0, if we don't enter the IF branch, we would use detachColumnCNFConditions to build
// ranges on the first index column.
// Considering minColNum > 0 and maxColNum > 1, bestCNFItemRes is better than the ranges built by detachColumnCNFConditions
// in most cases.
bestCNFItemRes.rangeResult.ColumnValues = res.ColumnValues
res = bestCNFItemRes.rangeResult
newConditions = newConditions[:0]
newConditions = append(newConditions, conditions[:bestCNFItemRes.offset]...)
newConditions = append(newConditions, conditions[bestCNFItemRes.offset+1:]...)
res.RemainedConds = append(res.RemainedConds, newConditions...)
return res, nil
}
}
}
if eqOrInCount > 0 {
Expand Down
30 changes: 29 additions & 1 deletion util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,7 @@ func TestIssue41572(t *testing.T) {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows())
})
testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...))
testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...))
Expand Down Expand Up @@ -2592,3 +2592,31 @@ create table t(
require.Equal(t, tt.resultStr, got, fmt.Sprintf("different for expr %s", tt.exprStr))
}
}

func TestIssue44389(t *testing.T) {
store := testkit.CreateMockStore(t)

testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a varchar(100), b int, c int, index idx_ab(a, b))")
testKit.MustExec("insert into t values ('kk', 1, 10), ('kk', 1, 20), ('hh', 2, 10), ('hh', 3, 10), ('xx', 4, 10), ('yy', 5, 10), ('yy', 6, 20), ('zz', 7, 10)")
testKit.MustExec("set @@tidb_opt_fix_control = '44389:ON'")

var input []string
var output []struct {
SQL string
Plan []string
Result []string
}
rangerSuiteData.LoadTestCases(t, &input, &output)
for i, tt := range input {
testdata.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = testdata.ConvertRowsToStrings(testKit.MustQuery("explain " + tt).Rows())
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(tt).Sort().Rows())
})
testKit.MustQuery("explain " + tt).Check(testkit.Rows(output[i].Plan...))
testKit.MustQuery(tt).Sort().Check(testkit.Rows(output[i].Result...))
}
}
7 changes: 7 additions & 0 deletions util/ranger/testdata/ranger_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,12 @@
"select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73) and col3 != \"2024-10-19 08:55:32\"",
"select * from IDT_20755 use index (u_m_col) where col1 = \"xxxxxxxxxxxxxxx\" and col2 in (72, 73, 74) and col3 != \"2024-10-19 08:55:32\""
]
},
{
"name": "TestIssue44389",
"cases": [
"select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))",
"select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))"
]
}
]
33 changes: 33 additions & 0 deletions util/ranger/testdata/ranger_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -723,5 +723,38 @@
]
}
]
},
{
"Name": "TestIssue44389",
"Cases": [
{
"SQL": "select * from t where c = 10 and (a = 'xx' or (a = 'kk' and b = 1))",
"Plan": [
"IndexLookUp_11 0.01 root ",
"├─IndexRangeScan_8(Build) 10.10 cop[tikv] table:t, index:idx_ab(a, b) range:[\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], keep order:false, stats:pseudo",
"└─Selection_10(Probe) 0.01 cop[tikv] eq(test.t.c, 10)",
" └─TableRowIDScan_9 10.10 cop[tikv] table:t keep order:false, stats:pseudo"
],
"Result": [
"kk 1 10",
"xx 4 10"
]
},
{
"SQL": "select * from t where c = 10 and ((a = 'xx' or a = 'yy') or ((a = 'kk' and b = 1) or (a = 'hh' and b = 2)))",
"Plan": [
"IndexLookUp_11 0.02 root ",
"├─IndexRangeScan_8(Build) 20.20 cop[tikv] table:t, index:idx_ab(a, b) range:[\"hh\" 2,\"hh\" 2], [\"kk\" 1,\"kk\" 1], [\"xx\",\"xx\"], [\"yy\",\"yy\"], keep order:false, stats:pseudo",
"└─Selection_10(Probe) 0.02 cop[tikv] eq(test.t.c, 10)",
" └─TableRowIDScan_9 20.20 cop[tikv] table:t keep order:false, stats:pseudo"
],
"Result": [
"hh 2 10",
"kk 1 10",
"xx 4 10",
"yy 5 10"
]
}
]
}
]

0 comments on commit 85d6323

Please sign in to comment.