Skip to content

Commit

Permalink
planner: fix row count estimation for unique composite IndexScan of I…
Browse files Browse the repository at this point in the history
…ndexJoin (#14167)
  • Loading branch information
eurekaka authored and sre-bot committed Dec 25, 2019
1 parent 980f72d commit f18abc9
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 18 deletions.
14 changes: 7 additions & 7 deletions cmd/explaintest/r/explain_complex.result
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ id count task operator info
Projection_13 1.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
└─Limit_16 1.00 root offset:0, count:2500
└─HashAgg_19 1.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
└─IndexMergeJoin_30 0.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, test.st.ip), gt(test.dd.t, test.st.t)
├─IndexLookUp_28 0.00 root
│ ├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
│ └─Selection_27 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
│ └─TableScan_26 1.00 cop[tikv] table:dd, keep order:false, stats:pseudo
└─HashRightJoin_34 0.00 root inner join, inner:IndexLookUp_52, equal:[eq(test.dd.aid, test.st.aid) eq(test.dd.ip, test.st.ip)], other cond:gt(test.dd.t, test.st.t)
├─IndexLookUp_52 0.00 root
│ ├─IndexScan_49 3333.33 cop[tikv] table:dd, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
│ └─Selection_51 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), not(isnull(test.dd.ip))
│ └─TableScan_50 3333.33 cop[tikv] table:dd, keep order:false, stats:pseudo
└─IndexLookUp_41 3.33 root
├─IndexScan_38 3333.33 cop[tikv] table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
└─Selection_40 3.33 cop[tikv] eq(test.st.pt, "android"), not(isnull(test.st.ip))
Expand All @@ -137,9 +137,9 @@ Projection_10 0.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd
│ └─Selection_34 0.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), not(isnull(test.st.dic))
│ └─TableScan_33 3333.33 cop[tikv] table:gad, keep order:false, stats:pseudo
└─IndexLookUp_22 0.00 root
├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
├─IndexScan_19 10000.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
└─Selection_21 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
└─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false, stats:pseudo
└─TableScan_20 10000.00 cop[tikv] table:sdk, keep order:false, stats:pseudo
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
id count task operator info
Projection_5 1.00 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21
Expand Down
13 changes: 6 additions & 7 deletions cmd/explaintest/r/explain_complex_stats.result
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,13 @@ id count task operator info
Projection_13 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
└─Limit_16 424.00 root offset:0, count:2500
└─HashAgg_19 424.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
└─IndexMergeJoin_30 424.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.st.ip, test.dd.ip), gt(test.dd.t, test.st.t)
└─HashRightJoin_34 424.00 root inner join, inner:TableReader_37, equal:[eq(test.st.aid, test.dd.aid) eq(test.st.ip, test.dd.ip)], other cond:gt(test.dd.t, test.st.t)
├─TableReader_37 424.00 root data:Selection_36
│ └─Selection_36 424.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.pt, "android"), gt(test.st.t, 1478143908), not(isnull(test.st.ip))
│ └─TableScan_35 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false
└─IndexLookUp_28 1.00 root
├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
└─Selection_27 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
└─TableScan_26 1.00 cop[tikv] table:dd, keep order:false
└─TableReader_44 455.80 root data:Selection_43
└─Selection_43 455.80 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
└─TableScan_42 2000.00 cop[tikv] table:dd, range:[0,+inf], keep order:false
explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
id count task operator info
Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext
Expand All @@ -145,9 +144,9 @@ Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.
│ └─Selection_30 170.34 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), gt(test.st.t, 1477971479), not(isnull(test.st.dic))
│ └─TableScan_29 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false
└─IndexLookUp_22 1.00 root
├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
├─IndexScan_19 3.93 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
└─Selection_21 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
└─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false
└─TableScan_20 3.93 cop[tikv] table:sdk, keep order:false
explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
id count task operator info
Projection_5 39.28 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21
Expand Down
16 changes: 13 additions & 3 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,15 +594,25 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
}
joins = make([]PhysicalPlan, 0, 3)
rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys)
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt)
maxOneRow := false
if helper.chosenPath.Index.Unique && helper.maxUsedCols == len(helper.chosenPath.FullIdxCols) {
l := len(helper.chosenAccess)
if l == 0 {
maxOneRow = true
} else {
sf, ok := helper.chosenAccess[l-1].(*expression.ScalarFunction)
maxOneRow = ok && (sf.FuncName.L == ast.EQ)
}
}
innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)

joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
// The index merge join's inner plan is different from index join, so we
// should construct another inner plan for it.
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt)
innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt, maxOneRow)
joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
}
// We can reuse the `innerTask` here since index nested loop hash join
Expand Down Expand Up @@ -742,6 +752,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
keepOrder bool,
desc bool,
rowCount float64,
maxOneRow bool,
) task {
is := PhysicalIndexScan{
Table: ds.tableInfo,
Expand Down Expand Up @@ -793,7 +804,6 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
if rowCount <= 0 {
rowCount = ds.tableStats.RowCount
}
maxOneRow := path.Index.Unique && len(outerJoinKeys) == len(path.FullIdxCols)
if maxOneRow {
// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
Expand Down
27 changes: 26 additions & 1 deletion planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,5 +303,30 @@ func (s *testIntegrationSuite) TestINLJHintSmallTable(c *C) {
tk.MustExec("insert into t2 values(1,1),(2,2),(3,3),(4,4),(5,5)")
tk.MustExec("analyze table t1, t2")
tk.MustExec("explain select /*+ TIDB_INLJ(t1) */ * from t1 join t2 on t1.a = t2.a")
tk.MustQuery("show warnings").Check(testkit.Rows())
}

func (s *testIntegrationSuite) TestIndexJoinUniqueCompositeIndex(c *C) {
tk := testkit.NewTestKit(c, s.store)

tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2")
tk.MustExec("create table t1(a int not null, c int not null)")
tk.MustExec("create table t2(a int not null, b int not null, c int not null, primary key(a,b))")
tk.MustExec("insert into t1 values(1,1)")
tk.MustExec("insert into t2 values(1,1,1),(1,2,1)")
tk.MustExec("analyze table t1,t2")

var input []string
var output []struct {
SQL string
Plan []string
}
s.testData.GetTestCases(c, &input, &output)
for i, tt := range input {
s.testData.OnRecord(func() {
output[i].SQL = tt
output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows())
})
tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
}
}
11 changes: 11 additions & 0 deletions planner/core/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@
"explain select * from t t1 left join t t2 on t1.a = t2.a where cast(t1.b as date) >= '2019-01-01'"
]
},
{
"name": "TestIndexJoinUniqueCompositeIndex",
"cases": [
// Row count of IndexScan should be 2.
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
// Row count of IndexScan should be 2.
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
// Row count of IndexScan should be 1.
"explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1"
]
},
{
"name": "TestPartitionTableStats",
"cases": [
Expand Down
38 changes: 38 additions & 0 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,44 @@
}
]
},
{
"Name": "TestIndexJoinUniqueCompositeIndex",
"Cases": [
{
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
"Plan": [
"IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:eq(test.t1.c, test.t2.c)",
"├─TableReader_19 1.00 root data:TableScan_18",
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
"└─IndexLookUp_8 2.00 root ",
" ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false",
" └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
]
},
{
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
"Plan": [
"IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:le(test.t1.c, test.t2.b)",
"├─TableReader_19 1.00 root data:TableScan_18",
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
"└─IndexLookUp_8 2.00 root ",
" ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) le(test.t1.c, test.t2.b)], keep order:false",
" └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
]
},
{
"SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1",
"Plan": [
"IndexJoin_9 1.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a",
"├─TableReader_19 1.00 root data:TableScan_18",
"│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
"└─IndexLookUp_8 1.00 root ",
" ├─IndexScan_6 1.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) eq(test.t2.b, 1)], keep order:false",
" └─TableScan_7 1.00 cop[tikv] table:t2, keep order:false"
]
}
]
},
{
"Name": "TestPartitionTableStats",
"Cases": [
Expand Down

0 comments on commit f18abc9

Please sign in to comment.