From f18abc980ddf632872ea5d8ecc0eeffd8469d3f5 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Wed, 25 Dec 2019 19:53:10 +0800 Subject: [PATCH] planner: fix row count estimation for unique composite IndexScan of IndexJoin (#14167) --- cmd/explaintest/r/explain_complex.result | 14 +++---- .../r/explain_complex_stats.result | 13 +++---- planner/core/exhaust_physical_plans.go | 16 ++++++-- planner/core/integration_test.go | 27 ++++++++++++- .../core/testdata/integration_suite_in.json | 11 ++++++ .../core/testdata/integration_suite_out.json | 38 +++++++++++++++++++ 6 files changed, 101 insertions(+), 18 deletions(-) diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index 682c6405bbf1e..1499e1af4c6ce 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -118,11 +118,11 @@ id count task operator info Projection_13 1.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t └─Limit_16 1.00 root offset:0, count:2500 └─HashAgg_19 1.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t - └─IndexMergeJoin_30 0.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, test.st.ip), gt(test.dd.t, test.st.t) - ├─IndexLookUp_28 0.00 root - │ ├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo - │ └─Selection_27 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) - │ └─TableScan_26 1.00 cop[tikv] table:dd, keep order:false, stats:pseudo + └─HashRightJoin_34 0.00 root inner join, inner:IndexLookUp_52, equal:[eq(test.dd.aid, test.st.aid) eq(test.dd.ip, test.st.ip)], other cond:gt(test.dd.t, test.st.t) + ├─IndexLookUp_52 0.00 root + │ ├─IndexScan_49 3333.33 cop[tikv] table:dd, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo + │ └─Selection_51 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), not(isnull(test.dd.ip)) + │ └─TableScan_50 3333.33 cop[tikv] table:dd, keep order:false, stats:pseudo └─IndexLookUp_41 3.33 root ├─IndexScan_38 3333.33 cop[tikv] table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo └─Selection_40 3.33 cop[tikv] eq(test.st.pt, "android"), not(isnull(test.st.ip)) @@ -137,9 +137,9 @@ Projection_10 0.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd │ └─Selection_34 0.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), not(isnull(test.st.dic)) │ └─TableScan_33 3333.33 cop[tikv] table:gad, keep order:false, stats:pseudo └─IndexLookUp_22 0.00 root - ├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo + ├─IndexScan_19 10000.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo └─Selection_21 0.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t)) - └─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false, stats:pseudo + └─TableScan_20 10000.00 cop[tikv] table:sdk, keep order:false, stats:pseudo explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; id count task operator info Projection_5 1.00 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21 diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 67e9f52f7d9b4..ffc36a4b49c0d 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -128,14 +128,13 @@ id count task operator info Projection_13 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t └─Limit_16 424.00 root offset:0, count:2500 └─HashAgg_19 424.00 root group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t - └─IndexMergeJoin_30 424.00 root inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.st.ip, test.dd.ip), gt(test.dd.t, test.st.t) + └─HashRightJoin_34 424.00 root inner join, inner:TableReader_37, equal:[eq(test.st.aid, test.dd.aid) eq(test.st.ip, test.dd.ip)], other cond:gt(test.dd.t, test.st.t) ├─TableReader_37 424.00 root data:Selection_36 │ └─Selection_36 424.00 cop[tikv] eq(test.st.bm, 0), eq(test.st.pt, "android"), gt(test.st.t, 1478143908), not(isnull(test.st.ip)) │ └─TableScan_35 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false - └─IndexLookUp_28 1.00 root - ├─IndexScan_25 1.00 cop[tikv] table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true - └─Selection_27 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) - └─TableScan_26 1.00 cop[tikv] table:dd, keep order:false + └─TableReader_44 455.80 root data:Selection_43 + └─Selection_43 455.80 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) + └─TableScan_42 2000.00 cop[tikv] table:dd, range:[0,+inf], keep order:false explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000; id count task operator info Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext @@ -145,9 +144,9 @@ Projection_10 170.34 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test. │ └─Selection_30 170.34 cop[tikv] eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), gt(test.st.t, 1477971479), not(isnull(test.st.dic)) │ └─TableScan_29 1999.00 cop[tikv] table:gad, range:[0,+inf], keep order:false └─IndexLookUp_22 1.00 root - ├─IndexScan_19 1.00 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true + ├─IndexScan_19 3.93 cop[tikv] table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true └─Selection_21 1.00 cop[tikv] eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t)) - └─TableScan_20 1.00 cop[tikv] table:sdk, keep order:false + └─TableScan_20 3.93 cop[tikv] table:sdk, keep order:false explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; id count task operator info Projection_5 39.28 root test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21 diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index ecc566a0693cc..d17d8adc7d946 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -594,7 +594,17 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan( } joins = make([]PhysicalPlan, 0, 3) rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys) - innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt) + maxOneRow := false + if helper.chosenPath.Index.Unique && helper.maxUsedCols == len(helper.chosenPath.FullIdxCols) { + l := len(helper.chosenAccess) + if l == 0 { + maxOneRow = true + } else { + sf, ok := helper.chosenAccess[l-1].(*expression.ScalarFunction) + maxOneRow = ok && (sf.FuncName.L == ast.EQ) + } + } + innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow) joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...) // The index merge join's inner plan is different from index join, so we @@ -602,7 +612,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan( // Because we can't keep order for union scan, if there is a union scan in inner task, // we can't construct index merge join. if us == nil { - innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt) + innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt, maxOneRow) joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...) } // We can reuse the `innerTask` here since index nested loop hash join @@ -742,6 +752,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask( keepOrder bool, desc bool, rowCount float64, + maxOneRow bool, ) task { is := PhysicalIndexScan{ Table: ds.tableInfo, @@ -793,7 +804,6 @@ func (p *LogicalJoin) constructInnerIndexScanTask( if rowCount <= 0 { rowCount = ds.tableStats.RowCount } - maxOneRow := path.Index.Unique && len(outerJoinKeys) == len(path.FullIdxCols) if maxOneRow { // Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger // than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 424961cbd828e..d0da765a00960 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -303,5 +303,30 @@ func (s *testIntegrationSuite) TestINLJHintSmallTable(c *C) { tk.MustExec("insert into t2 values(1,1),(2,2),(3,3),(4,4),(5,5)") tk.MustExec("analyze table t1, t2") tk.MustExec("explain select /*+ TIDB_INLJ(t1) */ * from t1 join t2 on t1.a = t2.a") - tk.MustQuery("show warnings").Check(testkit.Rows()) +} + +func (s *testIntegrationSuite) TestIndexJoinUniqueCompositeIndex(c *C) { + tk := testkit.NewTestKit(c, s.store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(a int not null, c int not null)") + tk.MustExec("create table t2(a int not null, b int not null, c int not null, primary key(a,b))") + tk.MustExec("insert into t1 values(1,1)") + tk.MustExec("insert into t2 values(1,1,1),(1,2,1)") + tk.MustExec("analyze table t1,t2") + + var input []string + var output []struct { + SQL string + Plan []string + } + s.testData.GetTestCases(c, &input, &output) + for i, tt := range input { + s.testData.OnRecord(func() { + output[i].SQL = tt + output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows()) + }) + tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...)) + } } diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json index 350b3860140c0..e20b75f153df3 100644 --- a/planner/core/testdata/integration_suite_in.json +++ b/planner/core/testdata/integration_suite_in.json @@ -26,6 +26,17 @@ "explain select * from t t1 left join t t2 on t1.a = t2.a where cast(t1.b as date) >= '2019-01-01'" ] }, + { + "name": "TestIndexJoinUniqueCompositeIndex", + "cases": [ + // Row count of IndexScan should be 2. + "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c", + // Row count of IndexScan should be 2. + "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b", + // Row count of IndexScan should be 1. + "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1" + ] + }, { "name": "TestPartitionTableStats", "cases": [ diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 32441f7b05f0d..c4b846647615e 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -78,6 +78,44 @@ } ] }, + { + "Name": "TestIndexJoinUniqueCompositeIndex", + "Cases": [ + { + "SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c", + "Plan": [ + "IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:eq(test.t1.c, test.t2.c)", + "├─TableReader_19 1.00 root data:TableScan_18", + "│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false", + "└─IndexLookUp_8 2.00 root ", + " ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false", + " └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false" + ] + }, + { + "SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b", + "Plan": [ + "IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:le(test.t1.c, test.t2.b)", + "├─TableReader_19 1.00 root data:TableScan_18", + "│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false", + "└─IndexLookUp_8 2.00 root ", + " ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) le(test.t1.c, test.t2.b)], keep order:false", + " └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false" + ] + }, + { + "SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1", + "Plan": [ + "IndexJoin_9 1.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a", + "├─TableReader_19 1.00 root data:TableScan_18", + "│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false", + "└─IndexLookUp_8 1.00 root ", + " ├─IndexScan_6 1.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) eq(test.t2.b, 1)], keep order:false", + " └─TableScan_7 1.00 cop[tikv] table:t2, keep order:false" + ] + } + ] + }, { "Name": "TestPartitionTableStats", "Cases": [