From f18abc980ddf632872ea5d8ecc0eeffd8469d3f5 Mon Sep 17 00:00:00 2001
From: Kenan Yao <cauchy1992@gmail.com>
Date: Wed, 25 Dec 2019 19:53:10 +0800
Subject: [PATCH] planner: fix row count estimation for unique composite
 IndexScan of IndexJoin (#14167)

---
 cmd/explaintest/r/explain_complex.result      | 14 +++----
 .../r/explain_complex_stats.result            | 13 +++----
 planner/core/exhaust_physical_plans.go        | 16 ++++++--
 planner/core/integration_test.go              | 27 ++++++++++++-
 .../core/testdata/integration_suite_in.json   | 11 ++++++
 .../core/testdata/integration_suite_out.json  | 38 +++++++++++++++++++
 6 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result
index 682c6405bbf1e..1499e1af4c6ce 100644
--- a/cmd/explaintest/r/explain_complex.result
+++ b/cmd/explaintest/r/explain_complex.result
@@ -118,11 +118,11 @@ id	count	task	operator info
 Projection_13	1.00	root	test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
 └─Limit_16	1.00	root	offset:0, count:2500
   └─HashAgg_19	1.00	root	group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
-    └─IndexMergeJoin_30	0.00	root	inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, test.st.ip), gt(test.dd.t, test.st.t)
-      ├─IndexLookUp_28	0.00	root	
-      │ ├─IndexScan_25	1.00	cop[tikv]	table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
-      │ └─Selection_27	0.00	cop[tikv]	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
-      │   └─TableScan_26	1.00	cop[tikv]	table:dd, keep order:false, stats:pseudo
+    └─HashRightJoin_34	0.00	root	inner join, inner:IndexLookUp_52, equal:[eq(test.dd.aid, test.st.aid) eq(test.dd.ip, test.st.ip)], other cond:gt(test.dd.t, test.st.t)
+      ├─IndexLookUp_52	0.00	root	
+      │ ├─IndexScan_49	3333.33	cop[tikv]	table:dd, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
+      │ └─Selection_51	0.00	cop[tikv]	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), not(isnull(test.dd.ip))
+      │   └─TableScan_50	3333.33	cop[tikv]	table:dd, keep order:false, stats:pseudo
       └─IndexLookUp_41	3.33	root	
         ├─IndexScan_38	3333.33	cop[tikv]	table:gad, index:t, range:(1478143908,+inf], keep order:false, stats:pseudo
         └─Selection_40	3.33	cop[tikv]	eq(test.st.pt, "android"), not(isnull(test.st.ip))
@@ -137,9 +137,9 @@ Projection_10	0.00	root	test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd
     │ └─Selection_34	0.00	cop[tikv]	eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), not(isnull(test.st.dic))
     │   └─TableScan_33	3333.33	cop[tikv]	table:gad, keep order:false, stats:pseudo
     └─IndexLookUp_22	0.00	root	
-      ├─IndexScan_19	1.00	cop[tikv]	table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
+      ├─IndexScan_19	10000.00	cop[tikv]	table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true, stats:pseudo
       └─Selection_21	0.00	cop[tikv]	eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
-        └─TableScan_20	1.00	cop[tikv]	table:sdk, keep order:false, stats:pseudo
+        └─TableScan_20	10000.00	cop[tikv]	table:sdk, keep order:false, stats:pseudo
 explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
 id	count	task	operator info
 Projection_5	1.00	root	test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21
diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result
index 67e9f52f7d9b4..ffc36a4b49c0d 100644
--- a/cmd/explaintest/r/explain_complex_stats.result
+++ b/cmd/explaintest/r/explain_complex_stats.result
@@ -128,14 +128,13 @@ id	count	task	operator info
 Projection_13	424.00	root	test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
 └─Limit_16	424.00	root	offset:0, count:2500
   └─HashAgg_19	424.00	root	group by:test.dd.dic, test.st.aid, funcs:firstrow(test.st.id)->test.st.id, funcs:firstrow(test.st.aid)->test.st.aid, funcs:firstrow(test.st.cm)->test.st.cm, funcs:firstrow(test.st.p1)->test.st.p1, funcs:firstrow(test.st.p2)->test.st.p2, funcs:firstrow(test.st.p3)->test.st.p3, funcs:firstrow(test.st.p4)->test.st.p4, funcs:firstrow(test.st.p5)->test.st.p5, funcs:firstrow(test.st.p6_md5)->test.st.p6_md5, funcs:firstrow(test.st.p7_md5)->test.st.p7_md5, funcs:firstrow(test.st.ext)->test.st.ext, funcs:firstrow(test.st.t)->test.st.t, funcs:firstrow(test.dd.id)->test.dd.id, funcs:firstrow(test.dd.dic)->test.dd.dic, funcs:firstrow(test.dd.ip)->test.dd.ip, funcs:firstrow(test.dd.t)->test.dd.t
-    └─IndexMergeJoin_30	424.00	root	inner join, inner:IndexLookUp_28, outer key:test.st.aid, inner key:test.dd.aid, other cond:eq(test.st.ip, test.dd.ip), gt(test.dd.t, test.st.t)
+    └─HashRightJoin_34	424.00	root	inner join, inner:TableReader_37, equal:[eq(test.st.aid, test.dd.aid) eq(test.st.ip, test.dd.ip)], other cond:gt(test.dd.t, test.st.t)
       ├─TableReader_37	424.00	root	data:Selection_36
       │ └─Selection_36	424.00	cop[tikv]	eq(test.st.bm, 0), eq(test.st.pt, "android"), gt(test.st.t, 1478143908), not(isnull(test.st.ip))
       │   └─TableScan_35	1999.00	cop[tikv]	table:gad, range:[0,+inf], keep order:false
-      └─IndexLookUp_28	1.00	root	
-        ├─IndexScan_25	1.00	cop[tikv]	table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
-        └─Selection_27	1.00	cop[tikv]	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
-          └─TableScan_26	1.00	cop[tikv]	table:dd, keep order:false
+      └─TableReader_44	455.80	root	data:Selection_43
+        └─Selection_43	455.80	cop[tikv]	eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t))
+          └─TableScan_42	2000.00	cop[tikv]	table:dd, range:[0,+inf], keep order:false
 explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000;
 id	count	task	operator info
 Projection_10	170.34	root	test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext
@@ -145,9 +144,9 @@ Projection_10	170.34	root	test.st.id, test.dd.id, test.st.aid, test.st.cm, test.
     │ └─Selection_30	170.34	cop[tikv]	eq(test.st.bm, 0), eq(test.st.dit, "mac"), eq(test.st.pt, "ios"), gt(test.st.t, 1477971479), not(isnull(test.st.dic))
     │   └─TableScan_29	1999.00	cop[tikv]	table:gad, range:[0,+inf], keep order:false
     └─IndexLookUp_22	1.00	root	
-      ├─IndexScan_19	1.00	cop[tikv]	table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
+      ├─IndexScan_19	3.93	cop[tikv]	table:sdk, index:aid, dic, range: decided by [eq(test.dd.aid, test.st.aid)], keep order:true
       └─Selection_21	1.00	cop[tikv]	eq(test.dd.bm, 0), eq(test.dd.pt, "ios"), gt(test.dd.t, 1477971479), not(isnull(test.dd.mac)), not(isnull(test.dd.t))
-        └─TableScan_20	1.00	cop[tikv]	table:sdk, keep order:false
+        └─TableScan_20	3.93	cop[tikv]	table:sdk, keep order:false
 explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5;
 id	count	task	operator info
 Projection_5	39.28	root	test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, Column#20, Column#21
diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go
index ecc566a0693cc..d17d8adc7d946 100644
--- a/planner/core/exhaust_physical_plans.go
+++ b/planner/core/exhaust_physical_plans.go
@@ -594,7 +594,17 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
 	}
 	joins = make([]PhysicalPlan, 0, 3)
 	rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys)
-	innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt)
+	maxOneRow := false
+	if helper.chosenPath.Index.Unique && helper.maxUsedCols == len(helper.chosenPath.FullIdxCols) {
+		l := len(helper.chosenAccess)
+		if l == 0 {
+			maxOneRow = true
+		} else {
+			sf, ok := helper.chosenAccess[l-1].(*expression.ScalarFunction)
+			maxOneRow = ok && (sf.FuncName.L == ast.EQ)
+		}
+	}
+	innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt, maxOneRow)
 
 	joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
 	// The index merge join's inner plan is different from index join, so we
@@ -602,7 +612,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
 	// Because we can't keep order for union scan, if there is a union scan in inner task,
 	// we can't construct index merge join.
 	if us == nil {
-		innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt)
+		innerTask2 := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, true, !prop.IsEmpty() && prop.Items[0].Desc, avgInnerRowCnt, maxOneRow)
 		joins = append(joins, p.constructIndexMergeJoin(prop, outerIdx, innerTask2, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...)
 	}
 	// We can reuse the `innerTask` here since index nested loop hash join
@@ -742,6 +752,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
 	keepOrder bool,
 	desc bool,
 	rowCount float64,
+	maxOneRow bool,
 ) task {
 	is := PhysicalIndexScan{
 		Table:            ds.tableInfo,
@@ -793,7 +804,6 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
 	if rowCount <= 0 {
 		rowCount = ds.tableStats.RowCount
 	}
-	maxOneRow := path.Index.Unique && len(outerJoinKeys) == len(path.FullIdxCols)
 	if maxOneRow {
 		// Theoretically, this line is unnecessary because row count estimation of join should guarantee rowCount is not larger
 		// than 1.0; however, there may be rowCount larger than 1.0 in reality, e.g, pseudo statistics cases, which does not reflect
diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
index 424961cbd828e..d0da765a00960 100644
--- a/planner/core/integration_test.go
+++ b/planner/core/integration_test.go
@@ -303,5 +303,30 @@ func (s *testIntegrationSuite) TestINLJHintSmallTable(c *C) {
 	tk.MustExec("insert into t2 values(1,1),(2,2),(3,3),(4,4),(5,5)")
 	tk.MustExec("analyze table t1, t2")
 	tk.MustExec("explain select /*+ TIDB_INLJ(t1) */ * from t1 join t2 on t1.a = t2.a")
-	tk.MustQuery("show warnings").Check(testkit.Rows())
+}
+
+func (s *testIntegrationSuite) TestIndexJoinUniqueCompositeIndex(c *C) {
+	tk := testkit.NewTestKit(c, s.store)
+
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t1, t2")
+	tk.MustExec("create table t1(a int not null, c int not null)")
+	tk.MustExec("create table t2(a int not null, b int not null, c int not null, primary key(a,b))")
+	tk.MustExec("insert into t1 values(1,1)")
+	tk.MustExec("insert into t2 values(1,1,1),(1,2,1)")
+	tk.MustExec("analyze table t1,t2")
+
+	var input []string
+	var output []struct {
+		SQL  string
+		Plan []string
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			output[i].Plan = s.testData.ConvertRowsToStrings(tk.MustQuery(tt).Rows())
+		})
+		tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...))
+	}
 }
diff --git a/planner/core/testdata/integration_suite_in.json b/planner/core/testdata/integration_suite_in.json
index 350b3860140c0..e20b75f153df3 100644
--- a/planner/core/testdata/integration_suite_in.json
+++ b/planner/core/testdata/integration_suite_in.json
@@ -26,6 +26,17 @@
       "explain select * from t t1 left join t t2 on t1.a = t2.a where cast(t1.b as date) >= '2019-01-01'"
     ]
   },
+  {
+    "name": "TestIndexJoinUniqueCompositeIndex",
+    "cases": [
+      // Row count of IndexScan should be 2.
+      "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
+      // Row count of IndexScan should be 2.
+	  "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
+      // Row count of IndexScan should be 1.
+	  "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1"
+    ]
+  },
   {
     "name": "TestPartitionTableStats",
     "cases": [
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 32441f7b05f0d..c4b846647615e 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -78,6 +78,44 @@
       }
     ]
   },
+  {
+    "Name": "TestIndexJoinUniqueCompositeIndex",
+    "Cases": [
+      {
+        "SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c = t2.c",
+        "Plan": [
+          "IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:eq(test.t1.c, test.t2.c)",
+          "├─TableReader_19 1.00 root data:TableScan_18",
+          "│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
+          "└─IndexLookUp_8 2.00 root ",
+          "  ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false",
+          "  └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
+        ]
+      },
+      {
+        "SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t1.c <= t2.b",
+        "Plan": [
+          "IndexJoin_9 2.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a, other cond:le(test.t1.c, test.t2.b)",
+          "├─TableReader_19 1.00 root data:TableScan_18",
+          "│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
+          "└─IndexLookUp_8 2.00 root ",
+          "  ├─IndexScan_6 2.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) le(test.t1.c, test.t2.b)], keep order:false",
+          "  └─TableScan_7 2.00 cop[tikv] table:t2, keep order:false"
+        ]
+      },
+      {
+        "SQL": "explain select /*+ TIDB_INLJ(t2) */ * from t1 join t2 on t1.a = t2.a and t2.b = 1",
+        "Plan": [
+          "IndexJoin_9 1.00 root inner join, inner:IndexLookUp_8, outer key:test.t1.a, inner key:test.t2.a",
+          "├─TableReader_19 1.00 root data:TableScan_18",
+          "│ └─TableScan_18 1.00 cop[tikv] table:t1, range:[-inf,+inf], keep order:false",
+          "└─IndexLookUp_8 1.00 root ",
+          "  ├─IndexScan_6 1.00 cop[tikv] table:t2, index:a, b, range: decided by [eq(test.t2.a, test.t1.a) eq(test.t2.b, 1)], keep order:false",
+          "  └─TableScan_7 1.00 cop[tikv] table:t2, keep order:false"
+        ]
+      }
+    ]
+  },
   {
     "Name": "TestPartitionTableStats",
     "Cases": [