pingcap · alivxxx · Oct 24, 2018 · Oct 23, 2018 · Oct 24, 2018 · Oct 24, 2018
diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result
@@ -158,11 +158,11 @@ Projection_5	39.28	root	test.st.cm, test.st.p1, test.st.p2, test.st.p3, test.st.
       └─TableScan_14	160.23	cop	table:st, keep order:false
 explain select dt.id as id, dt.aid as aid, dt.pt as pt, dt.dic as dic, dt.cm as cm, rr.gid as gid, rr.acd as acd, rr.t as t,dt.p1 as p1, dt.p2 as p2, dt.p3 as p3, dt.p4 as p4, dt.p5 as p5, dt.p6_md5 as p6, dt.p7_md5 as p7 from dt dt join rr rr on (rr.pt = 'ios' and rr.t > 1478185592 and dt.aid = rr.aid and dt.dic = rr.dic) where dt.pt = 'ios' and dt.t > 1478185592 and dt.bm = 0 limit 2000;
 id	count	task	operator info
-Projection_9	428.55	root	dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5
-└─Limit_12	428.55	root	offset:0, count:2000
-  └─IndexJoin_18	428.55	root	inner join, inner:IndexLookUp_17, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic
-    ├─TableReader_42	428.55	root	data:Selection_41
-    │ └─Selection_41	428.55	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592)
+Projection_9	428.32	root	dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr.t, dt.p1, dt.p2, dt.p3, dt.p4, dt.p5, dt.p6_md5, dt.p7_md5
+└─Limit_12	428.32	root	offset:0, count:2000
+  └─IndexJoin_18	428.32	root	inner join, inner:IndexLookUp_17, outer key:dt.aid, dt.dic, inner key:rr.aid, rr.dic
+    ├─TableReader_42	428.32	root	data:Selection_41
+    │ └─Selection_41	428.32	cop	eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592)
     │   └─TableScan_40	2000.00	cop	table:dt, range:[0,+inf], keep order:false
     └─IndexLookUp_17	970.00	root	
       ├─IndexScan_14	1.00	cop	table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false

diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result
@@ -47,10 +47,10 @@ explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	count	task	operator info
 Projection_6	2481.25	root	test.t1.c1, test.t1.c2, test.t1.c3, test.t2.c1, test.t2.c2
 └─MergeJoin_7	2481.25	root	left outer join, left key:test.t1.c2, right key:test.t2.c1
-  ├─IndexLookUp_17	1999.00	root	
-  │ ├─Selection_16	1999.00	cop	gt(test.t1.c1, 1)
+  ├─IndexLookUp_17	1998.00	root	
+  │ ├─Selection_16	1998.00	cop	gt(test.t1.c1, 1)
   │ │ └─IndexScan_14	1999.00	cop	table:t1, index:c2, range:[NULL,+inf], keep order:true
-  │ └─TableScan_15	1999.00	cop	table:t1, keep order:false
+  │ └─TableScan_15	1998.00	cop	table:t1, keep order:false
   └─IndexLookUp_21	1985.00	root	
     ├─IndexScan_19	1985.00	cop	table:t2, index:c1, range:[NULL,+inf], keep order:true
     └─TableScan_20	1985.00	cop	table:t2, keep order:false

diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result
@@ -251,7 +251,7 @@ limit 10;
 id	count	task	operator info
 Projection_14	10.00	root	tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderdate, tpch.orders.o_shippriority
 └─TopN_17	10.00	root	7_col_0:desc, tpch.orders.o_orderdate:asc, offset:0, count:10
-  └─HashAgg_20	40256361.71	root	group by:tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority, funcs:sum(mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))), firstrow(tpch.orders.o_orderdate), firstrow(tpch.orders.o_shippriority), firstrow(tpch.lineitem.l_orderkey)
+  └─HashAgg_20	40227041.09	root	group by:tpch.lineitem.l_orderkey, tpch.orders.o_orderdate, tpch.orders.o_shippriority, funcs:sum(mul(tpch.lineitem.l_extendedprice, minus(1, tpch.lineitem.l_discount))), firstrow(tpch.orders.o_orderdate), firstrow(tpch.orders.o_shippriority), firstrow(tpch.lineitem.l_orderkey)
     └─IndexJoin_26	91515927.49	root	inner join, inner:IndexLookUp_25, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey
       ├─HashRightJoin_46	22592975.51	root	inner join, inner:TableReader_52, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
       │ ├─TableReader_52	1498236.00	root	data:Selection_51
@@ -260,9 +260,9 @@ Projection_14	10.00	root	tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderd
       │ └─TableReader_49	36870000.00	root	data:Selection_48
       │   └─Selection_48	36870000.00	cop	lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000)
       │     └─TableScan_47	75000000.00	cop	table:orders, range:[-inf,+inf], keep order:false
-      └─IndexLookUp_25	163063881.42	root	
+      └─IndexLookUp_25	162945114.27	root	
         ├─IndexScan_22	1.00	cop	table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
-        └─Selection_24	163063881.42	cop	gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
+        └─Selection_24	162945114.27	cop	gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000)
           └─TableScan_23	1.00	cop	table:lineitem, keep order:false
 /*
 Q4 Order Priority Checking Query
@@ -922,13 +922,13 @@ p_brand,
 p_type,
 p_size;
 id	count	task	operator info
-Sort_13	15.00	root	supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc
-└─Projection_14	15.00	root	tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0
-  └─HashAgg_17	15.00	root	group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size)
-    └─HashLeftJoin_22	4022816.68	root	anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)]
-      ├─IndexJoin_26	5028520.85	root	inner join, inner:IndexReader_25, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey
-      │ ├─TableReader_41	1249969.60	root	data:Selection_40
-      │ │ └─Selection_40	1249969.60	cop	in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92))
+Sort_13	14.41	root	supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_type:asc, tpch.part.p_size:asc
+└─Projection_14	14.41	root	tpch.part.p_brand, tpch.part.p_type, tpch.part.p_size, 9_col_0
+  └─HashAgg_17	14.41	root	group by:tpch.part.p_brand, tpch.part.p_size, tpch.part.p_type, funcs:count(distinct tpch.partsupp.ps_suppkey), firstrow(tpch.part.p_brand), firstrow(tpch.part.p_type), firstrow(tpch.part.p_size)
+    └─HashLeftJoin_22	3863988.24	root	anti semi join, inner:TableReader_46, equal:[eq(tpch.partsupp.ps_suppkey, tpch.supplier.s_suppkey)]
+      ├─IndexJoin_26	4829985.30	root	inner join, inner:IndexReader_25, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey
+      │ ├─TableReader_41	1200618.43	root	data:Selection_40
+      │ │ └─Selection_40	1200618.43	cop	in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92))
       │ │   └─TableScan_39	10000000.00	cop	table:part, range:[-inf,+inf], keep order:false
       │ └─IndexReader_25	1.00	root	index:IndexScan_24
       │   └─IndexScan_24	1.00	cop	table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false

diff --git a/statistics/ddl_test.go b/statistics/ddl_test.go
@@ -137,7 +137,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
 	c.Assert(count, Equals, float64(2))
 	count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3].ID)
 	c.Assert(err, IsNil)
-	c.Assert(count, Equals, float64(2))
+	c.Assert(count, Equals, float64(0))
 
 	testKit.MustExec("alter table t add column c4 datetime NOT NULL default CURRENT_TIMESTAMP")
 	err = h.HandleDDLEvent(<-h.DDLEventCh())

diff --git a/statistics/histogram.go b/statistics/histogram.go
@@ -729,7 +729,7 @@ func (c *Column) String() string {
 	return c.Histogram.ToString(0)
 }
 
-func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (float64, error) {
+func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, modifyCount int64) (float64, error) {
 	if val.IsNull() {
 		return float64(c.NullCount), nil
 	}
@@ -738,7 +738,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum) (f
 		return 0.0, nil
 	}
 	if c.NDV > 0 && c.outOfRange(val) {
-		return c.totalRowCount() / (float64(c.NDV)), nil
+		return float64(modifyCount) / float64(c.NDV), nil
 	}
 	if c.CMSketch != nil {
 		count, err := c.CMSketch.queryValue(sc, val)
@@ -759,7 +759,7 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
 			// the point case.
 			if !rg.LowExclude && !rg.HighExclude {
 				var cnt float64
-				cnt, err = c.equalRowCount(sc, rg.LowVal[0])
+				cnt, err = c.equalRowCount(sc, rg.LowVal[0], modifyCount)
 				if err != nil {
 					return 0, errors.Trace(err)
 				}
@@ -773,14 +773,14 @@ func (c *Column) getColumnRowCount(sc *stmtctx.StatementContext, ranges []*range
 			cnt += float64(modifyCount) / outOfRangeBetweenRate
 		}
 		if rg.LowExclude {
-			lowCnt, err := c.equalRowCount(sc, rg.LowVal[0])
+			lowCnt, err := c.equalRowCount(sc, rg.LowVal[0], modifyCount)
 			if err != nil {
 				return 0, errors.Trace(err)
 			}
 			cnt -= lowCnt
 		}
 		if !rg.HighExclude {
-			highCnt, err := c.equalRowCount(sc, rg.HighVal[0])
+			highCnt, err := c.equalRowCount(sc, rg.HighVal[0], modifyCount)
 			if err != nil {
 				return 0, errors.Trace(err)
 			}
@@ -809,10 +809,10 @@ func (idx *Index) String() string {
 	return idx.Histogram.ToString(len(idx.Info.Columns))
 }
 
-func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte) float64 {
+func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) float64 {
 	val := types.NewBytesDatum(b)
 	if idx.NDV > 0 && idx.outOfRange(val) {
-		return idx.totalRowCount() / (float64(idx.NDV))
+		return float64(modifyCount) / (float64(idx.NDV))
 	}
 	if idx.CMSketch != nil {
 		return float64(idx.CMSketch.QueryBytes(b))
@@ -834,7 +834,7 @@ func (idx *Index) getRowCount(sc *stmtctx.StatementContext, indexRanges []*range
 		fullLen := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == len(idx.Info.Columns)
 		if fullLen && bytes.Equal(lb, rb) {
 			if !indexRange.LowExclude && !indexRange.HighExclude {
-				totalCount += idx.equalRowCount(sc, lb)
+				totalCount += idx.equalRowCount(sc, lb, modifyCount)
 			}
 			continue
 		}

diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
@@ -158,7 +158,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
 		},
 		{
 			exprs:       "a >= 1 and b > 1 and a < 2",
-			selectivity: 0.01817558299,
+			selectivity: 0.01783264746,
 		},
 		{
 			exprs:       "a >= 1 and c > 1 and a < 2",
@@ -174,7 +174,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
 		},
 		{
 			exprs:       "b > 1",
-			selectivity: 0.98148148148,
+			selectivity: 0.96296296296,
 		},
 		{
 			exprs:       "a > 1 and b < 2 and c > 3 and d < 4 and e > 5",
@@ -304,6 +304,24 @@ func (s *testSelectivitySuite) TestEstimationForUnknownValues(c *C) {
 	count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(1, 30))
 	c.Assert(err, IsNil)
 	c.Assert(count, Equals, 0.0)
+
+	testKit.MustExec("drop table t")
+	testKit.MustExec("create table t(a int, b int, index idx(b))")
+	testKit.MustExec("insert into t values (1,1)")
+	testKit.MustExec("analyze table t")
+	table, err = s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	statsTbl = h.GetTableStats(table.Meta())
+
+	colID = table.Meta().Columns[0].ID
+	count, err = statsTbl.GetRowCountByColumnRanges(sc, colID, getRange(2, 2))
+	c.Assert(err, IsNil)
+	c.Assert(count, Equals, 0.0)
+
+	idxID = table.Meta().Indices[0].ID
+	count, err = statsTbl.GetRowCountByIndexRanges(sc, idxID, getRange(2, 2))
+	c.Assert(err, IsNil)
+	c.Assert(count, Equals, 0.0)
 }
 
 func BenchmarkSelectivity(b *testing.B) {

diff --git a/statistics/table.go b/statistics/table.go
@@ -394,7 +394,7 @@ func (t *Table) ColumnEqualRowCount(sc *stmtctx.StatementContext, value types.Da
 		return float64(t.Count) / pseudoEqualRate, nil
 	}
 	c := t.Columns[colID]
-	result, err := c.equalRowCount(sc, value)
+	result, err := c.equalRowCount(sc, value, t.ModifyCount)
 	result *= c.getIncreaseFactor(t.Count)
 	return result, errors.Trace(err)
 }
@@ -551,7 +551,7 @@ func (coll *HistColl) getIndexRowCount(sc *stmtctx.StatementContext, idxID int64
 			// so we use heuristic methods to estimate the selectivity.
 			if idx.NDV > 0 && len(ran.LowVal) == len(idx.Info.Columns) && rangePosition == len(ran.LowVal) {
 				// for equality queries
-				selectivity = 1.0 / float64(idx.NDV)
+				selectivity = float64(coll.ModifyCount) / float64(idx.NDV) / idx.totalRowCount()
 			} else {
 				// for range queries
 				selectivity = float64(coll.ModifyCount) / outOfRangeBetweenRate / idx.totalRowCount()