From 64ea885bbe68c91efd0e50d8f558d92cc2d607f6 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Thu, 4 Jan 2018 11:15:32 +0800
Subject: [PATCH 1/2] stats: fix bug when build histograms for null json
 (#5545)

---
 statistics/builder.go         |  9 +++++++--
 statistics/statistics_test.go | 19 ++++++++++++++-----
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/statistics/builder.go b/statistics/builder.go
index bd6258bed582..e7830dbcd564 100644
--- a/statistics/builder.go
+++ b/statistics/builder.go
@@ -164,8 +164,13 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, collector *SampleCol
 	}
 	bucketIdx := 0
 	var lastCount int64
-	hg.Buckets[0].LowerBound = samples[0]
-	for i := int64(0); i < int64(len(samples)); i++ {
+	hg.Buckets[0] = Bucket{
+		LowerBound: samples[0],
+		UpperBound: samples[0],
+		Count:      int64(sampleFactor),
+		Repeats:    int64(ndvFactor),
+	}
+	for i := int64(1); i < int64(len(samples)); i++ {
 		cmp, err := hg.Buckets[bucketIdx].UpperBound.CompareDatum(sc, &samples[i])
 		if err != nil {
 			return nil, errors.Trace(err)
diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go
index 023e41974181..2ea1bf05e25e 100644
--- a/statistics/statistics_test.go
+++ b/statistics/statistics_test.go
@@ -28,6 +28,7 @@ import (
 	"github.com/pingcap/tidb/util/codec"
 	"github.com/pingcap/tidb/util/mock"
 	"github.com/pingcap/tidb/util/types"
+	"github.com/pingcap/tidb/util/types/json"
 )
 
 func TestT(t *testing.T) {
@@ -43,11 +44,6 @@ type testStatisticsSuite struct {
 	pk      ast.RecordSet
 }
 
-type dataTable struct {
-	count   int64
-	samples []types.Datum
-}
-
 type recordSet struct {
 	data   []types.Datum
 	count  int64
@@ -271,6 +267,19 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
 	count, err = col.lessRowCount(sc, types.NewIntDatum(99999))
 	c.Check(err, IsNil)
 	c.Check(int(count), Equals, 99999)
+
+	datum := types.Datum{}
+	datum.SetMysqlJSON(json.JSON{TypeCode: json.TypeCodeLiteral})
+	collector = &SampleCollector{
+		Count:     1,
+		NullCount: 0,
+		Samples:   []types.Datum{datum},
+		Sketch:    sketch,
+	}
+	col, err = BuildColumn(ctx, bucketCount, 2, collector)
+	c.Assert(err, IsNil)
+	c.Assert(len(col.Buckets), Equals, 1)
+	c.Assert(col.Buckets[0].LowerBound, DeepEquals, col.Buckets[0].UpperBound)
 }
 
 func (s *testStatisticsSuite) TestHistogramProtoConversion(c *C) {

From 20973c966a5b76e9f7fe22a95c018e0ab83f69d8 Mon Sep 17 00:00:00 2001
From: Haibin Xie <lambdax.tyler@gmail.com>
Date: Thu, 4 Jan 2018 14:31:25 +0800
Subject: [PATCH 2/2] stats: fix bug when estimating row count for outdated
 histograms (#5552)

---
 plan/cbo_test.go               | 30 ++++++++++++++++++++++++++++++
 statistics/histogram.go        |  9 ++++-----
 statistics/selectivity_test.go |  8 +++++++-
 statistics/statistics_test.go  |  5 +++++
 statistics/table.go            |  8 ++++++--
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/plan/cbo_test.go b/plan/cbo_test.go
index b07f900e6ef2..e3754cb69819 100644
--- a/plan/cbo_test.go
+++ b/plan/cbo_test.go
@@ -324,6 +324,36 @@ func (s *testAnalyzeSuite) TestAnalyze(c *C) {
 	}
 }
 
+func (s *testAnalyzeSuite) TestOutdatedAnalyze(c *C) {
+	defer testleak.AfterTest(c)()
+	store, dom, err := newStoreWithBootstrap()
+	c.Assert(err, IsNil)
+	testKit := testkit.NewTestKit(c, store)
+	defer func() {
+		dom.Close()
+		store.Close()
+	}()
+	testKit.MustExec("use test")
+	testKit.MustExec("create table t (a int, b int, index idx(a))")
+	for i := 0; i < 10; i++ {
+		testKit.MustExec(fmt.Sprintf("insert into t values (%d,%d)", i, i))
+	}
+	h := dom.StatsHandle()
+	h.DumpStatsDeltaToKV()
+	testKit.MustExec("analyze table t")
+	testKit.MustExec("insert into t select * from t")
+	testKit.MustExec("insert into t select * from t")
+	testKit.MustExec("insert into t select * from t")
+	h.DumpStatsDeltaToKV()
+	c.Assert(h.Update(dom.InfoSchema()), IsNil)
+	// FIXME: The count for table scan is wrong.
+	testKit.MustQuery("explain select * from t where a <= 5 and b <= 5").Check(testkit.Rows(
+		"TableScan_4 Selection_5  cop table:t, range:(-inf,+inf), keep order:false 28.799999999999997",
+		"Selection_5  TableScan_4 cop le(test.t.a, 5), le(test.t.b, 5) 28.799999999999997",
+		"TableReader_6   root data:Selection_5 28.799999999999997",
+	))
+}
+
 func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) {
 	store, err := tikv.NewMockTikvStore()
 	if err != nil {
diff --git a/statistics/histogram.go b/statistics/histogram.go
index bb2536dc6a79..ed056c00a7f6 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -442,14 +442,13 @@ func (c *Column) String() string {
 }
 
 // getIntColumnRowCount estimates the row count by a slice of IntColumnRange.
-func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges []types.IntColumnRange,
-	totalRowCount float64) (float64, error) {
+func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges []types.IntColumnRange) (float64, error) {
 	var rowCount float64
 	for _, rg := range intRanges {
 		var cnt float64
 		var err error
 		if rg.LowVal == math.MinInt64 && rg.HighVal == math.MaxInt64 {
-			cnt = totalRowCount
+			cnt = c.totalRowCount()
 		} else if rg.LowVal == math.MinInt64 {
 			cnt, err = c.lessAndEqRowCount(sc, types.NewIntDatum(rg.HighVal))
 		} else if rg.HighVal == math.MaxInt64 {
@@ -469,8 +468,8 @@ func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges [
 		}
 		rowCount += cnt
 	}
-	if rowCount > totalRowCount {
-		rowCount = totalRowCount
+	if rowCount > c.totalRowCount() {
+		rowCount = c.totalRowCount()
 	}
 	return rowCount, nil
 }
diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
index 82488d591cec..d4eeb8e04f3d 100644
--- a/statistics/selectivity_test.go
+++ b/statistics/selectivity_test.go
@@ -180,6 +180,12 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
 		c.Assert(sel, NotNil, comment)
 		ratio, err := statsTbl.Selectivity(ctx, sel.Conditions)
 		c.Assert(err, IsNil, comment)
-		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, comment)
+		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
+
+		statsTbl.Count *= 10
+		ratio, err = statsTbl.Selectivity(ctx, sel.Conditions)
+		c.Assert(err, IsNil, comment)
+		c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
+		statsTbl.Count /= 10
 	}
 }
diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go
index 2ea1bf05e25e..39fd02884ce4 100644
--- a/statistics/statistics_test.go
+++ b/statistics/statistics_test.go
@@ -524,6 +524,11 @@ func (s *testStatisticsSuite) TestIntColumnRanges(c *C) {
 	count, err = tbl.GetRowCountByIntColumnRanges(sc, 0, ran)
 	c.Assert(err, IsNil)
 	c.Assert(int(count), Equals, 1)
+
+	tbl.Count *= 10
+	count, err = tbl.GetRowCountByIntColumnRanges(sc, 0, ran)
+	c.Assert(err, IsNil)
+	c.Assert(int(count), Equals, 10)
 }
 
 func (s *testStatisticsSuite) TestIndexRanges(c *C) {
diff --git a/statistics/table.go b/statistics/table.go
index f9f100aeea27..1edc45610261 100644
--- a/statistics/table.go
+++ b/statistics/table.go
@@ -214,7 +214,9 @@ func (t *Table) GetRowCountByIntColumnRanges(sc *variable.StatementContext, colI
 	if t.Pseudo || c == nil || len(c.Buckets) == 0 {
 		return getPseudoRowCountByIntRanges(intRanges, float64(t.Count)), nil
 	}
-	return c.getIntColumnRowCount(sc, intRanges, float64(t.Count))
+	result, err := c.getIntColumnRowCount(sc, intRanges)
+	result *= c.getIncreaseFactor(t.Count)
+	return result, errors.Trace(err)
 }
 
 // GetRowCountByColumnRanges estimates the row count by a slice of ColumnRange.
@@ -223,7 +225,9 @@ func (t *Table) GetRowCountByColumnRanges(sc *variable.StatementContext, colID i
 	if t.Pseudo || c == nil || len(c.Buckets) == 0 {
 		return getPseudoRowCountByColumnRanges(sc, float64(t.Count), colRanges)
 	}
-	return c.getColumnRowCount(sc, colRanges)
+	result, err := c.getColumnRowCount(sc, colRanges)
+	result *= c.getIncreaseFactor(t.Count)
+	return result, errors.Trace(err)
 }
 
 // GetRowCountByIndexRanges estimates the row count by a slice of IndexRange.