From 64ea885bbe68c91efd0e50d8f558d92cc2d607f6 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Thu, 4 Jan 2018 11:15:32 +0800 Subject: [PATCH 1/2] stats: fix bug when build histograms for null json (#5545) --- statistics/builder.go | 9 +++++++-- statistics/statistics_test.go | 19 ++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/statistics/builder.go b/statistics/builder.go index bd6258bed582..e7830dbcd564 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -164,8 +164,13 @@ func BuildColumn(ctx context.Context, numBuckets, id int64, collector *SampleCol } bucketIdx := 0 var lastCount int64 - hg.Buckets[0].LowerBound = samples[0] - for i := int64(0); i < int64(len(samples)); i++ { + hg.Buckets[0] = Bucket{ + LowerBound: samples[0], + UpperBound: samples[0], + Count: int64(sampleFactor), + Repeats: int64(ndvFactor), + } + for i := int64(1); i < int64(len(samples)); i++ { cmp, err := hg.Buckets[bucketIdx].UpperBound.CompareDatum(sc, &samples[i]) if err != nil { return nil, errors.Trace(err) diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 023e41974181..2ea1bf05e25e 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tidb/util/types" + "github.com/pingcap/tidb/util/types/json" ) func TestT(t *testing.T) { @@ -43,11 +44,6 @@ type testStatisticsSuite struct { pk ast.RecordSet } -type dataTable struct { - count int64 - samples []types.Datum -} - type recordSet struct { data []types.Datum count int64 @@ -271,6 +267,19 @@ func (s *testStatisticsSuite) TestBuild(c *C) { count, err = col.lessRowCount(sc, types.NewIntDatum(99999)) c.Check(err, IsNil) c.Check(int(count), Equals, 99999) + + datum := types.Datum{} + datum.SetMysqlJSON(json.JSON{TypeCode: json.TypeCodeLiteral}) + collector = &SampleCollector{ + Count: 1, + NullCount: 0, + Samples: []types.Datum{datum}, + Sketch: sketch, + } + col, err = BuildColumn(ctx, bucketCount, 2, collector) + c.Assert(err, IsNil) + c.Assert(len(col.Buckets), Equals, 1) + c.Assert(col.Buckets[0].LowerBound, DeepEquals, col.Buckets[0].UpperBound) } func (s *testStatisticsSuite) TestHistogramProtoConversion(c *C) { From 20973c966a5b76e9f7fe22a95c018e0ab83f69d8 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Thu, 4 Jan 2018 14:31:25 +0800 Subject: [PATCH 2/2] stats: fix bug when estimating row count for outdated histograms (#5552) --- plan/cbo_test.go | 30 ++++++++++++++++++++++++++++++ statistics/histogram.go | 9 ++++----- statistics/selectivity_test.go | 8 +++++++- statistics/statistics_test.go | 5 +++++ statistics/table.go | 8 ++++++-- 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/plan/cbo_test.go b/plan/cbo_test.go index b07f900e6ef2..e3754cb69819 100644 --- a/plan/cbo_test.go +++ b/plan/cbo_test.go @@ -324,6 +324,36 @@ func (s *testAnalyzeSuite) TestAnalyze(c *C) { } } +func (s *testAnalyzeSuite) TestOutdatedAnalyze(c *C) { + defer testleak.AfterTest(c)() + store, dom, err := newStoreWithBootstrap() + c.Assert(err, IsNil) + testKit := testkit.NewTestKit(c, store) + defer func() { + dom.Close() + store.Close() + }() + testKit.MustExec("use test") + testKit.MustExec("create table t (a int, b int, index idx(a))") + for i := 0; i < 10; i++ { + testKit.MustExec(fmt.Sprintf("insert into t values (%d,%d)", i, i)) + } + h := dom.StatsHandle() + h.DumpStatsDeltaToKV() + testKit.MustExec("analyze table t") + testKit.MustExec("insert into t select * from t") + testKit.MustExec("insert into t select * from t") + testKit.MustExec("insert into t select * from t") + h.DumpStatsDeltaToKV() + c.Assert(h.Update(dom.InfoSchema()), IsNil) + // FIXME: The count for table scan is wrong. + testKit.MustQuery("explain select * from t where a <= 5 and b <= 5").Check(testkit.Rows( + "TableScan_4 Selection_5 cop table:t, range:(-inf,+inf), keep order:false 28.799999999999997", + "Selection_5 TableScan_4 cop le(test.t.a, 5), le(test.t.b, 5) 28.799999999999997", + "TableReader_6 root data:Selection_5 28.799999999999997", + )) +} + func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) { store, err := tikv.NewMockTikvStore() if err != nil { diff --git a/statistics/histogram.go b/statistics/histogram.go index bb2536dc6a79..ed056c00a7f6 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -442,14 +442,13 @@ func (c *Column) String() string { } // getIntColumnRowCount estimates the row count by a slice of IntColumnRange. -func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges []types.IntColumnRange, - totalRowCount float64) (float64, error) { +func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges []types.IntColumnRange) (float64, error) { var rowCount float64 for _, rg := range intRanges { var cnt float64 var err error if rg.LowVal == math.MinInt64 && rg.HighVal == math.MaxInt64 { - cnt = totalRowCount + cnt = c.totalRowCount() } else if rg.LowVal == math.MinInt64 { cnt, err = c.lessAndEqRowCount(sc, types.NewIntDatum(rg.HighVal)) } else if rg.HighVal == math.MaxInt64 { @@ -469,8 +468,8 @@ func (c *Column) getIntColumnRowCount(sc *variable.StatementContext, intRanges [ } rowCount += cnt } - if rowCount > totalRowCount { - rowCount = totalRowCount + if rowCount > c.totalRowCount() { + rowCount = c.totalRowCount() } return rowCount, nil } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 82488d591cec..d4eeb8e04f3d 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -180,6 +180,12 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) { c.Assert(sel, NotNil, comment) ratio, err := statsTbl.Selectivity(ctx, sel.Conditions) c.Assert(err, IsNil, comment) - c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, comment) + c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) + + statsTbl.Count *= 10 + ratio, err = statsTbl.Selectivity(ctx, sel.Conditions) + c.Assert(err, IsNil, comment) + c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) + statsTbl.Count /= 10 } } diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 2ea1bf05e25e..39fd02884ce4 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -524,6 +524,11 @@ func (s *testStatisticsSuite) TestIntColumnRanges(c *C) { count, err = tbl.GetRowCountByIntColumnRanges(sc, 0, ran) c.Assert(err, IsNil) c.Assert(int(count), Equals, 1) + + tbl.Count *= 10 + count, err = tbl.GetRowCountByIntColumnRanges(sc, 0, ran) + c.Assert(err, IsNil) + c.Assert(int(count), Equals, 10) } func (s *testStatisticsSuite) TestIndexRanges(c *C) { diff --git a/statistics/table.go b/statistics/table.go index f9f100aeea27..1edc45610261 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -214,7 +214,9 @@ func (t *Table) GetRowCountByIntColumnRanges(sc *variable.StatementContext, colI if t.Pseudo || c == nil || len(c.Buckets) == 0 { return getPseudoRowCountByIntRanges(intRanges, float64(t.Count)), nil } - return c.getIntColumnRowCount(sc, intRanges, float64(t.Count)) + result, err := c.getIntColumnRowCount(sc, intRanges) + result *= c.getIncreaseFactor(t.Count) + return result, errors.Trace(err) } // GetRowCountByColumnRanges estimates the row count by a slice of ColumnRange. @@ -223,7 +225,9 @@ func (t *Table) GetRowCountByColumnRanges(sc *variable.StatementContext, colID i if t.Pseudo || c == nil || len(c.Buckets) == 0 { return getPseudoRowCountByColumnRanges(sc, float64(t.Count), colRanges) } - return c.getColumnRowCount(sc, colRanges) + result, err := c.getColumnRowCount(sc, colRanges) + result *= c.getIncreaseFactor(t.Count) + return result, errors.Trace(err) } // GetRowCountByIndexRanges estimates the row count by a slice of IndexRange.