Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: load column histograms by need #4847

Merged
merged 9 commits into from
Oct 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,8 @@ func (do *Domain) updateStatsWorker(ctx context.Context, lease time.Duration) {
defer loadTicker.Stop()
deltaUpdateTicker := time.NewTicker(deltaUpdateDuration)
defer deltaUpdateTicker.Stop()
loadHistogramTicker := time.NewTicker(lease)
defer loadHistogramTicker.Stop()
statsHandle := do.StatsHandle()
for {
select {
Expand All @@ -600,6 +602,11 @@ func (do *Domain) updateStatsWorker(ctx context.Context, lease time.Duration) {
}
case <-deltaUpdateTicker.C:
statsHandle.DumpStatsDeltaToKV()
case <-loadHistogramTicker.C:
err := statsHandle.LoadNeededHistograms()
if err != nil {
log.Error("[stats] load histograms fail: ", errors.ErrorStack(err))
}
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions plan/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ func (p *DataSource) getStatsProfileByFilter(conds expression.CNFExprs) *statsPr
}
for i, col := range p.Columns {
hist, ok := p.statisticTable.Columns[col.ID]
if ok && hist.NDV > 0 && len(hist.Buckets) > 0 {
factor := float64(p.statisticTable.Count) / float64(hist.Buckets[len(hist.Buckets)-1].Count)
if ok && hist.Count > 0 {
factor := float64(p.statisticTable.Count) / float64(hist.Count)
profile.cardinality[i] = float64(hist.NDV) * factor
} else {
profile.cardinality[i] = profile.count * distinctFactor
Expand Down
2 changes: 1 addition & 1 deletion session.go
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ func (s *session) Execute(sql string) (recordSets []ast.RecordSet, err error) {
sessionExecuteCompileDuration.Observe(time.Since(startTS).Seconds())

// Step3: Cache the physical plan if possible.
if cache.PlanCacheEnabled && stmt.Cacheable && len(stmtNodes) == 1 {
if cache.PlanCacheEnabled && stmt.Cacheable && len(stmtNodes) == 1 && !s.GetSessionVars().StmtCtx.HistogramsNotLoad() {
cache.GlobalPlanCache.Put(cacheKey, cache.NewSQLCacheValue(stmtNode, stmt.Plan, stmt.Expensive))
}

Expand Down
22 changes: 19 additions & 3 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,10 @@ type StatementContext struct {
// mu struct holds variables that change during execution.
mu struct {
sync.Mutex
affectedRows uint64
foundRows uint64
warnings []error
affectedRows uint64
foundRows uint64
warnings []error
histogramsNotLoad bool
}

// Copied from SessionVars.TimeZone.
Expand Down Expand Up @@ -421,6 +422,21 @@ func (sc *StatementContext) AppendWarning(warn error) {
sc.mu.Unlock()
}

// SetHistogramsNotLoad sets histogramsNotLoad.
func (sc *StatementContext) SetHistogramsNotLoad() {
sc.mu.Lock()
sc.mu.histogramsNotLoad = true
sc.mu.Unlock()
}

// HistogramsNotLoad gets histogramsNotLoad.
func (sc *StatementContext) HistogramsNotLoad() bool {
sc.mu.Lock()
notLoad := sc.mu.histogramsNotLoad
sc.mu.Unlock()
return notLoad
}

// HandleTruncate ignores or returns the error based on the StatementContext state.
func (sc *StatementContext) HandleTruncate(err error) error {
// TODO: At present we have not checked whether the error can be ignored or treated as warning.
Expand Down
10 changes: 5 additions & 5 deletions statistics/ddl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ func (s *testStatsCacheSuite) TestDDLAfterLoad(c *C) {
tableInfo = tbl.Meta()

sc := new(variable.StatementContext)
count, err := statsTbl.ColumnGreaterRowCount(sc, types.NewDatum(recordCount+1), tableInfo.Columns[0])
count, err := statsTbl.ColumnGreaterRowCount(sc, types.NewDatum(recordCount+1), tableInfo.Columns[0].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, 0.0)
count, err = statsTbl.ColumnGreaterRowCount(sc, types.NewDatum(recordCount+1), tableInfo.Columns[2])
count, err = statsTbl.ColumnGreaterRowCount(sc, types.NewDatum(recordCount+1), tableInfo.Columns[2].ID)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 333)
}
Expand Down Expand Up @@ -124,7 +124,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
statsTbl := do.StatsHandle().GetTableStats(tableInfo.ID)
c.Assert(statsTbl.Pseudo, IsFalse)
sc := new(variable.StatementContext)
c.Assert(statsTbl.ColumnIsInvalid(tableInfo.Columns[2]), IsTrue)
c.Assert(statsTbl.ColumnIsInvalid(sc, tableInfo.Columns[2].ID), IsTrue)
c.Check(statsTbl.Columns[tableInfo.Columns[2].ID].NDV, Equals, int64(0))

testKit.MustExec("alter table t add column c3 int NOT NULL")
Expand All @@ -138,10 +138,10 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
statsTbl = do.StatsHandle().GetTableStats(tableInfo.ID)
c.Assert(statsTbl.Pseudo, IsFalse)
sc = new(variable.StatementContext)
count, err := statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(0), tableInfo.Columns[3])
count, err := statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(0), tableInfo.Columns[3].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(2))
count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3])
count, err = statsTbl.ColumnEqualRowCount(sc, types.NewIntDatum(1), tableInfo.Columns[3].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(0))

Expand Down
21 changes: 21 additions & 0 deletions statistics/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,24 @@ func (h *Handle) UpdateTableStats(tables []*Table, deletedIDs []int64) {
}
h.statsCache.Store(newCache)
}

// LoadNeededHistograms will load histograms for those needed columns.
func (h *Handle) LoadNeededHistograms() error {
cols := histogramNeededColumns.allCols()
for _, col := range cols {
tbl := h.GetTableStats(col.tableID).copy()
c, ok := tbl.Columns[col.columnID]
if !ok || len(c.Buckets) > 0 {
histogramNeededColumns.delete(col)
continue
}
hg, err := histogramFromStorage(h.ctx, col.tableID, c.ID, &c.Info.FieldType, c.NDV, 0, c.LastUpdateVersion, c.NullCount)
if err != nil {
return errors.Trace(err)
}
tbl.Columns[c.ID] = &Column{Histogram: *hg, Info: c.Info, Count: int64(hg.totalRowCount())}
h.UpdateTableStats([]*Table{tbl}, nil)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove one key if update success.

histogramNeededColumns.delete(col)
}
return nil
}
46 changes: 40 additions & 6 deletions statistics/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package statistics_test

import (
"fmt"
"time"

"github.com/juju/errors"
. "github.com/pingcap/check"
Expand All @@ -40,7 +41,7 @@ type testStatsCacheSuite struct {
func (s *testStatsCacheSuite) SetUpSuite(c *C) {
testleak.BeforeTest()
var err error
s.store, s.do, err = newStoreWithBootstrap()
s.store, s.do, err = newStoreWithBootstrap(0)
c.Assert(err, IsNil)
}

Expand Down Expand Up @@ -170,7 +171,7 @@ func (s *testStatsCacheSuite) TestEmptyTable(c *C) {
tableInfo := tbl.Meta()
statsTbl := do.StatsHandle().GetTableStats(tableInfo.ID)
sc := new(variable.StatementContext)
count, err := statsTbl.ColumnGreaterRowCount(sc, types.NewDatum(1), tableInfo.Columns[0])
count, err := statsTbl.ColumnGreaterRowCount(sc, types.NewDatum(1), tableInfo.Columns[0].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, 0.0)
}
Expand All @@ -189,7 +190,7 @@ func (s *testStatsCacheSuite) TestColumnIDs(c *C) {
tableInfo := tbl.Meta()
statsTbl := do.StatsHandle().GetTableStats(tableInfo.ID)
sc := new(variable.StatementContext)
count, err := statsTbl.ColumnLessRowCount(sc, types.NewDatum(2), tableInfo.Columns[0])
count, err := statsTbl.ColumnLessRowCount(sc, types.NewDatum(2), tableInfo.Columns[0].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, float64(1))

Expand All @@ -203,7 +204,7 @@ func (s *testStatsCacheSuite) TestColumnIDs(c *C) {
tableInfo = tbl.Meta()
statsTbl = do.StatsHandle().GetTableStats(tableInfo.ID)
// At that time, we should get c2's stats instead of c1's.
count, err = statsTbl.ColumnLessRowCount(sc, types.NewDatum(2), tableInfo.Columns[0])
count, err = statsTbl.ColumnLessRowCount(sc, types.NewDatum(2), tableInfo.Columns[0].ID)
c.Assert(err, IsNil)
c.Assert(count, Equals, 0.0)
}
Expand Down Expand Up @@ -337,13 +338,46 @@ func (s *testStatsCacheSuite) TestLoadHist(c *C) {
c.Assert(newStatsTbl2.Columns[int64(3)].LastUpdateVersion, Greater, newStatsTbl2.Columns[int64(1)].LastUpdateVersion)
}

func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) {
func (s *testStatsUpdateSuite) TestLoadHistogram(c *C) {
store, do, err := newStoreWithBootstrap(10 * time.Millisecond)
c.Assert(err, IsNil)
defer store.Close()
defer do.Close()
testKit := testkit.NewTestKit(c, store)
testKit.MustExec("use test")
testKit.MustExec("create table t(a int, b int, c int, primary key(a), key idx(b))")
testKit.MustExec("insert into t values (1,1,1),(2,2,2),(3,3,3)")
testKit.MustExec("analyze table t")

is := do.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo := tbl.Meta()
h := do.StatsHandle()
time.Sleep(1 * time.Second)
stat := h.GetTableStats(tableInfo.ID)
hg := stat.Columns[tableInfo.Columns[0].ID].Histogram
c.Assert(len(hg.Buckets), Greater, 0)
hg = stat.Indices[tableInfo.Indices[0].ID].Histogram
c.Assert(len(hg.Buckets), Greater, 0)
hg = stat.Columns[tableInfo.Columns[2].ID].Histogram
c.Assert(len(hg.Buckets), Equals, 0)
_, err = stat.ColumnEqualRowCount(testKit.Se.GetSessionVars().StmtCtx, types.NewIntDatum(1), tableInfo.Columns[2].ID)
c.Assert(err, IsNil)
time.Sleep(1 * time.Second)
stat = h.GetTableStats(tableInfo.ID)
hg = stat.Columns[tableInfo.Columns[2].ID].Histogram
c.Assert(len(hg.Buckets), Greater, 0)
}

func newStoreWithBootstrap(statsLease time.Duration) (kv.Storage, *domain.Domain, error) {
store, err := tikv.NewMockTikvStore()
if err != nil {
return nil, nil, errors.Trace(err)
}
tidb.SetSchemaLease(0)
tidb.SetStatsLease(0)
tidb.SetStatsLease(statsLease)
domain.RunAutoAnalyze = false
do, err := tidb.BootstrapSession(store)
return store, do, errors.Trace(err)
}
15 changes: 14 additions & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@ func histogramFromStorage(ctx context.Context, tableID int64, colID int64, tp *t
return hg, nil
}

func columnCountFromStorage(ctx context.Context, tableID, colID int64) (int64, error) {
selSQL := fmt.Sprintf("select sum(count) from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d", tableID, 0, colID)
rows, _, err := ctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(ctx, selSQL)
if err != nil {
return 0, errors.Trace(err)
}
if rows[0].Data[0].IsNull() {
return 0, nil
}
return rows[0].Data[0].GetMysqlDecimal().ToInt()
}

func (hg *Histogram) toString(isIndex bool) string {
strs := make([]string, 0, len(hg.Buckets)+1)
if isIndex {
Expand Down Expand Up @@ -443,7 +455,8 @@ func MergeHistograms(sc *variable.StatementContext, lh *Histogram, rh *Histogram
// Column represents a column histogram.
type Column struct {
Histogram
Info *model.ColumnInfo
Count int64
Info *model.ColumnInfo
}

func (c *Column) String() string {
Expand Down
2 changes: 1 addition & 1 deletion statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func (t *Table) Selectivity(ctx context.Context, exprs []expression.Expression)
for _, colInfo := range t.Columns {
col := expression.ColInfo2Col(extractedCols, colInfo.Info)
// This column should have histogram.
if col != nil && len(colInfo.Histogram.Buckets) > 0 {
if col != nil && !t.ColumnIsInvalid(ctx.GetSessionVars().StmtCtx, col.ID) {
maskCovered, ranges, err := getMaskAndRanges(ctx, exprs, ranger.ColumnRangeType, nil, col)
if err != nil {
return 0, errors.Trace(err)
Expand Down
2 changes: 1 addition & 1 deletion statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func mockStatsTable(tbl *model.TableInfo, rowCount int64) *statistics.Table {
}

func (s *testSelectivitySuite) TestSelectivity(c *C) {
store, dom, err := newStoreWithBootstrap()
store, dom, err := newStoreWithBootstrap(0)
defer func() {
dom.Close()
store.Close()
Expand Down
6 changes: 3 additions & 3 deletions statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,13 +373,13 @@ func (s *testStatisticsSuite) TestPseudoTable(c *C) {
tbl := PseudoTable(ti.ID)
c.Assert(tbl.Count, Greater, int64(0))
sc := new(variable.StatementContext)
count, err := tbl.ColumnLessRowCount(sc, types.NewIntDatum(100), colInfo)
count, err := tbl.ColumnLessRowCount(sc, types.NewIntDatum(100), colInfo.ID)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 3333)
count, err = tbl.ColumnEqualRowCount(sc, types.NewIntDatum(1000), colInfo)
count, err = tbl.ColumnEqualRowCount(sc, types.NewIntDatum(1000), colInfo.ID)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 10)
count, err = tbl.ColumnBetweenRowCount(sc, types.NewIntDatum(1000), types.NewIntDatum(5000), colInfo)
count, err = tbl.ColumnBetweenRowCount(sc, types.NewIntDatum(1000), types.NewIntDatum(5000), colInfo.ID)
c.Assert(err, IsNil)
c.Assert(int(count), Equals, 250)
}
Expand Down
Loading