Skip to content

Commit

Permalink
*: provide a variable to ignore the real-time stats in the planner (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate committed Sep 1, 2023
1 parent 3036b6d commit 7119410
Show file tree
Hide file tree
Showing 22 changed files with 332 additions and 165 deletions.
2 changes: 1 addition & 1 deletion planner/cardinality/BUILD.bazel
Expand Up @@ -59,7 +59,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":cardinality"],
flaky = True,
shard_count = 31,
shard_count = 32,
deps = [
"//config",
"//domain",
Expand Down
6 changes: 5 additions & 1 deletion planner/cardinality/row_count_column.go
Expand Up @@ -282,9 +282,13 @@ func GetColumnRowCount(sctx sessionctx.Context, c *statistics.Column, ranges []*
// If the current table row count has changed, we should scale the row count accordingly.
cnt *= c.GetIncreaseFactor(realtimeRowCount)

histNDV := c.NDV
if c.StatsVer == statistics.Version2 {
histNDV = histNDV - int64(c.TopN.Num())
}
// handling the out-of-range part
if (c.OutOfRange(lowVal) && !lowVal.IsNull()) || c.OutOfRange(highVal) {
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount)
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV)
}

if debugTrace {
Expand Down
6 changes: 5 additions & 1 deletion planner/cardinality/row_count_index.go
Expand Up @@ -320,9 +320,13 @@ func getIndexRowCountForStatsV2(sctx sessionctx.Context, idx *statistics.Index,
// If the current table row count has changed, we should scale the row count accordingly.
count *= idx.GetIncreaseFactor(realtimeRowCount)

histNDV := idx.NDV
if idx.StatsVer == statistics.Version2 {
histNDV = histNDV - int64(idx.TopN.Num())
}
// handling the out-of-range part
if (outOfRangeOnIndex(idx, l) && !(isSingleCol && lowIsNull)) || outOfRangeOnIndex(idx, r) {
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount)
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV)
}

if debugTrace {
Expand Down
4 changes: 2 additions & 2 deletions planner/cardinality/row_count_test.go
Expand Up @@ -33,7 +33,7 @@ func TestPseudoTable(t *testing.T) {
State: model.StatePublic,
}
ti.Columns = append(ti.Columns, colInfo)
tbl := statistics.PseudoTable(ti)
tbl := statistics.PseudoTable(ti, false)
require.Len(t, tbl.Columns, 1)
require.Greater(t, tbl.RealtimeCount, int64(0))
sctx := mock.NewContext()
Expand All @@ -50,7 +50,7 @@ func TestPseudoTable(t *testing.T) {
Hidden: true,
State: model.StatePublic,
})
tbl = statistics.PseudoTable(ti)
tbl = statistics.PseudoTable(ti, false)
// We added a hidden column. The pseudo table still only have one column.
require.Equal(t, len(tbl.Columns), 1)
}
84 changes: 81 additions & 3 deletions planner/cardinality/selectivity_test.go
Expand Up @@ -173,11 +173,15 @@ func TestOutOfRangeEstimationAfterDelete(t *testing.T) {
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int unsigned)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
// [300, 900)
// 5 rows for each value, 3000 rows in total.
for i := 0; i < 3000; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%v)", i/5+300)) // [300, 900)
testKit.MustExec(fmt.Sprintf("insert into t values (%v)", i/5+300))
}
require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll))
testKit.MustExec("analyze table t with 1 samplerate, 0 topn")
// Data in [300, 500), 1000 rows in total, are deleted.
// 2000 rows left.
testKit.MustExec("delete from t where a < 500")
require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.Nil(t, h.Update(dom.InfoSchema()))
Expand All @@ -193,9 +197,15 @@ func TestOutOfRangeEstimationAfterDelete(t *testing.T) {
for i := range input {
testdata.OnRecord(func() {
output[i].SQL = input[i]
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
})
testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...))
if strings.HasPrefix(input[i], "explain") {
testdata.OnRecord(func() {
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
})
testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...))
} else {
testKit.MustExec(input[i])
}
}
}

Expand Down Expand Up @@ -1321,3 +1331,71 @@ func TestCrossValidationSelectivity(t *testing.T) {
"└─Selection 0.00 cop[tikv] gt(test.t.c, 1000)",
" └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false"))
}

func TestIgnoreRealtimeStats(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, index ib(b))")
h := dom.StatsHandle()
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))

// 1. Insert 11 rows of data without ANALYZE.
testKit.MustExec("insert into t values(1,1),(1,2),(1,3),(1,4),(1,5),(2,1),(2,2),(2,3),(2,4),(2,5),(3,1)")
require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.Nil(t, h.Update(dom.InfoSchema()))

// 1-1. use real-time stats.
// From the real-time stats, we are able to know the total count is 11.
testKit.MustExec("set @@tidb_opt_objective = 'moderate'")
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(
"TableReader_7 0.00 root data:Selection_6",
"└─Selection_6 0.00 cop[tikv] eq(test.t.a, 1), gt(test.t.b, 2)",
" └─TableFullScan_5 11.00 cop[tikv] table:t keep order:false, stats:pseudo",
))

// 1-2. ignore real-time stats.
// Use pseudo stats table. The total row count is 10000.
testKit.MustExec("set @@tidb_opt_objective = 'determinate'")
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(
"TableReader_7 3.33 root data:Selection_6",
"└─Selection_6 3.33 cop[tikv] eq(test.t.a, 1), gt(test.t.b, 2)",
" └─TableFullScan_5 10000.00 cop[tikv] table:t keep order:false, stats:pseudo",
))

// 2. After ANALYZE.
testKit.MustExec("analyze table t with 1 samplerate")
require.Nil(t, h.Update(dom.InfoSchema()))

// The execution plans are the same no matter we ignore the real-time stats or not.
analyzedPlan := []string{
"TableReader_7 2.73 root data:Selection_6",
"└─Selection_6 2.73 cop[tikv] eq(test.t.a, 1), gt(test.t.b, 2)",
" └─TableFullScan_5 11.00 cop[tikv] table:t keep order:false",
}
testKit.MustExec("set @@tidb_opt_objective = 'moderate'")
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(analyzedPlan...))
testKit.MustExec("set @@tidb_opt_objective = 'determinate'")
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(analyzedPlan...))

// 3. Insert another 4 rows of data.
testKit.MustExec("insert into t values(3,2),(3,3),(3,4),(3,5)")
require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.Nil(t, h.Update(dom.InfoSchema()))

// 3-1. use real-time stats.
// From the real-time stats, we are able to know the total count is 15.
// Selectivity is not changed: 15 * (2.73 / 11) = 3.72
testKit.MustExec("set @@tidb_opt_objective = 'moderate'")
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(
"TableReader_7 3.72 root data:Selection_6",
"└─Selection_6 3.72 cop[tikv] eq(test.t.a, 1), gt(test.t.b, 2)",
" └─TableFullScan_5 15.00 cop[tikv] table:t keep order:false",
))

// 3-2. ignore real-time stats.
// The execution plan is the same as case 2.
testKit.MustExec("set @@tidb_opt_objective = 'determinate'")
testKit.MustQuery("explain select * from t where a = 1 and b > 2").Check(testkit.Rows(analyzedPlan...))
}
7 changes: 6 additions & 1 deletion planner/cardinality/testdata/cardinality_suite_in.json
Expand Up @@ -111,7 +111,12 @@
"explain format = 'brief' select * from t where a > 900 and a < 1000",
"explain format = 'brief' select * from t where a > 900 and a < 1100",
"explain format = 'brief' select * from t where a > 200 and a < 300",
"explain format = 'brief' select * from t where a > 100 and a < 300"
"explain format = 'brief' select * from t where a > 100 and a < 300",
"set @@tidb_opt_objective = 'determinate'",
"explain format = 'brief' select * from t where a <= 300",
"explain format = 'brief' select * from t where a <= 500",
"explain format = 'brief' select * from t where a > 900",
"explain format = 'brief' select * from t where a <= 900"
]
},
{
Expand Down
36 changes: 36 additions & 0 deletions planner/cardinality/testdata/cardinality_suite_out.json
Expand Up @@ -234,6 +234,42 @@
"└─Selection 832.49 cop[tikv] gt(test.t.a, 100), lt(test.t.a, 300)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "set @@tidb_opt_objective = 'determinate'",
"Result": null
},
{
"SQL": "explain format = 'brief' select * from t where a <= 300",
"Result": [
"TableReader 10.00 root data:Selection",
"└─Selection 10.00 cop[tikv] le(test.t.a, 300)",
" └─TableFullScan 3000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a <= 500",
"Result": [
"TableReader 1010.00 root data:Selection",
"└─Selection 1010.00 cop[tikv] le(test.t.a, 500)",
" └─TableFullScan 3000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 900",
"Result": [
"TableReader 5.00 root data:Selection",
"└─Selection 5.00 cop[tikv] gt(test.t.a, 900)",
" └─TableFullScan 3000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a <= 900",
"Result": [
"TableReader 3000.00 root data:Selection",
"└─Selection 3000.00 cop[tikv] le(test.t.a, 900)",
" └─TableFullScan 3000.00 cop[tikv] table:t keep order:false"
]
}
]
},
Expand Down
67 changes: 65 additions & 2 deletions planner/core/logical_plan_builder.go
Expand Up @@ -4695,6 +4695,7 @@ func (ds *DataSource) AddExtraPhysTblIDColumn() *expression.Column {
// 1. tidb-server started and statistics handle has not been initialized.
// 2. table row count from statistics is zero.
// 3. statistics is outdated.
// Note: please also update getLatestVersionFromStatsTable() when logic in this function changes.
func getStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64) *statistics.Table {
statsHandle := domain.GetDomain(ctx).StatsHandle()
var usePartitionStats, countIs0, pseudoStatsForUninitialized, pseudoStatsForOutdated bool
Expand All @@ -4717,7 +4718,7 @@ func getStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64)
}
// 1. tidb-server started and statistics handle has not been initialized.
if statsHandle == nil {
return statistics.PseudoTable(tblInfo)
return statistics.PseudoTable(tblInfo, false)
}

if pid == tblInfo.ID || ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() {
Expand All @@ -4727,11 +4728,35 @@ func getStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64)
statsTbl = statsHandle.GetPartitionStats(tblInfo, pid, cache.WithTableStatsByQuery())
}

allowPseudoTblTriggerLoading := false
// In OptObjectiveDeterminate mode, we need to ignore the real-time stats.
// To achieve this, we copy the statsTbl and reset the real-time stats fields (set ModifyCount to 0 and set
// RealtimeCount to the row count from the ANALYZE, which is fetched from loaded stats in GetAnalyzeRowCount()).
if ctx.GetSessionVars().GetOptObjective() == variable.OptObjectiveDeterminate {
analyzeCount := max(int64(statsTbl.GetAnalyzeRowCount()), 0)
// If the two fields are already the values we want, we don't need to modify it, and also we don't need to copy.
if statsTbl.RealtimeCount != analyzeCount || statsTbl.ModifyCount != 0 {
// Here is a case that we need specially care about:
// The original stats table from the stats cache is not a pseudo table, but the analyze row count is 0 (probably
// because of no col/idx stats are loaded), which will makes it a pseudo table according to the rule 2 below.
// Normally, a pseudo table won't trigger stats loading since we assume it means "no stats available", but
// in such case, we need it able to trigger stats loading.
// That's why we use the special allowPseudoTblTriggerLoading flag here.
if !statsTbl.Pseudo && statsTbl.RealtimeCount > 0 && analyzeCount == 0 {
allowPseudoTblTriggerLoading = true
}
// Copy it so we can modify the ModifyCount and the RealtimeCount safely.
statsTbl = statsTbl.ShallowCopy()
statsTbl.RealtimeCount = analyzeCount
statsTbl.ModifyCount = 0
}
}

// 2. table row count from statistics is zero.
if statsTbl.RealtimeCount == 0 {
countIs0 = true
core_metrics.PseudoEstimationNotAvailable.Inc()
return statistics.PseudoTable(tblInfo)
return statistics.PseudoTable(tblInfo, allowPseudoTblTriggerLoading)
}

// 3. statistics is uninitialized or outdated.
Expand All @@ -4751,6 +4776,44 @@ func getStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64)
return statsTbl
}

// getLatestVersionFromStatsTable gets statistics information for a table specified by "tableID", and get the max
// LastUpdateVersion among all Columns and Indices in it.
// Its overall logic is quite similar to getStatsTable(). During plan cache matching, only the latest version is needed.
// In such case, compared to getStatsTable(), this function can save some copies, memory allocations and unnecessary
// checks. Also, this function won't trigger metrics changes.
func getLatestVersionFromStatsTable(ctx sessionctx.Context, tblInfo *model.TableInfo, pid int64) (version uint64) {
statsHandle := domain.GetDomain(ctx).StatsHandle()
// 1. tidb-server started and statistics handle has not been initialized. Pseudo stats table.
if statsHandle == nil {
return 0
}

var statsTbl *statistics.Table
if pid == tblInfo.ID || ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() {
statsTbl = statsHandle.GetTableStats(tblInfo, cache.WithTableStatsByQuery())
} else {
statsTbl = statsHandle.GetPartitionStats(tblInfo, pid, cache.WithTableStatsByQuery())
}

// 2. Table row count from statistics is zero. Pseudo stats table.
realtimeRowCount := statsTbl.RealtimeCount
if ctx.GetSessionVars().GetOptObjective() == variable.OptObjectiveDeterminate {
realtimeRowCount = max(int64(statsTbl.GetAnalyzeRowCount()), 0)
}
if realtimeRowCount == 0 {
return 0
}

// 3. Not pseudo stats table. Return the max LastUpdateVersion among all Columns and Indices
for _, col := range statsTbl.Columns {
version = max(version, col.LastUpdateVersion)
}
for _, idx := range statsTbl.Indices {
version = max(version, idx.LastUpdateVersion)
}
return version
}

func (b *PlanBuilder) tryBuildCTE(ctx context.Context, tn *ast.TableName, asName *model.CIStr) (LogicalPlan, error) {
for i := len(b.outerCTEs) - 1; i >= 0; i-- {
cte := b.outerCTEs[i]
Expand Down
22 changes: 1 addition & 21 deletions planner/core/plan_cache_utils.go
Expand Up @@ -36,7 +36,6 @@ import (
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
driver "github.com/pingcap/tidb/types/parser_driver"
"github.com/pingcap/tidb/util/codec"
Expand Down Expand Up @@ -478,24 +477,6 @@ func GetPreparedStmt(stmt *ast.ExecuteStmt, vars *variable.SessionVars) (*PlanCa
return nil, ErrStmtNotFound
}

func tableStatsVersionForPlanCache(tStats *statistics.Table) (tableStatsVer uint64) {
if tStats == nil {
return 0
}
// use the max version of all columns and indices as the table stats version
for _, col := range tStats.Columns {
if col.LastUpdateVersion > tableStatsVer {
tableStatsVer = col.LastUpdateVersion
}
}
for _, idx := range tStats.Indices {
if idx.LastUpdateVersion > tableStatsVer {
tableStatsVer = idx.LastUpdateVersion
}
}
return tableStatsVer
}

// GetMatchOpts get options to fetch plan or generate new plan
// we can add more options here
func GetMatchOpts(sctx sessionctx.Context, is infoschema.InfoSchema, stmt *PlanCacheStmt, params []expression.Expression) (*utilpc.PlanCacheMatchOpts, error) {
Expand All @@ -508,8 +489,7 @@ func GetMatchOpts(sctx sessionctx.Context, is infoschema.InfoSchema, stmt *PlanC
if err != nil { // CTE in this case
continue
}
tStats := getStatsTable(sctx, t.Meta(), t.Meta().ID)
statsVerHash += tableStatsVersionForPlanCache(tStats) // use '+' as the hash function for simplicity
statsVerHash += getLatestVersionFromStatsTable(sctx, t.Meta(), t.Meta().ID) // use '+' as the hash function for simplicity
}

for _, node := range stmt.QueryFeatures.limits {
Expand Down
6 changes: 3 additions & 3 deletions planner/core/planbuilder.go
Expand Up @@ -1807,10 +1807,10 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(_ context.Context, dbName m
Ranges: ranger.FullRange(),
physicalTableID: physicalID,
isPartition: isPartition,
tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl,
tblColHists: &(statistics.PseudoTable(tblInfo, false)).HistColl,
}.Init(b.ctx, b.getSelectOffset())
// There is no alternative plan choices, so just use pseudo stats to avoid panic.
is.SetStats(&property.StatsInfo{HistColl: &(statistics.PseudoTable(tblInfo)).HistColl})
is.SetStats(&property.StatsInfo{HistColl: &(statistics.PseudoTable(tblInfo, false)).HistColl})
if hasCommonCols {
for _, c := range commonInfos {
is.Columns = append(is.Columns, c.ColumnInfo)
Expand All @@ -1826,7 +1826,7 @@ func (b *PlanBuilder) buildPhysicalIndexLookUpReader(_ context.Context, dbName m
DBName: dbName,
physicalTableID: physicalID,
isPartition: isPartition,
tblColHists: &(statistics.PseudoTable(tblInfo)).HistColl,
tblColHists: &(statistics.PseudoTable(tblInfo, false)).HistColl,
}.Init(b.ctx, b.getSelectOffset())
ts.SetSchema(idxColSchema)
ts.Columns = ExpandVirtualColumn(ts.Columns, ts.schema, ts.Table.Columns)
Expand Down
2 changes: 1 addition & 1 deletion planner/core/stats.go
Expand Up @@ -68,7 +68,7 @@ func (p *LogicalMemTable) DeriveStats(_ []*property.StatsInfo, selfSchema *expre
if p.StatsInfo() != nil {
return p.StatsInfo(), nil
}
statsTable := statistics.PseudoTable(p.TableInfo)
statsTable := statistics.PseudoTable(p.TableInfo, false)
stats := &property.StatsInfo{
RowCount: float64(statsTable.RealtimeCount),
ColNDVs: make(map[int64]float64, len(p.TableInfo.Columns)),
Expand Down

0 comments on commit 7119410

Please sign in to comment.