Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: adjust estimated rows to account for modified rows #50970

Merged
merged 35 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
fc6fe67
first prototype
terry1purcell Feb 7, 2024
85a081c
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 7, 2024
d370704
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 8, 2024
7544227
second prototype
terry1purcell Feb 8, 2024
dae9e59
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 10, 2024
141c392
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 12, 2024
3b12bee
third prototype
terry1purcell Feb 12, 2024
0ca986e
fourth prototype
terry1purcell Feb 12, 2024
5083417
fiftth prototype
terry1purcell Feb 13, 2024
4d83321
sixth prototype
terry1purcell Feb 13, 2024
aceed63
seventh prototype
terry1purcell Feb 13, 2024
49ec171
format updates
terry1purcell Feb 13, 2024
1e986c8
format updates2
terry1purcell Feb 13, 2024
6e24577
testing updates1
terry1purcell Feb 13, 2024
1bd486b
testing updates2
terry1purcell Feb 13, 2024
2f711a1
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 17, 2024
05d0eea
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 19, 2024
d1bb73b
Merge branch 'pingcap:master' into outofrange
terry1purcell Feb 28, 2024
6c44b60
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 4, 2024
399e8db
Updates
terry1purcell Mar 4, 2024
82a07e0
Correct minor formatting error
terry1purcell Mar 5, 2024
138216a
Correct minor formatting error2
terry1purcell Mar 5, 2024
4d3d0a0
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 6, 2024
61f1194
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 6, 2024
2bb2b5c
update from march 8 review comments
terry1purcell Mar 9, 2024
638a027
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 9, 2024
fd3e01c
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 10, 2024
2ee1658
update from march 8 testcases
terry1purcell Mar 10, 2024
1940437
update from march 8 testcases2
terry1purcell Mar 10, 2024
59f17e5
update from march 8 testcases3
terry1purcell Mar 10, 2024
fde5749
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 11, 2024
56a3ef8
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 11, 2024
7b1c074
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 12, 2024
24b154d
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 12, 2024
143e3fe
Merge branch 'pingcap:master' into outofrange
terry1purcell Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions pkg/planner/cardinality/row_count_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,17 @@ func GetColumnRowCount(sctx context.PlanContext, c *statistics.Column, ranges []
cnt = mathutil.Clamp(cnt, 0, c.TotalRowCount())

// If the current table row count has changed, we should scale the row count accordingly.
cnt *= c.GetIncreaseFactor(realtimeRowCount)
increaseFactor := c.GetIncreaseFactor(realtimeRowCount)
cnt *= increaseFactor

histNDV := c.NDV
if c.StatsVer == statistics.Version2 {
histNDV = histNDV - int64(c.TopN.Num())
}
// handling the out-of-range part
if (c.OutOfRange(lowVal) && !lowVal.IsNull()) || c.OutOfRange(highVal) {
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV)
histNDV := c.NDV
// Exclude the TopN
if c.StatsVer == statistics.Version2 {
histNDV -= int64(c.TopN.Num())
}
cnt += c.Histogram.OutOfRangeRowCount(sctx, &lowVal, &highVal, modifyCount, histNDV, increaseFactor)
}

if debugTrace {
Expand Down
27 changes: 18 additions & 9 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
defer debugtrace.LeaveContextCommon(sctx)
}
totalCount := float64(0)
isSingleCol := len(idx.Info.Columns) == 1
isSingleColIdx := len(idx.Info.Columns) == 1
for _, indexRange := range indexRanges {
var count float64
lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
Expand Down Expand Up @@ -278,7 +278,7 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
l := types.NewBytesDatum(lb)
r := types.NewBytesDatum(rb)
lowIsNull := bytes.Equal(lb, nullKeyBytes)
if isSingleCol && lowIsNull {
if isSingleColIdx && lowIsNull {
count += float64(idx.Histogram.NullCount)
}
expBackoffSuccess := false
Expand Down Expand Up @@ -325,15 +325,24 @@ func getIndexRowCountForStatsV2(sctx context.PlanContext, idx *statistics.Index,
}

// If the current table row count has changed, we should scale the row count accordingly.
count *= idx.GetIncreaseFactor(realtimeRowCount)
increaseFactor := idx.GetIncreaseFactor(realtimeRowCount)
count *= increaseFactor

histNDV := idx.NDV
if idx.StatsVer == statistics.Version2 {
histNDV = histNDV - int64(idx.TopN.Num())
}
// handling the out-of-range part
if (outOfRangeOnIndex(idx, l) && !(isSingleCol && lowIsNull)) || outOfRangeOnIndex(idx, r) {
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV)
if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) {
histNDV := idx.NDV
// Exclude the TopN in Stats Version 2
if idx.StatsVer == statistics.Version2 {
c, ok := coll.Columns[idx.Histogram.ID]
// If this is single column of a multi-column index - use the column's NDV rather than index NDV
isSingleColRange := len(indexRange.LowVal) == len(indexRange.HighVal) && len(indexRange.LowVal) == 1
if isSingleColRange && !isSingleColIdx && ok && c != nil && c.Histogram.NDV > 0 {
histNDV = c.Histogram.NDV - int64(c.TopN.Num())
} else {
histNDV -= int64(idx.TopN.Num())
}
}
count += idx.Histogram.OutOfRangeRowCount(sctx, &l, &r, modifyCount, histNDV, increaseFactor)
}

if debugTrace {
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/cardinality/testdata/cardinality_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
{
"Start": 800,
"End": 900,
"Count": 723.504166655054
"Count": 735.504166655054
},
{
"Start": 900,
Expand Down Expand Up @@ -79,7 +79,7 @@
{
"Start": 800,
"End": 1000,
"Count": 1181.696869573942
"Count": 1193.696869573942
},
{
"Start": 900,
Expand All @@ -104,7 +104,7 @@
{
"Start": 200,
"End": 400,
"Count": 1190.2788209899081
"Count": 1237.5288209899081
},
{
"Start": 200,
Expand Down
51 changes: 24 additions & 27 deletions pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ func (hg *Histogram) OutOfRange(val types.Datum) bool {
func (hg *Histogram) OutOfRangeRowCount(
sctx context.PlanContext,
lDatum, rDatum *types.Datum,
modifyCount, histNDV int64,
modifyCount, histNDV int64, increaseFactor float64,
) (result float64) {
debugTrace := sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace
if debugTrace {
Expand Down Expand Up @@ -1052,38 +1052,35 @@ func (hg *Histogram) OutOfRangeRowCount(
rightPercent = (math.Pow(boundR-actualL, 2) - math.Pow(boundR-actualR, 2)) / math.Pow(histWidth, 2)
}

totalPercent := leftPercent*0.5 + rightPercent*0.5
if totalPercent > 1 {
totalPercent = 1
}
totalPercent := min(leftPercent*0.5+rightPercent*0.5, 1.0)
rowCount = totalPercent * hg.NotNullCount()

// Upper bound logic
// Upper & lower bound logic.
upperBound := rowCount
if histNDV > 0 {
upperBound = hg.NotNullCount() / float64(histNDV)
}

allowUseModifyCount := sctx.GetSessionVars().GetOptObjective() != variable.OptObjectiveDeterminate
// Use the modifyCount as the upper bound. Note that modifyCount contains insert, delete and update. So this is
// a rather loose upper bound.
// There are some scenarios where we need to handle out-of-range estimation after both insert and delete happen.
// But we don't know how many increases are in the modifyCount. So we have to use this loose bound to ensure it
// can produce a reasonable results in this scenario.
if rowCount > float64(modifyCount) && allowUseModifyCount {
return float64(modifyCount)
}

// In OptObjectiveDeterminate mode, we can't rely on the modify count anymore.
// An upper bound is necessary to make the estimation make sense for predicates with bound on only one end, like a > 1.
// But it's impossible to have a reliable upper bound in all cases.
// We use 1/NDV here (only the Histogram part is considered) and it seems reasonable and good enough for now.

if !allowUseModifyCount {
var upperBound float64
if histNDV > 0 {
upperBound = hg.NotNullCount() / float64(histNDV)
}
if rowCount > upperBound {
return upperBound
}
// In OptObjectiveDeterminate mode, we can't rely on the modify count anymore.
// An upper bound is necessary to make the estimation make sense for predicates with bound on only one end, like a > 1.
// We use 1/NDV here (only the Histogram part is considered) and it seems reasonable and good enough for now.
return min(rowCount, upperBound)
}

// If the modifyCount is large (compared to original table rows), then any out of range estimate is unreliable.
// Assume at least 1/NDV is returned
if float64(modifyCount) > hg.NotNullCount() && rowCount < upperBound {
rowCount = upperBound
} else if rowCount < upperBound {
// Adjust by increaseFactor if our estimate is low
rowCount *= increaseFactor
}
return rowCount

// Use modifyCount as a final bound
return min(rowCount, float64(modifyCount))
}

// Copy deep copies the histogram.
Expand Down