Skip to content

Commit

Permalink
add comments and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
time-and-fate committed Nov 10, 2022
1 parent 686257f commit 355b2ae
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 0 deletions.
6 changes: 6 additions & 0 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,12 @@ func (hg *Histogram) outOfRangeRowCount(lDatum, rDatum *types.Datum, modifyCount
totalPercent = 1
}
rowCount := totalPercent * hg.notNullCount()

// Use the modifyCount as the upper bound. Note that modifyCount contains insert, delete and update. So this is
// a rather loose upper bound.
// There are some scenarios where we need to handle out-of-range estimation after both insert and delete happen.
// But we don't know how many increases are in the modifyCount. So we have to use this loose bound to ensure it
// can produce a reasonable results in this scenario.
if rowCount > float64(modifyCount) {
return float64(modifyCount)
}
Expand Down
36 changes: 36 additions & 0 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,42 @@ func TestOutOfRangeEstimation(t *testing.T) {
}
}

// TestOutOfRangeEstimationAfterDelete tests the out-of-range estimation after deletion happen.
// The test result doesn't perfectly reflect the actual data distribution, but this is the expected behavior for now.
func TestOutOfRangeEstimationAfterDelete(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int unsigned)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
for i := 0; i < 3000; i++ {
testKit.MustExec(fmt.Sprintf("insert into t values (%v)", i/5+300)) // [300, 900)
}
require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll))
testKit.MustExec("analyze table t with 1 samplerate, 0 topn")
testKit.MustExec("delete from t where a < 500")
require.Nil(t, h.DumpStatsDeltaToKV(handle.DumpAll))
require.Nil(t, h.Update(dom.InfoSchema()))
var (
input []string
output []struct{
SQL string
Result []string
}
)
statsSuiteData := statistics.GetStatsSuiteData()
statsSuiteData.LoadTestCases(t, &input, &output)
for i := range input {
testdata.OnRecord(func() {
output[i].SQL = input[i]
output[i].Result = testdata.ConvertRowsToStrings(testKit.MustQuery(input[i]).Rows())
})
testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...))
}
}

func TestEstimationForUnknownValues(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
Expand Down
18 changes: 18 additions & 0 deletions statistics/testdata/stats_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,23 @@
"End": 0
}
]
},
{
"name": "TestOutOfRangeEstimationAfterDelete",
"cases": [
"explain format = 'brief' select * from t where a <= 300",
"explain format = 'brief' select * from t where a < 300",
"explain format = 'brief' select * from t where a <= 500",
"explain format = 'brief' select * from t where a >= 300 and a <= 900",
"explain format = 'brief' select * from t where a >= 900",
"explain format = 'brief' select * from t where a > 900",
"explain format = 'brief' select * from t where a >= 300",
"explain format = 'brief' select * from t where a <= 900",
"explain format = 'brief' select * from t where a > 800 and a < 1000",
"explain format = 'brief' select * from t where a > 900 and a < 1000",
"explain format = 'brief' select * from t where a > 900 and a < 1100",
"explain format = 'brief' select * from t where a > 200 and a < 300",
"explain format = 'brief' select * from t where a > 100 and a < 300"
]
}
]
109 changes: 109 additions & 0 deletions statistics/testdata/stats_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -759,5 +759,114 @@
"Count": 7.5
}
]
},
{
"Name": "TestOutOfRangeEstimationAfterDelete",
"Cases": [
{
"SQL": "explain format = 'brief' select * from t where a <= 300",
"Result": [
"TableReader 1003.33 root data:Selection",
"└─Selection 1003.33 cop[tikv] le(test.t.a, 300)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a < 300",
"Result": [
"TableReader 1000.00 root data:Selection",
"└─Selection 1000.00 cop[tikv] lt(test.t.a, 300)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a <= 500",
"Result": [
"TableReader 1670.00 root data:Selection",
"└─Selection 1670.00 cop[tikv] le(test.t.a, 500)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a >= 300 and a <= 900",
"Result": [
"TableReader 2000.00 root data:Selection",
"└─Selection 2000.00 cop[tikv] ge(test.t.a, 300), le(test.t.a, 900)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a >= 900",
"Result": [
"TableReader 1000.00 root data:Selection",
"└─Selection 1000.00 cop[tikv] ge(test.t.a, 900)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 900",
"Result": [
"TableReader 1000.00 root data:Selection",
"└─Selection 1000.00 cop[tikv] gt(test.t.a, 900)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a >= 300",
"Result": [
"TableReader 2000.00 root data:Selection",
"└─Selection 2000.00 cop[tikv] ge(test.t.a, 300)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a <= 900",
"Result": [
"TableReader 2000.00 root data:Selection",
"└─Selection 2000.00 cop[tikv] le(test.t.a, 900)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 800 and a < 1000",
"Result": [
"TableReader 793.13 root data:Selection",
"└─Selection 793.13 cop[tikv] gt(test.t.a, 800), lt(test.t.a, 1000)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 900 and a < 1000",
"Result": [
"TableReader 458.12 root data:Selection",
"└─Selection 458.12 cop[tikv] gt(test.t.a, 900), lt(test.t.a, 1000)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 900 and a < 1100",
"Result": [
"TableReader 832.49 root data:Selection",
"└─Selection 832.49 cop[tikv] gt(test.t.a, 900), lt(test.t.a, 1100)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 200 and a < 300",
"Result": [
"TableReader 458.12 root data:Selection",
"└─Selection 458.12 cop[tikv] gt(test.t.a, 200), lt(test.t.a, 300)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
},
{
"SQL": "explain format = 'brief' select * from t where a > 100 and a < 300",
"Result": [
"TableReader 832.49 root data:Selection",
"└─Selection 832.49 cop[tikv] gt(test.t.a, 100), lt(test.t.a, 300)",
" └─TableFullScan 2000.00 cop[tikv] table:t keep order:false"
]
}
]
}
]

0 comments on commit 355b2ae

Please sign in to comment.