From 1fb5a9ae14a540fdc5c537f3772dc62ea4388cd3 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 27 Dec 2023 11:10:57 +0800 Subject: [PATCH] planner: a better way to round scale factor when collecting TopN stats (#49808) close pingcap/tidb#49801 --- pkg/statistics/BUILD.bazel | 2 +- pkg/statistics/builder.go | 4 +--- pkg/statistics/cmsketch.go | 7 +++++++ pkg/statistics/cmsketch_test.go | 20 ++++++++++++++++++++ 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index eef0f98427c10..6997f6ceb6d1e 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -78,7 +78,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 34, + shard_count = 35, deps = [ "//pkg/config", "//pkg/parser/ast", diff --git a/pkg/statistics/builder.go b/pkg/statistics/builder.go index 00e5956239da8..c71247f9bb078 100644 --- a/pkg/statistics/builder.go +++ b/pkg/statistics/builder.go @@ -429,12 +429,10 @@ func BuildHistAndTopN( } } } - for i := 0; i < len(topNList); i++ { - topNList[i].Count *= uint64(sampleFactor) - } } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/pkg/statistics/cmsketch.go b/pkg/statistics/cmsketch.go index 5851749238052..eecffae89e09d 100644 --- a/pkg/statistics/cmsketch.go +++ b/pkg/statistics/cmsketch.go @@ -536,6 +536,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/pkg/statistics/cmsketch_test.go b/pkg/statistics/cmsketch_test.go index 0258fc0630102..f97663e443ae6 100644 --- a/pkg/statistics/cmsketch_test.go +++ b/pkg/statistics/cmsketch_test.go @@ -266,3 +266,23 @@ func TestSortTopnMeta(t *testing.T) { SortTopnMeta(data) require.Equal(t, uint64(2), data[0].Count) } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +}