diff --git a/statistics/builder.go b/statistics/builder.go index ec116803e952d..22946dd922db6 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -419,10 +419,8 @@ func BuildHistAndTopN( } } - for i := 0; i < len(topNList); i++ { - topNList[i].Count *= uint64(sampleFactor) - } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 9406d9eb7a5b2..0757e7a0a3d4d 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -495,6 +495,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/statistics/cmsketch_test.go b/statistics/cmsketch_test.go index 1585342d8826b..8f9bccf3cfb56 100644 --- a/statistics/cmsketch_test.go +++ b/statistics/cmsketch_test.go @@ -390,3 +390,23 @@ func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) { require.Equal(t, uint64(55), globalTopN.TotalCount(), "should have 55") require.Len(t, leftTopN, 1, "should have 1 left topN") } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +}