forked from ChannelMeter/gohistogram
/
numerichistogram.go
175 lines (144 loc) · 3.65 KB
/
numerichistogram.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
package gohistogram
// Copyright (c) 2013 VividCortex, Inc. All rights reserved.
// Please see the LICENSE file for applicable license terms.
import (
"fmt"
)
type NumericHistogram struct {
bins []bin
maxbins int
total uint64
}
// NewHistogram returns a new NumericHistogram with a maximum of n bins.
//
// There is no "optimal" bin count, but somewhere between 20 and 80 bins
// should be sufficient.
func NewHistogram(n int) *NumericHistogram {
return &NumericHistogram{
bins: make([]bin, 0, n+1),
maxbins: n,
total: 0,
}
}
func (h *NumericHistogram) Reset() {
h.total = 0
h.bins = h.bins[:0]
}
func (h *NumericHistogram) Add(n float64) {
h.total++
for i, bini := range h.bins {
if bini.value == n {
h.bins[i].count++
return
}
if bini.value > n {
// Insert a bin at i
tail := h.bins[i:]
h.bins = h.bins[0 : len(h.bins)+1]
copy(h.bins[i+1:], tail)
h.bins[i] = bin{value: n, count: 1}
h.trim()
return
}
}
h.bins = append(h.bins, bin{count: 1, value: n})
h.trim()
}
func (h *NumericHistogram) Quantile(q float64) float64 {
count := q * float64(h.total)
for i := range h.bins {
count -= float64(h.bins[i].count)
if count <= 0 {
return h.bins[i].value
}
}
return -1
}
// CDF returns the value of the cumulative distribution function
// at x
func (h *NumericHistogram) CDF(x float64) float64 {
count := 0.0
for i := range h.bins {
if h.bins[i].value <= x {
count += float64(h.bins[i].count)
}
}
return count / float64(h.total)
}
// Mean returns the sample mean of the distribution
func (h *NumericHistogram) Mean() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
for i := range h.bins {
sum += h.bins[i].value * h.bins[i].count
}
return sum / float64(h.total)
}
// Variance returns the variance of the distribution
func (h *NumericHistogram) Variance() float64 {
if h.total == 0 {
return 0
}
sum := 0.0
mean := h.Mean()
for i := range h.bins {
sum += (h.bins[i].count * (h.bins[i].value - mean) * (h.bins[i].value - mean))
}
return sum / float64(h.total)
}
func (h *NumericHistogram) Count() float64 {
return float64(h.total)
}
// trim merges adjacent bins to decrease the bin count to the maximum value
func (h *NumericHistogram) trim() {
if len(h.bins) <= 1 {
return
}
for len(h.bins) > h.maxbins {
// Find closest bins in terms of value
minDelta := h.bins[1].value - h.bins[0].value
minDeltaIndex := 1
for i := 2; i < len(h.bins); i++ {
if i <= 1 {
continue
}
if delta := h.bins[i].value - h.bins[i-1].value; delta < minDelta {
minDelta = delta
minDeltaIndex = i
}
}
binMinDeltaIdxSub1 := h.bins[minDeltaIndex-1]
binMinDeltaIdx := h.bins[minDeltaIndex]
// We need to merge bins minDeltaIndex-1 and minDeltaIndex
totalCount := binMinDeltaIdxSub1.count + binMinDeltaIdx.count
mergedbin := bin{
value: (binMinDeltaIdxSub1.value*
binMinDeltaIdxSub1.count +
binMinDeltaIdx.value*
binMinDeltaIdx.count) /
totalCount, // weighted average
count: totalCount, // summed heights
}
// Now remove bin minDeltaIndex by replacing minDeltaIndex-1 with mergedbin
head := h.bins[0:minDeltaIndex]
tail := h.bins[minDeltaIndex+1:]
head[minDeltaIndex-1] = mergedbin
copy(h.bins[minDeltaIndex:], tail)
h.bins = h.bins[:len(h.bins)-1]
}
}
// String returns a string reprentation of the histogram,
// which is useful for printing to a terminal.
func (h *NumericHistogram) String() (str string) {
str += fmt.Sprintln("Total:", h.total)
for i := range h.bins {
var bar string
for j := 0; j < int(float64(h.bins[i].count)/float64(h.total)*200); j++ {
bar += "."
}
str += fmt.Sprintln(h.bins[i].value, "\t", bar)
}
return
}