/
hyperlog.go
83 lines (77 loc) · 1.42 KB
/
hyperlog.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
package main
import (
"math"
"sync"
)
type hyperlog struct {
slot []uint8
numslot uint32
lock *sync.RWMutex
}
const (
SLOT uint32 = 256
SLOTF float64 = float64(SLOT)
twopo32 uint64 = 0x00000100000000
twopo32f float64 = float64(twopo32)
fval float64 = (0.7213 / (1 + 1.079/SLOTF)) * SLOTF * SLOTF
cmp1 float64 = 2.5 * SLOTF
cmp2 float64 = twopo32f / 30.0
)
func newHyperLog() *hyperlog {
slot := make([]uint8, SLOT)
return &hyperlog{slot: slot, numslot: SLOT, lock: &sync.RWMutex{}}
}
func (hpl *hyperlog) addhash(val uint32) {
idx := uint8(val >> 24)
var andwth uint32 = 0x00800000
var leadzs uint32 = 0
i := 23
for {
if (val & andwth) != 0 {
break
}
leadzs += 1
if i == 0 {
break
}
i--
andwth = andwth >> 1
}
leadzs += 1
hpl.lock.Lock()
if hpl.slot[idx] < uint8(leadzs) {
hpl.slot[idx] = uint8(leadzs)
}
hpl.lock.Unlock()
}
func (hpl *hyperlog) count_cardinality() uint64 {
var i uint32 = 0
sum := 0.0
hpl.lock.RLock()
defer hpl.lock.RUnlock()
for i < SLOT {
x := 1 << hpl.slot[i]
sum += 1.0 / float64(x)
i++
}
ret := fval / sum
if ret <= cmp1 {
i = 0
var v float64 = 0
for i < SLOT {
if hpl.slot[i] == 0 {
v++
}
i++
}
if v != 0 {
return uint64(SLOTF * math.Log(SLOTF/v))
} else {
return uint64(ret)
}
} else if ret <= cmp2 {
return uint64(ret)
}
ret = -(twopo32f * math.Log(1.0-ret/twopo32f))
return uint64(ret)
}