forked from matrixorigin/mocache
-
Notifications
You must be signed in to change notification settings - Fork 0
/
robin_hood.go
295 lines (266 loc) · 8.12 KB
/
robin_hood.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
package mocache
import (
"fmt"
"math/bits"
"os"
"runtime/debug"
"strings"
"time"
"github.com/cespare/xxhash"
"github.com/nnsgmsone/mocache/internal/invariants"
)
var hashSeed = uint64(time.Now().UnixNano())
func robinHoodHash(k key, shift uint32) uint32 {
const m = 11400714819323198485
h := hashSeed
h ^= xxhash.Sum64String(k.id)
h ^= k.offset * m
return uint32(h >> shift)
}
type robinHoodEntry struct {
key key
// Note that value may point to a Go allocated object (if the "invariants"
// build tag was specified), even though the memory for the entry itself is
// manually managed. This is technically a volation of the Cgo pointer rules:
//
// https://golang.org/cmd/cgo/#hdr-Passing_pointers
//
// Specifically, Go pointers should not be stored in C allocated memory. The
// reason for this rule is that the Go GC will not look at C allocated memory
// to find pointers to Go objects. If the only reference to a Go object is
// stored in C allocated memory, the object will be reclaimed. What makes
// this "safe" is that the Cache guarantees that there are other pointers to
// the entry and shard which will keep them alive. In particular, every Go
// allocated entry in the cache is referenced by the shard.entries map. And
// every shard is referenced by the Cache.shards map.
value *entry
// The distance the entry is from its desired position.
dist uint32
}
type robinHoodEntries struct {
entries []robinHoodEntry
len uint32
}
func newRobinHoodEntries(n uint32) robinHoodEntries {
return robinHoodEntries{
entries: make([]robinHoodEntry, n),
len: n,
}
}
func (e robinHoodEntries) at(i uint32) *robinHoodEntry {
return &e.entries[i]
}
func (e robinHoodEntries) free() {
}
// robinHoodMap is an implementation of Robin Hood hashing. Robin Hood hashing
// is an open-address hash table using linear probing. The twist is that the
// linear probe distance is reduced by moving existing entries when inserting
// and deleting. This is accomplished by keeping track of how far an entry is
// from its "desired" slot (hash of key modulo number of slots). During
// insertion, if the new entry being inserted is farther from its desired slot
// than the target entry, we swap the target and new entry. This effectively
// steals from the "rich" target entry and gives to the "poor" new entry (thus
// the origin of the name).
//
// An extension over the base Robin Hood hashing idea comes from
// https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/. A cap
// is placed on the max distance an entry can be from its desired slot. When
// this threshold is reached during insertion, the size of the table is doubled
// and insertion is restarted. Additionally, the entries slice is given "max
// dist" extra entries on the end. The very last entry in the entries slice is
// never used and acts as a sentinel which terminates loops. The previous
// maxDist-1 entries act as the extra entries. For example, if the size of the
// table is 2, maxDist is computed as 4 and the actual size of the entry slice
// is 6.
//
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 4 | 5 |
// +---+---+---+---+---+---+
// ^
// size
//
// In this scenario, the target entry for a key will always be in the range
// [0,1]. Valid entries may reside in the range [0,4] due to the linear probing
// of up to maxDist entries. The entry at index 5 will never contain a value,
// and instead acts as a sentinel (its distance is always 0). The max distance
// threshold is set to log2(num-entries). This ensures that retrieval is O(log
// N), though note that N is the number of total entries, not the count of
// valid entries.
//
// Deletion is implemented via the backward shift delete mechanism instead of
// tombstones. This preserves the performance of the table in the presence of
// deletions. See
// http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion
// for details.
type robinHoodMap struct {
entries robinHoodEntries
size uint32
shift uint32
count uint32
maxDist uint32
}
func maxDistForSize(size uint32) uint32 {
desired := uint32(bits.Len32(size))
if desired < 4 {
desired = 4
}
return desired
}
func newRobinHoodMap(initialCapacity int) *robinHoodMap {
m := &robinHoodMap{}
m.init(initialCapacity)
return m
}
func (m *robinHoodMap) init(initialCapacity int) {
if initialCapacity < 1 {
initialCapacity = 1
}
targetSize := 1 << (uint(bits.Len(uint(2*initialCapacity-1))) - 1)
m.rehash(uint32(targetSize))
}
func (m *robinHoodMap) free() {
}
func (m *robinHoodMap) rehash(size uint32) {
oldEntries := m.entries
m.size = size
m.shift = uint32(64 - bits.Len32(m.size-1))
m.maxDist = maxDistForSize(size)
m.entries = newRobinHoodEntries(size + m.maxDist)
m.count = 0
for i := uint32(0); i < oldEntries.len; i++ {
e := oldEntries.at(i)
if e.value != nil {
m.Put(e.key, e.value)
}
}
}
// Find an entry containing the specified value. This is intended to be used
// from debug and test code.
func (m *robinHoodMap) findByValue(v *entry) *robinHoodEntry {
for i := uint32(0); i < m.entries.len; i++ {
e := m.entries.at(i)
if e.value == v {
return e
}
}
return nil
}
func (m *robinHoodMap) Count() int {
return int(m.count)
}
func (m *robinHoodMap) Put(k key, v *entry) {
maybeExists := true
n := robinHoodEntry{key: k, value: v, dist: 0}
for i := robinHoodHash(k, m.shift); ; i++ {
e := m.entries.at(i)
if maybeExists && k == e.key {
// Entry already exists: overwrite.
e.value = n.value
m.checkEntry(i)
return
}
if e.value == nil {
// Found an empty entry: insert here.
*e = n
m.count++
m.checkEntry(i)
return
}
if e.dist < n.dist {
// Swap the new entry with the current entry because the current is
// rich. We then continue to loop, looking for a new location for the
// current entry. Note that this is also the not-found condition for
// retrieval, which means that "k" is not present in the map. See Get().
n, *e = *e, n
m.checkEntry(i)
maybeExists = false
}
// The new entry gradually moves away from its ideal position.
n.dist++
// If we've reached the max distance threshold, grow the table and restart
// the insertion.
if n.dist == m.maxDist {
m.rehash(2 * m.size)
i = robinHoodHash(n.key, m.shift) - 1
n.dist = 0
maybeExists = false
}
}
}
func (m *robinHoodMap) Get(k key) *entry {
var dist uint32
for i := robinHoodHash(k, m.shift); ; i++ {
e := m.entries.at(i)
if k == e.key {
// Found.
return e.value
}
if e.dist < dist {
// Not found.
return nil
}
dist++
}
}
func (m *robinHoodMap) Delete(k key) {
var dist uint32
for i := robinHoodHash(k, m.shift); ; i++ {
e := m.entries.at(i)
if k == e.key {
m.checkEntry(i)
// We found the entry to delete. Shift the following entries backwards
// until the next empty value or entry with a zero distance. Note that
// empty values are guaranteed to have "dist == 0".
m.count--
for j := i + 1; ; j++ {
t := m.entries.at(j)
if t.dist == 0 {
*e = robinHoodEntry{}
return
}
e.key = t.key
e.value = t.value
e.dist = t.dist - 1
e = t
m.checkEntry(j)
}
}
if dist > e.dist {
// Not found.
return
}
dist++
}
}
func (m *robinHoodMap) checkEntry(i uint32) {
if invariants.Enabled {
e := m.entries.at(i)
if e.value != nil {
pos := robinHoodHash(e.key, m.shift)
if (uint32(i) - pos) != e.dist {
fmt.Fprintf(os.Stderr, "%d: invalid dist=%d, expected %d: %s\n%s",
i, e.dist, uint32(i)-pos, e.key, debug.Stack())
os.Exit(1)
}
if e.dist > m.maxDist {
fmt.Fprintf(os.Stderr, "%d: invalid dist=%d > maxDist=%d: %s\n%s",
i, e.dist, m.maxDist, e.key, debug.Stack())
os.Exit(1)
}
}
}
}
func (m *robinHoodMap) String() string {
var buf strings.Builder
fmt.Fprintf(&buf, "count: %d\n", m.count)
for i := uint32(0); i < m.entries.len; i++ {
e := m.entries.at(i)
if e.value != nil {
fmt.Fprintf(&buf, "%d: [%s,%p,%d]\n", i, e.key, e.value, e.dist)
}
}
return buf.String()
}