This repository has been archived by the owner on May 29, 2018. It is now read-only.
/
phtable.go
233 lines (205 loc) · 4.36 KB
/
phtable.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
package phtable
import (
"bytes"
"encoding/binary"
"io"
"io/ioutil"
)
// CHD hash table lookup.
type CHD struct {
// Random hash function table.
r []uint64
// Array of indices into hash function table r. We assume there aren't
// more than 2^16 hash functions O_o
indices []uint16
// Final table of values.
keys [][]byte
values [][]byte
el uint32
StoreKeys bool
ValuesAreVarints bool
valueVarints []uint64
}
func hasher(data []byte) uint64 {
var hash uint64 = 14695981039346656037
for _, c := range data {
hash ^= uint64(c)
hash *= 1099511628211
}
return hash
}
// Read a serialized CHD.
func Read(r io.Reader) (*CHD, error) {
b, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
return Mmap(b, false)
}
func ReadVarints(r io.Reader) (*CHD, error) {
b, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
return Mmap(b, true)
}
// Mmap creates a new CHD aliasing the CHD structure over an existing byte region (typically mmapped).
func Mmap(b []byte, isVarints bool) (*CHD, error) {
c := &CHD{ValuesAreVarints: isVarints}
bi := &sliceReader{b: b}
// Read vector of hash functions.
rl := bi.ReadInt()
c.r = bi.ReadUint64Array(rl)
// Read hash function indices.
il := bi.ReadInt()
c.indices = bi.ReadUint16Array(il)
c.el = bi.ReadInt32()
c.StoreKeys = bi.ReadInt() != 0
if c.StoreKeys {
c.keys = make([][]byte, c.el)
}
if c.ValuesAreVarints {
c.valueVarints = make([]uint64, c.el)
} else {
c.values = make([][]byte, c.el)
}
for i := uint32(0); i < c.el; i++ {
if c.StoreKeys {
kl := bi.ReadUvarint()
c.keys[i] = bi.Read(kl)
}
vl := bi.ReadUvarint()
if c.ValuesAreVarints {
c.valueVarints[i] = vl
} else {
c.values[i] = bi.Read(vl)
}
}
return c, nil
}
func (c *CHD) getIndex(key []byte) (uint64, bool) {
r0 := c.r[0]
h := hasher(key) ^ r0
i := h % uint64(len(c.indices))
ri := c.indices[i]
// This can occur if there were unassigned slots in the hash table.
if ri >= uint16(len(c.r)) {
return 0, false
}
r := c.r[ri]
ti := (h ^ r) % uint64(c.el)
// fmt.Printf("r[0]=%d, h=%d, i=%d, ri=%d, r=%d, ti=%d\n", c.r[0], h, i, ri, r, ti)
return ti, true
}
// Get an entry from the hash table.
func (c *CHD) Get(key []byte) []byte {
if c.ValuesAreVarints {
panic("ValuesAreVarints")
}
ti, found := c.getIndex(key)
if !found {
return nil
}
if c.StoreKeys {
k := c.keys[ti]
if bytes.Compare(k, key) != 0 {
return nil
}
}
return c.values[ti]
}
func (c *CHD) GetUint64(key []byte) (uint64, bool) {
if !c.ValuesAreVarints {
panic("!ValuesAreVarints")
}
ti, found := c.getIndex(key)
if !found {
return 0, false
}
if c.StoreKeys {
k := c.keys[ti]
if bytes.Compare(k, key) != 0 {
return 0, false
}
}
return c.valueVarints[ti], true
}
func (c *CHD) Len() int {
return len(c.keys)
}
// Iterate over entries in the hash table.
func (c *CHD) Iterate() *Iterator {
if len(c.keys) == 0 {
return nil
}
return &Iterator{c: c}
}
// Serialize the CHD. The serialized form is conducive to mmapped access. See
// the Mmap function for details.
func (c *CHD) Write(w io.Writer) error {
write := func(nd ...interface{}) error {
for _, d := range nd {
if err := binary.Write(w, binary.LittleEndian, d); err != nil {
return err
}
}
return nil
}
var storeKeys uint32
if c.StoreKeys {
storeKeys = 1
}
data := []interface{}{
uint32(len(c.r)), c.r,
uint32(len(c.indices)), c.indices,
uint32(c.el),
storeKeys,
}
if err := write(data...); err != nil {
return err
}
vb := make([]byte, binary.MaxVarintLen64)
for i := 0; i < int(c.el); i++ {
if c.StoreKeys {
k := c.keys[i]
n := binary.PutUvarint(vb, uint64(len(k)))
if _, err := w.Write(vb[:n]); err != nil {
return err
}
if _, err := w.Write(k); err != nil {
return err
}
}
if c.ValuesAreVarints {
v := c.valueVarints[i]
n := binary.PutUvarint(vb, v)
if _, err := w.Write(vb[:n]); err != nil {
return err
}
} else {
v := c.values[i]
n := binary.PutUvarint(vb, uint64(len(v)))
if _, err := w.Write(vb[:n]); err != nil {
return err
}
if _, err := w.Write(v); err != nil {
return err
}
}
}
return nil
}
type Iterator struct {
i int
c *CHD
}
func (c *Iterator) Get() (key []byte, value []byte) {
return c.c.keys[c.i], c.c.values[c.i]
}
func (c *Iterator) Next() *Iterator {
c.i++
if c.i >= len(c.c.keys) {
return nil
}
return c
}