-
Notifications
You must be signed in to change notification settings - Fork 106
/
lookup_htbl.rs
662 lines (617 loc) · 24.7 KB
/
lookup_htbl.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
//
// Copyright 2024 The Project Oak Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//! This is a key/value hash table optimized for large numbers of small k/v pairs. We assume we
//! bust out of on-chip cache, so speed is limited by cache misses, not the actual operations such
//! as hashing and bit manipulation.
//!
//! The k/v pairs are loaded before any are read, and once they are loaded cannot be modified. We
//! take advantage of these restrictions to build a hash table with far less memory overhead per
//! k/v pair (21 bytes vs ~128 bytes for hashbrown<Bytes, Bytes>). Speed for k/v lookups is ~90ns
//! on manual tests.
//!
//! The hash table consists of 2 vectors. The first is a table of "entries", which is a packed
//! array of entries with the following layout:
//!
//! ```ignore
//! hash_byte: u8,
//! data_index: u40, // A 5-byte index, so we support up to 1TiB of data.
//! ```
//!
//! This is a "swiss" table meaning that instead of the usual "buckets" of entries, the entries are
//! collapsed into the hash table itself. By adding a 1-byte hash, we can eliminate 99.6% of key
//! comparisons to the wrong keys. This is important for speed because comparing keys causes a
//! page fault usually, while comparing the 1-byte hash is usually an L1 cache hit. The number of
//! cache misses per lookup should be close to 2 for values found in the table, and 1 for values
//! not in the table.
//!
//! The storage for k/v pairs is chunked into 2MiB vectors:
//!
//! ```ignore
//! data_chunks: Vec<Box<[u8]>>,
//! ```
//!
//! `data_chunks` starts empty, and as data is added to the table, we allocate another chunk. The
//! format for each k/v pair of data in each chunk is:
//!
//! ```ignore
//! key_len: <compressed u30>
//! key: [u8]
//! value_len: <compressed u30>
//! value: [u8]
//! ```
//!
//! Another memory savings comes from using a swiss table with exactly a 60% load factor. Swiss
//! tables require a significant portion of the table be empty, and normally as the table grows,
//! they dynamically double the size. At least when using the alloc crate, this resize causes a
//! new table to be allocated at 2X the size, data is copied from the old table to the new, and the
//! old table is freed. This results in a 3X expansion in memory usage vs the prior array size.
//! In contrast, this scheme has only 40% of the table empty and is never resized.
//!
//! There are certain limits imposed by this structure:
//!
//! * Every k/v pair must fit into a 2MiB chunk.
//! * The total memory allocated to keys and values, and their compressed lengths, is < 1TiB.
use alloc::{boxed::Box, vec, vec::Vec};
use core::mem;
use bytes::Bytes;
use rand_core::{OsRng, RngCore};
// To save memory, we use 5 byte array index values instead of 8 bytes. This saves 12 bytes per
// k/v pair. It is referred to as u40 below.
const INDEX_SIZE: usize = 5;
#[derive(Clone, Copy, Default)]
struct Entry {
hash_byte: u8,
data_index: [u8; INDEX_SIZE],
}
#[derive(Default)]
pub struct LookupHtbl {
table: Vec<Entry>, // Contains fixed-size entries with index of k/v data in data_chunks.
data_chunks: Vec<Box<[u8]>>,
max_entries: usize, // The number at which we must grow the table.
used_data: usize,
used_entries: usize,
hash_secret: u64, // For defense against DDoS, and some defense vs cache timing.
// A "data index" is a 5-byte value where the upper bits select the chunk, and the lower bits
// index into the chunk to find a k/v pair. The chunk index is data_index >> CHUNK_BITS.
// The index within the chunk is data_index & CHUNK_MASK. CHUNK_BITS defines how large a
// chunk is, which is computed as 2^CHUNK_BITS. To change the chunk size, just change
// CHUNK_BITS. This could be important for performance as the data_chunk array grows
// beyond what fits in cache.
chunk_bits: usize,
chunk_size: usize,
chunk_mask: usize,
}
// Retured by LookupHtbl::lookup.
enum LookupResult {
Found(usize, usize), // (table_index: u40, data_index: u40)
NotFound(usize, u8), // (table_index: u40, hash_byte: u8)
}
impl LookupHtbl {
/// Set the initial size of self.table. For best speed and memory use, this should be as large
/// as the number of key/value entries that will be loaded. NOTE: Only call this once, before
/// storing anything in the table.
pub fn reserve(&mut self, max_entries: usize) {
if self.used_entries != 0 {
return; // Too late to reserve memory.
}
// Use a load factor of 60%.
let allocated_entries = (5 * max_entries + 1) / 3;
self.max_entries = max_entries;
self.table = vec![Entry::default(); allocated_entries];
self.data_chunks = vec![];
// Initialized used_data to 1, so that we can use 0 as the null value. The first byte of
// the first chunk will never be used as a result.
self.used_data = 1usize;
self.used_entries = 0usize;
self.hash_secret = OsRng.next_u64();
if self.hash_secret == 0u64 {
panic!("There is something very wrong with OsRng");
}
// Set chunk size such that we have chunk sizes about 1/10th the size of the entries table.
let mut chunk_size = allocated_entries.next_power_of_two();
if chunk_size < 1 << 21 {
// The minimum chunk size is 2MiB.
chunk_size = 1 << 21;
}
self.chunk_size = chunk_size;
self.chunk_mask = chunk_size - 1;
self.chunk_bits = self.chunk_size.ilog2() as usize;
}
// Lookup the key/value pair. If found, return LookupResult::Found(table_index, data_index).
// Otherwise, return the LookupResult::NotFound(table index, hash_byte).
fn lookup(&self, key: &[u8]) -> LookupResult {
if self.table.is_empty() {
panic!("Check for emtpy before calling lookup");
}
let key_hash: u64 = hash(key, self.hash_secret);
// The lower 32-bites are used in reduce to compute the table index, and values next to
// each other in the table will have their lower-32 bits close to each other. The next
// higher byte should have no significant correlation for unequal keys.
let key_hash_byte = (key_hash >> 32) as u8;
let mut table_index = reduce(key_hash as u32, self.table.len() as u32);
// To quickly find the correct entry, we skip entries that have hash_byte values that don't
// match the key's hash. This avoids 99.6% of cache misses caused comparing the key to the
// wrong key. Once we found an entry with a matching hash byte, we then compare the two
// keys, and return Found if they match. Otherwise, we start back at the top of the loop.
loop {
// We try to load only the hash byte to save time. Once we have a matching hash byte,
// the loop ends and we compare keys.
let mut entry = &self.table[table_index];
while entry.hash_byte != key_hash_byte && entry.hash_byte != 0 {
table_index += 1;
if table_index == self.table.len() {
table_index = 0;
}
entry = &self.table[table_index];
}
let data_index = read_index(entry);
if data_index == 0usize {
return LookupResult::NotFound(table_index, key_hash_byte);
}
if entry.hash_byte == key_hash_byte && key == self.read_key(data_index) {
return LookupResult::Found(table_index, data_index);
}
table_index += 1;
if table_index == self.table.len() {
table_index = 0;
}
}
}
// Read the key from its data chunk.
#[inline]
fn read_key(&self, data_index: usize) -> &[u8] {
let chunk: &[u8] = &self.data_chunks[data_index >> self.chunk_bits];
let index = data_index & self.chunk_mask;
let (key_len, key_len_size) = read_len(chunk, index);
let key_index = index + key_len_size;
&chunk[key_index..key_index + key_len]
}
/// This is like HashMapp::contains_key.
pub fn contains_key(&self, key: &[u8]) -> bool {
if self.table.is_empty() {
return false;
}
matches!(self.lookup(key), LookupResult::Found(_, _))
}
/// This is like HashMapp::get.
pub fn get(&self, key: &[u8]) -> Option<&[u8]> {
if self.is_empty() {
return None;
}
match self.lookup(key) {
LookupResult::Found(_, data_index) => Some(self.read_value(data_index)),
LookupResult::NotFound(_, _) => None,
}
}
// Return the value associated with the k/v pair at data_index.
fn read_value(&self, data_index: usize) -> &[u8] {
let chunk: &[u8] = &self.data_chunks[data_index >> self.chunk_bits];
let index = data_index & self.chunk_mask;
let (key_len, key_len_size) = read_len(chunk, index);
let value_len_index = index + key_len_size + key_len;
let (value_len, value_len_size) = read_len(chunk, value_len_index);
let value_index = value_len_index + value_len_size;
&chunk[value_index..value_index + value_len]
}
/// Insert a k/v pair into the table. Returns None if the key was not already in the table,
/// and Some(&[u8]) referring to the prior value otherwise.
pub fn insert(&mut self, key: &[u8], value: &[u8]) -> Option<&[u8]> {
if self.table.is_empty() {
self.reserve(1);
} else if self.used_entries >= self.max_entries {
// This should never happen if the needed space is reserved first.
self.grow_table();
}
let data_index = self.new_data(key, value);
match self.lookup(key) {
LookupResult::Found(table_index, old_data_index) => {
let entry = &mut self.table[table_index];
write_index(entry, data_index);
Some(self.read_value(old_data_index))
}
LookupResult::NotFound(table_index, hash_byte) => {
let entry = &mut self.table[table_index];
entry.hash_byte = hash_byte;
write_index(entry, data_index);
self.used_entries += 1;
None
}
}
}
// This increases memory for the table temporarily increase 3X, and permanently by 2X.
fn grow_table(&mut self) {
if self.max_entries == 0 {
self.reserve(1);
return;
}
let old_table = mem::take(&mut self.table);
self.table = vec![Entry::default(); old_table.len() << 1];
self.max_entries <<= 1;
#[allow(clippy::all)]
for i in 0..old_table.len() {
let raw_index = read_index(&old_table[i]);
if raw_index != 0 {
match self.lookup(self.read_key(raw_index)) {
LookupResult::NotFound(table_index, hash_byte) => {
let mut entry = &mut self.table[table_index];
entry.hash_byte = hash_byte;
write_index(&mut entry, raw_index);
}
_ => panic!("New table should not already have this key"),
}
}
}
}
// Create a k/v pair in the most recent chunk. Create a new chunk if this would overflow.
fn new_data(&mut self, key: &[u8], value: &[u8]) -> usize {
// Check to see if we need a new chunk. INDEX_SIZE is also the max for compressed ints.
let additional_data_len = 2 * INDEX_SIZE + key.len() + value.len();
assert!(additional_data_len < self.chunk_size);
let end_index = self.used_data + additional_data_len;
let chunk_index = end_index >> self.chunk_bits;
if self.data_chunks.len() <= chunk_index {
let values = Box::<[u8]>::new_zeroed_slice(self.chunk_size);
// Safety:
// This is safe because we allocated these values as a zero-ed slice, which
// initializes our u8 values to 0. The assumption here is that the zero value for
// u8 is zero, a safe assumption: what would happen if 0u8 != 0x00u8?
let values = unsafe { values.assume_init() };
self.data_chunks.push(values);
if chunk_index == 0 {
self.used_data = 1; // Address 0 is NULL.
} else {
self.used_data = self.chunk_size * chunk_index;
}
}
let chunk: &mut [u8] = &mut self.data_chunks[chunk_index];
let index = self.used_data & self.chunk_mask;
let key_index = index + write_len(chunk, index, key.len());
chunk[key_index..key_index + key.len()].copy_from_slice(key);
let value_len_index = key_index + key.len();
let value_index = value_len_index + write_len(chunk, value_len_index, value.len());
chunk[value_index..value_index + value.len()].copy_from_slice(value);
let used_data = self.used_data;
self.used_data += value_index + value.len() - index;
used_data
}
/// This is like HashMap::extend.
pub fn extend<T: IntoIterator<Item = (Bytes, Bytes)>>(&mut self, new_data: T) {
for (key, value) in new_data {
self.insert(&key, &value);
}
}
/// Return the number of entries in the hash table.
pub fn len(&self) -> usize {
self.used_entries
}
/// Return true if there are no entries in the hash table.
pub fn is_empty(&self) -> bool {
self.used_entries == 0
}
/// Return an iterator that can be used to iterate through k/v pairs.
pub fn iter(&self) -> LookupHtblIter {
LookupHtblIter {
htbl: self,
table_index: 0,
}
}
}
pub struct LookupHtblIter<'a> {
htbl: &'a LookupHtbl,
table_index: usize,
}
impl<'a> Iterator for LookupHtblIter<'a> {
type Item = (&'a [u8], &'a [u8]);
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.table_index >= self.htbl.table.len() {
return None;
}
let raw_index = read_index(&self.htbl.table[self.table_index]);
self.table_index += 1;
if raw_index != 0usize {
let chunk: &[u8] = &self.htbl.data_chunks[raw_index >> self.htbl.chunk_bits];
let index = raw_index & self.htbl.chunk_mask;
let (key_len, key_len_size) = read_len(chunk, index);
let key_index = index + key_len_size;
let value_len_index = key_index + key_len;
let (value_len, value_len_size) = read_len(chunk, value_len_index);
let value_index = value_len_index + value_len_size;
return Some((
&chunk[key_index..key_index + key_len],
&chunk[value_index..value_index + value_len],
));
}
}
}
}
impl<'a> IntoIterator for &'a LookupHtbl {
type Item = (&'a [u8], &'a [u8]);
type IntoIter = LookupHtblIter<'a>;
// Required method
fn into_iter(self) -> Self::IntoIter {
LookupHtblIter {
htbl: self,
table_index: 0usize,
}
}
}
// Reduce maps a random value in the range [0..2^32] to [0..N], which is used to compute the index
// into the hash table. This is faster than computing x % M.
fn reduce(x: u32, n: u32) -> usize {
(((x as u64) * (n as u64)) >> 32) as usize
}
// Read a u40 index from unaligned memory LE, and return it as a usize.
#[inline]
fn read_index(entry: &Entry) -> usize {
let mut val = [0u8; 8];
val[0..INDEX_SIZE].copy_from_slice(&entry.data_index);
u64::from_le_bytes(val) as usize
}
// Write a u40 index to unaligned memory LE.
#[inline]
fn write_index(entry: &mut Entry, value: usize) {
entry
.data_index
.copy_from_slice(&value.to_le_bytes()[0..INDEX_SIZE]);
}
// Read a compressed integer length. Returns (length, num bytes of length) Compressed integers for
// lengths are <2-bit num_bytes><6-bit MSBs>. num_bytes is 0 for small integers <= 64. Lengths up
// to 2^30 can be represented, limiting keys and values to 1GiB each.
#[inline]
fn read_len(data: &[u8], index: usize) -> (usize, usize) {
let first_byte = data[index] as usize;
if first_byte < 64 {
return (first_byte, 1usize);
}
let num_bytes = first_byte >> 6;
let mut val = [0u8; 8];
val[0..num_bytes].copy_from_slice(&data[index + 1..index + 1 + num_bytes]);
let len = u64::from_le_bytes(val) as usize;
let len = len | ((first_byte & 0x3f) << (8 * num_bytes));
(len, 1 + num_bytes)
}
// Write a compressed length to data. Return the number of bytes used to represent the length.
#[inline]
fn write_len(data: &mut [u8], index: usize, mut len: usize) -> usize {
if len < 64 {
data[index] = len as u8;
return 1usize;
}
let mut num_bytes = 1usize;
while len >= 64 {
data[index + num_bytes] = len as u8;
len >>= 8;
num_bytes += 1;
}
let first_byte = ((num_bytes - 1) << 6 | len) as u8;
data[index] = first_byte;
num_bytes
}
// This hash both defends against DDoS attacks and passes dieharder tests. It is 2 rounds of
// hashing in order to pass the dieharder tests.
#[inline]
fn hash_u64(v: u64, hash_secret: u64) -> u64 {
let v1 = u64::wrapping_mul((v ^ hash_secret) ^ (v >> 32), 0x9d46_0858_ea81_ac79);
u64::wrapping_add(v1 ^ v, v1 >> 32)
}
#[inline]
fn hash(v: &[u8], hash_secret: u64) -> u64 {
let mut i = 0usize;
let mut val = 0u64;
let mut bytes = [0u8; 8];
while i + 8 <= v.len() {
bytes.copy_from_slice(&v[i..i + 8]);
let digit = u64::from_le_bytes(bytes);
val = hash_u64(val ^ digit, hash_secret);
i += 8;
}
if i < v.len() {
bytes[0..v.len() - i].copy_from_slice(&v[i..v.len()]);
let digit: u64 = u64::from_le_bytes(bytes);
val = hash_u64(val ^ digit, hash_secret);
}
val
}
#[cfg(test)]
mod tests {
use super::*;
const NUM_KEYS: usize = 1_000_000usize;
const NUM_LOOKUPS: usize = 1_000_000usize;
const AVE_VALUE_SIZE: usize = 60;
#[test]
fn test_lookup_htbl() {
let mut table = LookupHtbl::default();
table.reserve(NUM_KEYS);
let mask: u64 = (NUM_KEYS.next_power_of_two() - 1) as u64;
let mut num_keys = 0usize;
let mut key_bytes = [0u8; 8];
while num_keys < NUM_KEYS {
let key = num_keys as u64;
key_bytes.copy_from_slice(&key.to_le_bytes());
if !table.contains_key(&key_bytes) {
let mut value = [key_bytes[1]; AVE_VALUE_SIZE];
value[0] = key_bytes[0];
value[AVE_VALUE_SIZE - 1] = key_bytes[7];
table.insert(&key_bytes, &value);
num_keys += 1;
} else {
panic!("Should not already have this key");
}
}
let mut hits = 0;
let mut misses = 0;
let mut total = 0u64;
for i in 0u64..NUM_LOOKUPS as u64 {
let key = hash_u64(i, 0) & mask;
match table.get(&key.to_le_bytes()) {
Some(val) => {
hits += 1;
total += val[5] as u64;
assert!(val[0] == (key as u8));
}
None => misses += 1,
}
}
assert!(hits != 0 && misses != 0 && total != 0);
}
#[test]
fn test_for_loop() {
let keys = ["key1".as_bytes(), "key2".as_bytes(), "key3".as_bytes()];
let values = [
"value1".as_bytes(),
"value2".as_bytes(),
"value3".as_bytes(),
];
let mut table = LookupHtbl::default();
table.reserve(3);
for i in 0..3 {
table.insert(keys[i], values[i]);
}
let mut found = [false, false, false];
for (key, value) in &table {
if key == keys[0] {
found[0] = true;
assert!(value == values[0]);
} else if key == keys[1] {
found[1] = true;
assert!(value == values[1]);
} else if key == keys[2] {
found[2] = true;
assert!(value == values[2]);
}
}
assert!(found[0] && found[1] && found[2]);
}
// This further tests the hash function.
struct Rand {
seed: u64,
hash_secret: u64,
}
impl Rand {
fn rand64(&mut self) -> u64 {
self.seed = hash_u64(self.seed, self.hash_secret);
self.seed
}
fn rand_range(&mut self, n: usize) -> usize {
self.rand64() as usize % n
}
fn rand_bytes(&mut self, len: usize) -> Vec<u8> {
let mut v = vec![];
for _ in 0..len {
v.push(self.seed as u8);
self.seed = hash_u64(self.seed, self.hash_secret);
}
v
}
fn rand_kv_pair(&mut self, key_len: usize, value_len: usize) -> (Vec<u8>, Vec<u8>) {
let key_len = self.rand_range(key_len);
let value_len = self.rand_range(value_len);
let key = self.rand_bytes(key_len);
let value = self.rand_bytes(value_len);
(key, value)
}
}
#[test]
fn test_rand_vals() {
let mut r = Rand {
seed: 0u64,
hash_secret: 0xdeadbeefu64,
};
let mut kv_pairs: Vec<(Vec<u8>, Vec<u8>)> = vec![];
let mut table = LookupHtbl::default();
for _ in 0..10_000 {
loop {
let kv_pair = r.rand_kv_pair(100, 1000);
if !table.contains_key(&kv_pair.0) {
table.insert(&kv_pair.0, &kv_pair.1);
kv_pairs.push(kv_pair);
break;
}
}
}
for kv_pair in kv_pairs {
assert!(table.get(&kv_pair.0) == Some(&kv_pair.1));
}
}
#[test]
fn test_dumplicate_key() {
let mut table = LookupHtbl::default();
table.insert("key".as_bytes(), "value1".as_bytes());
assert!(table.get("key".as_bytes()) == Some("value1".as_bytes()));
table.insert("key".as_bytes(), "value2".as_bytes());
assert!(table.get("key".as_bytes()) == Some("value2".as_bytes()));
}
// The RNG function should act like a random oracle, in which case the odds of seeing the same
// value as one that came before is determined by the Birthday Problem. Using the rule of
// thumb for how large the sequence needs to be to have a .5 probability of collision: sqrt(0.5
// * ln(2) * 2^64) = 2.5e9. This test just checks that one seed leads to a sequence longer
// than 2^28 without collisions.
//
// This is a linear-time algorithm for finding the cycle length of an RNG.
#[test]
fn test_rng_sequence_len() {
let mut r = Rand {
seed: 0u64,
hash_secret: 1u64,
};
for loop_power in 0..28 {
let target = r.rand64();
for i in 0usize..(1usize << loop_power) {
if r.rand64() == target {
if i < 1 << 28 {
panic!("Sequence too short");
}
return;
}
}
}
}
// Test that the hash function yeilds the expected 2.54 average sequence length in a swiss
// table with 50% load factor.
#[test]
fn test_swiss_collisions() {
let table_len = 1usize << 28;
let mut table: Vec<bool> = Vec::default();
table.resize(table_len, false);
let hash_secret = 0x123;
for i in 0..table_len >> 1 {
let mut h = (hash_u64(i as u64, hash_secret) & (table_len as u64 - 1)) as usize;
while table[h] {
h += 1;
if h == table.len() {
h = 0;
}
}
table[h] = true;
}
let mut len = 0;
let mut total_len = 0;
let mut num_seq = 0;
#[allow(clippy::all)]
for i in 0..table_len {
if table[i] {
len += 1;
} else if len != 0 {
num_seq += 1;
total_len += len;
len = 0;
}
}
let ave_seq_len = total_len as f32 / num_seq as f32;
assert!((ave_seq_len - 2.5415f32).abs() < 0.01);
}
}