forked from decred/dcrd
/
filter.go
644 lines (584 loc) · 24.7 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
// Copyright (c) 2021-2022 The Decred developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
//go:generate go run generateapbftable.go
// Package apbf implements an optimized Age-Partitioned Bloom Filter.
package apbf
import (
"encoding/binary"
"math"
"math/bits"
"sync"
"time"
"github.com/dchest/siphash"
)
// References:
// [APBF] Age-Partitioned Bloom Filters (Shtul, Baquero, Almeida)
// https://arxiv.org/pdf/2001.03147.pdf
//
// [LHSP] Less Hashing, Same Performance: Building a Better Bloom Filter
// (Kirsch, Mitzenmacher)
//
// [BFPV] Bloom Filters in Probabilistic Verification (Dillinger, Manolis)
// calcFPRateKey is used as the key when caching results for the false positive
// calculations.
type calcFPRateKey struct {
a, i uint16
}
// calcFPRateInternal calculates and returns the false positive rate for the
// provided parameters using the given results map to cache intermediate
// results.
func calcFPRateInternal(results map[calcFPRateKey]float64, k, l uint8, a, i uint16) float64 {
// The false positive rate is calculated according to the following
// recursively-defined function provided in [APBF]:
//
// {1 , if a = k
// F(k,l,a,i) = {0 , if i > l + a
// {(r_i)F(k,l,a+1,i+1) + (1-r_i)F(k,l,0,i+1), otherwise
if a == uint16(k) {
return 1
} else if i > uint16(l)+a {
return 0
}
// Return stored results to avoid a bunch of duplicate work.
resultKey := calcFPRateKey{a, i}
if result, ok := results[resultKey]; ok {
return result
}
// Calculate the fill ratio for the slice.
fillRatio := float64(0.5)
if i < uint16(k) {
fillRatio = float64(i+1) / float64(2*k)
}
firstTerm := fillRatio * calcFPRateInternal(results, k, l, a+1, i+1)
secondTerm := (1 - fillRatio) * calcFPRateInternal(results, k, l, 0, i+1)
result := firstTerm + secondTerm
results[resultKey] = result
return result
}
// CalcFPRate calculates and returns the false positive rate for an APBF
// created with the given parameters.
//
// NOTE: This involves allocations, so the result should be cached by the caller
// if it intends to use it multiple times.
//
// This function is safe for concurrent access.
func CalcFPRate(k, l uint8) float64 {
results := make(map[calcFPRateKey]float64, 2*uint16(k)*uint16(l))
return calcFPRateInternal(results, k, l, 0, 0)
}
// Filter implements an Age-Partitioned Bloom Filter (APBF) that is safe for
// concurrent access.
//
// An APBF is a probabilistic data structure suitable for use in processing
// unbounded data streams where more recent items are more significant than
// older ones and some false positives are acceptable. It has similar
// computational costs as traditional Bloom filters and provides space
// efficiency that is competitive with the current best-known, and more complex,
// Dictionary approaches.
//
// Similar to classic Bloom filters, APBFs have a non-zero probability of false
// positives that can be tuned via parameters and are free from false negatives
// up to the capacity of the filter. However, unlike classic Bloom filters,
// where the false positive rate climbs as items are added until all queries are
// a false positive, APBFs provide a configurable upper bound on the false
// positive rate for an unbounded number of additions.
//
// The unbounded property is achieved by adding and retiring disjoint segments
// over time where each segment is a slice of a partitioned Bloom filter. The
// slices are conceptually aged over time as new items are added by shifting
// them and discarding the oldest one.
//
// See the NewFilter documentation for details regarding tuning parameters and
// expected approximate total space usage.
type Filter struct {
// k is the number of slices which need consecutive matches for an item to
// be considered in the filter. This is similar to the k parameter of
// static bloom filters which use exactly k hash functions for k different
// slices, except here it is the number of consecutive matches in a larger
// sequence of distinct slices determined by this parameter along with the l
// parameter below to preserve the false positive rate.
k uint8
// l is the additional number of slices which comprise the region of items
// that are transitioning to expired.
//
// The sum of this parameter and k above determines the total number of
// slices, which in turn determines the false positive rate and also
// influences the filter capacity.
l uint8
// kPlusL is simply the sum of the k and l parameters and is stored to avoid
// calculating it repeatedly. It is the total number of slices.
kPlusL uint16
// bitsPerSlice is the number of physical bits occupied by each slice that
// comprise the overall filter.
bitsPerSlice uint64
// itemsPerGeneration is the number of items per generation. Conceptually,
// the slices are aged every time this number of items is inserted into the
// filter resulting in items inserted in older generations expiring. In
// other words, the items are effectively partitioned by age.
itemsPerGeneration uint32
// ****************************************************************
// The fields below this point are protected by the embedded mutex.
// ****************************************************************
mtx sync.Mutex
// key0 and key1 are used to seed the hash function in order to ensure
// attackers are not able to intentionally grind false positives that would
// otherwise affect the entire network if all nodes were calculating the
// same hash values.
key0, key1 uint64
// itemsInCurGeneration is the number of items that have been added to the
// current generation. The slices are aged once the number of items per
// generation is reached by conceptually shifting them to subsequent slices
// and throwing away the overflow from the final slice. In practice, the
// shifting is done via logical shifts in a ring buffer by keeping track of
// the index of the first logical slice (called the base index).
itemsInCurGeneration uint32
// baseIndex is the index of the position of the first slice within the ring
// buffer.
baseIndex uint16
// data is the actual filter data implemented as a packed ring buffer where
// the individual filter slices (partitions) perform logical shifts.
data []byte
}
// Capacity returns the max number of items that were most recently added which
// are guaranteed to return true. Adding more items than the returned value
// will cause the oldest items to be expired.
//
// This function is safe for concurrent access.
func (f *Filter) Capacity() uint32 {
return uint32(f.l+1) * f.itemsPerGeneration
}
// FPRate calculates and returns the actual false positive rate for the filter.
//
// NOTE: This involves allocations, so the result should be cached by the caller
// if it intends to use it multiple times.
//
// This function is safe for concurrent access.
func (f *Filter) FPRate() float64 {
return CalcFPRate(f.k, f.l)
}
// Size returns the total bytes occupied by the filter data plus overhead.
//
// This function is safe for concurrent access.
func (f *Filter) Size() int {
const overhead = 70
f.mtx.Lock()
result := len(f.data) + overhead
f.mtx.Unlock()
return result
}
// K returns the filter configuration parameter for the number of slices that
// need consecutive matches. It is one of the tunable parameters that determine
// the overall capacity and false positive rate of the filter.
//
// This function is safe for concurrent access.
func (f *Filter) K() uint8 {
return f.k
}
// L returns the filter configuration parameter for the number of additional
// slices. It is one of the tunable parameters that determine the overall
// capacity and false positive rate of the filter.
//
// This function is safe for concurrent access.
func (f *Filter) L() uint8 {
return f.l
}
// nextGeneration transitions the filter to the next generation which
// effectively ages all items and potentially expires the oldest generation of
// items.
//
// This function MUST be called with the filter mutex held.
func (f *Filter) nextGeneration() {
// Shift the position of the first slice within the ring buffer to the left
// by one.
if f.baseIndex == 0 {
f.baseIndex = f.kPlusL
}
f.baseIndex--
// Clear the bits associated with the new logical slice. Note that since
// the logical slice was just rotated once backwards around the ring buffer
// above, this clears what was previously the oldest slice so the new
// entries take its place.
startBit := uint64(f.baseIndex) * f.bitsPerSlice
endBit := startBit + f.bitsPerSlice
// Clear bits up to the next byte boundary.
byteIdx := startBit >> 3
if bit := startBit & 7; bit != 0 {
f.data[byteIdx] &= byte(1<<bit - 1)
byteIdx++
}
// Clear full bytes in one fell swoop when possible.
if endByteIdx := (endBit >> 3); endByteIdx > byteIdx {
fullBytes := endByteIdx - byteIdx
data := f.data[byteIdx : byteIdx+fullBytes]
for i := range data {
data[i] = 0
}
byteIdx += fullBytes
}
// Clear any remaining bits.
if byteIdx < uint64(len(f.data)) {
f.data[byteIdx] &^= byte(1<<(endBit&7) - 1)
}
f.itemsInCurGeneration = 0
}
// deriveIndex uses enhanced double hashing to calculate a unique index for the
// given logical slice number using the closed formula. In order to support
// more efficient interleaving of the calculation while looping over several
// slices, it also returns the intermediate accumulator which can be fed into
// subsequent iterations of the algorithm to potentially provide significant
// speed improvements.
//
// Note that each slice conceptually uses an independent hash function to
// determine which bit to index. However, rather than using the more
// traditional approach of a distinct hash function for each one, enhanced
// double hashing is used to effectively generate the necessary hash function
// because it is significantly faster while still coming quite close to the
// theoretical limit of two-index fingerprinting.
//
// It is defined as "f(i) = hash1 + i*hash2 + (i^3 - i)/6 (mod m)", where m is
// the number of bits to index, however, this function does not perform the
// modular reduction and instead leaves it up to the caller.
//
// It is also worth noting that enhanced double hashing is used in this
// implementation over the more common double hashing referenced in [APBF], and
// defined in [LHSP], which generates the necessary hash functions from a linear
// combination of the two independent hash functions as "f(i) = hash1 + i*hash2
// (mod m)" because, as pointed out in [BFPV], that construction results in
// imposing an observable accuracy limit since there are no restrictions placed
// on the second hash to ensure the resulting indices are (almost) all unique.
// Namely, when the second hash function produces 0, all indices are the same,
// and, similarly, when it produces a value that divides m, the number of
// indices probed will be the minimum of the number of slices and the produced
// value, which could be as small as 2.
//
// See section 5.2 of [BFPV] for further details, if desired.
func deriveIndex(slice uint16, hash1, hash2 uint64) (uint64, uint64) {
s := uint64(slice)
z := (s*s + s) / 2
derivedIdx := hash1 + s*hash2 + (z*(s-1))/3
return derivedIdx, hash2 + z
}
// setBit unconditionally sets the bit at the provided absolute index in the
// underlying filter data.
//
// This function MUST be called with the filter mutex held.
func (f *Filter) setBit(bit uint64) {
f.data[bit>>3] |= 1 << (bit & 7)
}
// fastReduce calculates a mapping that is more or less equivalent to x mod N.
// However, instead of using a mod operation that can lead to slowness on many
// processors when not using a power of two due to unnecessary division, this
// uses a "multiply-and-shift" trick that eliminates all divisions as described
// in a blog post by Daniel Lemire, located at the following site at the time
// of this writing:
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
//
// Since that link might disappear, the general idea is to multiply by N and
// shift right by log2(N). Since N is a 64-bit integer in this case, it
// becomes:
//
// (x * N) / 2^64 == (x * N) >> 64
//
// This is a fair map since it maps integers in the range [0,2^64) to multiples
// of N in [0, N*2^64) and then divides by 2^64 to map all multiples of N in
// [0,2^64) to 0, all multiples of N in [2^64, 2*2^64) to 1, etc. This results
// in either ceil(2^64/N) or floor(2^64/N) multiples of N.
func fastReduce(x, N uint64) uint64 {
// This uses math/bits to perform the 128-bit multiplication as the compiler
// will replace it with the relevant intrinsic on most architectures.
//
// The high 64 bits in a 128-bit product is the same as shifting the entire
// product right by 64 bits.
hi, _ := bits.Mul64(x, N)
return hi
}
// Add inserts the provided data into the filter.
//
// This function is safe for concurrent access.
func (f *Filter) Add(data []byte) {
// Transition the filter to the next generation when adding the new item
// will exceed the number of items per generation. This effectively ages
// all existing items and potentially expires the oldest generation of
// items.
f.mtx.Lock()
if f.itemsInCurGeneration == f.itemsPerGeneration {
f.nextGeneration()
}
f.itemsInCurGeneration++
// Set the relevant bits in the filter for 'k' slices, starting from the
// first slice (slice 0), as determined by the equivalent of a separate hash
// (index) function for each slice. See the comments in deriveIndex for
// more details about the enhanced double hashing technique used.
//
// Note that this implementation uses a ring buffer where the position index
// of the first slice is modified to simulate shifting, so the code below
// uses logical slices to map to the correct physical location within the
// filter data.
//
// As a further optimization, rather than fully calculating each independent
// bit index for each slice via the closed formula for enhanced double
// hashing, this interleaves the calculation with the loop over the slices
// which benchmarking shows is about 30% faster.
logicalSlice := f.baseIndex
sliceBitOffset := uint64(logicalSlice) * f.bitsPerSlice
hash1, hash2 := siphash.Hash128(f.key0, f.key1, data)
derivedIdx, acc := deriveIndex(logicalSlice, hash1, hash2)
for i := uint8(0); i < f.k; i++ {
f.setBit(sliceBitOffset + fastReduce(derivedIdx, f.bitsPerSlice))
// Move to the next logical slice while wrapping around the ring buffer
// if needed.
logicalSlice++
if logicalSlice == f.kPlusL {
logicalSlice = 0
sliceBitOffset = 0
// Reset the derived bit index using enhanced double hashing
// accordingly.
derivedIdx, acc = hash1, hash2
continue
}
sliceBitOffset += f.bitsPerSlice
// Derive the next bit index using enhanced double hashing.
derivedIdx += acc
acc += uint64(logicalSlice)
}
f.mtx.Unlock()
}
// isBitSet returns whether or not the bit at the provided absolute index in the
// underlying filter data is set.
//
// This function MUST be called with the filter mutex held.
func (f *Filter) isBitSet(bit uint64) bool {
return f.data[bit>>3]&(1<<(bit&7)) != 0
}
// Contains returns the result of a probabilistic membership test of the
// provided data such that there is a non-zero probability of false positives,
// per the false positive rate of the filter, and a zero probability of false
// negatives for the previous number of items supported by the max capacity of
// the filter.
//
// In other words, the most recent max capacity number of items added to the
// filter will always return true while items that were never added or have been
// expired will only report true with the false positive rate of the filter.
//
// This function is safe for concurrent access.
func (f *Filter) Contains(data []byte) bool {
// Attempt to find the required 'k' consecutive matches using an algorithm
// that reduces the average number of tests required.
//
// The algorithm works by choosing the starting position such that it leaves
// exactly 'k' consecutive slices to be tested, accumulates the matching
// sub sequences, and jumps backwards 'k' slices when a match fails.
//
// Recall that this implementation uses a ring buffer where the position
// index of the first slice is modified to simulate shifting, so the code
// below uses logical slices to map to the correct physical location within
// the filter data.
//
// As a further optimization, similar to when the items are added, rather
// than fully calculating each independent bit index for each slice via the
// closed formula for enhanced double hashing, this interleaves the
// calculation with the loop over the slices for consecutive matches which
// benchmarking shows is about 12% faster on average.
var result = false
f.mtx.Lock()
prevMatches, curMatches := uint8(0), uint8(0)
physicalSlice := uint16(f.l)
logicalSlice := (f.baseIndex + physicalSlice) % f.kPlusL
sliceBitOffset := uint64(logicalSlice) * f.bitsPerSlice
hash1, hash2 := siphash.Hash128(f.key0, f.key1, data)
derivedIdx, acc := deriveIndex(logicalSlice, hash1, hash2)
for {
if f.isBitSet(sliceBitOffset + fastReduce(derivedIdx, f.bitsPerSlice)) {
// Successful query when the required number of consecutive matches
// is achieved.
curMatches++
if prevMatches+curMatches == f.k {
result = true
break
}
// Move to the next logical slice while wrapping around the ring
// buffer if needed.
physicalSlice++
logicalSlice++
if logicalSlice == f.kPlusL {
logicalSlice = 0
sliceBitOffset = 0
// Reset the derived bit index using enhanced double hashing
// accordingly.
derivedIdx, acc = hash1, hash2
continue
}
sliceBitOffset += f.bitsPerSlice
// Derive the next bit index using enhanced double hashing.
derivedIdx += acc
acc += uint64(logicalSlice)
continue
}
// Nothing more to do when there are not enough slices left to achieve
// the required number of consecutive matches.
if uint16(f.k) > physicalSlice {
break
}
// Skip back the required number of matches while accumulating any
// matching sub sequence.
physicalSlice -= uint16(f.k)
prevMatches = curMatches
curMatches = 0
// Reset logical slice and derive the associated bit index using
// enhanced double hashing.
logicalSlice = (f.baseIndex + physicalSlice) % f.kPlusL
sliceBitOffset = uint64(logicalSlice) * f.bitsPerSlice
derivedIdx, acc = deriveIndex(logicalSlice, hash1, hash2)
}
f.mtx.Unlock()
return result
}
// Reset clears the filter and changes the key used in the internal hashing
// logic to ensure a unique set of false positives versus those prior to the
// reset.
//
// This function is safe for concurrent access.
func (f *Filter) Reset() {
f.mtx.Lock()
f.key0, f.key1 = siphash.Hash128(f.key0, f.key1, []byte("reset"))
f.baseIndex = 0
f.itemsInCurGeneration = 0
for i := range f.data {
f.data[i] = 0
}
f.mtx.Unlock()
}
// NewFilterKL returns an Age-Partitioned Bloom Filter (APBF) suitable for use
// in processing unbounded data streams where more recent items are more
// significant than older ones and some false positives are acceptable. The
// parameters specify the number of most-recently added items that must always
// return true, the number slices which need consecutive matches, k, and the
// number of additional slices, l, to reach a desired target false positive rate
// to maintain.
//
// Every new filter uses a unique key for the internal hashing logic so that
// each one will have a unique set of false positives. The key is also
// automatically changed by Reset.
//
// Applications might prefer using NewFilter to specify the max capacity and a
// target false positive rate unless they specifically require the additional
// fine tuning provided here.
//
// For convenience, the 'go generate' command may be used in the code directory
// to generate a table of k and l combinations along with the false positive
// rate they maintain and average expected number of accesses for false queries
// to help select appropriate parameters.
//
// Note that, due to rounding, the actual max number of items that can be added
// to the filter before old entries are expired might actually be slightly
// higher than the specified target and can be obtained via the Capacity method
// on the returned filter.
//
// The total space (in bytes) used by a filter that can hold 'n' items for the
// given 'k' and 'l' is approximately:
//
// ceil(ceil(1.44k * ceil(n / l+1)) * (k+l) / 8)
func NewFilterKL(minCapacity uint32, k, l uint8) *Filter {
// Calculate the number of items per generation such that the maximum
// capacity (aka sliding window size) is at least the specified number of
// items.
// w = g*(l+1)
// => g = w/(l+1)
// => g = ceil(w/(l+1))
g := uint32(math.Ceil(float64(minCapacity) / (float64(l) + 1)))
// Calculate the number of bits needed per slice based the number of items
// per generation and number of slices used for filter insertions.
//
// The fill ratio can be approximated by r = 1 - e^-(n/m), where n is the
// number of items and m is the bits per slice. It is well known that
// optimal filter usage for partitioned bloom filters is asymptotically when
// r = 1/2.
//
// Thus solving r = 1/2 yields:
// e^-(n/m) = 1/2
// => n/m = ln(2)
// => m = n / ln(2)
// => bitsPerSlice = k*g / ln(2)
bitsPerSlice := uint64(math.Ceil(float64(k) * float64(g) / math.Log(2)))
// The total filter size in bits is thus the total number of slices
// multiplied by the number of bits per slice.
kPlusL := uint16(k) + uint16(l)
filterBytes := ((bitsPerSlice * uint64(kPlusL)) + 7) / 8
// The key does not need to be cryptographically secure since its purpose is
// only to ensure filters created at different times (for example, as
// different nodes come online) produce a different set of false positives.
var seed [8]byte
s0 := uint64(time.Now().UnixNano())
s1 := s0 ^ 0xa5a5a5a5a5a5a5a5
binary.BigEndian.PutUint64(seed[:], s0^0x5a5a5a5a5a5a5a5a)
key0, key1 := siphash.Hash128(s0, s1, seed[:])
return &Filter{
key0: key0,
key1: key1,
k: k,
l: l,
kPlusL: kPlusL,
bitsPerSlice: bitsPerSlice,
itemsPerGeneration: g,
data: make([]byte, filterBytes),
}
}
// nearOptimalK calculates a near optimal number of hash functions to use based
// on the desired false positive rate and tradeoff versus the average number of
// queries. Since the actual false positive rate depends on both the number of
// hash functions and an additional number of slices, the calculated value here
// might be off by one from what is truly optimal, but the difference is minor.
func nearOptimalK(fpRate float64) uint8 {
return uint8(math.Ceil(-math.Log2(fpRate)))
}
// nearOptimalL calculates a near optimal number of additional slices to use for
// a given number of hash functions in order to maintain the desired false
// positive rate.
func nearOptimalL(k uint8, fpRate float64) uint8 {
// There is not currently a closed formula for calculating the false
// positive rate of an APBF, so just brute force it since it is only done
// once when the filter is created and it's fast enough anyway.
const maxL = 100
for l := uint8(1); l <= maxL; l++ {
result := CalcFPRate(k, l)
if result > fpRate {
return l - 1
}
}
return maxL
}
// NewFilter returns an Age-Partitioned Bloom Filter (APBF) suitable for use in
// processing unbounded data streams where more recent items are more
// significant than older ones and some false positives are acceptable. The
// parameters specify the number of most-recently added items that must always
// return true and the target false positive rate to maintain.
//
// Every new filter uses a unique key for the internal hashing logic so that
// each one will have a unique set of false positives. The key is also
// automatically changed by Reset.
//
// Note that, due to rounding, the actual max number of items that can be added
// to the filter before old entries are expired might actually be slightly
// higher than the specified target and can be obtained via the Capacity method
// on the returned filter.
//
// Similarly, the actual false positive rate might be slightly better than the
// target rate and can be obtained via the FPRate method on the returned filter.
//
// In other words, both parameters are treated as lower bounds so that the
// returned filter has at least the requested target values.
//
// For most applications, the values will be perfectly acceptable, however,
// applications that desire greater control over the tuning can make use of
// NewFilterKL instead.
//
// The total space (in bytes) used by a filter that can hold 'n' items with a
// false positive rate of 'fpRate' is approximately:
//
// 1.3n * -log(fpRate), where log is base 10
func NewFilter(minCapacity uint32, fpRate float64) *Filter {
k := nearOptimalK(fpRate)
l := nearOptimalL(k, fpRate)
return NewFilterKL(minCapacity, k, l)
}