Add compile time validation for block sizes--disallows Deserialized B…

…loomFilters of wrong block size
tomtomwombat · Jun 25, 2024 · 97e51c2 · 97e51c2
1 parent a249556
commit 97e51c2
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 7 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,7 +17,7 @@ maintenance = { status = "actively-developed" }
 [features]
 default = ["rand"]
 rand = ["dep:rand"]
-serde = ["dep:serde"]
+serde = ["dep:serde", "siphasher/serde_std"]
 
 [target.'cfg(all(any(target_arch = "wasm32", target_arch = "wasm64"), target_os = "unknown"))'.dependencies]
 getrandom = { version = "0.2", features = ["js"] }
@@ -26,7 +26,7 @@ getrandom = { version = "0.2", features = ["js"] }
 getrandom = "0.2"
 rand = { version = "0.8.5", optional = true }
 serde = { version = "1.0.203", features = ["derive"], optional = true }
-siphasher = { version = "1.0.0", features = ["serde"] }
+siphasher = "1.0.0"
 wide = "0.7.15"
 
 [dev-dependencies]

diff --git a/README.md b/README.md
@@ -166,6 +166,8 @@ In reality, the Bloom filter may have more than 64 bits of storage. In that case
 
 - **`rand`** - Enabled by default, this has the `DefaultHasher` source its random state using `thread_rng()` instead of hardware sources. Getting entropy from a user-space source is considerably faster, but requires additional dependencies to achieve this. Disabling this feature by using `default-features = false` makes `DefaultHasher` source its entropy using `getrandom`, which will have a much simpler code footprint at the expense of speed.
 
+- **`serde`** - `BloomFilter`s implement `Serialize` and `Deserialize` when possible.
+
 ## References
 - [Bloom filter - Wikipedia](https://en.wikipedia.org/wiki/Bloom_filter)
 - [Bloom Filter - Brilliant](https://brilliant.org/wiki/bloom-filter/)

diff --git a/src/bit_vector.rs b/src/bit_vector.rs
@@ -1,6 +1,4 @@
 use std::ops::Range;
-#[cfg(feature = "serde")]
-use serde::{Deserialize, Serialize};
 
 /// The number of bits in the bit mask that is used to index a u64's bits.
 ///
@@ -22,8 +20,7 @@ const BIT_MASK: u64 = (1 << BIT_MASK_LEN) - 1;
 /// Indexing a block is also efficient, since it can be done with bit operators because
 /// the size of a block is a power of 2.
 #[derive(Clone, Debug, Eq, PartialEq)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-
+#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct BlockedBitVec<const BLOCK_SIZE_BITS: usize> {
     bits: Vec<u64>,
 }

diff --git a/src/lib.rs b/src/lib.rs
@@ -144,9 +144,16 @@ impl BloomFilter {
     }
 }
 
+const fn validate_block_size(size: usize) -> usize {
+    match size {
+        64 | 128 | 256 | 512 => size,
+        _ => panic!("The only BLOCK_SIZE's allowed are 64, 128, 256, and 512."),
+    }
+}
+
 impl<const BLOCK_SIZE_BITS: usize, S: BuildHasher> BloomFilter<BLOCK_SIZE_BITS, S> {
     /// Used to grab the last N bits from a hash.
-    const BIT_INDEX_MASK: u64 = (BLOCK_SIZE_BITS - 1) as u64;
+    const BIT_INDEX_MASK: u64 = (validate_block_size(BLOCK_SIZE_BITS) - 1) as u64;
 
     /// The optimal number of hashes to perform for an item given the expected number of items to be contained in one block.
     /// Proof under "False Positives Analysis": <https://brilliant.org/wiki/bloom-filter/>