In [None]:
import pandas as pd

bit_array = 13
bloom_filter = [0] * bit_array
hash1 = lambda x: (x + 1) % 13
hash2 = lambda x: (2 * x + 5) % 13
numbers = [8, 17, 25, 9, 20]

In [None]:
for num in numbers:
  index1 = hash1(num)
  index2 = hash2(num)
  bloom_filter[index1] = 1
  bloom_filter[index2] = 1
  data = {'Number': numbers, 'Hash1': [hash1(x) for x in numbers], 'Hash2': [hash2(x) for x in numbers]}
  print(pd.DataFrame(data))
  print("\nBloom filter values:", bloom_filter)

   Number  Hash1  Hash2
0       8      9      8
1      17      5      0
2      25      0      3
3       9     10     10
4      20      8      6

Bloom filter values: [1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0]
   Number  Hash1  Hash2
0       8      9      8
1      17      5      0
2      25      0      3
3       9     10     10
4      20      8      6

Bloom filter values: [1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0]
   Number  Hash1  Hash2
0       8      9      8
1      17      5      0
2      25      0      3
3       9     10     10
4      20      8      6

Bloom filter values: [1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0]
   Number  Hash1  Hash2
0       8      9      8
1      17      5      0
2      25      0      3
3       9     10     10
4      20      8      6

Bloom filter values: [1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0]
   Number  Hash1  Hash2
0       8      9      8
1      17      5      0
2      25      0      3
3       9     10     10
4      20      8      6

Bloom filter values: [1, 0, 0, 

In [None]:
check_num = 17
check1 = hash1(check_num)
check2 = hash2(check_num)
print(f"\nCheck for new number {check_num}.")
if bloom_filter[check1] == 1 and bloom_filter[check2] == 1:
  print(f"Result: {check_num} might be in the set (Possible False Positive)")
else:
  print(f"Result: {check_num} is definitely not in the set")
  print(f"Hash values of new number are hash1 = {check1} and hash2 = {check2}.")


Check for new number 17.
Result: 17 might be in the set (Possible False Positive)


In [None]:
!pip install mmh3
!pip install bitarray

Collecting mmh3
  Downloading mmh3-5.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading mmh3-5.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (101 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/101.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.6/101.6 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mmh3
Successfully installed mmh3-5.1.0
Collecting bitarray
  Downloading bitarray-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)
Downloading bitarray-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (303 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.1/303.1 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitarray
Successfully insta

In [None]:
class BloomFilter:
    def __init__(self, size, hash_count):
        self.size = size
        self.hash_count = hash_count
        self.bit_array = [0] * self.size

    def _hash1(self, item):
        return hash(item) % self.size

    def _hash2(self, item):
        return (hash(item) * 31) % self.size

    def _hash3(self, item):
        return (hash(item) * 17) % self.size

    def _hash(self, item, i):
        if i == 0:
            return self._hash1(item)
        elif i == 1:
            return self._hash2(item)
        elif i == 2:
            return self._hash3(item)

    def add(self, item):
        for i in range(self.hash_count):
            index = self._hash(item, i)
            self.bit_array[index] = 1

    def check(self, item):
        for i in range(self.hash_count):
            index = self._hash(item, i)
            if self.bit_array[index] == 0:
                return False
        return True

In [None]:
bloom_filter = BloomFilter(size=1000, hash_count=3)

items_to_add = ["apple", "banana", "orange"]
for item in items_to_add:
  bloom_filter.add(item)

test_items = ["apple", "banana", "cherry", "pineapple"]
for item in test_items:
  if bloom_filter.check(item):
    print(f"{item} is possibly in the set")
  else:
    print(f"{item} is definitely not in the set")

apple is possibly in the set
banana is possibly in the set
cherry is definitely not in the set
pineapple is definitely not in the set
