In [11]:
import random
import math
import statistics

# TableEntry class to represent each entry in the hash table
class TableEntry:
    # Constructor for TableEntry objects
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.in_table = True

    #   Getter methods for key and value
    def get_key(self):
        return self.key
    def get_value(self):
        return self.value

    #   Setter method for value
    def set_value(self, value):
        self.value = value

    #   Methods to check and set the state of the entry
    def is_in_table(self):
        return self.in_table
    def set_to_removed(self):
        self.key = None
        self.value = None
        self.in_table = False

    # Method to check if the entry has been removed. Removed entries are available to store values
    # but indicate that a value being searched for may still exist further in the probing sequence
    def is_removed(self):
        return (self.key is None) and (self.value is None) and not self.in_table

In [12]:
# Parent class for Linear Probing class and Double Hashing class
# Instantiates a hash table with probe counting functionality
class HashTableWithCount:
    # Constructor for HashTableWithCount objects
    def __init__(self, initial_capacity=307):
        self.table = [None] * initial_capacity  # Initialize empty hash table of default capacity
        self.size = 0
        self.probe_count = 0
    
    # Method to find the next prime number greater than or equal to n
    def get_next_prime(self, n):
        if n <= 2:                      # Start with the first prime number
            return 2
        prime = n
        if prime % 2 == 0:              # Ensure prime is odd
            prime += 1
        while not self.is_prime(prime): # Call is_prime to check for primality
            prime += 2                  # Increment by 2 to check only odd numbers
        return prime                    # Return the found prime number

    # Method to check if a number is prime
    def is_prime(self, n):
        if n <= 1:                      # Throw out numbers smaller than first prime number: 2
            return False
        for i in range(2, int(math.sqrt(n)) + 1): # Check divisibility up to square root of n
            if n % i == 0:                        # If divisible, n is not prime
                return False
        return True                     # If no divisors found, n is prime
    
    # Method not needed for this assignment; passed for simplicity
    def check_size(self, size):
        pass

    # Method to compute hash index based on key modulo length of the table
    def get_hash_index(self, key):
        return hash(key) % len(self.table)

    # Method not needed for this assignment; passed for simplicity
    def is_hash_table_too_full(self):
        pass

    # Method not needed for this assignment; passed for simplicity
    def enlarge_hash_table(self):
        pass

    # Sets the probe count to zero for a new operation
    def reset_probe_count(self):
        self.probe_count = 0
    
    # Returns the current probe count
    def get_probes(self):
        return self.probe_count

In [13]:
class LinearProbingWithCount(HashTableWithCount):
    def __init__(self):
        super().__init__()
        
    # Method to probe for a key starting from a given index, increasing probe count
    def probe(self, index, key):
        for i in range(len(self.table)):
            bucket_index = (self.get_hash_index(index) + i) % len(self.table)    # Sets the bucket index using linear probing
            self.probe_count += 1                           # Increments the probe count
            if self.table[bucket_index] is None:            # If the bucket is empty since start, the key is not in the table
                return -1
            elif self.table[bucket_index].is_in_table() and self.table[bucket_index].key == key:
                return bucket_index                         # Key found, return the bucket index
        return -1                                           # Key not found after full table probe, return -1
    
    # Method to locate a key starting from a given index without increasing probe count
    def locate(self, index, key):   # Optional per Eric
        for i in range(len(self.table)):
            bucket_index = (index + i) % len(self.table)    # Sets the bucket index using linear probing
            if self.table[bucket_index] is None:            # If the bucket is empty since start, the key is not in the table
                return -1
            elif self.table[bucket_index].is_in_table() and self.table[bucket_index].get_key() == key:
                return bucket_index                         # Key found, return the bucket index
        return -1                                           # Key not found after full table probe, return -1
    
    # Method to add a key-value pair to the hash table
    def add(self, key, value):
        new_node = TableEntry(key, value)                   # Create a new TableEntry object
        index = self.get_hash_index(key)                    # Get the initial hash index
        
        for i in range(len(self.table)):                    # Linear probing to find the correct bucket
            bucket_index = (index + i) % len(self.table)
            
            if self.table[bucket_index] is None:            # If the bucket is empty since start, insert the new node here
                self.table[bucket_index] = new_node
                self.size += 1                              # Increment size of the table
                return
            
            elif self.table[bucket_index].is_in_table() and self.table[bucket_index].get_key() == key:
                self.table[bucket_index].set_value(value)   # Key found, update value
                return
            
            elif self.table[bucket_index].is_removed():     # If the bucket has been removed, keep looking for a spot to insert
                placeholder_index = bucket_index            # Placeholder for potential insertion point if key not found
            
                for j in range(i + 1, len(self.table)):     # Continue probing to check if key already exists
                    next_index = (index + j) % len(self.table)
            
                    if self.table[next_index] is None:      # Found an empty bucket, insert at the placeholder
                        self.table[placeholder_index] = new_node
                        self.size += 1                      # Increment size of the table
                        return
            
                    elif self.table[next_index].is_in_table() and self.table[next_index].get_key() == key:
                        self.table[next_index].set_value(value) # Key found, update value
                        return
            
    # Method to remove a key from the hash table
    def remove(self, key):
        index = self.locate(self.get_hash_index(key), key)  # Call locate method to find the key without increasing probe count
        if index != -1:                                     # if key is found
            self.table[index].set_to_removed()              # set the entry to removed
            self.size -= 1                                  # and decrement size of the table

    # Method to get the value associated with a key
    def get_value(self, key):
        index = self.locate(self.get_hash_index(key), key)  # Call locate method to find the key without increasing probe count
        if index != -1:                                     # if key is found
            return self.table[index].get_value()            # and return the associated value

    # Method to check if a key exists in the hash table
    def contains(self, key):
        index = self.locate(self.get_hash_index(key), key)  # Call locate method to find the key without increasing probe count
        if index != -1:                                     # if key is found
            return True                                     # return True
        return False                                        # else, return False

    # Method to check if the hash table is empty
    def is_empty(self):
        return self.size == 0
    
    # Method to get the current size of the hash table
    def get_size(self):
        return self.size

    # Method to clear the hash table, resetting all entries, size, and probe count
    def clear(self):
        self.table = [None] * len(self.table)
        self.size = 0
        self.reset_probe_count()



In [14]:
class DoubleHashingWithCount(HashTableWithCount):
    def __init__(self):
        super().__init__()
        
    # First hash function
    def primary_hash(self, key):
        """First hash function."""
        return hash(key) % len(self.table)                  # Basic modulo hash function
    
    # Second hash function to use for double hashing in event of collision
    def secondary_hash(self, key):
        """Second hash function to calculate step size."""
        return 293 - (hash(key) % 293)     # Next prime less than table size 307

    # Method to probe for a key starting from a given index using double hashing, increasing probe count
    def probe(self, index, key):
        initial_index = self.primary_hash(index)            # Get the initial index using primary hash
        step_size = self.secondary_hash(key)                # Get the step size using secondary hash
        for i in range(len(self.table)):
            bucket_index = (initial_index + i * step_size) % len(self.table)    # Calculate bucket index using double hashing
            self.probe_count += 1                           # Increment probe count
            if self.table[bucket_index] is None:            # If the bucket is empty since start, the key is not in the table
                return -1
            elif self.table[bucket_index].is_in_table() and self.table[bucket_index].key == key:    # Key found, return the bucket index
                return bucket_index
        return -1                                           # Key not found after full table probe, return -1

    # Method to locate a key starting from a given index without increasing probe count
    def locate(self, index, key):   # Optional per Eric
        for i in range(len(self.table)):
            initial_index = self.primary_hash(index)        # Get the initial index using primary hash
            step_size = self.secondary_hash(key)                # Get the step size using secondary hash
        for i in range(len(self.table)):
            bucket_index = (initial_index + i * step_size) % len(self.table)    # Calculate bucket index using double hashing
            if self.table[bucket_index] is None:            # If the bucket is empty since start, the key is not in the table
                return -1
            elif self.table[bucket_index].is_in_table() and self.table[bucket_index].key == key:    # Key found, return the bucket index
                return bucket_index
        return -1                                           # Key not found after full table probe, return -1
    
    # Method to add a key-value pair to the hash table
    def add(self, key, value):
        new_node = TableEntry(key, value)
        initial_index = self.primary_hash(key)
        step_size = self.secondary_hash(key)
        
        for i in range(len(self.table)):
            bucket_index = (initial_index + i * step_size) % len(self.table)     # Calculate bucket index using double hashing
            
            if self.table[bucket_index] is None:            # If the bucket is empty since start, insert the new node here
                self.table[bucket_index] = new_node
                self.size += 1                              # Increment size of the table
                return
            
            elif self.table[bucket_index].is_in_table() and self.table[bucket_index].get_key() == key:
                self.table[bucket_index].set_value(value)   # Key found, update value
                return
            
            elif self.table[bucket_index].is_removed():     # If the bucket has been removed, keep looking for a spot to insert
                placeholder_index = bucket_index            # Placeholder for potential insertion point if key not found
                
                for j in range(i + 1, len(self.table)):     # Continue probing to check if key already exists
                    next_index = (initial_index + j * step_size) % len(self.table)  # Calculate next index using double hashing
                    
                    if self.table[next_index] is None:      # Found an empty bucket, insert at the placeholder
                        self.table[placeholder_index] = new_node
                        self.size += 1                      # Increment size of the table
                        return
                    
                    elif self.table[next_index].is_in_table() and self.table[next_index].get_key() == key:
                        self.table[next_index].set_value(value) # Key found, update value
                        return
            
    # Method to remove a key from the hash table
    def remove(self, key):
        index = self.locate(key, key)                       # Call locate method to find the key without increasing probe count
        if index != -1:                                     # If key is found, remove object and decrement size    
            self.table[index].set_to_removed()
            self.size -= 1

    # Method to get the value associated with a key
    def get_value(self, key):
        index = self.locate(key, key)                       # Call locate method to find the key without increasing probe count
        if index != -1:                                     # If key is found, return value
            return self.table[index].get_value()

    # Method to check if a key exists in the hash table
    def contains(self, key):
        index = self.locate(key, key)                       # Call locate method to find the key without increasing probe count
        if index != -1:                                     # If key is found, return True
            return True
        return False                                        # Else, return False    

    # Method to check if the hash table is empty and return boolean
    def is_empty(self):
        return self.size == 0
    
    # Method to get the current size of the hash table
    def get_size(self):
        return self.size
    
    # Method to clear the hash table, resetting all entries, size, and probe count
    def clear(self):
        self.table = [None] * len(self.table)
        self.size = 0
        self.reset_probe_count()

In [15]:
class GetStatistics:
    
    def main():
        repetitions = 1000                              # Variable allowing easy adjustment of number of repetitions
        a_names = GetStatistics.get_1000_names()
        b_names = GetStatistics.get_10000_names()
        linear_ht = LinearProbingWithCount()
        double_ht = DoubleHashingWithCount()
        linear_totals, double_hashing_totals = [], []
        for _ in range(repetitions):                    # Repeat the experiment 'repetitions' times
            linear_ht.clear()                           # Clear both hash tables for new trial
            double_ht.clear()

            GetStatistics.choose_100_names_and_add(linear_ht, double_ht, a_names) # Choosing 100 random 'a' names to add to both hash tables
            
            for _ in range(100):
                random_b_name = b_names[random.randint(0, 9999)]        # Comparing probes for 100 random 'b' names not in either table
                linear_ht.probe(random_b_name, random_b_name)
                double_ht.probe(random_b_name, random_b_name)
            linear_totals.append(linear_ht.get_probes())                # Append total probes for linear probing to linear_totals list
            double_hashing_totals.append(double_ht.get_probes())        # Append total probes for double hashing to double_hashing_totals list
        print(f"Linear Probing totals per 100 trials over {repetitions} repetitions: {linear_totals}")
        print(f"Linear Probing total probes for {repetitions} trials: {sum(linear_totals) / len(linear_totals)}")
        print(f"Standard deviation for Linear Probing over {repetitions} trials: {round(statistics.stdev(linear_totals), 3)}")
        print('')
        print(f"Double Hashing totals per 100 trials over {repetitions} repetitions: {double_hashing_totals}")
        print(f"Double Hashing average probes for {repetitions} trials: {sum(double_hashing_totals) / len(double_hashing_totals)}")
        print(f"Standard deviation for Double Hashing over 1000 trials: {round(statistics.stdev(double_hashing_totals), 3)}")
        

    # Choosing 100 random names from the list of 1,000 'a' names to add to both hash tables
    def choose_100_names_and_add(linear_ht, double_ht, names):
        for _ in range(100):
                random_a_name = names[random.randint(0, 999)]
                linear_ht.add(random_a_name, random_a_name)
                double_ht.add(random_a_name, random_a_name)

    # Getting 1,000 names starting with 'a' to ensure no overlap with 'b' names
    def get_1000_names():
        names = []
        names = GetStatistics.fill_list(names, 'a', 1000)
        return names

    # Getting 10,000 names starting with 'b' to ensure no overlap with 'a' names
    def get_10000_names():
        names = []
        names = GetStatistics.fill_list(names, 'b', 10000)
        return names
    
    # Filling sets first to ensure uniqueness, then converting to lists
    def fill_list(name_list, starting_letter, target_number):
        name_set = set()
        while len(name_set) < target_number:
            # All words in set_a start with specified starting letter followed by 6 random digits
            word = starting_letter
            for _ in range(6):
                word += str(random.randint(0, 9))
            name_set.add(word)
        name_list = list(name_set)
        return name_list

if __name__ == "__main__":
    # A hash table with 100 entries needs a size of approximately 342 in order to average 1.5 comparisons
    # for unsuccessful searches using linear probing. The next largest prime is 347.
    # Using double hashing, a size of 300 is needed. The next largest prime is 307.
    # For this experiment we will set a default size of 307 in the HashTableWithCount constructor so that 
    # we can compare both methods on the same table size.
    GetStatistics.main()



Linear Probing totals per 100 trials over 1000 repetitions: [160, 152, 182, 172, 139, 128, 159, 148, 165, 143, 150, 152, 153, 136, 161, 150, 145, 151, 163, 160, 177, 152, 153, 180, 165, 154, 178, 159, 151, 157, 164, 166, 141, 141, 154, 145, 139, 173, 159, 141, 163, 164, 178, 162, 162, 148, 183, 163, 179, 153, 172, 148, 152, 156, 150, 133, 173, 155, 154, 157, 153, 179, 149, 140, 136, 134, 158, 162, 147, 132, 151, 151, 152, 155, 168, 146, 162, 144, 164, 145, 147, 144, 165, 184, 155, 147, 147, 156, 190, 169, 150, 147, 156, 184, 151, 140, 135, 160, 147, 145, 140, 186, 161, 169, 154, 136, 146, 158, 168, 171, 151, 143, 157, 171, 180, 140, 148, 175, 142, 132, 162, 171, 161, 149, 139, 190, 155, 161, 146, 155, 147, 144, 163, 173, 163, 157, 152, 171, 162, 150, 134, 155, 153, 161, 162, 143, 142, 143, 146, 158, 162, 165, 143, 164, 143, 164, 157, 166, 161, 158, 147, 163, 143, 146, 149, 152, 146, 163, 152, 149, 140, 149, 154, 157, 150, 180, 149, 146, 158, 146, 134, 143, 143, 140, 161, 148, 138, 168,

## Analysis
My initial trials showed similar results for Linear Probing and Double Hashing, but upon further inspection my double hashing algorithm was being improperly executed.
With a better functioning double hashing algorithm we see more distinct results, where Double Hashing generally performs around ten probes less than Linear Hashing, in the neighborhood of 144 (standard deviation around 8) for Double Hashing compared with roughly 154 (standard deviation around 12) for Linear Probing.

Double Hashing is noticeably more efficient than Linear Hashing, though not to an extreme degree.