# Lab 10: Hashing

## <font color=DarkRed>Your Exercise: Rehashing using Quadratic Probing.</font>

Implement quadratic probing as a rehash technique. Use the `HashTable` implementation provided in my class notes.

## <font color=green>Your Solution</font>

*Use a variety of code, Markdown (text) cells below to create your solution. Nice outputs would be timing results, and even plots. You will be graded not only on correctness, but the clarity of your code, descriptive text and other output. Keep it succinct!*

In [39]:
class HashTable:
    def __init__(self, size):
        self.size = size
        self.slots = [None] * self.size
        self.data = [None] * self.size

    def put(self, key, data):
        
        if data is None:
            raise ValueError("None cannot be stored in this hashtable")
            
        hashvalue = self.hashfunction(key)
        print(f"First hashvalue = {hashvalue}")
        
        if self.slots[hashvalue] == None:
            print(f"No collision, hashvalue = {hashvalue}")
            self.data[hashvalue] = data
            self.slots[hashvalue] = key
        else:
            collisionCount = 1
            if self.slots[hashvalue] == key:
                print(f"No collision, hashvalue = {hashvalue}, update hashtable")
                self.data[hashvalue] = data  # update/replace
            else:  # collision circumstance
                newHashValue = self.rehash(hashvalue, collisionCount)
                print(f"Collision, previous hashvalue = {hashvalue}, next hashvalue = {newHashValue}")
                while (self.slots[newHashValue] != None and
                       self.slots[newHashValue] != key):
                    collisionCount += 1
                    newHashValue = self.rehash(hashvalue, collisionCount)
                    print(f"Collision again, next hashvalue = {newHashValue}")
                
                # We only get here if the while loop ends, meaning we have space
                # to add a value, or update an existing one
                self.slots[newHashValue] = key  # updating or new data insertion
                self.data[newHashValue] = data
  
    def get(self, key):
        startslot = self.hashfunction(key) # call hashfunction using key
        data = None
        stop = False
        found = False
        position = startslot
        
        while (self.slots[position] != None and
               not found and
               not stop):
            
            if self.slots[position] == key:  # We've found it
                found = True
                data = self.data[position] # put the data into the position 
            else:
                position = self.rehash(position) # continue rehash the position 
                if position == startslot:  # key is not in the dictionary/hashtable/map
                    stop = True
                    
        return data

    def hash(self, astring):
        _sum = 0
        for i, c in enumerate(astring, start=1): # enumerate is to put the order and the value 
            _sum = _sum + ord(c)*i   # convert the value to ascii number  
        return _sum%self.size

    def hashfunction(self, key):
        if isinstance(key, int): # check if key is an int 
            h = self.hash(str(key)) # change it into the string
        elif isinstance(key, str): # If the string
            h = self.hash(key) # put directly as the key
        else: # Other data type does not work 
            raise NotImplementedError("This data type isn't available for keys")

        return h  # Key must be an int or str

    def rehash(self, oldhash, collisionCount):
        if self.size == len(self):  # max size 
            raise IndexError("Hashtable is full")
        return (oldhash + collisionCount**2) % self.size # rehash calculation
               
    def __getitem__(self, key):
        if not (isinstance(key, str) or isinstance(key, int)): # key needs to be an in or str
            raise TypeError("Key must be a string or int")
            
        val = self.get(key)
        
        if not val:  # it's None
            raise KeyError
        
        return val
        
    def __setitem__(self, key, value):
        if not (isinstance(key, str) or isinstance(key, int)):
            raise TypeError("Key must be a string or int")
            
        self.put(key, value)
        
    def __len__(self):
        counter = 0
        
        for key in self.slots: # calclate the slots with keys
            if key != None:
                counter += 1
                
        return counter
    
    def __contains__(self, key):
        return True if self.get(key) is not None else False
    
    def __str__(self):
        d_str = "{" # set a dicitonary 
        for k, v in zip(self.slots, self.data): # create a ditionary,one to one relations 
            if k is not None:
                d_str += f"{repr(k)}:{repr(v)}, " # format the dictionary
        d_str = d_str[:-2] + "}" # print the last one without comma
        return d_str
    
    def __repr__(self):
        return self.__str__()

## Testing

Show me that collision resolution is happening in a quadratic fashion. Perhaps instrument the `rehash` function to print some useful output when rehashing, or show the state of the `self.slots` list before or after a collision occurs. I'll leave it up to you to demonstrate.

In [40]:
h=HashTable(15)

In [41]:
h.put('apple', "MacBook Pro")
h.put('apple', "MacBook Air")
h.put('google', "Pixel")
h.put('microsoft', "Surface Book Pro")
h.put('amazon',"Prime")
h.put('dell','Lemon')
h.put('ibm', 'Watson')
h.put('lenovo', 'Thinkpad')
h.put('hp', 'Alienware')
h.put('Acer', 'PC')

First hashvalue = 4
No collision, hashvalue = 4
First hashvalue = 4
No collision, hashvalue = 4, update hashtable
First hashvalue = 11
No collision, hashvalue = 11
First hashvalue = 4
Collision, previous hashvalue = 4, next hashvalue = 5
First hashvalue = 14
No collision, hashvalue = 14
First hashvalue = 8
No collision, hashvalue = 8
First hashvalue = 13
No collision, hashvalue = 13
First hashvalue = 0
No collision, hashvalue = 0
First hashvalue = 13
Collision, previous hashvalue = 13, next hashvalue = 14
Collision again, next hashvalue = 2
First hashvalue = 2
Collision, previous hashvalue = 2, next hashvalue = 3


In [43]:
h['apple'] = 'macbook'
h['apple']

First hashvalue = 4
No collision, hashvalue = 4, update hashtable


'macbook'

In [44]:
print(h)

{'lenovo':'Thinkpad', 'hp':'Alienware', 'Acer':'PC', 'apple':'macbook', 'microsoft':'Surface Book Pro', 'dell':'Lemon', 'google':'Pixel', 'ibm':'Watson', 'amazon':'Prime'}
