## Hashing


In [1]:
import requests

In [2]:
req = requests.get("http://t2.hhg.to/ospd.txt")
words = req.text.split("\n")

In [5]:
len(words)

79340

In [7]:
def hash(astring, tablesize):
    _sum = 0
    for c in astring:
        _sum = _sum + ord(c)
    
    return _sum%tablesize

In [8]:
hash("foo",11)

5

## Slightly better hashing

In [4]:
def hash_v2(astring, tablesize):
    _sum = 0
    for i,c in enumerate(astring,start=1):
        _sum = _sum + ord(c)*i
    
    return _sum%tablesize

## `HashTable` Class

In [30]:
class HashTable:
    
    def __init__(self, size = 10):
        self.size = size
        self.slots = [None] * self.size
        self.data = [None] * self.size
    
    def length(self):
        return len(self.slots)
    
    def myhash(self,astring):
        _sum = 0
        for i,c in enumerate(astring,start=1):
            _sum = _sum + ord(c)*i

        return _sum%self.size
    
    def hashfunction(self, key):
        if isinstance(key, int):
            h = self.myhash(str(key))
        elif isinstance(key, str):            
            h = self.myhash(key)
        else:
            raise NotImplementedError("This datatype isn't developed for key")
        
        return h
    
    def rehash(self, oldhash):
        return (oldhash + 1) % self.size
    
    def put(self, key, data):
        hashvalue = self.hashfunction(key)
        
        if self.slots[hashvalue] == None:
            self.slots[hashvalue] = key
            self.data[hashvalue] = data
        else:
            if self.slots[hashvalue] == key:
                self.data[hashvalue] = data # Update value for the same key
            else: # Collision
                nextslot = self.rehash(hashvalue)
                while self.slots[nextslot] != None and self.slots[nextslot] != key:
                    nextslot = self.rehash(nextslot)
                
                if self.slots[nextslot] == None:
                    self.slots[nextslot] = key
                    self.data[nextslot] = data
                else:
                    self.data[nextslot] = data # Update
    
    def get(self, key):
        startslot = self.hashfunction(key)
        
        data = None
        stop = False
        found = False
        position = startslot
        
        while self.slots[position] != None and not found and not stop:
            if self.slots[position] == key:
                found = True
                data = self.data[position]
            else:
                position = self.rehash(position)
                if position == startslot: # Key not found
                    stop = True
        
        return data
    
    def __getitem__(self, key):
        return self.get(key)
    
    def __setitem__(self, key, data):
        self.put(key, data)

In [38]:
H = HashTable(10)

In [39]:
H.length()

10

In [40]:
H["apple"] = "MacBook Pro"
H["google"] = "Pixel"
H["Microsoft"] = "Surface Book Pro"

In [41]:
H.put('amazon','kindle')

In [42]:
print(H.slots)

[None, None, 'Microsoft', None, 'apple', None, 'google', None, None, 'amazon']


In [44]:
print(H.data)

[None, None, 'Surface Book Pro', None, 'MacBook Pro', None, 'Pixel', None, None, 'kindle']
