# Problem Statement - Python Dictionaries and Hash Tables

In [1]:
phone_numbers = {
  'Aakash' : '9489484949',
  'Hemanth' : '9595949494',
  'Siddhant' : '9231325312'
}
phone_numbers

{'Aakash': '9489484949', 'Hemanth': '9595949494', 'Siddhant': '9231325312'}

In [2]:
phone_numbers['Aakash']

'9489484949'

In [3]:
# Add a new value
phone_numbers['Vishal'] = '8787878787'
# Update existing value
phone_numbers['Aakash'] = '7878787878'
# View the updated dictionary
phone_numbers

{'Aakash': '7878787878',
 'Hemanth': '9595949494',
 'Siddhant': '9231325312',
 'Vishal': '8787878787'}

In [4]:
for name in phone_numbers:
    print('Name:', name, 'Phone Number:', phone_numbers[name])

Name: Aakash Phone Number: 7878787878
Name: Hemanth Phone Number: 9595949494
Name: Siddhant Phone Number: 9231325312
Name: Vishal Phone Number: 8787878787


In [5]:
class HashTable:
    def insert(self, key, value):
        """Insert a new key-value pair"""
        pass

    def find(self, key):
        """Find the value associated with a key"""
        pass

    def update(self, key, value):
        """Change the value associated with a key"""
        pass

    def list_all(self):
        """List all the keys"""
        pass

## Data List

In [6]:
MAX_HASH_TABLE_SIZE = 4096

In [8]:
data_list = [None] * MAX_HASH_TABLE_SIZE

In [16]:
len(data_list) == 4096

True

In [17]:
data_list[99] == None

True

## Hashing Function

In [18]:
def get_index(data_list, a_string):
    result = 0

    for a_character in a_string:
        a_number = ord(a_character)
        result += a_number

    list_index = result % len(data_list)
    return list_index

In [19]:
get_index(data_list, '') == 0

True

In [20]:
get_index(data_list, 'Aakash') == 585

True

In [28]:
ord('A') + ord('a') + ord('k') + ord('a') + ord('s') + ord('h')

585

In [30]:
585 % len(data_list)

585

In [32]:
get_index(data_list, 'Don O Leary') == 941

True

## Insert

In [33]:
key, value = 'Aakash', '7878787878'

In [34]:
idx = get_index(data_list, key)
idx

585

In [36]:
data_list[idx] = (key, value)

In [39]:
data_list[get_index(data_list, 'Hemanth')] = ('Hemanth', '9595949494')

## Find

In [40]:
idx = get_index(data_list, 'Aakash')
idx

585

In [41]:
key, value = data_list[idx]
value

'7878787878'

## List

In [42]:
pairs = [kv[0] for kv in data_list if kv is not None]

In [43]:
pairs

['Aakash', 'Hemanth']

## Basic Hash Table Implementation

In [48]:
class BasicHashTable:
    def __init__(self, max_size=MAX_HASH_TABLE_SIZE):
        self.data_list = [None] * max_size

    def insert(self, key, value):
        idx = get_index(self.data_list, key)
        self.data_list[idx] = (key, value)

    def find(self, key):
        idx = get_index(self.data_list, key)
        kv = self.data_list[idx]

        if kv is None:
            return None
        else:
            key, value = kv
            return value

    def update(self, key, value):
        idx = get_index(self.data_list, key)
        self.data_list[idx] = (key, value)

    def list_all(self):
        return [kv[0] for kv in self.data_list if kv is not None]

In [49]:
basic_table = BasicHashTable(max_size=1024)
len(basic_table.data_list) == 1024

True

In [50]:
# Insert some values
basic_table.insert('Aakash', '999999999')
basic_table.insert('Hemanth', '8888888888')

# Find a value
basic_table.find('Hemanth') == '8888888888'

True

In [51]:
# Update a value
basic_table.update('Aakash', '7777777777')

basic_table.find('Aakash') == '7777777777'

True

In [52]:
basic_table.list_all() == ['Aakash', 'Hemanth']

True

## Handling Collisions with Linear Probing

In [54]:
basic_table.insert('listen', 99)

In [55]:
basic_table.insert('silent', 200)

In [56]:
basic_table.find('listen')

200

In [57]:
def get_valid_index(data_list, key):
    idx = get_index(data_list, key)

    while True:
        kv = data_list[idx]

        if kv is None:
            return idx

        k, v = kv
        if key == k:
            return idx

        idx += 1

        if idx == len(data_list):
            idx = 0

In [62]:
# Create an empty hash table
data_list2 = [None] * MAX_HASH_TABLE_SIZE

# New key 'listen' should return expected index
get_valid_index(data_list2, 'listen') == 655

True

In [63]:
# Insert a key-value pair for key 'listen'
data_list2[get_index(data_list2, 'listen')] = ('listen', 99)

# Colliding key 'silent' should return next index
get_valid_index(data_list2, 'silent') == 656

True

## Hash Table with Linear Probing

In [64]:
class ProbingHashTable:
    def __init__(self, max_size=MAX_HASH_TABLE_SIZE):
        self.data_list = [None] * max_size

    def insert(self, key, value):
        idx = get_valid_index(self.data_list, key)
        self.data_list[idx] = (key, value)

    def find(self, key):
        idx = get_valid_index(self.data_list, key)
        kv = self.data_list[idx]
        return None if kv is None else kv[1]

    def update(self, key, value):
        idx = get_valid_index(self.data_list, key)
        self.data_list[idx] = (key, value)

    def list_all(self):
        return [kv[0] for kv in self.data_list if kv is not None]

In [65]:
# Create a new hash table
probing_table = ProbingHashTable()

# Insert a value
probing_table.insert('listen', 99)

# Check the value
probing_table.find('listen') == 99

True

In [66]:
# Insert a colliding key
probing_table.insert('silent', 200)

# Check the new and old keys
probing_table.find('listen') == 99 and probing_table.find('silent') == 200

True

In [69]:
# Update a key
probing_table.insert('listen', 101)

# Check the value
probing_table.find('listen') == 101

True

In [70]:
probing_table.list_all() == ['listen', 'silent']

True

## Python Dictionaries using Hash Tables

In [75]:
MAX_HASH_TABLE_SIZE = 4096

class HashTable:
    def __init__(self, max_size=MAX_HASH_TABLE_SIZE):
        self.data_list = [None] * max_size

    def get_valid_index(self, key):
        # Get index
        result = 0

        for char in key:
            num = ord(char)
            result += num

        idx = result % len(self.data_list)

        # Get valid index
        while True:
            kv = self.data_list[idx]

            if kv is None:
                return idx

            k, v = kv
            if key == k:
                return idx

            idx += 1

            if idx == len(self.data_list):
                idx = 0

    def __getitem__(self, key):
        idx = self.get_valid_index(key)
        kv = self.data_list[idx]
        
        return None if kv is None else kv[1]

    def __setitem__(self, key, value):
        idx = self.get_valid_index(key)
        self.data_list[idx] = (key, value)

    def __iter__(self):
        return (x for x in self.data_list if x is not None)

    def __len__(self):
        return len([x for x in self])

    def __repr__(self):
        from textwrap import indent
        pairs = [indent("{} : {}".format(repr(kv[0]), repr(kv[1])), '  ') for kv in self]
        return "{\n" + "{}".format(',\n'.join(pairs)) + "\n}"

    def __str__(self):
        return repr(self)

In [76]:
# Create a hash table
table = HashTable()

# Insert some key-value pairs
table['a'] = 1
table['b'] = 34

# Retrieve the inserted values
table['a'] == 1 and table['b'] == 34

True

In [78]:
# Update a value
table['a'] = 99

## Check the updated value
table['a'] == 99

True

In [79]:
# Get a list of key-value pairs
list(table) == [('a', 99), ('b', 34)]

True

In [80]:
table

{
  'a' : 99,
  'b' : 34
}