<a href="https://colab.research.google.com/github/sagsshakya/Data-Structures-and-Algorithms/blob/master/Data%20Structures/Hash_Maps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def get_hash(key, memory_size):
    '''Sums up the ASCII values of all the characters in the key and performs modular division by the memory size.'''
    hash = 0
    for ch in key:
        hash += ord(ch)

    return hash % memory_size

In [None]:
get_hash('march 20', 100)

53

# Creating a Hash Table class.

### Get a hash value using a hash function.

In [None]:
class HashTable:
    def __init__(self):
        self.maximum = 100
        self.arr = [None for ii in range(self.maximum)]         # Creates an array for storing the VALUES created by the hash function.

    def get_hash(self, key):
        '''Sums up the ASCII values of all the characters in the key
         and performs modular division by the memory size.'''
        hash = 0
        for ch in key:
            hash += ord(ch)

        return hash % self.maximum  


In [None]:
tt = HashTable()
tt.get_hash('march 20')

53

### Add an element in the array at the position governed  by the hash map.

In [None]:
class HashTable:
    def __init__(self, maximum = 100):
        self.maximum = maximum
        self.arr = [None for ii in range(self.maximum)]         # Creates an array for storing the VALUES created by the hash function.

    def get_hash(self, key):
        '''Sums up the ASCII values of all the characters in the key
         and performs modular division by the memory size.'''
        hash = 0
        for ch in key:
            hash += ord(ch)

        return hash % self.maximum  

    def add(self, key, value):
        '''Adds the value in the position defined by the hash funciton.'''
        h = self.get_hash(key)
        self.arr[h] = value

In [None]:
tt = HashTable(maximum = 10)
tt.add('march 20', value = 310.0)
tt.arr

[None, None, None, 310.0, None, None, None, None, None, None]

### Get the VALUE part in the KEY - VALUE pair.

In [None]:
class HashTable:
    def __init__(self, maximum = 100):
        self.maximum = maximum
        self.arr = [None for ii in range(self.maximum)]         # Creates an array for storing the VALUES created by the hash function.

    def get_hash(self, key):
        '''Sums up the ASCII values of all the characters in the key
         and performs modular division by the memory size.'''
        hash = 0
        for ch in key:
            hash += ord(ch)

        return hash % self.maximum  

    def add(self, key, value):
        '''Adds the value in the position defined by the hash funciton.'''
        h = self.get_hash(key)
        self.arr[h] = value

    def get(self, key):
        '''Returns the value part in the dictionary given the key.'''
        h = self.get_hash(key)
        return self.arr[h]

In [None]:
tt = HashTable(maximum = 10)
tt.add('march 20', value = 310.0)
tt.add('march 19', value = 56.6)
tt.add('may 20', value = 320.0)
tt.add('june 09', value = 66.6)
print(tt.arr)
print()
print(tt.get('may 20'))

[None, 66.6, None, 310.0, None, None, None, 320.0, None, None]

320.0


### For easy insertion and access (dictionary - like using ['....'] ).


In [None]:
class HashTable:
    def __init__(self, maximum = 100):
        self.maximum = maximum
        self.arr = [None for ii in range(self.maximum)]         # Creates an array for storing the VALUES created by the hash function.

    def get_hash(self, key):
        '''Sums up the ASCII values of all the characters in the key
         and performs modular division by the memory size.'''
        hash = 0
        for ch in key:
            hash += ord(ch)

        return hash % self.maximum  

    def __setitem__(self, key, value):
        '''Adds the value in the position defined by the hash funciton.'''
        h = self.get_hash(key)
        self.arr[h] = value

    def __getitem__(self, key):
        '''Returns the value part in the dictionary given the key.'''
        h = self.get_hash(key)
        return self.arr[h]

    def __delitem__(self, key):
        '''Deletes (replaces the value with None) the value with the given key.'''
        h = self.get_hash(key)
        self.arr[h] = None

In [None]:
tt = HashTable(maximum = 10)
tt['march 15'] = 20.12
tt['march 16'] = 69.54
tt['march 17'] = 650.14
tt['march 18'] = 65.13

print(tt.arr)
print()
print(tt['march 18'])
print('\nAfter deletion:\n')
del tt['march 16']
print(tt.arr)

[65.13, None, None, None, None, None, None, 20.12, 69.54, 650.14]

65.13

After deletion:

[65.13, None, None, None, None, None, None, 20.12, None, 650.14]


# Overloading.

In [None]:
tt = HashTable(maximum = 10)
tt['march 1'] = 23.45
tt['march 9'] = 69.10       # hash_value = 2
tt['march 10'] = 23.78      # hash_value = 2
tt['march 12'] = 65.45
tt['march 15'] = 20.12
tt['march 16'] = 69.54
tt['march 17'] = 650.14
tt['march 18'] = 65.13

tt.arr

[65.13, None, 23.78, None, 65.45, None, None, 20.12, 69.54, 650.14]

### Chaining.
- We use Linked list to form a chain of ***(key, value)*** tuples in the overflowing bucket.



In [None]:
class HashTable:
    def __init__(self, maximum = 100, items = [], keys = [], values = []):
        self.maximum = maximum
        self.arr = [[] for ii in range(self.maximum)]         # Creates a 2D - array for storing the VALUES created by the hash function.
        self.items = items
        self.keys = keys
        self.values = values

    def get_hash(self, key):
        '''Sums up the ASCII values of all the characters in the key
         and performs modular division by the memory size.'''
        hash = 0
        for ch in key:
            hash += ord(ch)

        return hash % self.maximum  

    def __setitem__(self, key, value):
        '''Adds the value in the position defined by the hash funciton.'''
        self.items.append((key,value))
        self.keys.append(key)
        self.values.append(value)

        h = self.get_hash(key)
        found = False
        
        for (id, ele) in enumerate(self.arr[h]):            # Note that arr = [[], [], [], [],...]
        
            if len(ele) == 2 and ele[0] == key:             # Handling the case where the key is already present in the array[h].
                self.arr[h][id] = (key, value)
                found = True
                break
        
        if found == False:        
            self.arr[h].append((key, value))

    def __getitem__(self, key):
        '''Returns the value part in the dictionary given the key.'''
        h = self.get_hash(key)
        found = False

        for ele in self.arr[h]:                             # ele represents a 2-ple.
            if ele[0] == key:
                found = True
                return ele[1]
                break
        if found == False:
            return None

    def __delitem__(self, key):
        '''Deletes (replaces the value with None) the value with the given key.'''
        h = self.get_hash(key)
        found = False

        for id, ele in enumerate(self.arr[h]):
            if ele[0] == key:
                found = True
                del self.arr[h][id]
                break
        if found == False:
            raise Exception('Value to be deleted not found.')

        for ii in range(len(self.items)):
            if self.items[ii][0] == key:
                del self.items[ii]
                del self.keys[ii]
                del self.values[ii]
                break     


In [None]:
tt = HashTable(maximum = 10)
tt['march 1'] = 23.45
tt['march 9'] = 69.10       # hash_value = 2
tt['march 10'] = 23.78      # hash_value = 2
tt['march 12'] = 65.45
tt['march 15'] = 20.12
tt['march 16'] = 69.54
tt['march 17'] = 650.14
tt['march 18'] = 65.13
tt['march 19'] = 6.54
tt['march 20'] = 60.14
tt['march 21'] = 6.3

print(tt.arr)
print()
print(tt['march 13'])
del tt['march 21']
print(tt.arr)
del tt['march 10']
print(tt.arr)


[[('march 18', 65.13)], [('march 19', 6.54)], [('march 9', 69.1), ('march 10', 23.78)], [('march 20', 60.14)], [('march 1', 23.45), ('march 12', 65.45), ('march 21', 6.3)], [], [], [('march 15', 20.12)], [('march 16', 69.54)], [('march 17', 650.14)]]

None
[[('march 18', 65.13)], [('march 19', 6.54)], [('march 9', 69.1), ('march 10', 23.78)], [('march 20', 60.14)], [('march 1', 23.45), ('march 12', 65.45)], [], [], [('march 15', 20.12)], [('march 16', 69.54)], [('march 17', 650.14)]]
[[('march 18', 65.13)], [('march 19', 6.54)], [('march 9', 69.1)], [('march 20', 60.14)], [('march 1', 23.45), ('march 12', 65.45)], [], [], [('march 15', 20.12)], [('march 16', 69.54)], [('march 17', 650.14)]]


In [None]:
tt.items()

[('march 18', 65.13),
 ('march 19', 6.54),
 ('march 9', 69.1),
 ('march 20', 60.14),
 ('march 1', 23.45),
 ('march 12', 65.45),
 ('march 15', 20.12),
 ('march 16', 69.54),
 ('march 17', 650.14)]

<hr>

# Exercises.

### nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following:
- What was the average temperature in first week of Jan?
- What was the maximum temperature in first 10 days of Jan?
- Figure out data structure that is best for this problem.

In [None]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/codebasics/py/master/DataStructures/4_HashTable_2_Collisions/Solution/nyc_weather.csv', index_col = 'date')
target = dict()
for key, value in zip(df.index, df['temperature(F)']):
    target[key] = value

target

{'Jan 1': 27,
 'Jan 10': 30,
 'Jan 2': 31,
 'Jan 3': 23,
 'Jan 4': 34,
 'Jan 5': 37,
 'Jan 6': 38,
 'Jan 7': 29,
 'Jan 8': 30,
 'Jan 9': 35}

In [None]:
# Read the file.
df = pd.read_csv('https://raw.githubusercontent.com/codebasics/py/master/DataStructures/4_HashTable_2_Collisions/Solution/nyc_weather.csv', index_col = 'date')

# Create the object of HashTable.
tq = HashTable(10, [], [], [])
for key, value in zip(df.index, df['temperature(F)']):
    tq[key] = value

tq.items

[('Jan 1', 27),
 ('Jan 2', 31),
 ('Jan 3', 23),
 ('Jan 4', 34),
 ('Jan 5', 37),
 ('Jan 6', 38),
 ('Jan 7', 29),
 ('Jan 8', 30),
 ('Jan 9', 35),
 ('Jan 10', 30)]

#### What was the average temperature in first week of Jan?

In [None]:
# Solution.
count = 0
sum = 0

for key, value in tq.items:
    if key in ['Jan ' + str(ii) for ii in range(1,7+1)]:
        count += 1
        sum += value
        
mean_first_week = sum / count
print(mean_first_week)

31.285714285714285


#### What was the maximum temperature in first 10 days of Jan?

In [None]:
max_value = sorted(tq.values).pop()
print('Max Value: ', max_value)

Max Value:  38


#### Figure out data structure that is best for this problem.
    - The best data structure is LIST because we only want to access the elements and do some computations.

<hr>


### ***poem.txt*** Contains famous poem "Road not taken" by poet Robert Frost. You have to read this file in python and print every word and its count as show below. Think about the best data structure that you can use to solve this problem and figure out why you selected that specific data structure.

In [None]:
from google.colab import files
uploaded = files.upload()

Saving poem.txt to poem (1).txt


In [None]:
import re
poem = open('poem.txt', 'r')
file = poem.read()
file_new = re.sub('[^A-Za-z0-9\s]+|\n', ' ', file)      # removes all the non - alphanumeric characters (except whitespace) and new line characters.
file_new

'Two roads diverged in a yellow wood  And sorry I could not travel both And be one traveler  long I stood And looked down one as far as I could To where it bent in the undergrowth   Then took the other  as just as fair  And having perhaps the better claim  Because it was grassy and wanted wear  Though as for that the passing there Had worn them really about the same   And both that morning equally lay In leaves no step had trodden black  Oh  I kept the first for another day  Yet knowing how way leads on to way  I doubted if I should ever come back   I shall be telling this with a sigh Somewhere ages and ages hence  Two roads diverged in a wood  and I  I took the one less traveled by  And that has made all the difference '

In [None]:
abc = HashTable(50, [], [], [])
for word in file_new.split():
    if word not in abc.keys:
        abc[word] = 1
    else:
        abc[word] += 1

abc.items

[('Two', 1),
 ('roads', 1),
 ('diverged', 1),
 ('in', 1),
 ('a', 1),
 ('yellow', 1),
 ('wood', 1),
 ('And', 1),
 ('sorry', 1),
 ('I', 1),
 ('could', 1),
 ('not', 1),
 ('travel', 1),
 ('both', 1),
 ('And', 2),
 ('be', 1),
 ('one', 1),
 ('traveler', 1),
 ('long', 1),
 ('I', 2),
 ('stood', 1),
 ('And', 3),
 ('looked', 1),
 ('down', 1),
 ('one', 2),
 ('as', 1),
 ('far', 1),
 ('as', 2),
 ('I', 3),
 ('could', 2),
 ('To', 1),
 ('where', 1),
 ('it', 1),
 ('bent', 1),
 ('in', 2),
 ('the', 1),
 ('undergrowth', 1),
 ('Then', 1),
 ('took', 1),
 ('the', 2),
 ('other', 1),
 ('as', 3),
 ('just', 1),
 ('as', 4),
 ('fair', 1),
 ('And', 4),
 ('having', 1),
 ('perhaps', 1),
 ('the', 3),
 ('better', 1),
 ('claim', 1),
 ('Because', 1),
 ('it', 2),
 ('was', 1),
 ('grassy', 1),
 ('and', 1),
 ('wanted', 1),
 ('wear', 1),
 ('Though', 1),
 ('as', 5),
 ('for', 1),
 ('that', 1),
 ('the', 4),
 ('passing', 1),
 ('there', 1),
 ('Had', 1),
 ('worn', 1),
 ('them', 1),
 ('really', 1),
 ('about', 1),
 ('the', 5),
 ('sam