## Hashmap using list ❌

In [7]:
stock_prices = []

with open("stock_prices.csv","r") as f:
    for line in f:
        tokens = line.split(',')
        day = tokens[0]
        price = float(tokens[1])
        stock_prices.append([day, price])
        
print(stock_prices)
print(stock_prices[0])
print(stock_prices[0][1])

[['march 6', 310.0], ['march 7', 340.0], ['march 8', 380.0], ['march 9', 302.0], ['march 10', 297.0], ['march 11', 323.0]]
['march 6', 310.0]
310.0


In [8]:
for i in stock_prices:     # time complexity of using list is O(n)
    if i[0] == 'march 6':
        print(i[1])

310.0


## Hashmap using dictionary ✔️

In [76]:
stock_prices = {}

with open("stock_prices.csv","r") as f:
    for line in f:
        tokens = line.split(',')
        day = tokens[0]
        price = float(tokens[1])
        stock_prices[day] = price
        
print(stock_prices)
print(stock_prices['march 6'])   # so time complexity of using dict is O(1). better than list

{'march 6': 310.0, 'march 7': 340.0, 'march 8': 380.0, 'march 9': 302.0, 'march 10': 297.0, 'march 11': 323.0}
310.0


## Hashmap implementation 🥵:
- create an array
- calculate asci value of each key, sum all the character's value and return the mod value of it
- the mod value will be the idx of that array
- for setting: using idx, store in that array
- for getting: using idx, take the element
- for delete: using idx, make the idx None

In [43]:
class hashmap:
    def __init__(self):
        self.max_size = 100
        self.arr  = [None for i in range(self.max_size)] # creating an array which is full of None value
        
    def get_mod_asci_num(self, key):
        mod_value = 0
        for i in key:
            mod_value += ord(i)          # ord gives asci value of characters. mod_value is adding every string's asci value
        return mod_value % self.max_size # doing mod so that value will be between 0 to max size
    
    def __setitem__(self, key, value):   # it is python's standard operation. automatically calls
        idx = self.get_mod_asci_num(key) # taking which idx i will store the value
        self.arr[idx] = value            # setting the value in that particular idx
    
    def __getitem__(self, key):          # it is python's standard operation. automatically calls
        idx = self.get_mod_asci_num(key) # getting the asci num's mod. so that, we can access the idx of the list arr
        return self.arr[idx]
    
    def __delitem__(self, key):
        idx = self.get_mod_asci_num(key) # same thing like prev
        self.arr[idx] = None             # make the idx value None

In [46]:
a = hashmap()

a['feb 2']  = 233  # the way we add into the dict. automatically calls the __setitem__ method
a['apr 5']  = 45
a['apr 6']  = 563
a['apr 26'] = 43
a['apr 29'] = 94

print(a['apr 5']) # automatically calls __getitem__ method

del a['apr 5'] # auto automatically calls __delitem__ method

print(a['apr 5'])
print(a.arr)

45
None
[None, None, None, None, None, None, None, None, None, 563, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 43, None, None, 94, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 233, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]


### How to access and iterate through tuple:

In [5]:
l = [[('moin', '3')],[('raihan', '23'),('arnob', '3')],[('rifst','34')]]

for i,j in enumerate(l[1]):
    print(i,j[0])
    l[1][i] = ('arnob','23')
print(l)

0 raihan
1 arnob
[[('moin', '3')], [('arnob', '23'), ('arnob', '23')], [('rifst', '34')]]


In [35]:
l = [['raihan', '33'], ['miin', '89']]

for i, j in enumerate(l):
    print(i,j)

0 ['raihan', '33']
1 ['miin', '89']


## Collision Handling In Hash map 👹:

In [21]:
class hashmap:
    def __init__(self):
        self.max_size = 10
        self.arr  = [[] for i in range(self.max_size)] # in a list, creating another list for storing key, value pair
        
    def get_mod_asci_num(self, key):
        mod_value = 0
        for i in key:
            mod_value += ord(i)          # ord gives asci value of characters. mod_value is adding every string's asci value
        return mod_value % self.max_size # doing mod so that value will be between 0 to max size
    
    def __setitem__(self, key, value):   # it is python's standard operation. automatically calls
        idx = self.get_mod_asci_num(key) # taking which idx i will store the value
        found = False
        
        for idx_idx, elements in enumerate(self.arr[idx]): # go to main arr, idx_idx is the index value, elements for tuples
            if len(elements) == 2 and elements[0] == key:  # check if the key already exists. len 2 means it will be pair, elements[0] means first element of that pair
                self.arr[idx][idx_idx] = (key, value)      # updating the existing idx
                found = True
                break
            
        if not found:
            self.arr[idx].append((key, value))  # if the asci value is same for multiple keys, then we will add in the same list
    
    def __getitem__(self, key):          # it is python's standard operation. automatically calls
        idx = self.get_mod_asci_num(key) # getting the asci num's mod. so that, we can access the idx of the list arr
        for i in self.arr[idx]:          # if there is multiple key value pair in the list, we will traves through it
            if i[0] == key:              # when key is send for printing, first element in the pair is key
                return i[1]              # second element in the pair is value
    
    def __delitem__(self, key):
        idx = self.get_mod_asci_num(key)                    # same thing like prev
        for idx_idx, elements in enumerate(self.arr[idx]):  # iterate through that index
            if elements[0] == key:                          
                del self.arr[idx][idx_idx]                  # delete the array's idx

In [24]:
t = hashmap()
t["march 6"] = 310
t["march 7"] = 420
t["march 8"] = 67
t["march 17"] = 63457

print(t['march 6'])
print(t['march 17'])

print(t.arr)

del t['march 17'] # auto automatically calls __delitem__ method
print(t.arr)


310
63457
[[('march 7', 420)], [('march 8', 67)], [], [], [], [], [], [], [], [('march 6', 310), ('march 17', 63457)]]
[[('march 7', 420)], [('march 8', 67)], [], [], [], [], [], [], [], [('march 6', 310)]]


## Exercise: Hash Table: New York City Weather Analysis
(1) nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following,

- What was the average temperature in first week of Jan?

- What was the maximum temperature in first 10 days of Jan?

  Figure out data structure that is best for this problem

In [73]:
temp_list = []

with open("nyc_weather.csv","r") as f:
    a = f.readlines()
    for line in a[1:]:
        tokens = line.strip().split(',')
        temp = tokens[1]
        temp_list.append(temp)

avg_temp = 0
for i in temp_list[:7]:
    avg_temp += int(i)
    
print("average temperature in first week of Jan:", avg_temp/len(temp_list[:7]))

print("maximum temperature in first 10 days of Jan:", max(temp_list))

average temperature in first week of Jan: 31.285714285714285
maximum temperature in first 10 days of Jan: 38


(2) nyc_weather.csv contains new york city weather for first few days in the month of January. Write a program that can answer following,

- What was the temperature on Jan 9?

- What was the temperature on Jan 4?

  Figure out data structure that is best for this problem

In [75]:
temp_dic = {}

with open("nyc_weather.csv","r") as f:
    a = f.readlines()
    for line in a[1:]:
        tokens = line.strip().split(',')
        date = tokens[0]
        temp = tokens[1]
        temp_dic[date] = temp
        
print("temperature on Jan 9:", temp_dic['Jan 9'])
print("temperature on Jan 4:", temp_dic['Jan 4'])

temperature on Jan 9: 35
temperature on Jan 4: 34


(3) poem.txt Contains famous poem "Road not taken" by poet Robert Frost. 
You have to read this file in python and print every word and its count as show below. Think about the best data structure that you can use to solve this problem and figure out why you selected that specific data structure.

In [95]:
word = {}

with open("poem.txt","r") as f:
    for line in f:
        tokens = line.strip().split(" ")
        for i in tokens:
            if i in word:
                word[i] += 1
            else:
                word[i] = 1
print(word)

{'Two': 2, 'roads': 2, 'diverged': 2, 'in': 3, 'a': 3, 'yellow': 1, 'wood,': 2, 'And': 6, 'sorry': 1, 'I': 8, 'could': 2, 'not': 1, 'travel': 1, 'both': 2, 'be': 2, 'one': 3, 'traveler,': 1, 'long': 1, 'stood': 1, 'looked': 1, 'down': 1, 'as': 5, 'far': 1, 'To': 1, 'where': 1, 'it': 2, 'bent': 1, 'the': 8, 'undergrowth;': 1, '': 3, 'Then': 1, 'took': 2, 'other,': 1, 'just': 1, 'fair,': 1, 'having': 1, 'perhaps': 1, 'better': 1, 'claim,': 1, 'Because': 1, 'was': 1, 'grassy': 1, 'and': 3, 'wanted': 1, 'wear;': 1, 'Though': 1, 'for': 2, 'that': 3, 'passing': 1, 'there': 1, 'Had': 1, 'worn': 1, 'them': 1, 'really': 1, 'about': 1, 'same,': 1, 'morning': 1, 'equally': 1, 'lay': 1, 'In': 1, 'leaves': 1, 'no': 1, 'step': 1, 'had': 1, 'trodden': 1, 'black.': 1, 'Oh,': 1, 'kept': 1, 'first': 1, 'another': 1, 'day!': 1, 'Yet': 1, 'knowing': 1, 'how': 1, 'way': 1, 'leads': 1, 'on': 1, 'to': 1, 'way,': 1, 'doubted': 1, 'if': 1, 'should': 1, 'ever': 1, 'come': 1, 'back.': 1, 'shall': 1, 'telling': 1