# Hash Table

In [2]:
## Hash Function
- Arithmetic Modular
- Truncation
- Folding

### Arithmetic Modular

In [3]:
def hash_modular(key, size):
    return key % size


lst = [None] * 10  # List of size 10
key = 35
index = hash_modular(key, len(lst))  # Fit the key into the list size
print("The index for key " + str(key) + " is " + str(index))

The index for key 35 is 5


### Truncation

In [None]:
def hash_trunc(key):
    return key % 1000  # Will always give us a key of up to 3 digits


key = 123456
index = hash_trunc(key)  # Fit the key into the list size
print("The index for key " + str(key) + " is " + str(index))

### Folding

In [None]:
def hash_fold(key, chunk_size):  # Define the size of each divided portion
    str_key = str(key)  # Convert integer into string for slicing
    print("Key: " + str_key)
    hash_val = 0
    print("Chunks:")
    for i in range(0, len(str_key),  chunk_size):

        if(i + chunk_size < len(str_key)):
            # Slice the appropriate chunk from the string
            print(str_key[i:i+chunk_size])
            hash_val += int(str_key[i:i+chunk_size])  # convert into integer
        else:
            print(str_key[i:len(str_key)])
            hash_val += int(str_key[i:len(str_key)])
    return hash_val


key = 3456789
chunk_size = 2
print("Hash Key: " + str(hash_fold(key, chunk_size)))

## Strategies to Handle Collisions
- Linear Probing
- Chaining
- Resizing the list

## Hash Table implementation 

In [None]:
class HashEntry:
    def __init__(self, key, data):
        self.key = key
        # data to be stored
        self.value = data
        # reference to new entry
        self.nxt = None

In [8]:
class HashTable:
    # Constructor
    def __init__(self):
        # Size of the HashTable
        self.slots = 10
        # Current entries in the table
        # Used while resizing the table when half of the table gets filled
        self.size = 0
        # List of HashEntry objects (by default all None)
        self.bucket = [None] * self.slots
        self.threshold = 0.6

    # Helper Functions
    def get_size(self):
        return self.size

    def is_empty(self):
        return self.get_size() is 0

    # Hash Function
    def get_index(self, key):
        # hash is a built in function in Python
        hash_code = hash(key)
        index = hash_code % self.slots
        return index

    def resize(self):
        new_slots = self.slots * 2
        new_bucket = [None] * new_slots
        # rehash all items into new slots
        for i in range(0, len(self.bucket)):
            head = self.bucket[i]
            while head is not None:
                new_index = hash(head.key) % new_slots
                if new_bucket[new_index] is None:
                    new_bucket[new_index] = HashEntry(head.key, head.value)
                else:
                    node = new_bucket[new_index]
                    while node is not None:
                        if node.key is head.key:
                            node.value = head.value
                            node = None
                        elif node.nxt is None:
                            node.nxt = HashEntry(head.key, head.value)
                            node = None
                        else:
                            node = node.nxt
                head = head.nxt
        self.bucket = new_bucket
        self.slots = new_slots

    def insert(self, key, value):
        # Find the node with the given key
        b_index = self.get_index(key)
        if self.bucket[b_index] is None:
            self.bucket[b_index] = HashEntry(key, value)
            self.size += 1
        else:
            head = self.bucket[b_index]
            while head is not None:
                if head.key is key:
                    head.value = value
                    break
                elif head.nxt is None:
                    head.nxt = HashEntry(key, value)
                    self.size += 1
                    break
                head = head.nxt

        load_factor = float(self.size) / float(self.slots)
        # Checks if 60% of the entries in table are filled, threshold = 0.6
        if load_factor >= self.threshold:
            self.resize()
        
    # Return a value for a given key
    def search(self, key):
        # Find the node with the given key
        b_index = self.get_index(key)
        head = self.bucket[b_index]
        # Search key in the slots
        while head is not None:
            if head.key == key:
                return head.value
            head = head.nxt
        # If key not found
        return None

    # Remove a value based on a key
    def delete(self, key):
        # Find index
        b_index = self.get_index(key)
        head = self.bucket[b_index]
        # If key exists at first slot
        if head.key is key:
            self.bucket[b_index] = head.nxt
            # Decrease the size by one
            self.size -= 1
            return self
        # Find the key in slots
        prev = None
        while head is not None:
            # If key exists
            if head.key is key:
                prev.nxt = head.nxt
                # Decrease the size by one
                self.size -= 1
                return
            # Else keep moving in chain
            prev = head
            head = head.nxt

        # If key does not exist
        return

3, Educative


In [10]:
table = HashTable()  # Create a HashTable
print(table.is_empty())
table.insert("This", 1)
table.insert("is", 2)
table.insert("a", 3)
table.insert("Test", 4)
table.insert("Driver", 5)
print("Table Size: " + str(table.get_size()))
print("The value for 'is' key: " + str(table.search("is")))
table.delete("is")
table.delete("a")
print("Table Size: " + str(table.get_size()))

7

### Challenge 1: A List as a Subset of Another List

Implement the is_subset(list1,list2) function which will take two lists as input and check whether one list is the subset of the other. This method is already available in Python, but we’ll be implementing it using hash tables.

Note: The input arrays do not contain duplicate values.

Use the Python set as your hash table.

Input #
Two lists of integers.

Output #
True if list2 is a subset of list1.

### Solution: Lookup in a Hash Table

For a lookup list with m elements and a subset list with n elements, the time complexity is O(m+n).

In [None]:
def is_subset(list1, list2):
    s = set(list1)  # Create a set with list1 values
    # Traverse list 2 elements
    for elem in list2:
        # Return false if an element not in list1
        if elem not in s:
            return False
    # Return True if all elements in list1
    return True


list1 = [9, 4, 7, 1, -2, 6, 5]
list2 = [7, 1, -2]
list3 = [10, 12]
print(is_subset(list1, list2))
print(is_subset(list1, list3))

## Challenge 2: Check if Lists are Disjoint

You have to implement the is_disjoint() function which checks whether two given lists are disjoint or not. Two lists are disjoint if there are no common elements between them. The assumption is that there are no duplicate elements in each list.

Input # Two lists of integers.

Output # It returns True if the two are disjoint. Otherwise, it returns False.

### Solution: Use a Set

For a lookup list with m elements and a subset list with n elements, the time complexity is O(m+n).

In [None]:
def is_disjoint(list1, list2):
    s = set(list1)  # Create set of list1 elements
    # iterate list 2
    for elem in list2:
        # if element in list1 then return False
        if elem in s:
            return False
    # Return True if no common element
    return True


list1 = [9, 4, 3, 1, -2, 6, 5]
list2 = [7, 10, 8]
list3 = [1, 12]
print(is_disjoint(list1, list2))
print(is_disjoint(list1, list3))

## Challenge 3: Find Symmetric Pairs in a Lis

By definition, (a, b) and (c, d) are symmetric pairs iff, a = d and b = c. In this problem, you have to implement the find_symmetric(list) function which will find all the symmetric pairs in a given list.

Input # A list.

Output # A list containing all the symmetric pairs of elements in the input list.

### Solution: Using a Dictionary/Set
The hash table lookups work in constant time. Hence, our traversal of the input list makes the algorithm run in O(n) where n is the list size.

In [None]:
def find_symmetric(my_list):
    # Create an empty set
    pair_set = set()
    result = []
    # Traverse through the given list
    for pair in my_list:
        # Make a tuple and a reverse tuple out of the pair
        pair_tup = tuple(pair)
        pair.reverse()
        reverse_tup = tuple(pair)
        # Check if the reverse tuple exists in the set
        if(reverse_tup in pair_set):
            # Symmetric pair found
            result.append(list(pair_tup))
            result.append(list(reverse_tup))
        else:
            # Insert the current tuple into the set
            pair_set.add(pair_tup)
    return result


arr = [[1, 2], [4, 6], [4, 3], [6, 4], [5, 9], [3, 4], [9, 5]]
symmetric = find_symmetric(arr)
print(symmetric)

## Challenge 4: Trace the Complete Path of a Journey

You have to implement the trace_path() function which will take in a list of source-destination pairs and return the correct sequence of the whole journey from the first city to the last.

Input #
A Python dict containing string pairs of source-destination cities.

Output #
A list of source-destination pairs in the correct order.

```
# Sample Input
dict = {
  "NewYork": "Chicago",
  "Boston": "Texas",
  "Missouri": "NewYork",
  "Texas": "Missouri"
}

# Sample Output
[["Boston", "Texas"] , ["Texas", "Missouri"] , ["Missouri", "NewYork"] , ["NewYork", "Chicago"]]
```

### Solution Review: Trace the Complete Path of a Journey

Although a hash table is created and traversed, both take the same amount of time. The complexity for this algorithm is O(n) where n is the number of source-destination pairs.

In [None]:
def trace_path(my_dict):  # A Map object
    
    result = []
    
    for k, v in my_dict.items():
        if k not in my_dict.values():
            result.append([k, v])
            dest = v
    
    while dest is not None:
        try:
            tmp = my_dict[dest]
            result.append([dest, tmp])
            dest = tmp
        except:
            dest = None

    return result

In [25]:
def trace_path(my_dict):
    result = []
    # Create a reverse dict of the given dict i.e if the given dict has (N,C)
    # then reverse dict will have (C,N) as key-value pair
    # Traverse original dict and see if it's key exists in reverse dict
    # If it doesn't exist then we found our starting point.
    # After the starting point is found, simply trace the complete path
    # from the original dict.
    reverse_dict = dict()
    # To fill reverse dict, iterate through the given dict
    keys = my_dict.keys()
    for key in keys:
        reverse_dict[my_dict.get(key)] = key
    # Find the starting point of itinerary
    from_loc = None
    keys_rev = reverse_dict.keys()
    for key in keys:
        if key not in reverse_dict:
            from_loc = key
            break
            # Trace complete path
    to = my_dict.get(from_loc)
    while to is not None:
        result.append([from_loc, to])
        from_loc = to
        to = my_dict.get(to)
    return result


my_dict = dict()
my_dict["NewYork"] = "Chicago"
my_dict["Boston"] = "Texas"
my_dict["Missouri"] = "NewYork"
my_dict["Texas"] = "Missouri"
print(trace_path(my_dict))

[['Boston', 'Texas'], ['Texas', 'Missouri'], ['Missouri', 'NewYork'], ['NewYork', 'Chicago']]


## Challenge 5: Find Two Pairs in List such that a+b = c+d

In this problem, you have to implement the find_pair() function which will find two pairs, [a, b] and [c, d], in a list such that :

a+b = c+d

You only have to find the first two pairs in the list which satisfies the above condition.

Input #
A list of distinct integers.

Output #
A list containing two pairs, (a, b) and (c, d), which satisfy a + b = c + d

```
# Sample Input
my_list = [3, 4, 7, 1, 12, 9]
# Sample Output #
[[4,12],[7,9]]
```

### Solution: Sums Stored as Hash Keys


The time complexity of this algorithm is O(n2).

In [26]:
def find_pair(my_list):
    my_dict = {}

    for item1 in my_list:
        for item2 in my_list:
            if item1 != item2:
                if item1 + item2 in my_dict.keys() and my_dict[item1+item2] != [item2, item1]:
                    result = [[item1, item2], my_dict[item1+item2]]
                else:
                    my_dict[item1+item2] = [item1, item2]
    
    return result

In [28]:
my_list = [3, 4, 7, 1, 12, 9]
print(find_pair(my_list))

[[9, 1], [7, 3]]


In [None]:
def find_pair(my_list):
    result = []
    # Create Has my_dict with Key being added and value being a pair
    # i.e key = 3 , value = {1,2}
    # Traverse all possible pairs in my_list and store sums in map
    # If sum already exist then print out the two pairs.
    my_dict = dict()
    for i in range(len(my_list)):
        for j in range(i+1, len(my_list)):
            added = my_list[i] + my_list[j]  # calculate sum
            # the 'in' operator on dict() item has a. complexity of O(1)
            # This is because of hashing
            # On a list, the 'in' operator would have the complexity of O(n)
            if added not in my_dict:
                # If added is not present in dict then insert it with pair
                my_dict[added] = [my_list[i], my_list[j]]
            else:
                # added already present in Map
                prev_pair = my_dict.get(added)
                # Since list elements are distinct, we don't
                # need to check if any element is common among pairs
                second_pair = [my_list[i], my_list[j]]
                result.append(prev_pair)
                result.append(second_pair)
                return result
    return result


my_list = [3, 4, 7, 1, 12, 9, 0]
print(find_pair(my_list))

## Challenge 6: A Sublist with a Sum of 0

You must implement the find_sub_zero(my_list) function which will take in a list of positive and negative integers. It will tell us if there exists a sublist in which the sum of all elements is zero. The term sublist implies that the elements whose sum is 0 must occur consecutively.

A list with these contents would return True:

Input #
A list containing positive and negative integers.

Output #
Returns True if there exists a sublist with its sum equal to 0. Otherwise, the function returns False.

### Naive Solution
The naive solution would be to iterate the list in a nested loop, summing each element with all the elements succeeding it.

In [30]:
def find_sub_zero(my_list):
    for i in range(len(my_list)):
        cum = my_list[i]
        for j in range(i+1, len(my_list)):
            cum += my_list[j]
            if cum == 0:
                return True
    return False

my_list = [6, 4, -7, 3, 12, 9]
print(find_sub_zero(my_list))

True


### Solution: Iterative Hashing

A hash table makes things much simpler.

As always, a linear iteration over n elements means that the algorithm’s time complexity is O(n).

In [52]:
def find_sub_zero(my_list):
    # Use hash table to store the cumulative sum as key
    # and the element as value till which sum has been calculated
    # Traverse the list and return true if either
    # elem == 0 or sum == 0 or hash table already contains the sum
    # If you completely traverse the list
    # and haven't found any of the above three
    # conditions then simply return false
    ht = dict()
    total_sum = 0
    # Traverse through the given list
    for elem in my_list:
        
        total_sum += elem
        # これまでの合計が、過去にも同じ値が計算されている、つまり、過去の部分を削除してやれば sum=0 が達成される
        if elem is 0 or total_sum is 0 or ht.get(total_sum) is not None:
            return True
        ht[total_sum] = elem
    return False


my_list = [6, 4, -7, 3, 12, 9]
print(find_sub_zero(my_list))

True


## Challenge 7: Word Formation Using a Hash Table

Problem Statement #
You have to implement the is_formation_possible() function which will find whether a given word can be formed by combining two words from a dictionary. We assume that all words are in lower case.

Input #
A list and a query word containing lowercase characters.

Output #
Returns True if the given word can be generated by combining two words from the list.

### Solution: Iterative Word Matching
We perform the insert operation m times for a list of size m. After that, we linearly traverse the word of size n once. Furthermore, we slice strings of size n in each iteration. Hence the total time complexity is O(m + n^2).

The solution only works for two words and not more.

In [None]:
from HashTable import HashTable


def is_formation_possible(lst, word):

    if len(word) < 2 or len(lst) < 2:
        return False
    
    hash_table = HashTable()
    for elem in lst:
        hash_table.insert(elem, True)
        
    for i in range(1, len(word)):
        # Slice the word into two strings in each iteration
        first = word[0:i]
        second = word[i:len(word)]
        check1 = False
        check2 = False
    
        if hash_table.search(first) is not None:
            check1 = True
        if hash_table.search(second) is not None:
            check2 = True
        
        # Return True If both substrings are present in the trie
        if check1 and check2:
            return True

    return False

keys = ["the", "hello", "there", "answer",
        "any", "educative", "world", "their", "abc"]
print(is_formation_possible(keys, "helloworld"))  

### solution for more than two words.

In [79]:
def is_formation_possible(lst, word):
    set_lst = set(lst)
    start = 0
    
    while start <= len(word):
        subword = word[start]
        for i in range(start+1, len(word)):
            subword += word[i]
            print(subword)
            if subword in set_lst:
                if i + 1 == len(word):
                    return True
                else:
                    start = i+1
                    break
            else:
                start += 1 
    
    return False


keys = ["the", "hello", "there", "answer",
        "any", "educative", "world", "their", "abc"]
print(is_formation_possible(keys, "helloworld")) 

he
hel
hell
hello
here 4 10 3
wo
wor
worl
world
here 9 10 8
True


## Challenge 8: Find Two Numbers that Add up to "k"

In this problem, you have to implement the findSum(lst,k) function which will take a number k as input and return two numbers that add up to k.

You have already seen this challenge previously in chapter 2 of this course. Here you would use HashTables for a more efficient solution.

Input #
A list and a number k

Output #
A list with two integers a and b that add up to k

### Naive Solution
The time complexity is O(N^2)

In [107]:
def findSum(lst, k):
    for i in range(len(lst)):
        for j in range(i+1, len(lst)):
            if lst[i] + lst[j] == k:
                return [lst[i], lst[j]]
    return False

### Using the Python Set.

The element being searched should not be in the list. So the solution uses foundValues set() and added element after searched.

The time complexity of the solution above is O(n)O(n).

In [109]:
def findSum(lst, value):
    foundValues = set()
    for ele in lst:
        if value - ele in foundValues:
            return [value-ele, ele]
        foundValues.add(ele)
    return False

In [110]:
print(findSum([1, 3, 2, 4], 6))

[2, 4]


## Solution Review: First Non-Repeating Integer in a list


### Solution #1: Using a Python dictionary to keep count of repetitions
Since the list is only iterated over only once, therefore the time complexity of this solution is linear, i.e., O(n)O(n).

In [None]:
def findFirstUnique(lst):
    counts = {}  # Creating a dictionary
    # Initializing dictionary with pairs like (lst[i],count)
    counts = counts.fromkeys(lst, 0)
    for ele in lst:
        # counts[ele] += 1  # Incrementing for every repitition
        counts[ele] = counts[ele]+1
    answer_key = None
    # filter first non-repeating 
    for ele in lst:
        if (counts[ele] is 1):
            answer_key = ele
            break
    return answer_key


print(findFirstUnique([1, 1, 1, 2]))

### Solution #2: Using collections 

In [None]:
import collections


def findFirstUnique(lst):
    orderedCounts = collections.OrderedDict()  # Creating an ordered dictionary
    # Initializing dictionary with pairs like (lst[i],0)
    orderedCounts = orderedCounts.fromkeys(lst, 0)
    for ele in lst:
        orderedCounts[ele] += 1  # Incrementing for every repitition
    for ele in orderedCounts:
        if orderedCounts[ele] == 1:
            return ele
    return None


print(findFirstUnique([1, 1, 1, 2, 3, 2, 4]))