HashTable implementation using only arrays

https://leetcode.com/problems/design-hashmap/

Sources:
* https://algs4.cs.princeton.edu/34hash/
* https://pagekeysolutions.com/blog/dsa/hash-table-python/

### Simple HashMap (no hash function)

In [10]:
from typing import List, Optional, Any


class MyHashMap:

    def __init__(self):
        self.size = 10**6 + 1
        self.keys = [-1 for _ in range(self.size)]

    def put(self, key: int, value: int) -> None:
        self.keys[key] = value

    def get(self, key: int) -> int:
        return self.keys[key]

    def remove(self, key: int) -> None:
        self.keys[key] = -1

### HashMap (hash function)
* collision resolution
  * chaining via a list of buckets to handle collision
  * the original unhashed key is stored in each bucket
  so lookups can resolve to the correct key if collision had occurred

As one of the most intuitive implementations, we could adopt the modulo operator as the hash function, since the key value is of integer type. In addition, in order to minimize the potential collisions, it is advisable to use a prime number as the base of modulo, e.g. 2069.

Modulo as non-prime:

1000 % 10 == 1000 % 100, which is 0

2069 is a large prime number

Here, a bucket is a list of tuples.

#### Collisions
To avoid collisions, like with keys 2070 and 1,
we only update an existing tuple
in the bucket if the original key is a match even though
the hashed keys may be the same.


```
key: 2070, value: 2
key: 1, value: 4

hash(2070) = 1
hash(1) = 1

hash_map[1] = Bucket([(2070, 2), (1, 4)])

```



In [11]:
class Bucket:
    def __init__(self):
        self.data = []

    def update(self, key, value):

        found = False
        for i, kv in enumerate(self.data):
            if kv[0] == key:
                found = True
                self.data[i] = (key, value)
        if not found:
            self.data.append((key, value))

    def get(self, key):
        for k, v in self.data:
            if key == k:
                return v
        return -1

    def remove(self, key):
        for i, kv in enumerate(self.data):
            if kv[0] == key:
                self.data.pop(i)

class MyHashMap:

    def __init__(self):
        # the size of the table should be a prime number
        # to reduce the number of collisions
        self.size = 2069
        self.hash_map = [Bucket() for i in range(self.size)]

    def put(self, key: int, value: int) -> None:
        self.hash_map[self.hash(key)].update(key,value)

    def get(self, key: int) -> int:
        return self.hash_map[self.hash(key)].get(key)

    def remove(self, key: int) -> None:
        self.hash_map[self.hash(key)].remove(key)

    def hash(self, key):
        return key % self.size


#### Test Collision

In [12]:
hash_map = MyHashMap()
hash_map.put(2070, 2)
hash_map.put(1, 4)  # collision

In [13]:
hash_map.get(2070)

2

In [14]:
hash_map.get(1)



4

#### Hashmap (with hash function and a single class)

In [15]:
class MyHashMap:

    def __init__(self):
        self.size = 2069  # large prime
        self.keys = [[] for _ in range(self.size)]

    def put(self, key: int, value: int) -> None:
        key_hash = self.hash(key)
        found = False
        for i, kv in enumerate(self.keys[key_hash]):
            if kv[0] == key:
                found = True
                self.keys[key_hash][i] = (key, value)
        if not found:
            self.keys[key_hash].append((key, value))

    def get(self, key: int) -> int:
        key_hash = self.hash(key)
        for i, kv in enumerate(self.keys[key_hash]):
            if kv[0] == key:
                return kv[1]
        return -1

    def remove(self, key: int) -> None:
        key_hash = self.hash(key)
        for i, kv in enumerate(self.keys[key_hash]):
            if kv[0] == key:
                self.keys[key_hash].pop(i)

    def hash(self, key):
        return key % self.size



#### Test Collision

In [16]:
hash_map = MyHashMap()
hash_map.put(2070, 2)
hash_map.put(1, 4)  # collision

In [17]:
hash_map.get(2070)

2

In [18]:
hash_map.get(1)

4

#### Hash functions (string -> int)

A good hash function should
1. Use all the data in the key
2. Uniformly distribute data in the table
3. Be deterministic. Gives the same output for the same input.

In [19]:
def hash(key: str, hash_table_size: int) -> int:
    """
    Computes the hash of a string
    :param key: A string to hash
    :param hash_table_size: preferably a large prime number to avoid collisions
    :return: an index between 0 and hash_table_size
    """
    s = 0
    for c in key:
        # ord converts a string to an int
        s += ord(c)
    return s % hash_table_size

hash("abc", 2069)


294

#### Implement a hashmap using only arrays

In [20]:
class Bucket:

    def __init__(self):
        self.key = None
        self.values = []  #  [(unhashed_key, value), (unhashed_key_2, value), ...]

    def get(self, orig_key):
        for kv in self.values:
            if kv[0] == orig_key:
                return kv[1]
        return None

    def put(self, orig_key, value):
        found = False
        for idx, kv in enumerate(self.values):
            if orig_key == kv[0]:
                self.values[idx] = (orig_key, value)
                found = True
                break
        if not found:
            self.values.append((orig_key, value))

    def remove(self, orig_key):
        for idx, kv in enumerate(self.values):
            if orig_key == kv[0]:
                self.values.pop(idx)
                break

class HashTable:

    def __init__(self):
        # we use a prime number to prevent collisions
        # (i.e. n % prime_number incur fewer collisions than n % even_number for example)
        self.size = 2069
        self.table = [Bucket() for _ in range(self.size)]

    def get(self, key: str):
        table_idx = self.hash(key)
        return self.table[table_idx].get(key)

    def put(self, key: str, value):
        table_idx = self.hash(key)
        self.table[table_idx].put(key, value)

    def remove(self, key: str):
        table_idx = self.hash(key)
        self.table[table_idx].remove(key)

    def hash(self, key: str):
        """
        str -> int -> int % max_hash_table_size
        """

        s = 0
        for c in key:
            s += ord(c)
        return s % self.size


ht = HashTable()
ht.put("My Name", "Peter")
ht.put("My Name", "Peter Lucia")
ht.get("My Name")

'Peter Lucia'

In [21]:
ht.get("My Name")

'Peter Lucia'

In [22]:
ht.remove("My Name")
ht.get("My Name")

### Reconstruct original digits from english

[https://leetcode.com/problems/reconstruct-original-digits-from-english/](https://leetcode.com/problems/reconstruct-original-digits-from-english/)

In [2]:
from collections import Counter
class Solution:
    def originalDigits(self, s: str) -> str:
        # approach
        # O(n) time complexity
        # O(1) space complexity

        # create a list of numbers 0-9 in english

        # zero - number of z's since it's the only one that has a z
        # one - number of o's minus counts for others with an o: zero, two, four
        # two - number of w's
        # three - number of t's minus counts for others with a 't': two and eight
        # four - number of u's
        # five - number of f's minus count for others with f: four
        # six - number of x's
        # seven - number of s's minus count for others with s: six
        # eight - number of g's
        # nine - number of i's minus count for others with i: eight: six, five

        # build {'a': 1, 'b': 2, 'c': 3}
        lookup = Counter(s)

        result = ""
        result += "0"*(lookup['z'])
        result += "1"*(lookup['o'] - lookup['z'] - lookup['w'] - lookup['u'])
        result += "2"*(lookup['w'])
        result += "3"*(lookup['t'] - lookup['w'] - lookup['g'])
        result += "4"*(lookup['u'])
        result += "5"*(lookup['f'] - lookup['u'])
        result += "6"*(lookup['x'])
        result += "7"*(lookup['s'] - lookup['x'])
        result += "8"*(lookup['g'])
        result += "9"*(lookup['i'] - lookup['g'] - lookup['x'] - (lookup['f'] - lookup['u']))

        return result

Solution().originalDigits("onetwothreefourfivesixseveneightnine")

'123456789'