# Ways to House your Data

## What is this?
<font color=green>
    <p>
        This is a notebook containing implementations, short descriptions, and a few use cases of various data structures.
    </p>
</font>

<p> 
    Currently implemented:
</p>
<ul>
    <li><b>Stack</b></li>
    <li><b>Hash table</b></li>
    <li><b>Max heap</b></li>
    <li><b>Binary search tree</b></li>
    <li><b>Linked list</b></li>
    <li><b>Trie</b></li>
    <li><b>AVL tree</b></li>
</ul>

<p>
    On the way:
</p>
<ul>
    <li><b>Adjacency list</b></li>
    <li><b>Adjacency matrix</b></li>
    <li><b>Directed graph</b></li>
    <li><b>Directed acyclic graph</b></li>
</ul>

In [None]:
class Stack:
    '''
    The stack is a simple collection of data elements that employs two main functionalities:
        - push, which adds a data item to the top of the stack
        - pop, which removes the most recently pushed data item (the top data item) from the stack
        
    Stacks are used for function calls, the undo-redo functionality in word processors, and counting guap.
    '''
    def __init__(self):
        self._stack = []

    def size(self):
        return len(self._stack)

    def top(self):
        return self._stack[-1]

    def push(self, item):
        # Time: O(1)
        self._stack.append(item)

    def pop(self):
        last_element = self._stack[-1]
        self._stack.pop()
        return last_element

if __name__ == '__main__':
    s = Stack()
    print(s.size())
    s.push(5)
    print(s.size())
    print(s.top())


In [8]:
import random
import math

class HashTable:
    '''
    The hash table is a data structure that implements an associative array. 
    It makes use of a hash function to compute an index into an array of buckets or slots
    in which a value is placed.
    
    Ideally, the hash function will hash a key to a unique bucket, but most hash functions are imperfect and
    cause hash collisions. This implementation contains a Merkle-Damgard hash function.
    Hash collisions are handled using an array.
    
    An example of a hash table is the ancient relic known as the phonebook where our names would have acted as
    a hash to our listed phone numbers.
    '''
    class HashElement:
        def __init__(self, key, value):
            self._key = key
            self._value = value
        
        def get_key(self):
            return self._key
        
        def get_value(self):
            return self._value
        
    def __init__(self, size):
        self._capacity = size
        self._load_factor = .75
        self._size = 0
        self._hash_table = [[None]]*self._capacity
        self._iv = random.getrandbits(8)

    def _mix(self, block, internal_state):
        return (internal_state * block) ^ ((internal_state << 2) + (block >> 1))

    def _hash(self, item):
        '''Utilizes Merkle-Damgard construction (collision-resistant cryptographic hash)'''
        internal_state = self._iv
        blocks = []
        # Convert item to binary and break up into 8-bit blocks
        if type(item) == int:
            temp = item
            while temp > 0:
                block = (temp & 0xff)
                temp >>= 8
                blocks.append(format(block, '08b'))
        elif type(item) == str:
            blocks = [format(ord(char), '08b') for char in item]
        else:
            print('err: invalid type -- key must be \'int\' or \'str\'')
            return
        
        # Mix blocks to produce the final output
        for block in blocks:
            mixable_block = int(block, 2)
            internal_state = self._mix(mixable_block, internal_state)

        return internal_state

    def get(self, key):
        '''Returns the value of a given key'''
        # Time: O(1)
        # Space: O(1)
        index = self._hash(key) % self._capacity
        
        if index > self._capacity:
            print('err: hashed index out of bounds')
            return
        
        hash_elements = self._hash_table[index]
        for element in hash_elements:
            if element == None:
                continue
    
            if element.get_key() == key:
                return element.get_value()
        
        print('err: element could not be found for key: {}'.format(key))
        return None

    def _resize(self):
        '''Increases the size of the hash table'''
        # Time: O(size(hash_table))
        # Space: O(2*capacity(hash_table))
        self._capacity = math.ceil(self._capacity*1.5)
        new_hash_table = [[None]]*self._capacity

        for i in range(len(self._hash_table)):
            if self._hash_table[i] == [None]:
                continue
            
            hash_elements = self._hash_table[i]
            for element in hash_elements:
                new_index = self._hash(element.get_key()) % self._capacity
                new_hash_table[new_index] += [element]

        self._hash_table = new_hash_table

    def put(self, key, value):
        '''Maps an item into an index in the hash table'''
        # Time: amortized O(1)
        index = self._hash(key) % self._capacity
        if self._hash_table[index] == [None]:
            self._hash_table[index] = [HashTable.HashElement(key, value)]
        else:
            self._hash_table[index] += [HashTable.HashElement(key, value)]

        self._size += 1

        # If size has met or exceeded the load capacity, grow the table
        if self._size / self._capacity >= self._load_factor:
            self._resize()

if __name__ == '__main__':
    ht = HashTable(100)
    ht.put(123456, 'dogs')
    ht.put('secret secret..', 'are')
    ht.put('yoyo', 'cute')

    assert 'dogs' == ht.get(123456)
    assert 'are' == ht.get('secret secret..')
    assert 'cute' == ht.get('yoyo')

    ht = HashTable(2)
    ht.put(123, 'boop boop')
    ht.put('abc', 'beep beep')

    assert 'boop boop' == ht.get(123)
    assert 'beep beep' == ht.get('abc')

[<__main__.HashTable.HashElement object at 0x0120EC50>]
[<__main__.HashTable.HashElement object at 0x0120E430>]
[<__main__.HashTable.HashElement object at 0x0120E730>]
[None, <__main__.HashTable.HashElement object at 0x0120E150>, <__main__.HashTable.HashElement object at 0x0120EC50>]
[None, <__main__.HashTable.HashElement object at 0x0120E150>, <__main__.HashTable.HashElement object at 0x0120EC50>]


In [8]:
import queue
import math

class MaxHeap:
    '''
    The heap is an almost complete (every level except the last are filled with nodes) tree-based data structure that satisfies the heap property.
    The heap property is as follows:
        - in a MAX heap, for any given node C, if P is a parent of C, then the value of P is GREATER than C
        - in a MIN heap, for any given node C, if P is a parent of C, then the value of P is LESS than C
    
    As the class name suggests, this is an implementation of a max heap.
    A heap is a useful data structure when it is necessary to repeatedly remove the object the highest (or lowest) value (or priority).
    
    The heap is just a hierarchy representation of some order.
    '''
    def __init__(self):
        self._heap = []

    def add(self, item):
        '''Add an element to the heap, maintaining the heap structure'''
        # Time complexity: O(logn)
        # Space complexity: O(1)

        self._heap.append(item)
        self._heapify_up(len(self._heap) - 1)
    
    def _heapify_up(self, index):
        '''Performs the heapify operation by bubbling up from the given index, used for add'''
        parent_index = math.floor((index - 1) / 2)
        if parent_index > -1 and self._heap[index] > self._heap[parent_index]:
                self._heap[index], self._heap[parent_index] = self._heap[parent_index], self._heap[index]
                self._heapify_up(parent_index)
                
    def _heapify_down(self, index):
        '''Performs the heapify operation by bubbling down from the given index, used for pop'''
        largest = index
        left = 2*index + 1
        right = 2*index + 2
        
        if left < len(self._heap) and self._heap[largest] < self._heap[left]:
            largest = left
            
        if right < len(self._heap) and self._heap[largest] < self._heap[right]:
            largest = right
        
        if largest == index:
            return
        
        self._heap[index], self._heap[largest] = self._heap[largest], self._heap[index]
        self._heapify_down(largest)

    def pop(self):
        '''Removes the top of the heap (the max element)'''
        # Time: O(n) due to implementation of having max being the first element
        # - would be O(logn) otherwise
        # Space: O(1)

        max_element = self._heap[0]
        self._heap.pop(0)
        self._heapify_down(0)
        return max_element

    def print_heap(self):
        '''Prints the levels of the heap in level-order'''
        # Time: O(n)
        # Space: O(# of leaves)
        
        if len(self._heap) < 1:
            return

        q = [0]
        temp = q
        
        while len(temp) > 0:
            for item in temp:
                print(self._heap[item], end=' ')
            print()

            temp = []
            for root_index in q:
                left_child_index = 2*root_index + 1
                right_child_index = 2*root_index + 2

                if left_child_index < len(self._heap):
                    temp.append(left_child_index)

                if right_child_index < len(self._heap):
                    temp.append(right_child_index)
            q = temp

if __name__ == '__main__':
    mh = MaxHeap()
    mh.add(1)
    mh.add(3)
    mh.add(5)
    mh.add(7)
    mh.add(2)
    mh.print_heap()

    assert mh.pop() == 7
    assert mh.pop() == 5
    assert mh.pop() == 3
    assert mh.pop() == 2
    assert mh.pop() == 1


7 
5 3 
1 2 


In [24]:
class BinarySearchTree:
    '''
    The Binary Search Tree is a node-based binary tree data structure.
    What differentiates the BST from the regular binary tree are the following properties:
        - the left subtree of a node contains only nodes with values less than the node's value
        - the right subtree of a node contains only nodes with values greater than the node's value
        - NOTE: for nodes with value equal, they can be placed in either the left or right subtree
        
    BSTs are useful in situations where lookup and other operations of the binary search principle can be used.
    BSTs without self-balancing are prone to becoming degenerate, and lookup operations can become O(n) instead of O(logn).
    
    BSTs without self-balancing are pretty useless unless the data is determinstic and is inserted in the exact order
    where the BST is fully balanced.
    '''
    class TreeNode:
        def __init__(self, val):
            self.val = val
            self.left = None
            self.right = None
            
    def __init__(self, val):
        self._root = BinarySearchTree.TreeNode(val)
    
    def _add_aux(self, item, node):
        '''Returns a new node if new value, otherwise returns the unchanged node pointer'''
        if node == None:
            return BinarySearchTree.TreeNode(item)
        
        if item < node.val:
            node.left = self._add_aux(item, node.left)
        elif item >= node.val:
            node.right = self._add_aux(item, node.right)
            
        return node
    
    def add(self, item):
        # Time: O(h) where h is tree's height
        # - will be O(n) if tree is degenerate
        self._add_aux(item, self._root)

    def _del_aux(self, item, node):
        '''Auxillary function for deleting a node with a given value'''
        # Traverse the tree for the node with the value
        if item < node.val:
            node.left = self._del_aux(item, node.left)
            return node
        elif item > node.val:
            node.right = self._del_aux(item, node.right)
            return node
        
        if node.left == None and node.right == None: # If leaf, just delete it (thru None assignment)
            return None
        elif None in [node.left, node.right]: # To-delete has one child, copy that child
            results = list(filter(lambda child: child != None, [node.left, node.right]))
            toCopy = results[0]
            return toCopy
        else: # To-delete has two children, copy successor child (inorder [right, then leftmost of to-delete])
            successor = node.right
            while successor.left != None:
                successor = successor.left
            
            successor.left = node.left
            return successor
        
    def delete(self, item):
        # Time: O(h) where h is height
        # - will be O(n) if tree is degenerate
        self._del_aux(item, self._root)
                
    
    def print_bst(self):
        q = [('Root', self._root)]
        temp = q
        
        while len(temp) > 0:
            for pos, node in temp:
                print(pos, ' ', node.val, end=' ')
            print()

            temp = []
            for pos, node in q:
                left_child = node.left
                right_child = node.right

                if left_child:
                    temp.append(('Left ', left_child))

                if right_child:
                    temp.append(('Right ', right_child))
            q = temp
    
if __name__ == '__main__':
    bst = BinarySearchTree(6)
    bst.add(5)
    bst.add(3)
    bst.add(1)
    bst.add(2)
    bst.print_bst()
    
    bst.delete(2)
    print()
    bst.print_bst()
    
    bst = BinarySearchTree(5)
    bst.add(8)
    bst.add(2)
    bst.add(6)
    bst.add(10)
    bst.delete(8)
    print()
    bst.print_bst()

Root   6 
Left    5 
Left    3 
Left    1 
Right    2 

Root   6 
Left    5 
Left    3 
Left    1 

Root   5 
Left    2 Right    10 
Left    6 


In [23]:
class AVLtree:
    '''
    The highly coveted AVL tree is a variation of the BST in which the binary tree is self-balancing.
    A balanced BST is one where the heights of the left and right subtrees cannot be more than 1 for all nodes.
    
    BST operations such as search, max, min, insert, delete, etc. take O(h) time where h is the height of the tree.
    However, if the binary tree becomes degenerate, then the cost of the operations may degrade to O(n).
    Making sure that the height of the tree is O(logn) after insertion and deletion guarantees an upper bound of O(logn) for
    these operations. O(h) == O(logn) if the tree is fully balanced.
    
    NOTE: The implementation of the AVL tree is exactly the same as the BST 
        except for a few augmentations to INSERT (add) and a new field for HEIGHT.
        
    Applications of any self-balancing tree can be found in any scenario where dynamic data must be sorted and efficiently maintained.
    Specific, I know, but get this, it has its advantages over a sorted array (which you can apply binary search to as well) in that
    the nodes do not have to be stored contiguously in memory, making adding a new node just allocating some chunk of memory
    and linking it to the rest of the tree.
    '''
    class TreeNode:
        def __init__(self, val):
            self.val = val
            self.left = None
            self.right = None
            self.height = 1
            
    def __init__(self, val):
        self._root = AVLtree.TreeNode(val)
    
    def _height(self, root):
        return root.height if root else 0
    
    def _get_balance(self, root):
        '''Returns >+1 if left subtree-heavy, <-1 if right subtree-heavy'''
        if not root:
            return 0
        return self._height(root.left) - self._height(root.right)

    def _left_rotation(self, root):
        '''Root takes right child's left child, and then root becomes right child's left child'''
        right_child = root.right
        
        root.right = right_child.left
        right_child.left = root
        
        # Re-assign root if rotated out
        if root == self._root:
            self._root = right_child
        
        # Update heights
        root.height = 1 + max(self._height(root.left), self._height(root.right))
        right_child.height = 1 + max(self._height(right_child.left), self._height(right_child.right))
        return right_child
        
    def _right_rotation(self, root):
        '''Root takes left child's right child, and then root becomes left child's right child'''
        left_child = root.left
        root.left = left_child.right
        left_child.right = root
        
        # Re-assign root if rotated out
        if root == self._root:
            self._root = left_child
        
        # Update heights
        root.height = 1 + max(self._height(root.left), self._height(root.right))
        left_child.height = 1 + max(self._height(left_child.left), self._height(left_child.right))
        return left_child
    
    def _balance(self, item, node):
        # Check for balance of subtrees for the ancestor node
        balance = self._get_balance(node)
        
        # If left subtree-heavy and item is positioned as left grandchild of left child
        # - (Left Left case)
        if balance > 1 and item < node.left.val:
            return self._right_rotation(node)
        
        # If right subtree-heavy and item is right grandchild of right child
        # - (Right Right case)
        if balance < -1 and item > node.right.val:
            return self._left_rotation(node)
        
        # If left subtree-heavy and item is right grandchild of left child
        # - (Left Right case)
        if balance > 1 and item > node.left.val:
            node.left = self._left_rotation(node.left)
            return self._right_rotation(node)
            
        # If right subtree-heavy and item is left grandchild of right child
        # - (Right Left case)
        if balance < -1 and item < node.right.val:
            node.right = self._right_rotation(node.right)
            return self._left_rotation(node)
        
    def _add_aux(self, item, node):
        '''Returns a new node if new value, otherwise returns the unchanged node pointer'''
        if node == None:
            return AVLtree.TreeNode(item)
        
        if item < node.val:
            node.left = self._add_aux(item, node.left)
        elif item > node.val:
            node.right = self._add_aux(item, node.right)
        else:
            return node
        
        # Update height for this ancestor node
        node.height = 1 + max(self._height(node.left), self._height(node.right))
        
        rebalanced = self._balance(item, node)
        
        return rebalanced if rebalanced else node

    def add(self, item):
        # Time: O(h) where h is tree's height
        # - will be O(n) if tree is degenerate
        self._add_aux(item, self._root)

    def _del_aux(self, item, node):
        '''Auxillary function for deleting a node with a given value'''
        # Traverse the tree for the node with the value
        if item < node.val:
            node.left = self._del_aux(item, node.left)
            return node
        elif item > node.val:
            node.right = self._del_aux(item, node.right)
            return node
        
        if node.left == None and node.right == None: # If leaf, just delete it (thru None assignment)
            return None
        elif None in [node.left, node.right]: # To-delete has one child, copy that child
            results = list(filter(lambda child: child != None, [node.left, node.right]))
            toCopy = results[0]
            return toCopy
        else: # To-delete has two children, copy successor child (inorder [right, then leftmost of to-delete])
            successor = node.right
            while successor.left != None:
                successor = successor.left
            
            successor.left = node.left
            return successor
        
    def delete(self, item):
        # Time: O(h) where h is height
        # - will be O(n) if tree is degenerate
        self._del_aux(item, self._root)
                
    
    def print_avl(self):
        q = [('Root', self._root)]
        temp = q
        
        while len(temp) > 0:
            for pos, node in temp:
                print(pos, ' ', node.val, end=' ')
            print()

            temp = []
            for pos, node in q:
                left_child = node.left
                right_child = node.right

                if left_child:
                    temp.append(('Left ', left_child))

                if right_child:
                    temp.append(('Right ', right_child))
            q = temp

if __name__ == '__main__':
    avl = AVLtree(6)
    avl.add(5)
    avl.add(3)
    # LEFT LEFT
    avl.add(1)
    avl.add(2)
    # LEFT RIGHT
    avl.add(7)
    avl.add(8)
    # RIGHT RIGHT
    avl.add(10)
    avl.add(9)
    # RIGHT LEFT
    avl.print_avl()

Root   5 
Left    2 Right    7 
Left    1 Right    3 Left    6 Right    9 
Left    8 Right    10 


In [9]:
class LinkedList:
    '''
    A linear collection of data elements, whose order is not given by their physical
    placement in memory. Linked list elements (list nodes) are linked using pointers.
    Each node of a list is made up of two items -- a data value and a reference to the next node.
    
    There are many variations to the Linked List, such as doubly-linked where each node contains
    a reference to both the next node in line, and the node behind it.
    This implementation is a singly-linked list with both a head and tail pointer.
    
    Linked lists benefits come from the easy insertion and removal of list elements. There is no reallocation or reorganization
    of an entire structure because the data items do not need to be stored continguously in memory or on disk.
    '''
    class ListNode:
        def __init__(self, val, next=None):
            self.val = val
            self.next = next
        
    def __init__(self):
        self._head = None
        self._tail = None
    
    def append(self, val):
        '''Appends a new node with the given value to the list'''
        # Time: O(1)
        # Space: O(1)
        if self._head == None:
            self._head = LinkedList.ListNode(val)
            self._tail = self._head
        else:
            self._tail.next = LinkedList.ListNode(val)
            self._tail = self._tail.next
    
    def contains(self, val):
        '''Iterates through the linked list and tries to find the given value'''
        # Time: O(n)
        # Space: O(1)
        temp = self._head
        while temp != None:
            if val == temp.val:
                return True
            temp = temp.next
            
        return False
    
    def delete(self, val):
        '''Iterates through the linked list and deletes a node'''
        # Time: O(n)
        # Space: O(1)
        if val == self._head.val:
            self._head = self._head.next
            return
        
        # Slow and fast pointers for deletion
        slow = self._head
        fast = slow.next
        while fast != None:
            if fast.val != val:
                slow = slow.next
                fast = fast.next
            else:    
                slow.next = fast.next
                fast.next = None
                if slow.next == None: # maintain tail pointer upon deletion
                    self._tail = slow
                return
        print('Could not find value {} to delete'.format(val))
        
    def print_ll(self):
        temp = self._head
        while temp != None:
            print(temp.val)
            temp = temp.next

if __name__ == '__main__':
    ll = LinkedList()
    ll.append(5)
    ll.append(3)
    ll.append(2)
    ll.append(6)
    ll.print_ll()
    print()
    
    ll.delete(3)
    ll.print_ll()
    print()
    
    ll.delete(6)
    ll.print_ll()
    print()
    
    ll.append(1)
    ll.append(1)
    ll.delete(5)
    ll.print_ll()
    print()

5
3
2
6

5
2
6

5
2

2
1
1



In [19]:
class Trie:
    '''
    AKA prefix tree.
    Used to solve the problem of representing a set of words.
    The term "trie" comes from the word reTRIEval.
    
    The shape and structure of a trie is always a set of linked nodes, all connecting back to an empty root node.
    Each node contains an array of pointers, one for each possible alphabetic value.
    Thus, the size of a trie is directly connected to the size of the represented alphabet.
    
    Tries are used for autocomplete, longest prefix matching, and CRIMINOLOGY??? Yes, suppose we are at a crime scene and
    we gathered the first few letters of a license plate, DOG. We would phone our private eye trie and it would
    find all the registered numbers matching the license plate prefix.
    '''
    class TrieNode:
        def __init__(self, value=''):
            self._alphbt = {chr(code): code - 97 for code in range(97, 123)}
            self._alphbt_size = 26 # size of english alphabet
            self._value = value
            self._children = [None]*self._alphbt_size
            self._num_children = 0
        
        def get_value(self):
            return self._value
        
        def get_num_children(self):
            return self._num_children
        
        def get_child(self, letter):
            return self._children[self._alphbt[letter]]
        
        def set_child(self, letter):
            self._children[self._alphbt[letter]] = Trie.TrieNode(letter)
            self._num_children += 1
            
        def delete_child(self, letter):
            self._children[self._alphbt[letter]] = None
            self._num_children -= 1
            
    def __init__(self):
        self._root = Trie.TrieNode()
    
    def search(self, word):
        count = 0
        temp = self._root

        for letter in word:
            temp = temp.get_child(letter)
            if temp == None:
                break
            
            count += 1
                
        return count == len(word)
    
    def insert(self, word):
        temp = self._root
        for letter in word:
            if temp.get_child(letter) == None:
                temp.set_child(letter)
                
            temp = temp.get_child(letter)

    def delete(self, word):
        if not self.search(word):
            print('err: cannot delete word not found in trie')
            return
        
        temp = self._root
        stack = []
        for letter in word:
            stack.append(temp.get_child(letter))
            temp = temp.get_child(letter)
        
        to_delete = None
        for i in range(len(stack)):
            node = stack.pop()
            if to_delete:
                node.delete_child(to_delete.get_value())
                to_delete = None
                
            if node.get_num_children() == 0:
                to_delete = node
    
    def print_words(self):
        
if __name__ == '__main__':
    trie = Trie()
    assert trie.search('cows') == False
    trie.insert('cows')
    assert trie.search('cows') ==  True
    trie.delete('cows')
    assert trie.search('cows') == False



i
0
h
1
