https://bradfieldcs.com/algos/trees/introduction/

### Trees

#### Introduction to Trees -

#### Representing a Tree 

#### Nodes and references representation

In [65]:
class Node:
    def __init__(self, val):
        self.val = val
        self.right = None
        self.left = None
        
    def insert_left(self, child):
        if self.left is None:
            self.left = child
        else:
            child.left = self.left
            self.left = child
            
    def insert_right(self, child):
        if self.right is None:
            self.right = child
        else:
            child.right = self.right
            self.right = child

In [66]:
root = Node('a')
root.insert_left(Node('b'))
print(root.left)
root.insert_right(Node('c'))
print(root.right)
print(root.right.val)
root.right.val = 'hello'

<__main__.Node object at 0x7fd058df1370>
<__main__.Node object at 0x7fd058560730>
c


#### List of lists representation

In [67]:
tree = [
    'a',  #root
    
    
    [
        'b',  # left subtree
        ['d', [], []],
        ['e', [], []]
    ],
    
    
    [
        'c',  # right subtree
        ['f', [], []],
        []
    ]

]
# or
tree = ['a', ['b', ['d', [], []], ['e', [], []]], ['c', ['f', [], []], []]]

In [68]:
# the left subtree
print(tree[1])  # => ['b', ['d', [], []], ['e', [], []]]

# the right subtree
print(tree[2])  # => ['c', ['f', [], []], []]

# the root
print(tree[0])  # => 'a'

['b', ['d', [], []], ['e', [], []]]
['c', ['f', [], []], []]
a


In [69]:

def insert_left(root, child_val):
    subtree = root.pop(1)
    if len(subtree)>1:
        root.insert(1, [child_val, subtree, []])
    else:
        root.insert(1, [child_val, [], []])

    return root

def insert_right(root, child_val):
    subtree = root.pop(2)
    if len(subtree)>1:
        root.insert(2, [child_val, [], subtree])
    else:
        root.insert(2, [child_val, [], []])
            
    return root
  
def get_root_val(root):
    return root[0]

def set_root_val(root, new_val):
    root[0] = new_val

def get_left_child(root):
    return root[1]

def get_right_child(root):
    return root[2]

In [70]:
root = [3, [], []]
insert_left(root, 4)
insert_left(root, 5)
insert_right(root, 6)
insert_right(root, 7)
left = get_left_child(root)

print(left)

[5, [4, [], []], []]


In [71]:
set_root_val(left, 9)
print(root)

[3, [9, [4, [], []], []], [7, [], [6, [], []]]]


In [72]:
insert_left(left, 11)

[9, [11, [4, [], []], []], []]

In [73]:
print(root)
print(get_right_child(get_right_child(root)))

[3, [9, [11, [4, [], []], []], []], [7, [], [6, [], []]]]
[6, [], []]


#### Map-based representation

In [74]:
root_binary = {
    'val': 'A',
    'left': {
        'val': 'B',
        'left': {'val': 'D'},
        'right': {'val': 'E'}
    },
    'right': {
        'val': 'C',
        'right': {'val': 'F'}
    }
}


In [75]:
root_non_binary = {
    'val': 'A',
    'children': [
        {
            'val': 'B',
            'children': [
                {'val': 'D'},
                {'val': 'E'},
            ]
        },
        {
            'val': 'C',
            'children': [
                {'val': 'F'},
                {'val': 'G'},
                {'val': 'H'}
            ]
        }
    ]
}


###  Parse Trees 

In [76]:
operators = ["+", "-", "*", "/"]

def build_parse_tree(expression):
    first = True
    tokens = expression.split()
    root = Node("")
    stack = [root]
    current_node = root
    print(tokens)
    for token in tokens: 
        print(token, stack)
        if token == "(":
            current_node.insert_left(Node(""))
            stack.append(current_node)
            current_node = current_node.left
            
        elif token in operators:
            current_node.val = token
            current_node.insert_right(Node(""))
            stack.append(current_node)
            current_node = current_node.right
            
        elif token == ")":
            current_node = stack.pop()
            
        else:
            current_node.val = int(token)
            current_node = stack.pop()
            
    return root

In [77]:
parse_tree = build_parse_tree("( 3 + ( 4 * 5 ) )")

['(', '3', '+', '(', '4', '*', '5', ')', ')']
( [<__main__.Node object at 0x7fd058df1ac0>]
3 [<__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058df1ac0>]
+ [<__main__.Node object at 0x7fd058df1ac0>]
( [<__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058df1ac0>]
4 [<__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058560850>]
* [<__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058df1ac0>]
5 [<__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058560850>]
) [<__main__.Node object at 0x7fd058df1ac0>, <__main__.Node object at 0x7fd058df1ac0>]
) [<__main__.Node object at 0x7fd058df1ac0>]


In [78]:
parse_tree.val

'+'

In [79]:
operators = ["*", "/", "+", "-"]

def perform_operation(a, b, operation):
    if operation == "*":
        return a * b
    elif operation == "/":
        return a / b
    elif operation == "+":
        return a + b
    elif operation == "-":
        return a - b

In [80]:
def evaluate(tree):
    try:
        return perform_operation(evaluate(tree.left), evaluate(tree.right), tree.val)
    except:
        return tree.val

In [81]:
evaluate(parse_tree)

23

### Tree Traversals

preorder, inorder, and postorder

In [82]:
def preorder(node):
    if node:
        print(node.val)
        preorder(node.left)
        preorder(node.right)
        
def postorder(node):
    if node:
        preorder(node.left)
        preorder(node.right)
        print(node.val)
        


In [83]:
preorder(parse_tree)

+
3
*
4
5


In [84]:
def construct_expression(parse_tree):
    if parse_tree is None:
        return ""
    else:
        left = construct_expression(parse_tree.left)
        val = parse_tree.val
        right = construct_expression(parse_tree.right)
        
        if left and right:
            return '( {} {} {} )'.format(left, val, right)
        else:
            return val

In [85]:
construct_expression(parse_tree)

'( 3 + ( 4 * 5 ) )'

### Priority Queues with Binary Heaps 

    the highest priority items are retrieved from the queue ahead of lower priority items.
    The classic way to implement a priority queue is using a data structure called a binary heap. A binary heap will allow us to enqueue or dequeue items in O(logn)
    The binary heap has two common variations: the min heap, in which the smallest key is always at the front, and the max heap, in which the largest key value is always at the front.
    
    

    BinaryHeap() creates a new, empty, binary heap.
    insert(k) adds a new item to the heap.
    find_min() returns the item with the minimum key value, leaving item in the heap.
    del_min() returns the item with the minimum key value, removing the item from the heap.
    is_empty() returns true if the heap is empty, false otherwise.
    size() returns the number of items in the heap.
    build_heap(list) builds a new heap from a list of keys.

    In order for our heap to work efficiently, we will take advantage of the logarithmic nature of the binary tree to represent our heap. In order to guarantee logarithmic performance, we must keep our tree balanced.
    
    A complete binary tree is a tree in which each level has all of its nodes. 
    
    Because the tree is complete, the left child of a parent (at position ppp) is the node that is found in position 2p in the list. Similarly, the right child of the parent is at position 2p+1 in the list. 
        
    To find the parent of any node in the tree, we can simply use integer division (like normal mathematical division except we discard the remainder). Given that a node is at position nnn in the list, the parent is at position n/2.
        
    

### The Heap Order Property

    The method that we will use to store items in a heap relies on maintaining the heap order property. The heap order property is as follows: In a heap, for every node x with parent p, the key in p is smaller than or equal to the key in x.

### Heap Operations

In [86]:
class BinaryHeap:
    def __init__(self):
        self.items = [0]
    
    def __len__(self):
        return len(self.items) - 1
    
    def percolate_up(self):
        i = len(self)
        while i//2 > 0:
            if self.items[i] < self.items[i//2]:
                self.items[i], self.items[i//2] = self.items[i//2], self.items[i]
                
            i //= 2
            
    def insert(self, k):
        self.items.append(k)
        self.percolate_up()

    def percolate_down(self, i):
        while 2*i <= len(self):
            mc = self.min_child(i)
            if self.items[i] > self.items[mc]:
                self.items[i], self.items[mc] = self.items[mc], self.items[i]
                
            i = mc
            
    def min_child(self, i):
        if (2*i) + 1 > len(self):
            return 2*i
        elif self.items[2*i] > self.items[2*i + 1]:
            return 2*i + 1
        else:
            return 2*i
        
    def delete_min(self):
        min_value = self.items[1]
        self.items[1] = self.items[-1]
        self.items.pop()
        self.percolate_down(1)
        return min_value
    
    def build_heap(self, input_list):
        i = len(input_list)//2
        self.items = [0] + input_list
        while i>0:
            self.percolate_down(i)
            i -= 1

In [87]:
bh = BinaryHeap()

In [88]:
bh.build_heap([2,3,4,5,753,20,21,30,56,80,8,876,54,35])

In [89]:
bh.items

[0, 2, 3, 4, 5, 8, 20, 21, 30, 56, 80, 753, 876, 54, 35]

In [90]:
i = 6
bh.items[i], bh.items[2*i], bh.items[2*i+1]

(20, 876, 54)

### Binary Search Trees 


    Map() Create a new, empty map.
    
    put(key, val) Add a new key-value pair to the map. If the key is already in the map then replace the old value with the new value.
    
    get(key) Given a key, return the value stored in the map or None otherwise.
    
    del Delete the key-value pair from the map using a statement of the form del map[key].
    
    len() Return the number of key-value pairs stored in the map.
    
    in Return True for a statement of the form key in map, if the given key is in the map.


#### Implementation

    A binary search tree relies on the property that keys that are less than the parent are found in the left subtree, and keys that are greater than the parent are found in the right subtree. We will call this the BST property.

In [91]:
class TreeNode:

    def __init__(self, key, val, left=None, right=None, parent=None):
        self.key = key
        self.val = val
        self.left = left
        self.right = right
        self.parent = parent
        
    def is_left_child(self):
        return self.parent and self.parent.left == self
        
    def is_right_child(self):
        return self.parent and self.parent.right == self
    
    def is_leaf(self):
        return ((self.right is None) and (self.left is None))
    
    def has_any_children(self):
        return ((self.left is not None) or (self.right is not None))
    
    def has_both_children(self):
        return ((self.left is not None) and (self.right is not None))
    
    def has_one_child(self):
        return self.has_any_children() and not self.has_both_children()
    
    def replace_node_data(self, key, val, left, right):
        self.key = key
        self.val = val
        self.left = left
        self.right = right
        if self.left:
            self.left.parent = self
        if self.right:
            self.right.parent = self
            
    def __iter__(self):
        if self is None:
            return
        
        left_child = self.left
        if left_child:
            for element in left_child:
                yield element
                
        yield self.key
        
        right_child = self.right
        if right_child:
            for element in right_child:
                yield element
                
           
    def find_min(self):
        current = self
        while current.left:
            current = current.left
        return current
    
    
    def find_successor(self):
        if self.right:
            return self.right.find_min()
        
        elif self.parent is None:
            return None
        
        elif self.is_left_child():
            return self.parent
        
        self.parent.right = None
        successor = self.parent.find_successor()
        self.parent.right = self
        return successor
    
    def splice_out(self):
        if self.is_leaf():
            if self.is_left_child():
                self.parent.left = None
            else:
                self.parent.right = None

        else:
            promoted_node = self.left or self.right

            if self.is_left_child():
                self.parent.left = promoted_node
            else:
                self.parent.right = promoted_node
            promoted_node.parent = self.parent        

    successor: the node that has the next-largest key in the tree
    

In [92]:
class BinarySearchTree:
    TreeNodeClass = TreeNode
    
    def __init__(self):
        self.size = 0
        self.root = None
        
    def __len__(self):
        return self.size
    
    def __iter__(self):
        return self.root.__iter__()
    
    def __setitem__(self, key, val):
        if self.root:
            self._put(key, val, self.root)
        else:
            self.root = self.TreeNodeClass(key, val, parent=Node)
        self.size += 1
        
    def _put(self, key, val, node):
        if key < node.key:
            if node.left:
                _put(key, val, node.left)
            else:
                node.left = self.TreeNodeClass(key, val, parent=Node)
        else:
            if node.right:
                _put(key, val, node.right)
            else:
                node.right = self.TreeNodeClass(key, val, parent=Node)
    
    def __getitem__(self, key):
        if self.root:
            result = self._get(key, self.root)
        raise KeyError
        
    def _get(self, key, node):
        if not node:
            return None
        elif node.key == key:
            return node
        elif key < node.key:
            return _get(key, node.left)
        elif key > node.key:
            return _get(key, node.right)
        
    def __contains__(self, key):
        return bool(self._get(self, key, self.root))
    
    def delete(self, key):
        if self.size>1:
            node_to_remove = self._get(key, self.root)
            if node_to_remove:
                self.remove(node_to_remove)
                self.size -= 1
                return

        elif size==1 and self.root.key == key:
            self.root = None
            self.size -= 1
            return
        
        else:
            raise KeyError("Key not found")
        
    def __delitem__(self, key):
        self.delete(key)
    
    def remove(self, node):
        if node.is_leaf() and parent is not None:
            if node == node.parent.left:
                node.parent.left = None
            elif node == node.parent.right:
                node.parent.right = None
        
        elif node.has_one_child():
            promoted_node = node.left or node.right
            
            if node.is_left_child():
                promoted_node.parent = node.parent
                node.parent.left = promoted_node
                # ?
                # promoted_node.parent.left = promoted_node
                
            elif node.is_right_child():
                promoted_node.parent = node.parent
                node.parent.right = promoted_node
            else:
                node.replace_node_data(
                    promoted_node.key,
                    promoted_node.val,
                    promoted_node.left,
                    promoted_node.right
                )
        
        else:
            successor = node.find_successor()
            if successor:
                successor.splice_out()
                node.key = successor.key
                node.val = successor.val

#### Analysis

    A perfectly balanced tree has the same number of nodes in the left subtree as it does in the right subtree. In a balanced binary tree, the worst-case performance of put is O(log2n), where nnn is the number of nodes in the tree. Notice that this is the inverse relationship to the calculation in the previous paragraph. So log2n gives us the height of the tree, and represents the maximum number of comparisons that put will need to do as it searches for the proper place to insert a new node.

    Unfortunately it is possible to construct a search tree that has height n simply by inserting the keys in sorted order! An example of such a tree is shown below. In this case the performance of the put method is O(n).

###  AVL Trees 

    balanceFactor=height(leftSubTree)−height(rightSubTree)

    N​h​​=1+(N​h−1​​)+(N​h−2​​)

    h=1.44logN​h​​
    This derivation shows us that at any time the height of our AVL tree is equal to a constant(1.44) times the log of the number of nodes in the tree. This is great news for searching our AVL tree because it limits the search to O(logN).

#### Implementation

In [93]:
class AVLTreeNode(TreeNode):
    def __init__(self, *args, **kwargs):
        super(AVLTreeNode, self).__init__(*args, **kwargs)
        self.balance_factor = 0

In [94]:
class AVLTree(BinarySearchTree):
    TreeNodeClass = AVLTreeNode
    
    def _put(self, key, val, node):
        if key < node.key:
            if node.left:
                self._put(key, val, node.left)
            else:
                node.left = self.TreeNodeClass(key, val, parent=node)
                self.update_balance(node.left)
                
        else:
            if node.right:
                self._put(key, val, node.right)
            else:
                node.right = self.TreeNodeClass(key, val, parent=node)
                self.update_balance(node.right)
                
    def update_balance(self, node):
        if node.balance_factor > 1 or node.balance_factor < -1:
            self.rebalance(node)
            return 
        if node.parent is not None:
            if node.is_left_child():
                node.parent.balance_factor += 1
            elif node.is_right_child:
                node.parent.balance_factor -= 1
            
            if node.parent.balance_facotr != 0:
                self.update_balance(node.parent)
            
            
    def rotate_left(self, rotation_root):
        new_root = rotation_root.right
        rotation_root.right = new_root.left
        if new_root.left is not None:
            new_root.left.parent = rotation_root
        new_root.parent = rotation_root.parent
        if not rotation_root.parent:
            self.root = new_root
        else:
            if rotation_root.is_left_child():
                rotation_root.parent.left = new_root
            else:
                rotation_root.parent.right = new_root
        new_root.left = rotation_root
        rotation_root.parent = new_root
        rotation_root.balance_factor +=  1 - min(new_root.balance_factor, 0)
        new_root.balance_factor += 1 + max(rotation_root.balance_factor, 0)
        
    def rotate_right(self, rotation_root):
        new_root = rotation_root.left
        rotation_root.left = new_root.right
        if new_root.right is not None:
            new_root.right.parent = rotation_root
        new_root.parent = rotation_root.parent
        if not rotation_root.parent:
            self.root = new_root
        else:
            if rotation_root.is_left_child():
                rotation_root.parent.left = new_root
            else:
                rotation_root.parent.right = new_root
        new_root.right = rotation_root
        rotation_root.parent = new_root
        rotation_root.balance_factor += 1 - min(new_root.balance_factor, 0)
        new_root.balance_factor += 1 + max(rotation_root.balance_factor, 0)

    def rebalance(self, node):
        if node.balance_factor < 0 and node.right:
            if node.right.balance_factor > 0:
                self.rotate_right(node.right)
                self.rotate_left(node)
            else:
                self.rotate_left(node)
        elif node.balance_factor > 0 and node.left:
            if node.left.balance_factor < 0:
                self.rotate_left(node.left)
                self.rotate_right(node)
            else:
                self.rotate_right(node)                

In [95]:
# Future: duplicate key, delete a node

    operation 	 	Sorted List 	Hash Table 	Binary Search Tree 	AVL Tree
    put 	 	 	O(n) 	 	O(1) 	 	O(n)  	 		O(log2n)
    get 	 	 	O(log2n) 	O(1) 	 	O(n)  	 		O(log2n)
    in 	 	 	O(log2n) 	O(1) 	 	O(n)  		 	O(log2n)
    del  	 		O(n) 	 	O(1) 	 	O(n) 	 	 	O(log2n)