# Binary Search Trees

- Recursive `add()` method
- Recursive `height()` method

In [None]:
from graphviz import Graph, Digraph

## Binary Search Tree

- special case of *Binary trees*
- **BST rule**: any node's value is bigger than every value in its left subtree, and and smaller than every value in its right subtree
- TODO: write an efficient search for a BST (better complexity than O(N)
- TODO: write a method to add values to a BST, while preserving the BST rule

In [None]:
class BSTNode:
    def __init__(self, label):
        self.label = label
        self.left = None
        self.right = None
    
    # Category 2: functions that do some action
    def dump(self, prefix="", suffix=""):
        """
        prints out name of every node in the tree with some basic formatting
        """
        print(prefix, self.label, suffix)
        if self.left != None:
            self.left.dump(prefix+"\t", "(LEFT)")
        if self.right != None:
            self.right.dump(prefix+"\t", "(RIGHT)")
            
    # Category 1: functions that return some computation
    def search(self, target):
        """
        returns True/False, if target is somewhere in the tree
        """
        if target == self.label:
            return True
        elif target < self.label:
            if self.left != None:
                if self.left.search(target):
                    return True
        elif target > self.label:
            if self.right != None:
                if self.right.search(target):
                    return True
        
        return False
    
    def add(self, label):
        """
        Finds the correct spot for label and adds a new node with it.
        Assumes that tree already contains at least one node -> TODO: discuss why?
        Raises ValueError if label is already on the tree.
        """
        pass
            

    def height(self):
        """
        Calculates height of the BST.
        Height: the number of nodes on the longest root-to-leaf path (including the root)
        """
        pass

### Recursive `add` method
- Manually creating a tree is cumbersome and subject to mistakes (violations of BST rule)

In [None]:
root = BSTNode(10)
root.left = BSTNode(2)
root.left.left = BSTNode(1)
root.left.right = BSTNode(4)
root.left.right.right = BSTNode(8)
root.left.right.left = BSTNode(3)
root.right = BSTNode(15)
root.right.left = BSTNode(12)
root.right.right = BSTNode(19)
root.dump("", "(ROOT)")

In [None]:
values = [10, 2, 1, 4, 8, 3, 15, 12, 19]

root = BSTNode(values[0])
for val in values[1:]:
    root.add(val)
    
root.dump("", "(ROOT)")

### Recursive `height` method

- **Height**: the number of nodes on the longest root-to-leaf path (including the root)
- left subtree has height 4, right subtree has height 6, my height = ?
- left subtree has height 4, right subtree has height 4, my height = ?
- left subtree has height 10, right subtree has height 0, my height = ?
- left subtree has height of l, right subtree has height of r, my height = ?
- What is the simplest case for height calculation?
    - What are the values of l and r in that case?

In [None]:
# TODO: Let's implement and invoke the height method


### Tree containing 100 values
- let's use range(...) to produce a sequence of 100 integers
- recall that range(...) returns a sequence in increasing order
- what will be the height of this tree? **100**

In [None]:
values = list(range(100))
# Q: Is this tree balanced?
# A: 



#### Let's use `random` module `shuffle` function to randomly order the sequence of 100 numbers.
- in-place re-ordering of numbers (just like `sort` method)

In [None]:
values = list(range(100))
random.shuffle(values)
# Q: Is this tree balanced?
# A: depends on the shuffling, you can check using math.log2(N)

root = BSTNode(values[0])
for val in values[1:]:
    root.add(val)
    
print(root.height())
root.dump("", "(ROOT)")

In [None]:
math.log2(100)

### Balanced BSTs / Self-balancing BSTs
- not a covered topic for the purpose of this course
- you can explore the below recursive function definition if you are interested
- you are **not required** to know how to do this

In [None]:
# Recrusive function that
def sorted_array_to_bst(nums, bst_nums):
    """
    Produces best ordering nums (a list of sorted numbers),
    for the purpose of creating a balanced BST.
    Writes new ordering of numbers into bst_nums.
    """
    if len(nums) == 0:
        return None
    elif len(nums) == 1:
        bst_nums.append(nums[0])
    else:
        mid_index = len(nums)//2
        bst_nums.append(nums[mid_index])
        
        # recurse left
        left_val = sorted_array_to_bst(nums[:mid_index], bst_nums)
        if left_val != None:
            bst_nums.append(left_val)

        # recurse right
        right_val = sorted_array_to_bst(nums[mid_index+1:], bst_nums)
        if right_val != None:
            bst_nums.append(right_val)

In [None]:
bst_nums = []
sorted_array_to_bst(list(range(5)), bst_nums)
bst_nums

In [None]:
bst_nums = []
sorted_array_to_bst(list(range(100)), bst_nums)

root = BSTNode(bst_nums[0])
for val in bst_nums[1:]:
    root.add(val)

print(root.height())

In [None]:
bst_nums = []
sorted_array_to_bst(list(range(5)), bst_nums)

root = BSTNode(bst_nums[0])
for val in bst_nums[1:]:
    root.add(val)

print(root.height())
root.dump("", "(ROOT)")

### Depth First Search (DFS) 
- Last lecture: BST search with complexity **O(logN)**
- Finds a path from one node to another -- works on any directed graph

In [None]:
def example(num):
    g = Graph()
    if num == 1:
        g.node("A")
        g.edge("B", "C")
        g.edge("C", "D")
        g.edge("D", "B")
    elif num == 2:
        g.edge("A", "B")
        g.edge("B", "C")
        g.edge("C", "D")
        g.edge("D", "E")
        g.edge("A", "E")
    elif num == 3:
        g.edge("A", "B")
        g.edge("A", "C")
        g.edge("B", "D")
        g.edge("B", "E")
        g.edge("C", "F")
        g.edge("C", "G")
    elif num == 4:
        g.edge("A", "B")
        g.edge("A", "C")
        g.edge("B", "D")
        g.edge("B", "E")
        g.edge("C", "F")
        g.edge("C", "G")
        g.edge("E", "Z")
        g.edge("C", "Z")
        g.edge("B", "A")
    elif num == 5:
        width = 8
        height = 4
        for L1 in range(height-1):
            L2 = L1 + 1
            for i in range(width-(height-L1-1)):
                for j in range(width-(height-L2-1)):
                    node1 = str(L1)+"-"+str(i)
                    node2 = str(L2)+"-"+str(j)
                    g.edge(node1, node2)
    else:
        raise Exception("no such example")
    return g

### For a regular graph, you need a new class `Graph` to keep track of the whole graph.
- Why? Remember graphs need not have a "root" node, which means there is no one origin point

In [None]:
class Graph:
    def __init__(self):
        # name => Node
        self.nodes = {}

    def node(self, name):
        node = Node(name)
        self.nodes[name] = node
        node.graph = self
    
    def edge(self, src, dst):
        """
        Automatically adds missing nodes.
        """
        for name in [src, dst]:
            if not name in self.nodes:
                self.node(name)
        self.nodes[src].children.append(self.nodes[dst])

    def _repr_svg_(self):
        """
        Draws the graph nodes and edges iteratively.
        """
        g = Digraph()
        for n in self.nodes:
            g.node(n)
            for child in self.nodes[n].children:
                g.edge(n, child.name)
        return g._repr_image_svg_xml()

class Node:
    def __init__(self, name):
        self.name = name
        self.children = []
        self.graph = None # back reference
        
    def __repr__(self):
        return self.name

    
        

g = example(1)
g

### Testcases for DFS

In [None]:
print(g.dfs_search("B", "A")) # should return False
print(g.dfs_search("B", "D")) # should return True

In [None]:
# DFS search
# TODO: give the actual path, not just True/False
# TODO: use a different algorithm to find the shortest path