# MIT 6.006 Introduction To Algorithms Course

## Finding A Two-Dimensional Peak

In [53]:
import sys

from math import floor

"""
Returns the global maximum for the given column.
"""
def find_global_maximum(matrix, num_rows, num_cols,j):
    maximum = -sys.maxsize
    maxIndex = 0
    
    # Iterate of the jth column
    for i in range(num_rows):
        if (maximum < matrix[i][j]):
            maxIndex = i
            maximum = matrix[i][j]
        
    return maxIndex  


"""
Given a global maxima for a column 
the function returns the coordinates of the peak.
"""
def find_peak(matrix, num_rows, start_col, end_col):
    num_cols = (end_col - start_col) + 1
    middle = floor(start_col + num_cols / 2)
    
    maxRow = find_global_maximum(matrix, num_rows, num_cols, middle)

    if (num_rows == 1):
        return (maxRow, middle)
    
    # Determine the peak in the matrix
    if (middle != 0):
        if (matrix[maxRow][middle-1] > matrix[maxRow][middle]):
            return find_peak(matrix, num_rows, start_col, middle - 1)    
    
    if (middle != end_col):
        if (matrix[maxRow][middle + 1] > matrix[maxRow][middle]):
            return find_peak(matrix, num_rows, middle + 1, end_col)
        
    return (maxRow, middle)      
        
    

matrix = [[1,2,2,6], [7, 8, 50000, 10], [1, 5, 9000, 10000], [18, 20, 4333333, 19]]
print(find_peak(matrix, 4, 0, 3)) 

(3, 2)


## Computing Document Distance

In [54]:
from collections import defaultdict
from math import acos
from math import sqrt
import re

"""
Returns a dictionary of the frequency
of words in a dictionary
"""
def count_words(document):
    document = document.lower()
    
    # Remove anything that is not in the alphabet
    document = re.sub(r'[^a-z ]', '', document)
    
    # Split the document into constituent words
    words = document.split()
    
    """ 
    Create a default dictionary where each element
    has a default value of 0.
    """
    wordDict = defaultdict(lambda: 0)
    
    for word in words: 
        wordDict[word]+=1
        
    
    return wordDict


"""
Calculates the magnitude of a dictionary.
"""
def vector_magnitude(doc):
    sum_doc = 0
    for value in doc.values():
        sum_doc += value ** 2
    
    return sum_doc
        
"""
Returns the dot cosine simmilarity 
of two strings.
"""
def dot_product(s1, s2):
    dict1 = count_words(s1)
    dict2 = count_words(s2)
    
    
    # Compute the dot product of both dictionaries.
    set_dict = set(dict1.keys()).union(set(dict2.keys()))
    
    sum_dict = 0
    
    for element in set_dict:
        sum_dict += dict1[element] * dict2[element]
        
    # Compute the pythagorean magnitude of each document vector
    return acos(sum_dict / (sqrt(vector_magnitude(dict1)) * sqrt(vector_magnitude(dict2))))

print(dot_product("Hello, my name is tim bruh what is yours?", 
                    "Hello my name is tim bruh what is Yours"))          

0.0


## Insertion Sort

In [55]:
"""
Implementation of insertion sort.
Insertion sort has O(n^2) complexity.
Does not require the creation of a new list.
"""
def insertion_sort(unsorted_list):
    for i in range(len(unsorted_list)):
        element = unsorted_list[i]
        for j in range(i - 1, -1, -1):
            if unsorted_list[j] > element:
                # Swap
                unsorted_list[j + 1] = unsorted_list[j]
            else:
                unsorted_list[j + 1] = element
                break
            
            if j == 0:
                unsorted_list[j] = element

    return unsorted_list

print("Sorted list", insertion_sort([1, -100, 50, 2044, 2020, 3 ,2]))
        

Sorted list [-100, 1, 2, 3, 50, 2020, 2044]


## Merge Sort

In [56]:
def merge(list1, list2, result_list):
    count_l1 = 0
    count_l2 = 0
    
    counter = 0
    
    while count_l1 < len(list1) and count_l2 < len(list2):
        if list1[count_l1] <= list2[count_l2]:
            result_list[counter] = list1[count_l1]
            count_l1 += 1
        else:
            result_list[counter] = list2[count_l2]
            count_l2 += 1
        
        counter += 1
        
    while count_l1 < len(list1):
        result_list[counter] = list1[count_l1]
        counter += 1
        count_l1 += 1
        
    while count_l2 < len(list2):
        result_list[counter] = list2[count_l2]
        counter += 1
        count_l2 += 1
        
    return result_list

"""
Carries out merge sort on the 
given list
"""
def merge_sort(unsorted_list):
    if len(unsorted_list) == 1:
        return unsorted_list
    
    middle = floor(len(unsorted_list) / 2)
    
    split_left = merge_sort(unsorted_list[:middle])
    split_right = merge_sort(unsorted_list[floor(len(unsorted_list) / 2):])
    
    return merge(split_left, split_right, unsorted_list) 

print(merge_sort([3,5,2,134,9, -1000]))

[-1000, 2, 3, 5, 9, 134]


## The Heap Data Structure & Heap Sort

In [188]:
class Heap:
    def __init__(self, *starting_elements):
        self.heap = list(starting_elements)
        self.heap_sort(len(self.heap) - 1)
      
    def build_max_heap(self, max_element):
        for i in range(floor (max_element / 2), -1, -1):
            self.max_heapify(i, max_element)
        
    """
    Presumes only one element causes
    the max heap condition to fail.
    """
    def max_heapify(self, index, max_element):
        # Get the left and right children
        lChild = index * 2 
        
        lChild = index * 2 if index * 2 <= max_element else None
        rChild = index * 2 + 1 if index * 2 + 1 <= max_element else None

        if lChild != None and self.heap[lChild] > self.heap[index]:
            temp = self.heap[lChild]
            self.heap[lChild] = self.heap[index]
            self.heap[index] = temp
            self.max_heapify(floor (index * 2), max_element)
        elif rChild != None and self.heap[rChild] > self.heap[index]:
            temp = self.heap[rChild]
            self.heap[rChild] = self.heap[index]
            self.heap[index] = temp
            self.max_heapify(floor (index * 2 + 1), max_element)
            
    def heap_sort(self, max_element):
        if max_element > 0:
            self.build_max_heap(max_element)
        
        
            # Extract the root element
            temp = self.heap[0]
            self.heap[0] = self.heap[max_element]
            self.heap[max_element] = temp
            
            self.heap_sort(max_element - 1)
            
        

    def get_heap_size(self):
        return len(self.heap)
    
    def get_heap(self):
        return self.heap 
    
heap = Heap(4, 15, 20, 9, 5, 3, 2, 1, 3, 1)
print(heap.get_heap())

[1, 1, 2, 3, 3, 4, 5, 9, 15, 20]


## Binary Search Trees & BST Sort

In [186]:
class Node:
    def __init__(self, element):
        self.lNode = -1
        self.rNode = -1
        self.parent = -1
        self.element = element
        self.depth = 0
        
    def get_data(self):
        return self.element
    
    def get_lnode(self):
        return self.lNode
    
    def get_rnode(self):
        return self.rNode
    
class BinaryTree:
    def __init__(self):
        self.starting_element = None
        
    def add_node(self, element):
        if self.starting_element == None:
            self.starting_element = Node(element)
        else:
            self.add_node_traverse(self.starting_element, element)

            

    def is_balanced(self, current_node):
    
        left_depth = -1 if current_node.lNode == -1 else current_node.lNode.depth
        right_depth = -1 if current_node.rNode == -1 else current_node.rNode.depth
        
        # If negative then we know it is right heavy
        # If positive then we know it i left heavy.
        return (1 +left_depth) - (1 + right_depth)
    
    def left_rotate(self, current_node):
        p = current_node
        q = current_node.rNode
        
        # Change Q to be the current node
        q.parent = p.parent
        p.parent = q
        if p == self.starting_element:
            # Don't change the starting element
            self.starting_element = q
        else: 
            if q.parent.lNode == p:
                q.parent.lNode = q
            else:
                q.parent.rNode = q
            
      
            
        
        # Now we need to change the left and right pointers
        p.rNode = q.lNode
        q.lNode = p
        
        # Now we need to change the depths. 
        p_lchild_depth = -1 if p.lNode == -1 else p.lNode.depth
        p_rchild_depth = -1 if p.rNode == -1 else p.rNode.depth
        p.depth = max(1 + p_lchild_depth, 1 + p_rchild_depth)
        
        q_rchild_depth = -1 if q.rNode == -1 else q.rNode.depth
        q.depth = max(1 + q.lNode.depth, 1 + q_rchild_depth)
        
    def right_rotate(self, current_node):
        q = current_node
        p = current_node.lNode
        
        p.parent = q.parent
        q.parent = p
        
        if q == self.starting_element:
            self.starting_element = p
        else:
            if p.parent.lNode == q:
                p.parent.lNode = p
            else:
                p.parent.rNode = p
        
            
        q.lNode = p.rNode
        p.rNode = q
        
        # Update the depths
        q_lchild_depth = -1 if q.lNode == -1 else q.lNode.depth
        q_rchild_depth = -1 if q.rNode == -1 else q.rNode.depth
            
        q.depth = max(1 + q_lchild_depth, 1 + q_rchild_depth)
        
        p_lchild_depth = -1 if p.lNode == -1 else p.lNode.depth
        p.depth = max(1 + p_lchild_depth, 1 + p.rNode.depth)                      
        
    
    def add_node_traverse(self, current_node, element):
            if element <= current_node.get_data():
                if current_node.lNode != -1:
                    self.add_node_traverse(current_node.lNode, element)
                    current_node.depth = max(current_node.depth, current_node.lNode.depth + 1)
                    
                    
                else: # Found the point to add the node
                    current_node.lNode = Node(element)
                    current_node.lNode.parent = current_node
                    current_node.depth = max(current_node.depth, 1 + current_node.lNode.depth)
                    
                    
            else:
                if current_node.rNode != -1:
                    self.add_node_traverse(current_node.rNode, element)
                    current_node.depth = max(current_node.depth, current_node.rNode.depth + 1)
                       
                else:
                    current_node.rNode = Node(element)
                    current_node.rNode.parent = current_node
                    current_node.depth = max(current_node.depth, 1 + current_node.rNode.depth)
                    
            # Then we need to rebalance the tree
            if current_node.depth >= 2:
                if self.is_balanced(current_node) > 1:
                    # Left heavy
                    # Check the left child 
                    if self.is_balanced(current_node.lNode) > 0:
                        # Left left case
                        self.right_rotate(current_node)
                    else:
                        # Left right case
                        self.left_rotate(current_node.lNode)
                        self.right_rotate(current_node)
                elif self.is_balanced(current_node) < -1: # Right heavy
                    if self.is_balanced(current_node.rNode) < 0:
                        # Right right case
                        self.left_rotate(current_node)
                    else:
                        # Right left case
                        self.right_rotate(current_node.rNode)
                        self.left_rotate(current_node)
          
                
                    
                    
    def in_order_traverse(self, node, ordered_list=[]):
        if node.lNode != -1:
            ordered_list = self.in_order_traverse(node.lNode)
            
        ordered_list.append(node)
        
        if node.rNode != -1:
            ordered_list = self.in_order_traverse(node.rNode)
            
        return ordered_list
           
elements = [1, 2, 3, 4, 5, 6, 7]

binTree = BinaryTree()

for element in elements:
    binTree.add_node(element)
            
nodes = binTree.in_order_traverse(binTree.starting_element) 

for node in nodes:
    print("Element:", node.element, "Depth", node.depth, "LPointer", node.lNode.element if (type(node.lNode) != int) else -1, 
                          "RPointer", node.rNode.element if (type(node.rNode) != int) else -1,
                            "Parent", node.parent.element if (type(node.parent) != int) else -1)
    
    

Element: 1 Depth 0 LPointer -1 RPointer -1 Parent 2
Element: 2 Depth 1 LPointer 1 RPointer 3 Parent 4
Element: 3 Depth 0 LPointer -1 RPointer -1 Parent 4
Element: 4 Depth 2 LPointer 2 RPointer 6 Parent -1
Element: 5 Depth 0 LPointer -1 RPointer -1 Parent 6
Element: 6 Depth 1 LPointer 5 RPointer 7 Parent 4
Element: 7 Depth 0 LPointer -1 RPointer -1 Parent 6


### The Problem With A Binary Tree

The binary tree use a Divide and Conquer strategy to locate an element so you would expect the time complexity to be O(log n) like that of binary search. This is correct a binary tree can have a time complexity of O(log n) if the binary tree is balanced, that is each subtree roughly has the same number of left nodes and right nodes. 

![balanced binary tree](https://www.baeldung.com/wp-content/uploads/2019/11/Zrzut-ekranu-2019-10-31-o-15.31.40.png)

Above is an example of a binary tree. Each subtree on the left and right hand side of a node has the same depth.

In an extreme case searching with a binary tree is O(n) time complexity.

![unbalanced binary tree](https://www.eecs.umich.edu/courses/eecs380/ALG/niemann/s_fig33.gif)

This binary tree would take O(n) to search (when the nodes are already in order). This is because when you are searching for an element you are always taking the path down the right node.

Therefore we need to ensure we are building a balanced binary tree. This is where an AVL tree comes in.

The time complexity for searching fir an element is O(h) where h is the height of the binary tree.

## AVL Tree & AVL Sort

The above example of binary tree is actually an implementation of an AVL Tree. An implementation is balances itself after each insertion. This ensures that the time compleity is similar to O(log n) when finding an element.

AVL ensures that no node can have left and right subtrees that have a height difference greater than one. There are otherwise to ensure a balanced tree but this is by far the easiest.

![AVL tree](https://www.cs.auckland.ac.nz/software/AlgAnim/fig/AVL_bal.gif)

## The Comparison Model

In the comparison model we assume that memory is random access. That is we can access any element in a list under O(1) time complexity.

In the comparison model we also assume that comparing two elements takes O(1) time complexity.

Using these assumptions we can derive the minimum time complexity for finding an element and sorting a list.

### Decision Trees

Any comparison algorithm can be viewed as a tree of all possible comparisons, their outcomes and the resulting answer of any particular n.

![an example of a decision tree](https://bcs.whfreeman.com/webpub/mathematics/gersting7e/chapter%206/section6-3/problem1/images/page5.gif)

#### Searching Lower Bound

We can prove that for n sorted items finding a given item among them in the comparison model requires O(lg n) time complexity.

#### Proof

- A decision tree is binary and the decision tree must have greater or equal to n leaves. Where n is the size of the list.

- This implies that the height is greater or equal to lg n. Which concludes the proof.

#### Sorting Lower Bound

We can prove that a comparison model the time complexity of sorting a list is at least O(nlogn)

#### Proof

![comparison sort time complexity](https://tim-beatham.github.io/Week7/comparison_sort_time.jpg)

Above is a proof of the time complexity for sorting elements using the comparison sort model. 

Please attempt to ignore my dodgey handwriting.

I have also in the proof forgotten to add the end of proof symbol but oh well.

## Linear-Time Sort (Integer Sorting)

Using different assumptions we can however achieve sorting which has a time complexity of at best O(n) time complexity which is much better than O(nlgn) time complexity.

- Assume we are sorting n keys of integers {0, 1, ..., k - 1} and each element fits into one word.
- By using the above assumptions we can add, subtract and compare numbers in O(1) time complexity. 

Using this assumptions we can achieve at best O(n) time complexity in reality however it is quite a bit worse than this but we can set it up to be better than O(nlgn) time complexity.

### Counting Sort

Counting sort assumes the key we use for sorting are between a specified range.

Counting sort works by: 
- Instantiating a list (L) of size k, where k is the maximum range of the list. 
- Iterating over each element list (l) and adding one to the corresponding list (L). That is L[l[i]] += 1.

From this we can sort the list.

The time complexity of counting sort is O(n + k). Where k is the range of the input.

This could be a problem if the range of the input is n^2 as it means the time complexity is O(n^2) which is worse than the comparison model version.

### Radix Sort

Radix sort is in essense an extension of counting sort. It uses counting sort as a subroutine.

In Radix sort we sort elements digit by digit from the least significant digit to the most significant digit.

This has time complexity O(d * (n + b)) where b is the base representing the numbers and d is the maximum number of digits.

In the decimal system d is O(logb k). Meaning the overall time complexity is O((n + b) * logb k) which is worse than a comparison model version.

If we assume k <= n^c then the time complexity becomes O( (n + b) * clogb n) which is equal to O(nlogb n).

This is still worse than a comparsion model.

If we also assume that the base we are using is that of b = n we can achieve O(n) time complexity:

O ( (n + n) * clogn n) = O(2n * c) = O(n) time complexity.

In [190]:
# Implementing Counting Sort

In [189]:
# Implementing Radix Sort