# Divide and conquer, Sorting, Searching, and Randomized Algorithm

In [None]:
import math
import random
import collections

## Grade 3 multiplication algorithem
- $O(n^2)$

## Karatsuba multiplication:
- Given x: 5678, y: 1234
- Define a=56, b=78, c=12, d=34
- compute 
$a*c \tag{1}$ 
$b*d \tag{2}$
$(a+b)*(c+d) \tag{3}$
$(3)-(2)-(1) = ad+bc \tag{4}$
- result
$10^{n}*(1) + (2) + 10^{n/2}*(4) = 10^{n}*ac + bd + 10^{n/2}*(ad+bc) \tag{5}$
- recursively compute (4 multiplications)
$ac, bd, ad, bc \tag{6}$

## Gauss' trick:
- recurively compute (3 multiplications)
$ac, bd, (a+b)(c+d) \tag{7}$


In [None]:
def karatsuba(operand1, operand2):
    """
    performs Karatsuba multiplication

    Args:
    operand1 -- string representing the first operand of multiplication
    operand2 -- string representing the second operand of multiplication

    Returns:
    result -- integer representing the result of multiplication
    """

    # error case
    if (len(operand1) < 2 or len(operand1) < 2):
        print("OUCH")
        return

    # split both operands by half
    firsthalf_operand1 = operand1[:int(len(operand1)/2)]
    secondhalf_operand1 = operand1[int(len(operand1)/2):len(operand1)]
    firsthalf_operand2 = operand2[:int(len(operand2)/2)]
    secondhalf_operand2 = operand2[int(len(operand2)/2):len(operand2)]

    result = 0
    if (len(operand1) == 2 and len(operand2) == 2):
        ac = int(firsthalf_operand1) * int(firsthalf_operand2)
        bd = int(secondhalf_operand1) * int(secondhalf_operand2)
        ad = int(firsthalf_operand1) * int(secondhalf_operand2)
        bc = int(secondhalf_operand1) * int(firsthalf_operand2)
        return (ac * 100) + bd + (ad + bc) * 10

    ac = karatsuba(firsthalf_operand1, firsthalf_operand2)
    bd = karatsuba(secondhalf_operand1, secondhalf_operand2)
    ad = karatsuba(firsthalf_operand1, secondhalf_operand2)
    bc = karatsuba(secondhalf_operand1, firsthalf_operand2)

    ac_in_str = str(ac)
    for i in range(0, len(operand1)):
        ac_in_str += "0"
    ad_plus_bc_in_str = str(ad+bc)
    for i in range(0, int(len(operand1)/2)):
        ad_plus_bc_in_str += "0"

    result = int(ac_in_str) + bd + int(ad_plus_bc_in_str)
    return result

In [None]:
assert(karatsuba("5678", "1234") == 7006652)
assert(karatsuba("12345678", "12345678") == 152415765279684)
assert(karatsuba("74639573", "94756283") == 7072568502187159)
assert(karatsuba("8475637284756461", "7483726374837363") == 63429350291486860416277938452343)
assert(karatsuba("3141592653589793238462643383279502884197169399375105820974944592", "2718281828459045235360287471352662497757247093699959574966967627") == 8539734222673567065463550869546574495034888535765114961879601127067743044893204848617875072216249073013374895871952806582723184)

## Master's method

$T(n) \le aT\left(\dfrac{n}{b}\right) + O(n^{d})$

$a$ = number of recursive steps

$b$ = input size factor

$d$ = running time of "combine step"

$T(n) = O(n^{d}\log{n})$ if $a = b^{d}$

$T(n) = O(n^{d})$ if $a \lt b^{d}$

$T(n) = O(n^{\log_{b}{a}})$ if $a \gt b^{d}$


## Mergesort

- $O(n\log{n})$

```
C = output[length=n]
A = 1st sorted array[n/2]
B = 2st sorted array[n/2]
i = 1
j = 1

for k=1 to n
    if A(i) < B(j)
        C(k) = A(i)
        i++
    else B(j) < A(i)
        C(k)
        j++
```

In [None]:
def mergesort(integer_array, filepath):
    """
    Implements merge sort and computes # of inversion

    Args:
    integer_array -- list of integers
    file -- string representing location of files containing lots of number

    Retunrs:
    Tuple of list representing sorted array and an integer representing the number of inversion
    """

    if filepath != "":
        with open(filepath, 'r') as line:
            integer_array = line.read().split("\n")

    # base case (only one or two elements in each array
    if len(integer_array) == 1:
        num_inversion = 0
        return (integer_array, num_inversion)

    if len(integer_array) == 2:
        num_inversion = 0
        if int(integer_array[0]) > int(integer_array[1]):
            temp = integer_array[0]
            integer_array[0] = integer_array[1]
            integer_array[1] = temp
            num_inversion = 1
        return (integer_array, num_inversion)

    first_half = integer_array[:int(len(integer_array)/2)]
    second_half = integer_array[int(len(integer_array)/2):len(integer_array)]

    result_from_first_half = mergesort(first_half, "")
    result_from_second_half = mergesort(second_half, "")

    sorted_first_half = result_from_first_half[0]
    sorted_second_half = result_from_second_half[0]
    num_inversion_first_half = result_from_first_half[1]
    num_inversion_second_half = result_from_second_half[1]

    i = 0
    j = 0
    sorted_integer_array = []
    num_inversion = num_inversion_first_half + num_inversion_second_half
    for k in range(0, len(integer_array)):
        if int(sorted_first_half[i]) < int(sorted_second_half[j]):
            sorted_integer_array.append(sorted_first_half[i])
            if i < len(sorted_first_half)-1:
                i += 1
            # if finished with one array, just push elements of other sorted array
            else:
                for index in range(j, len(sorted_second_half)):
                    sorted_integer_array.append(sorted_second_half[index])
                break
        else:
            sorted_integer_array.append(sorted_second_half[j])
            # count inversion
            num_inversion += len(sorted_first_half[i:len(sorted_first_half)])

            if j < len(sorted_second_half)-1:
                j += 1
            # if finished with one array, just push elements of other sorted array
            else:
                for index in range(i, len(first_half)):
                    sorted_integer_array.append(sorted_first_half[index])
                break

    return (sorted_integer_array, num_inversion)

In [None]:
print(mergesort([], "data/mergesort.txt")[1])

## Quicksort

- $O(n\log{n})$ on average
- no space required

```
Partition(A,l,r) # input = A[l ... r]
P = A[l] # for example, pick first element as pivot
i = l+1
for i = l+1 to r
    if A[j] < P
        swap A[j] and A[i]
        i++
swap A[l] and A[i-1]
```

```
quicksort(array A, length n)
if n=1
    return
p = choosepivot(A, n)
Partition A around P
recursively sort 1st part
recursively sort 2nd part
```

In [None]:
def quicksort(integer_array, start_index, end_index, comparison, pivot_strategy):
    """
    Implements quicksort and computes # of comparison in partition subroutine
    
    Args:
    integer_array -- list containing intergers to be sorted
    start_index -- beginning index of list to apply sorting
    end_index -- ending index of list to apply sorting
    comparison -- list to store the number of comparions in all subroutines
    pivot_strategy -- flag to specify how to pick a pivot element
    
    Returns:
    total_comparison -- total number of comparions in all subroutines
    """

    # base case: there is only 1 element in the array to sort
    if end_index <= start_index:
        return

    pivot = 0
    if pivot_strategy == "first_item":
        pivot = integer_array[start_index]
        partition_around_first_item(integer_array, start_index, end_index, pivot, comparison)
        partition_index = integer_array.index(pivot)
        quicksort(integer_array, start_index, partition_index-1, comparison, "first_item")
        quicksort(integer_array, partition_index+1, end_index, comparison, "first_item")
    elif pivot_strategy == "last_item":
        pivot = integer_array[end_index]
        partition_around_last_item(integer_array, start_index, end_index, pivot, comparison)
        partition_index = integer_array.index(pivot)
        quicksort(integer_array, start_index, partition_index-1, comparison, "last_item")
        quicksort(integer_array, partition_index+1, end_index, comparison, "last_item")
    elif pivot_strategy == "median":
        pivot = choose_median_item_as_pivot(integer_array, start_index, end_index)
        partition_around_specific_item(integer_array, start_index, end_index, pivot, comparison)
        partition_index = integer_array.index(pivot)
        quicksort(integer_array, start_index, partition_index-1, comparison, "median")
        quicksort(integer_array, partition_index+1, end_index, comparison, "median")
    total_comparison = sum(comparison)
    return total_comparison
    
    
def partition_around_first_item(integer_array, start_index, end_index, pivot, comparison):
    """
    Performs partition with the first item in the list
    
    Args:
    integer_array -- list containing intergers to be partitioned
    start_index -- beginning index of list to apply partitioning
    end_index -- ending index of list to apply partitioning
    pivot -- pivot element 
    comparison -- list to store the number of comparions in all subroutines
    
    Returns:
    None
    """

    i = start_index + 1
    for j in range(start_index + 1, end_index + 1):
        if integer_array[j] < pivot:
            temp = integer_array[i]
            integer_array[i] = integer_array[j]
            integer_array[j] = temp
            i += 1

    temp = integer_array[start_index]
    integer_array[start_index] = integer_array[i-1]
    integer_array[i-1] = temp

    comparison.append(end_index - start_index)


def partition_around_last_item(integer_array, start_index, end_index, pivot, comparison):
    """
    Performs partition with the last item in the list
    
    Args:
    integer_array -- list containing intergers to be partitioned
    start_index -- beginning index of list to apply partitioning
    end_index -- ending index of list to apply partitioning
    pivot -- pivot element 
    comparison -- list to store the number of comparions in all subroutines
    
    Returns:
    None
    """

    temp = integer_array[start_index]
    integer_array[start_index] = integer_array[end_index]
    integer_array[end_index] = temp    

    i = start_index + 1
    for j in range(start_index + 1, end_index + 1):
        if integer_array[j] < pivot:
            temp = integer_array[i]
            integer_array[i] = integer_array[j]
            integer_array[j] = temp
            i += 1

    temp = integer_array[start_index]
    integer_array[start_index] = integer_array[i-1]
    integer_array[i-1] = temp

    comparison.append(end_index - start_index)


def partition_around_specific_item(integer_array, start_index, end_index, pivot, comparison):
    """
    Performs partition with the specific item in the list
    
    Args:
    integer_array -- list containing intergers to be partitioned
    start_index -- beginning index of list to apply partitioning
    end_index -- ending index of list to apply partitioning
    pivot -- pivot element 
    comparison -- list to store the number of comparions in all subroutines
    
    Returns:
    None
    """

    middle_index = integer_array.index(pivot)

    temp = integer_array[start_index]
    integer_array[start_index] = integer_array[middle_index]
    integer_array[middle_index] = temp    

    i = start_index + 1
    for j in range(start_index + 1, end_index + 1):
        if integer_array[j] < pivot:
            temp = integer_array[i]
            integer_array[i] = integer_array[j]
            integer_array[j] = temp
            i += 1

    temp = integer_array[start_index]
    integer_array[start_index] = integer_array[i-1]
    integer_array[i-1] = temp

    comparison.append(end_index - start_index)


def choose_median_item_as_pivot(integer_array, start_index, end_index):
    """
    Compare the first, middle, and last elements of list and returns the median element
    
    Args:
    integer_array -- list containing intergers
    start_index -- beginning index of list
    end_index -- ending index of list
    
    Returns:
    median -- median element of the first, middle, and last elements of list
    """
    
    middle_index = 0
    if (start_index - end_index) % 2 == 0:
        middle_index = int((end_index - start_index) / 2)
    else:
        middle_index = int((end_index - start_index - 1) / 2)
       
    num1 = integer_array[start_index]
    num2 = integer_array[end_index]
    num3 = integer_array[start_index + middle_index]

    median = 0
    if num1 > num2:
        if num1 < num3:
            median = num1
        elif num2 > num3:
            median = num2
        else:
            median = num3
    else:
        if num1 > num3:
            median = num1
        elif num2 < num3:
            median = num2
        else:
            median = num3
#     print(integer_array[start_index:end_index+1])
#     print(str(num1) + " vs " + str(num3) + " vs " + str(num2) + " => winner is: " + str(median))        
    return median


def openfile(file_path):
    """
    Read in a file and store the content into a list
    
    Args:
    file_path -- path of file to be read
    
    Returns:
    integer_array -- list of integers
    """
    
    with open(file_path, 'r') as line:
        integer_array = line.read().split("\n")
    return integer_array


def list_of_string_to_integer(input_list):
    """
    Convert the contents of list from type string to type integer
    
    Args:
    input_list -- list of integers (in type string)
    
    Returns
    None
    """
    
    for i in range(0, len(input_list)):
        input_list[i] = int(input_list[i])


array = openfile("data/quicksort.txt")
# array = openfile("data/quicksort-test1.txt")
list_of_string_to_integer(array)
# print(quicksort(array, 0, len(array)-1, [], "first_item"))
# 162085
# print(quicksort(array, 0, len(array)-1, [], "last_item"))
# 164123
# print(quicksort(array, 0, len(array)-1, [], "median"))
# 138382
# print(array)

## Randomized Selection

- Input: Array $A$ with $n$ distinct numbers ${1,2 \dots n}$
- Output: $i$th smallest number

### Solutions

1. Do mergesort and return $i$th element of sorted array: $O(n\log{n})$
2. Randomized Selection: $O(n)$ on average
```
RSelect(array A, length n, order statistic i)
    if n = 1
        return A[1]
    choose pivot p from A uniformly at random
    partition A around p and let j = new index of p
    if j = i
        return p
    if j > i
        return RSelect(1st part of A, j-1, i)
    if j < i
        return RSelect(2nd part of A, n-j, i-j)
```

# Graph

- node (V) and edge (E)
- let n = number of vertices and m = number of edges

## Min-cut problem

- Compute a cut on graph G=(V,E) with fewest number of crossing edges

## Adjacency lists

- array of vertices
- array of edges
- each edge points to its endpoints
- each vertex points to its edges
- space requirement: $O(n+m)$

## Contraction algorithm

while there are more than 2 vertices
- pick a remaining edge (u,v) uniformly at random
- merge u and v into a single vertex
- remove self-loops (but parallel edges are allowed)

return cut represented by the final 2 vertices

In [None]:
def openfile(file_path, split_index):
    """
    Read in a file and produces a list of lists containing integers
    
    Args:
    file_path -- path of file to be read
    split_index -- character to base the "split"
    
    Returns:
    array -- a list of lists
    """
    
    with open(file_path, 'r') as line:
        array = line.read().split("\n")
        for i in range(0, len(array)): # last subarray is an empty array
            subarray = array[i].split(split_index)
            subarray = subarray[:-1] # remove last empty element
            array[i] = subarray
    return array


def convert_to_pair_representation(array):
    """
    Converts data structure froma list of lists (adjacency representation) to a list of tuples (pair representation)
    
    Args:
    array -- a list of lists (adjacency representation)
    
    Returns:
    edges -- a list of tuples (pair representation)
    """
    
    edges = []
    for i in range(0, len(array)): 
        for j in range(1, len(array[i])):
            edges.append((array[i][0], array[i][j]))
    return edges


def get_new_adjacent_vertices(array, vertex1, vertex2):
    """
    Remove edge (vertex1, vertex2) & (vertex2, vertex1)
    Remove self-loops
    Combine remaining edges of A and B
    
    Args: 
    array -- a list of lists (adjacency representation)
    vertex1 -- first vertex of edge to apply contraction
    vertex2 -- second vertex of edge to apply contraction
    
    Returns:
    new_adjacent_vertices -- new sets of vertices that vertex1 will be connected to
    """
    
    new_adjacent_vertices = []
    for i in range(0, len(array)): 
        if array[i][0] == vertex1 or array[i][0] == vertex2:
            for vertex in array[i]:
                if vertex != vertex1 and vertex != vertex2: # don't include self-loop, remove edge (vertex1, vertex2)
                    new_adjacent_vertices.append(vertex)
    return new_adjacent_vertices


def remove_vertex2(array, vertex2):
    """
    Remove vertex2 from the graph (where vertex2 is the first vertex of edges)
    
    Args:
    array -- a list of lists (adjacency representation)
    vertex2 -- second vertex of edge to apply contraction
    
    Returns:
    None
    """
    
    # print("Looking for vertex: " + str(vertex2))
    for i in range(0, len(array)):
        # print(str(len(array)) + " => " + str(len(array[i])))
        if array[i][0] == vertex2: # remove vertex2 information
            array.remove(array[i])
            return


def update_vertex1(array, vertex1, new_adjacent_vertices):
    """
    Update vertex1 in the graph (where vertex1 is the first vertex of edges)
    
    Args:
    array -- a list of lists (adjacency representation)
    vertex1 -- first vertex of edge to apply contraction
    new_adjacent_vertices -- new sets of vertices that vertex1 will be connected to
    
    Returns:
    None
    """
    
    new_array = []
    for i in range(0, len(array)): 
        if array[i][0] == vertex1: # update vertex1 information
            new_array.append(array[i][0])
            new_array = new_array + new_adjacent_vertices
            array.remove(array[i])
            array.append(new_array)
            return
            

def replace_vertex2_with_vertex1(array, vertex1, vertex2):
    """
    Remove vertex2 and update vertex1 in the graph (where vertex1 and vertex2 are the second vertices of edges)
    
    Args:
    array -- a list of lists (adjacency representation)
    vertex1 -- first vertex of edge to apply contraction
    vertex2 -- second vertex of edge to apply contraction
    """
    
    for i in range(0, len(array)): 
        while vertex2 in array[i]:  # replace vertex2 with vertex1. There could be more than 1 vertex2
            # print("removing:" + str(vertex2) + " and adding: "  + str(vertex1))
            array[i].remove(vertex2)
            array[i].append(vertex1)   


def mincut(adjacency_representation_array):
    """
    Performs minimum cut on graph algorithm (cut that crosses fewest number of edges)

    Args:
    adjacency_representation_array -- a list of lists (adjacency representation)

    Returns:
    None
    """
    
    while len(adjacency_representation_array) > 2:
#         print("len(adjacency_representation_array): " + str(len(adjacency_representation_array)))
        pair_representation_array = convert_to_pair_representation(adjacency_representation_array)
        pick = random.choice(pair_representation_array) 
        new_adjacent_vertices = get_new_adjacent_vertices(adjacency_representation_array, pick[0], pick[1])        
        remove_vertex2(adjacency_representation_array, pick[1])
        update_vertex1(adjacency_representation_array, pick[0], new_adjacent_vertices)
        replace_vertex2_with_vertex1(adjacency_representation_array, pick[0], pick[1])
        mincut(adjacency_representation_array)
        

def do_trial(file_path, split_index, num_trial): 
    """
    Execute mincut many times to find the optimum answer
    
    Args:
    integer_array -- list of integers
    file -- string representing location of files containing lots of number
    num_trials -- how many times to try
    
    Returns:
    None
    """
    
    i = 0
    mincut_num = 10000
    while i < num_trial:
        adjacency_representation_array = openfile(file_path, split_index)
        mincut(adjacency_representation_array)
        if len(adjacency_representation_array[0]) < mincut_num:
            mincut_num = len(adjacency_representation_array[0])
        print(str(len(adjacency_representation_array[0])) + " ? mincut is: " + str(mincut_num))
#         print(adjacency_representation_array)
        i = i + 1
    print("final answer: " + str(mincut_num-1))


if __name__ == "__main__":
    do_trial("data/mincut.txt", "\t", 10)
    # 17
#     do_trial("data/mincut-test1.txt", " ", 10)
#     do_trial("data/mincut-test2.txt", " ", 10)
#     do_trial("data/mincut-test3.txt", " ", 10)
#     do_trial("data/mincut-test4.txt", " ", 10)
#     do_trial("data/mincut-test5.txt", " ", 10)
#     do_trial("data/mincut-test6.txt", " ", 10)