# Algorithms Week 3, MergeSort and QuickSort

## MergeSort

### General Approach

1. Divide array into two halves
2. Recursively sort each half
3. Merge the the two halves together

In [56]:
def merge(L1, L2, compare = lambda x, y: x <= y):
    L = []
    i = 0
    j = 0
    while i < len(L1) and j < len(L2):
        if compare(L1[i], L2[j]):
            L.append(L1[i])
            i += 1
        else:
            L.append(L2[j])
            j += 1
    if i == len(L1):
        L.extend(L2[j:])
    else:
        L.extend(L1[i:])
    return L

test1 = [1,3,5,7,9]
test2 = [2,4,6,8,10]
merge(test1,test2)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [83]:
def short_merge(L1, L2, compare):
    L = []
    while L1 != [] and L2 != []:
        L.append(L1.pop(0)) if compare(L1[0], L2[0]) else L.append(L2.pop(0))
    L.extend(L1)
    L.extend(L2)
    return L

test1 = [1,3,5,7,9]
test2 = [2,4,6,8,10]
test3 = [9,7,5,3,1]
test4 = [10,8,6,4,2]
print(short_merge(test1, test2, lambda x, y: x <= y))
print(short_merge(test3, test4, lambda x, y: x >= y))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]


In [95]:
def merge_sort(L, compare = lambda x, y: x <= y):
    N = len(L)
    return L if N <= 1 else short_merge(merge_sort(L[:N//2], compare), merge_sort(L[N//2:], compare), compare)

test = [6,7,2,3,4,7,18,23,3,2,1,11,0,-1]
print(merge_sort(test, lambda x, y: x >= y))
print(merge_sort(test))
%timeit merge_sort(test)

[23, 18, 11, 7, 7, 6, 4, 3, 3, 2, 2, 1, 0, -1]
[-1, 0, 1, 2, 2, 3, 3, 4, 6, 7, 7, 11, 18, 23]
39.9 µs ± 1.38 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Time and Space Complexity

Time complexity of mergesort is NlogN.  Space complexity uses additional space proportional to N.

### Issues and Improvements

MergeSort has too much overhead for small subarrays.  Switch to insertion sort when subarray length is 7 or less.

Stop if already sorted.  Is biggest item in first half smaller than smallest item in second half

In [99]:
def insertion_sort(L):
    for i in range(1,len(L)):
        j = i
        while j > 0 and L[j] < L[j-1]:
            L[j], L[j-1] = L[j-1], L[j]
            j -= 1
    return L

def improved_short_merge(L1, L2, compare):
    if compare(L1[-1],L2[0]):
        return L1 + L2
    if compare(L2[-1],L1[0]):
        return L2 + L1
    L = []
    while L1 != [] and L2 != []:
        L.append(L1.pop(0)) if compare(L1[0], L2[0]) else L.append(L2.pop(0))
    L.extend(L1)
    L.extend(L2)
    return L

def improved_merge_sort(L, compare = lambda x, y: x <= y):
    N = len(L)        
    return insertion_sort(L) if N <= 7 else short_merge(merge_sort(L[:N//2], compare), merge_sort(L[N//2:], compare), compare)

test5 = [1,3,5]
test6 = [6,8,10]
test7 = [-5,-1,0]
test8 = [-6,0,8]
print(improved_short_merge(test5,test6, lambda x,y: x<=y))
print(improved_short_merge(test5,test7, lambda x,y: x<=y))
print(improved_short_merge(test5,test8, lambda x,y: x<=y))

test = [6,7,2,3,4,7,18,23,3,2,1,11,0,-1]
print(improved_merge_sort(test, lambda x, y: x >= y))
print(improved_merge_sort(test))
%timeit improved_merge_sort(test)


[1, 3, 5, 6, 8, 10]
[-5, -1, 0, 1, 3, 5]
[-6, 0, 1, 3, 5, 8]
[23, 18, 11, 7, 7, 6, 4, 3, 3, 2, 2, 1, 0, -1]
[-1, 0, 1, 2, 2, 3, 3, 4, 6, 7, 7, 11, 18, 23]
42.3 µs ± 3.43 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
