In [1]:
import numpy as np

# Sorting Algorithms
Notes and Implementations

In [2]:
def get_rand_ints(num=50, sort=False):
    int_list = np.random.randint(low=-10, high=100, size=num)
    if sort:
        int_list = np.sort(int_list)
    print(f"Unsorted List:\n{int_list}")
    return int_list.tolist()


## Selection Sort
Walk array and for each place $i$ in the array of size $n$, find the smallest value amongst the other $n-1$ values. If that value is smaller than the $i$, swap. Continue until array is walked. Will sort in place.

Time: $O(n^2)$ - Avg and Worst case  
Space: $O(1)$

In [11]:
num_list = get_rand_ints(15)

for i in range(len(num_list)):
    for j in range(i+1, len(num_list)):
        if num_list[j] < num_list[i]:
            i_val = num_list[i]
            num_list[i] = num_list[j]
            num_list[j] = i_val
            
print(f"Sorted list:\n{np.array(num_list)}")

Unsorted List:
[49 53 33 45 49 -6 -7 -6 65 49 18 75 66 29 98]
Sorted list:
[-7 -6 -6 18 29 33 45 49 49 49 53 65 66 75 98]


## Insertion Sort
Walks array, separating the array into 2 parts: sorted and unsorted. Takes elements from the unsorted portion of the array and moves them to the correct sequential order in the sorted portion of the array. Stops when the unsorted portion is empty. Sorts elements in-place.   

Time: $O(n^2)$ - Avg and Worst case  
Space: $O(1)$

In [10]:
int_list = get_rand_ints(15)

for i in range(1,len(int_list)):
    val = int_list[i]
    ptr = i-1
    while ptr >= 0 and int_list[ptr] > val:
        #need to move value at ptr up to make room for val. Will have value duplicated at ptr and ptr+1 until next iteration
        int_list[ptr+1] = int_list[ptr]
        ptr-=1
    
    #insert val into correct place
    int_list[ptr+1] = val
    
print(f"Sorted List:\n{np.array(int_list)}")

Unsorted List:
[25  4 11 11 88 26 56 75 70 47 93 58 41 27 89]
Sorted List:
[ 4 11 11 25 26 27 41 47 56 58 70 75 88 89 93]


## Bubble Sort
Sweep through array starting at the beginning of the array and swap elements if $n_1 > n_0$. Will need to do $n \times n-1$ passes 

Time: $O(n^2)$ - Avg and Worst case  
Space: $O(1)$

In [5]:
num_list = get_rand_ints(15)

for i in range(1, len(num_list)):
    for j in range(0, len(num_list) - i):
        if num_list[j] > num_list[j+1]:
            jp1_val = num_list[j+1]
            num_list[j+1] = num_list[j]
            num_list[j] = jp1_val
            
print(f"Sorted List:\n{np.array(num_list)}")

Unsorted List:
[81 50 22 39 92 44 32 70 76 79 -6 77 29 87 89]
Sorted List:
[-6 22 29 32 39 44 50 70 76 77 79 81 87 89 92]


## Merge Sort
A sorting method that recursively splits the list in-half until lists of size 1 are produced. Then a merge method is called to combine two sorted lists, initially consisting of two single element lists, by looping over the two sublists and inserting into the main list the correct value by order. Sorts elements in-place.  

Time: $O(nlog(n))$ - Average and Worst case   
Space: $O(n)$

In [15]:
def merge(l, a, b, c):
    '''Combines sorted sub-arrays l[a:b+1] (endpoint exclusive) and l[b+1:c+1] into sorted sub-array l[a:c+1]'''
    #create two lists left, right from main list
    left = l[a:b+1]
    right = l[b+1:c+1]
    n_left = b-a+1
    n_right = c-b
    
    ileft = 0
    iright = 0
    # loop for full range of elements and insert correct value from sublists into main lists in ascending order
    for i in range(a, c+1):
        if left[ileft] <= right[iright]:
            l[i] = left[ileft]
            ileft+=1
        else:
            l[i] = right[iright]
            iright+=1
        
        # if either left or right index is greater than respective list length, 
        #  append remaining values in the other list to main list
        if ileft >= n_left and iright < n_right:
            l[i+1:c+1] = right[iright:]
            break
        elif iright >= n_right and ileft < n_left:
            l[i+1:c+1] = left[ileft:]
            break
    return l

def merge_sort(l, a, c):
    # do nothing when single element list, i.e. a=c
    if a < c:
        mid_pt = (a + c) // 2
        merge_sort(l, a, mid_pt)
        merge_sort(l, mid_pt+1, c)
        merge(l, a, mid_pt, c)
        
int_list = get_rand_ints()
merge_sort(int_list, 0, len(int_list)-1)

print(f"\n Sorted List:\n{np.array(int_list)}")

Unsorted List:
[82 -3 58 -7 75 65 51 24  4 28 63 28 85 19 56 -4 60 49 99 15  0 86 78 19
  4 27 37 86 56 32  7 96 33 19 44  3 59 25 79 30 60 52 81 18 72 22 78 14
 -8  2]

 Sorted List:
[-8 -7 -4 -3  0  2  3  4  4  7 14 15 18 19 19 19 22 24 25 27 28 28 30 32
 33 37 44 49 51 52 56 56 58 59 60 60 63 65 72 75 78 78 79 81 82 85 86 86
 96 99]


## QuickSort 
In a given array, choose a random partition point (here the midpoint). Move all values less than this partition value/point to the left and all values greater to the right. Recursively perform this partitioning again on the left half of the list $[a, part\_ind]$ and on the right half of the list $[part\_ind+1, b]$. While do this recursive process until left and right list each consist of one value. 

Time: $O(nlog(n))$ - Average, $O(n^2)$ - Worst   
Space: $O(1)$

In [27]:
def get_partition(arr, left, right):
    p_val = arr[left + (right - left) // 2]
    
    while left <= right:
        while arr[left] < p_val: 
            left+=1
        while arr[right] > p_val: 
            right-=1
        
        if left <= right:
            l_val = arr[left]
            arr[left] = arr[right]
            arr[right] = l_val
            left+=1
            right-=1
    
    return left-1

def quick_sort(arr, left, right):
    p_ind = get_partition(arr, left, right)
    if left < p_ind: 
        quick_sort(arr, left, p_ind)
    if right > p_ind+1:
        quick_sort(arr, p_ind+1, right)
    

In [28]:
nums = get_rand_ints(10)
quick_sort(nums, 0, 9)
print(f"\n Sorted List:\n{np.array(nums)}")

Unsorted List:
[77 37 85 55 10 98 17 76 70 35]

 Sorted List:
[10 17 35 37 55 70 76 77 85 98]


## Counting Sort

Can be used only for integer values. Creates count array that is of sufficient size where the values of the original array are indices into the count array and keep a total of the number of their occurrences in the original array. The count array is converted to a cumulative sum. For each value in the original array, perform a look up in the count array, and that value minus 1 is the correct position of that value in a new output array. Count array values should be decremented after insertion. 

Time: $O(n+k)$ where $k$ is the range of the input   
Space: $O(n+k)$

In [91]:
def count_sort(arr):
    max_arg = max(arr)
    min_arg = min(arr)
    # To account for negative numbers, shift to the right by min value and plus 1 since max value must be an index
    arg_range = max_arg - min_arg + 1
    num_els = len(arr)

    # Initialize counts array to be 0s of size rull range of values    
    counts = [0]*arg_range
    output = [0]*num_els

    for arg in arr:
        # Use value in original array as index in count array and increment 
        counts[arg-min_arg] += 1

    for i in range(1, arg_range):
        # Convert into a cumulative sum
        counts[i] += counts[i-1]

    # For each value in array, look up count value, using minimum shift to the right 
    #  Going in reverse to preserve order of equal valued elements
    for i in range(num_els-1, -1, -1):
        val = counts[arr[i] - min_arg]
        # count val minus 1 is correct index of array value
        output[val-1] = arr[i]
        # decrement count value
        counts[arr[i] - min_arg] -=1

    for i in range(num_els):
        arr[i] = output[i]


In [96]:
arr = get_rand_ints(10)
count_sort(arr)
print(f"\n Sorted List:\n{np.array(arr)}")

Unsorted List:
[54 27 29 35 68 33  9 67 -3 41]

 Sorted List:
[-3  9 27 29 33 35 41 54 67 68]


## Radix Sort

Counting sort is good for when values are in the range of $[-k, k]$ but not for the range of $[1, n^2]$ or greater. Radix sort can be used to augment Counting Sort. Sort values in array by their significant using counting sort. Sort first by the ones place, tens place, etc.

Time: $O(nk)$   
Space: $O(max(array))$

In [116]:
def count_sort(arr, pwr=1):
    # Convert array to array filled digit of interest
    proc_arr = []
    for v in arr:
        v = v // pwr
        if v >= 0:
            proc_arr.append(v % 10)
        else:
            v = abs(v) % 10
            proc_arr.append(-1*v)
    
    max_arg = max(proc_arr)
    min_arg = min(proc_arr)
    # To account for negative numbers, shift to the right by min value and plus 1 since max value must be an index
    arg_range = max_arg - min_arg + 1
    num_els = len(proc_arr)

    # Initialize counts array to be 0s of size rull range of values    
    counts = [0]*arg_range
    output = [0]*num_els

    for arg in proc_arr:
        # Use value in original array as index in count array and increment 
        counts[arg-min_arg] += 1

    for i in range(1, arg_range):
        # Convert into a cumulative sum
        counts[i] += counts[i-1]

    # For each value in array, look up count value, using minimum shift to the right 
    #  Going in reverse to preserve order of equal valued elements
    for i in range(num_els-1, -1, -1):
        val = counts[proc_arr[i] - min_arg]
        # count val minus 1 is correct index of original array value
        output[val-1] = arr[i]
        # decrement count value of processed array
        counts[proc_arr[i] - min_arg] -=1

    for i in range(num_els):
        arr[i] = output[i]
    
    
def radix_sort(arr):
    pwr = 1
    max_val_length = len(str(max(arr)))
    
    for i in range(max_val_length):
        count_sort(arr, pwr)
        pwr*=10


In [134]:
arr = get_rand_ints(10)
radix_sort(arr)
print(f"\n Sorted List:\n{np.array(arr)}")

Unsorted List:
[ 43  47   7  58   3   0  62  -4 -10  57]

 Sorted List:
[ -4 -10   0   3   7  43  47  57  58  62]


## Binary Search Insert
Given a sorted list, performs a recursive binary search to find the index to insert a new value and still maintain the ordering

In [2]:
def get_ordered_place(ordered_values, val, left, right) -> int:
    '''Returns index where new val should be inserted in ordered_values to maintain order'''
    # if left crosses right, return valid insert position
    if left > right:
        return left

    mid_pt = (left+right) // 2
    # if midpoint equals value to be inserted midpoint
    if ordered_values[mid_pt] == val:
        return mid_pt

    # value should be inserted in upper half
    if ordered_values[mid_pt] < val:
        return get_ordered_place(ordered_values, val, mid_pt+1, right)
    else:
        # value should be inserted in lower half
        return get_ordered_place(ordered_values, val, left, mid_pt-1)

In [14]:
ordered = get_rand_ints(10, sort=True)
val = 13
ind = get_ordered_place(ordered, val, 0, len(ordered)-1)
ordered.insert(ind, val)
print(f"\n Sorted List With New Value {val}:\n{np.array(ordered)}")

Unsorted List:
[ 1  4 19 27 33 37 41 47 65 85]

 Sorted List With New Value 13:
[ 1  4 13 19 27 33 37 41 47 65 85]
