Choose random pivot, average running time O(nlogn)
<br>
Partition takes O(n) and require no extra memory

In [2]:
import numpy as np
import random

In [14]:
def quicksort(arr):
    if len(arr) == 1 or len(arr) == 0:
        return arr
    
    else:
        (arr, p, p_value) = partition(arr, 0, len(arr)-1)
        left = quicksort(arr[0:p])
        right = quicksort(arr[p+1:])
        return left + [p_value] + right
    
def partition(arr, l, r):
    pivot = random.randint(l, r)
    pivot_value = arr[pivot]
    
    if pivot != l:
        temp = arr[l]
        arr[l] = pivot_value
        arr[pivot] = temp
    
    i = l+1
    for j in range(l+1, r+1):
        if arr[j] < pivot_value:
            temp = arr[i]
            arr[i] = arr[j]
            arr[j] = temp
            i += 1
            
    arr[l] = arr[i-1]
    arr[i-1] = pivot_value
    
    return (arr[l:r+1], i-1, pivot_value)
    

In [15]:
quicksort([2,5,4,6,7,3,1,4,12,11])

[1, 2, 3, 4, 4, 5, 6, 7, 11, 12]

Assignment
<br>
<br>
Your task is to compute the total number of comparisons used to sort the given input file by QuickSort. As you know, the number of comparisons depends on which elements are chosen as pivots, so we'll ask you to explore three different pivoting rules.
<br>
You should not count comparisons one-by-one. Rather, when there is a recursive call on a subarray of length m, you should simply add m−1 to your running total of comparisons. (This is because the pivot element is compared to each of the other m−1 elements in the subarray in this recursive call.)

In [154]:
f = open('datasets/QuickSort.txt', 'r')

In [155]:
array = [int(line.strip()) for line in f]

In [104]:
# the first element of the array as the pivot element.

def partition_a(arr, l, r): # the first element of the array as the pivot element.
    pivot = l
    pivot_value = arr[pivot]
    
    i = l+1
    for j in range(l+1, r+1):
        if arr[j] < pivot_value:
            temp = arr[i]
            arr[i] = arr[j]
            arr[j] = temp
            i += 1
            
    arr[l] = arr[i-1]
    arr[i-1] = pivot_value
    
    return (arr[l:r+1], i-1, pivot_value)


def quicksort_a(arr):
    if len(arr) == 1 or len(arr) == 0:
        return (arr, 0)
    
    else:
        (arr, p, p_value) = partition_a(arr, 0, len(arr)-1)
        left, c_l = quicksort_a(arr[0:p])
        right, c_r = quicksort_a(arr[p+1:])
        count = len(arr)-1 + c_l + c_r
        return (left + [p_value] + right, count)
    

In [105]:
_, c_a = quicksort_a(array)

In [106]:
c_a

162085

In [110]:
# always using the final element of the given array as the pivot element

def partition_b(arr, l, r): # the final element of the array as the pivot element.
    pivot = r
    pivot_value = arr[pivot]
    
    temp = arr[l]
    arr[l] = pivot_value
    arr[pivot] = temp
    
    i = l+1
    for j in range(l+1, r+1):
        if arr[j] < pivot_value:
            temp = arr[i]
            arr[i] = arr[j]
            arr[j] = temp
            i += 1
            
    arr[l] = arr[i-1]
    arr[i-1] = pivot_value
    
    return (arr[l:r+1], i-1, pivot_value)

def quicksort_b(arr):
    if len(arr) == 1 or len(arr) == 0:
        return (arr, 0)
    
    else:
        (arr, p, p_value) = partition_b(arr, 0, len(arr)-1)
        left, c_l = quicksort_b(arr[0:p])
        right, c_r = quicksort_b(arr[p+1:])
        count = len(arr)-1 + c_l + c_r
        return (left + [p_value] + right, count)

In [114]:
_, c_b = quicksort_b(array)
c_b

164123

In [156]:
# using the "median-of-three" pivot rule

def partition_c(arr, l, r): # "median-of-three"
    if (r-l) % 2 == 0:
        mid = l+int((r-l)/2)
    else:
        mid = l+int((r-l-1)/2)
    
    dic = {arr[l]:l, arr[mid]:mid, arr[r]:r}
    pivot_value = sorted([arr[l], arr[mid], arr[r]])[1]
    pivot = dic[pivot_value]
    
    if pivot != l:
        temp = arr[l]
        arr[l] = pivot_value
        arr[pivot] = temp
    
    i = l+1
    for j in range(l+1, r+1):
        if arr[j] < pivot_value:
            temp = arr[i]
            arr[i] = arr[j]
            arr[j] = temp
            i += 1
            
    arr[l] = arr[i-1]
    arr[i-1] = pivot_value
    
    return (arr[l:r+1], i-1, pivot_value)

def quicksort_c(arr):
    if len(arr) == 1 or len(arr) == 0:
        return (arr, 0)
    
    else:
        (arr, p, p_value) = partition_c(arr, 0, len(arr)-1)
        left, c_l = quicksort_c(arr[0:p])
        right, c_r = quicksort_c(arr[p+1:])
        count = len(arr)-1 + c_l + c_r
        return (left + [p_value] + right, count)

In [157]:
_, c_c = quicksort_c(array)
c_c

138382

In [24]:
# 2018-10-07

# partition with O(n) extra memory, the easy way out
# assume pivot is the first element of an list

def partition(arr):
    pv = arr[0]  # pivot_value
    new_arr = [None] * len(arr)
    i = 0  # left
    j = len(arr) - 1  # right
    for v in arr[1:]:
        if v >= pv:
            new_arr[j] = v
            j -= 1
        else:
            new_arr[i] = v
            i += 1
    new_arr[i] = pv
    return new_arr   

In [26]:
partition([3, 8, 2, 5, 1, 4, 7, 6])

[2, 1, 3, 6, 7, 4, 5, 8]

In [57]:
# In place sort 
# assume pivot is the first element of an list

def partition(arr):
    pv = arr[0]
    i = 1
    j = 1
    for v in arr[1:]:
        if v >= pv:
            j += 1
        else:
            arr[i], arr[j] = arr[j], arr[i]
            i += 1
            j += 1
    arr[0], arr[i - 1] = arr[i - 1], arr[0]
    
my_array = [3, 8, 2, 5, 1, 4, 7, 6]
partition(my_array)
my_array

[1, 2, 3, 5, 8, 4, 7, 6]

In [97]:
# always choose the first pivot for quick sort

def partition(arr):
    pv = arr[0]
    i = 1
    j = 1
    for v in arr[1:]:
        if v >= pv:
            j += 1
        else:
            arr[i], arr[j] = arr[j], arr[i]
            i += 1
            j += 1
    split_point = i - 1
    arr[0], arr[i - 1] = arr[i - 1], arr[0]
    return split_point

def quick_sort(arr):
    if len(arr) > 1:
        split_point = partition(arr)
        split_value = arr[split_point]
        left = quick_sort(arr[:split_point])
        right = quick_sort(arr[split_point+1:])
        return left + [split_value] + right
    else:
        return arr
    
my_array = [3, 8, 2, 5, 1, 4, 7, 6]
quick_sort(my_array)

[1, 2, 3, 4, 5, 6, 7, 8]

In [8]:
# pick random pivot

import random

def partition(arr):
    p = random.randint(0, len(arr) - 1)
    pv = arr[p]
    arr[0], arr[p] = arr[p], arr[0]
    i = 1
    j = 1
    while j < len(arr):
        if arr[j] >= pv:
            j += 1
        else:
            arr[i], arr[j] = arr[j], arr[i]
            i += 1
            j += 1
    split_point = i - 1
    arr[0], arr[split_point] = arr[split_point], arr[0]
    return split_point

def quick_sort(arr):
    if len(arr) > 1:
        split_point = partition(arr)
        split_value = arr[split_point]
        left = quick_sort(arr[:split_point])
        right = quick_sort(arr[split_point+1:])
        return left + [split_value] + right
    else:
        return arr

my_array = [3, 8, 2, 5, 1, 4, 7, 6]
quick_sort(my_array)

[1, 2, 3, 4, 5, 6, 7, 8]

In [15]:
# 2018-10-07 final in place solution

import random

def quick_sort_(arr, first, last):
    if first < last:
        split_point = partition(arr, first, last)
        quick_sort_(arr, first, split_point - 1)
        quick_sort_(arr, split_point + 1, last)

def partition(arr, first, last):
    p = random.randint(first, last)
    pv = arr[p]
    arr[first], arr[p] = arr[p], arr[first]
    i = first + 1
    j = first + 1
    while j < last + 1:
        if arr[j] >= pv:
            j += 1
        else:
            arr[i], arr[j] = arr[j], arr[i]
            i += 1
            j += 1
    split_point = i - 1
    arr[first], arr[split_point] = arr[split_point], arr[first]
    return split_point
    
def quick_sort(arr):
    if len(arr) > 1:
        quick_sort_helper(arr, 0, len(arr) - 1)

In [16]:
my_array = [3, 8, 2, 5, 1, 4, 7, 6]
quick_sort(my_array)
my_array

[1, 2, 3, 4, 5, 6, 7, 8]