Instructions

The file contains all of the integers between 1 and 10,000 (inclusive, with no repeats)
in unsorted order. The integer in the ith row of the file gives you the ith entry of an
input array.

Your task is to compute the total number of comparisons used to sort the given input
file by QuickSort. As you know, the number of comparisons depends on which elements are
chosen as pivots, so we'll ask you to explore three different pivoting rules.

You should not count comparisons one-by-one. Rather, when there is a recursive call on
a subarray of length m, you should simply add m−1 to your running total of comparisons.
(This is because the pivot element is compared to each of the other m−1 elements in the
subarray in this recursive call.)

WARNING: The Partition subroutine can be implemented in several different ways, and
different implementations can give you differing numbers of comparisons. For this problem,
you should implement the Partition subroutine exactly as it is described in the video
lectures (otherwise you might get the wrong answer).

In [1]:
import random

total_comparision = 0

"""
Implementation of quick sort for array of integers
A: input array of integers
Blower: lower bound index for sorting
Bupper: upper bound index for sorting
pivot_choice: string on how to choose pivot element
"""
def quick_sort(A, Blower, Bupper, pivot_choice):
    global total_comparision
    
    if Blower < Bupper:
        total_comparision += Bupper - Blower
        
        p = partition(A, Blower, Bupper, pivot_choice)
        if Blower < p - 1:
            quick_sort(A, Blower, p - 1, pivot_choice)
        if Bupper > p + 1:
            quick_sort(A, p + 1, Bupper, pivot_choice)


"""
Partition subroutine
A: input array of integers
Blower: lower bound index for sorting
Bupper: upper bound index for sorting
pivot_choice: methods on choosing pivot
return: the index of pivot element
"""
def partition(A, Blower, Bupper, pivot_choice):
    p = choose_pivot(A, Blower, Bupper, pivot_choice)
    
    # if pivot is not the first element, then swap pivot to the first element
    if p is not Blower:
        A[p], A[Blower] = A[Blower], A[p]
    
    i = Blower + 1
    for j in range(Blower + 1, Bupper + 1):
        if A[j] < A[Blower]:
            A[i], A[j] = A[j], A[i]
            i += 1

    A[Blower], A[i - 1] = A[i - 1], A[Blower]
    
    return i - 1


"""
Subroutine for pivot selection
A: input array of integers
Blower: lower bound index for sorting
Bupper: upper bound index for sorting
choice: methods on choosing pivot
  first: always use the lower bound index
  last: always use the upper bound index
  median: use the median of {A[left], A[right], A[middle]}
  default: random choice
return: the index of pivot element
"""
def choose_pivot(A, Blower, Bupper, choice):
    if choice == 'first':
        return Blower
    elif choice == 'last':
        return Bupper
    elif choice == 'median':
        middle = (Bupper - Blower) / 2 + Blower
        l = A[Blower]
        r = A[Bupper]
        m = A[middle]
        if (l - m) * (l - r) < 0:
            return Blower
        elif (m - l) * (m - r) < 0:
            return middle
        else:
            return Bupper
    else:
        return random.randint(Blower, Bupper)

In [2]:
a = [3,8,2,5,1,4,7,6,0,12,20,30,21,16,13]

def reset_a(arrange='fixed'):
    global a
    if arrange == 'sorted':
        a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30]
    elif arrange == 'reversed':
        a = [30, 21, 20, 16, 13, 12, 8, 7, 6, 5, 4, 3, 2, 1, 0]
    elif arrange == 'semi':
        a = [8, 12, 13, 16, 20, 21, 30, 0, 1, 2, 3, 4, 5, 6, 7]
    else:
        a = [3, 8, 2, 5, 1, 4, 7, 6, 0, 12, 20, 30, 21, 16, 13]

def qs_comparison_count(data, choice, Print=False):
    global total_comparision
    total_comparision = 0
    quick_sort(data, 0, len(data) - 1, choice)
    if Print:
        print data, total_comparision
    else:
        print total_comparision

# test cases
for pivot in ('first', 'last', 'median', 'random'):
    for arrangement in ('fixed', 'sorted', 'reversed', 'semi'):
        print "pivot choice: {0:>6}, data arrangement: {1:>8}".format(pivot, arrangement)
        reset_a(arrangement)
        qs_comparison_count(a, pivot, True)
        print

pivot choice:  first, data arrangement:    fixed
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 42

pivot choice:  first, data arrangement:   sorted
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 105

pivot choice:  first, data arrangement: reversed
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 105

pivot choice:  first, data arrangement:     semi
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 51

pivot choice:   last, data arrangement:    fixed
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 51

pivot choice:   last, data arrangement:   sorted
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 105

pivot choice:   last, data arrangement: reversed
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 105

pivot choice:   last, data arrangement:     semi
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 42

pivot choice: median, data arrangement:    fixed
[0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 16, 20, 21, 30] 35

pivot choice: median, data arrangement:   sorted
[0

In [None]:
content = [int(line.rstrip('\n')) for line in open('array.txt')]
qs_comparison_count(content, 'first')

content = [int(line.rstrip('\n')) for line in open('array.txt')]
qs_comparison_count(content, 'last')

content = [int(line.rstrip('\n')) for line in open('array.txt')]
qs_comparison_count(content, 'median')