# QuickSort

#### We start with importing code we might use to do the benchmarking

In [2]:
import random
import numpy as np
import timeit
import copy
import pandas
import matplotlib
import matplotlib.pyplot as plt
import math
from tqdm import tqdm

#### Then we implement the QuickSort code

In [3]:
def partition(arr,low,high): 
    i = ( low-1 )         # index of smaller element 
    pivot = arr[high]     # pivot 
  
    for j in range(low , high): 
  
        # If current element is smaller than or 
        # equal to pivot 
        if   arr[j] <= pivot: 
          
            # increment index of smaller element 
            i = i+1 
            arr[i],arr[j] = arr[j],arr[i] 
  
    arr[i+1],arr[high] = arr[high],arr[i+1] 
    return ( i+1 ) 

# Function to do Quick sort 
def quickSort(arr, low, high): 
    if low < high: 
  
        # pi is partitioning index, arr[p] is now 
        # at right place 
        pi = partition(arr,low,high) 
  
        # Separately sort elements before 
        # partition and after partition 
        quickSort(arr, low, pi-1) 
        quickSort(arr, pi+1, high) 
    return arr

def quicksort(A):
    return quickSort(A, 0, (len(A)-1))

#### Then we make a function that test if our implement of the code i working as it should

In [4]:
def test_sorting_algorithm(algorithm): 
    """
    Function to test the correctness of a sorting algorithm
    Generating numpy array with random integers to be tested on
    Tests it 10 times and then i assume that it is correct
    """
    for i in range(1000):
        A = np.random.randint(1000, size=100)
        A_copy = A.copy()
        algorithm(A_copy)      
        assert A_copy.tolist() == sorted(A), 'The implementation of %s is wrong'% (algorithm.__name__)

In [5]:
test_sorting_algorithm(quicksort)

#### In the benchmarking we will need random generated data, so we will make som functions that gives us diffrent kind of random genreated data. This will test the sort funciton for best, worst and avrage case

In [6]:
def ascending_list_int(n):
    """
    Returns a ascending list with values from 0 to n with length n
    """
    List = [i for i in range(n)]
    return List

def descending_list_int(n):
    """
    Returns a descending list with values from n to 0 with length n
    """
    List = [i for i in range(n - 1, -1, -1)]
    return List

def random_list_int(n):
    """
    Returns a list of random integers from -n to n with length n
    """
    List = [random.randint(-n, n) for _ in range(n)]
    return List
  
def random_list_float(n):
    """
    Returns a list of length n with random float values from -n to n
    """
    List = [random.uniform(-n, n) for _ in range(n)]
    return List


def random_charlist(n):
    """
    Returns a list of length n with random characters
    """
    List = [random.choice('abcdefghisjklmnopqrstuvwxyz') 
                 for _ in range(n)]
    
    return List

test_data_list = [ascending_list_int, descending_list_int, random_list_int, random_list_float, random_charlist]

#### Making the time function that will time the sort function for the benchmarking

In [7]:
def time_function(sort_function, test_data):
    """
    Actual function which does the timing
    """
    clock = timeit.Timer('func(copy(data))',
                       globals={'func': sort_function, 'data': test_data, 
                                'copy': copy.copy})
    

    data = clock.repeat(repeat=5, number=10)
    
    sort = pandas.DataFrame(data)
    sort.to_pickle("quick_sort_times")
    
    return np.min(data)

#### Function that does the benchmarking

In [9]:
test_sizes = [10, 100, 1000, 10000, 100000]
test_size2 = [10, 100, 1000]

def benchmark_function(sort_function):
    data1 = [[], []]
    data2 = [[], []]
    data3 = [[], []]
    data4 = [[], []]
    data5 = [[], []]
   
    for size in tqdm(test_sizes):
        data3[0].append(size) 
        data4[0].append(size) 
        data3[1].append(time_function(sort_function,random_list_int(size)))
        data4[1].append(time_function(sort_function,random_list_float(size)))
    for size in tqdm(test_size2):
        data1[0].append(size) 
        data2[0].append(size)
        data5[0].append(size)
        data1[1].append(time_function(sort_function,ascending_list_int(size)))
        data2[1].append(time_function(sort_function,descending_list_int(size)))
        data5[1].append(time_function(sort_function,random_charlist(size)))
    
   
    all_data = [data1, data2, data3, data4, data5]
    
    quick_sort = pandas.DataFrame(all_data)
    
    quick_sort.to_pickle("quick_sort")
    
    return all_data

benchmark_quick = benchmark_function(quicksort)

100%|██████████| 5/5 [01:09<00:00, 20.22s/it]
100%|██████████| 3/3 [00:15<00:00,  4.68s/it]
