### Importing the necessary modules to perform a benchmark analysis on HeapSort algorithm

In [28]:
import random
import numpy as np
import timeit
import copy
import pandas
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm

### Implementing the Heapsort algorithm

In [29]:
def heapSort(data): 
    """
    Implementation of heapSort
    Creates a binary heap from the array and then iterates through 
    it to get the sorted array
    """
    n = len(data) 

    for i in range(n, -1, -1): 
        heapify(data, n, i) 

    for i in range(n-1, 0, -1): 
        data[i], data[0] = data[0], data[i] # swap 
        heapify(data, i, 0) 


def heapify(data, n, i): 
    """
    Turns the list into a max heap
    """
    largest = i 
    l = 2 * i + 1 
    r = 2 * i + 2 

    if l < n and data[i] < data[l]: 
        largest = l 

    if r < n and data[largest] < data[r]: 
        largest = r 

    if largest != i: 
        data[i],data[largest] = data[largest],data[i]

        heapify(data, n, largest) 

### Testing the HeapSort algorithm

In [30]:
def test_sorting_algorithm(algorithm): 
    """
    Function to test the correctness of a sorting algorithm
    Generating numpy array with random integers to be tested on
    Tests it 10 times and then i assume that it is correct
    """
    for i in range(1000):
        A = np.random.randint(1000, size=100)
        A_copy = A.copy()
        algorithm(A_copy)      
        assert A_copy.tolist() == sorted(A), 'The implementation of %s is wrong'% (algorithm.__name__)

In [31]:
test_sorting_algorithm(heapSort)

### Generating test data

In [32]:
def ascending_list_int(n):
    """
    Returns a ascending list with values from 0 to n with length n
    """
    List = [i for i in range(n)]
    return List

def descending_list_int(n):
    """
    Returns a descending list with values from n to 0 with length n
    """
    List = [i for i in range(n - 1, -1, -1)]
    return List

def random_list_int(n):
    """
    Returns a list of random integers from -n to n with length n
    """
    List = [random.randint(-n, n) for _ in range(n)]
    return List
  
def random_list_float(n):
    """
    Returns a list of length n with random float values from -n to n
    """
    List = [random.uniform(-n, n) for _ in range(n)]
    return List


def random_charlist(n):
    """
    Returns a list of length n with random characters
    """
    List = [random.choice('abcdefghisjklmnopqrstuvwxyz') 
                 for _ in range(n)]
    
    return List

test_data_list = [ascending_list_int, descending_list_int, random_list_int, random_list_float, random_charlist]

### Determining the running time per iteration for the algorithm

In [33]:
def time_function(sort_function, test_data):
    """
    Actual function which does the timing
    """
    clock = timeit.Timer('func(copy(data))',
                       globals={'func': sort_function, 'data': test_data, 
                                'copy': copy.copy})

    n_ar, t_ar = clock.autorange()
    
    data = np.array(clock.repeat(repeat=7, number=n_ar)) / n_ar
    
    sort = pandas.DataFrame(data)
    
    sort.to_pickle("heap_sort_times")
    
    return np.min(data)

### Performing benchmark analysis on the algorithm

In [34]:
test_sizes = [10, 100, 1000, 10000, 100000]

def benchmark_function(sort_function):
    data1 = [[], []]
    data2 = [[], []]
    data3 = [[], []]
    data4 = [[], []]
    data5 = [[], []]
   
    for size in tqdm(test_sizes):
        data1[0].append(size) 
        data2[0].append(size)
        data3[0].append(size) 
        data4[0].append(size), 
        data5[0].append(size)     
        data1[1].append(time_function(sort_function,ascending_list_int(size)))
        data2[1].append(time_function(sort_function,descending_list_int(size)))
        data3[1].append(time_function(sort_function,random_list_int(size)))
        data4[1].append(time_function(sort_function,random_list_float(size)))
        data5[1].append(time_function(sort_function,random_charlist(size)))
    
    all_data = [data1, data2, data3, data4, data5]
    
    heap_sort = pandas.DataFrame(all_data)
    
    heap_sort.to_pickle("heap_sort")
    
    return all_data

benchmark_heap = benchmark_function(heapSort)

100%|██████████| 5/5 [02:03<00:00, 29.81s/it]
