### Importing the necessary modules to perform a benchmark analysis on HeapSort algorithm

In [14]:
import random
import numpy as np
import timeit
import copy
import pandas
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm

### Heapifying the input array


In [15]:
def heapify(arr, n, i): 
    largest = i # Initialize largest as root 
    l = 2 * i + 1     # left = 2*i + 1 
    r = 2 * i + 2     # right = 2*i + 2 
  
    # See if left child of root exists and is 
    # greater than root 
    if l < n and arr[i] < arr[l]: 
        largest = l 
  
    # See if right child of root exists and is 
    # greater than root 
    if r < n and arr[largest] < arr[r]: 
        largest = r 
  
    # Change root, if needed 
    if largest != i: 
        arr[i],arr[largest] = arr[largest],arr[i] # swap 
  
        # Heapify the root. 
        heapify(arr, n, largest) 

### Implementing the Heapsort algorithm

In [16]:
# The main function to sort an array of given size 
def heapSort(arr): 
    n = len(arr) 
  
    # Build a maxheap. 
    for i in range(n, -1, -1): 
        heapify(arr, n, i) 
  
    # One by one extract elements 
    for i in range(n-1, 0, -1): 
        arr[i], arr[0] = arr[0], arr[i] # swap 
        heapify(arr, i, 0)
    
    return arr

### Testing the HeapSort algorithm

In [17]:
def test_sorting_algorithm(algorithm): 
    """
    Function to test the correctness of a sorting algorithm
    Generating numpy array with random integers to be tested on
    Tests it 10 times and then i assume that it is correct
    """
    for i in range(1000):
        A = np.random.randint(1000, size=100)
        A_copy = A.copy()
        algorithm(A_copy)      
        assert A_copy.tolist() == sorted(A), 'The implementation of %s is wrong'% (algorithm.__name__)

In [18]:
test_sorting_algorithm(heapSort)

### Generating test data

In [19]:
def ascending_list_int(n):
    """
    Returns a ascending list with values from 0 to n with length n
    """
    List = [i for i in range(n)]
    return List

def descending_list_int(n):
    """
    Returns a descending list with values from n to 0 with length n
    """
    List = [i for i in range(n - 1, -1, -1)]
    return List

def random_list_int(n):
    """
    Returns a list of random integers from -n to n with length n
    """
    List = [random.randint(-n, n) for _ in range(n)]
    return List
  
def random_list_float(n):
    """
    Returns a list of length n with random float values from -n to n
    """
    List = [random.uniform(-n, n) for _ in range(n)]
    return List


def random_charlist(n):
    """
    Returns a list of length n with random characters
    """
    List = [random.choice('abcdefghisjklmnopqrstuvwxyz') 
                 for _ in range(n)]
    
    return List

test_data_list = [ascending_list_int, descending_list_int, random_list_int, random_list_float, random_charlist]

### Determining the running time per iteration for the algorithm

In [20]:
def time_function(sort_function, test_data):
    """
    Actual function which does the timing
    """
    clock = timeit.Timer('func(copy(data))',
                       globals={'func': sort_function, 'data': test_data, 
                                'copy': copy.copy})

    data = clock.repeat(repeat=5, number=10)
    
    sort = pandas.DataFrame(data)
    
    sort.to_pickle("heap_sort_times")
    
    return np.min(data)

### Performing benchmark analysis on the algorithm

In [21]:
test_sizes = [10, 100, 1000, 10000, 100000]

def benchmark_function(sort_function):
    data1 = [[], []]
    data2 = [[], []]
    data3 = [[], []]
    data4 = [[], []]
    data5 = [[], []]
   
    for size in tqdm(test_sizes):
        data1[0].append(size) 
        data2[0].append(size)
        data3[0].append(size) 
        data4[0].append(size), 
        data5[0].append(size)     
        data1[1].append(time_function(sort_function,ascending_list_int(size)))
        data2[1].append(time_function(sort_function,descending_list_int(size)))
        data3[1].append(time_function(sort_function,random_list_int(size)))
        data4[1].append(time_function(sort_function,random_list_float(size)))
        data5[1].append(time_function(sort_function,random_charlist(size)))
    
    all_data = [data1, data2, data3, data4, data5]
    
    heap_sort = pandas.DataFrame(all_data)
    
    heap_sort.to_pickle("heap_sort")
    
    return all_data

benchmark_heap = benchmark_function(heapSort)

100%|██████████| 5/5 [07:34<00:00, 132.92s/it]
