### importing the necessary modules to perform a benchmark analysis on the Merge Sort algorithm

In [31]:
import random
import numpy as np
import timeit
import copy
import pandas
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

### Implementing the Merge Sort algorithm

In [32]:
def mergeSort(m):

    length = len(m)

    if length == 1:
        return m

    mid = int(math.floor(length / 2))

    left = mergeSort(m[0:mid])
    right = mergeSort(m[mid:length])

    return merge(left, right)

def merge(left, right):

    merged = []

    # while at least one list has elements
    while left or right:

        if left and right:
            if left[0] <= right[0]:
                key = left.pop(0)
            else:
                key = right.pop(0)
        elif left:
            key = left.pop(0)
        else:
            key = right.pop(0)

        merged.append(key)

    return merged

### Testing the sorting algorithm

In [33]:
def test_sorting_algorithm(algorithm): 
    """
    Function to test the correctness of a sorting algorithm
    Generating numpy array with random integers to be tested on
    Tests it 10 times and then i assume that it is correct
    """
    for i in range(1000):
        L = [random.randint(0, 1000) for _ in range(100)]
        L_copy = L.copy()
        L_sorted = algorithm(L_copy)      
        assert L_sorted == sorted(L), 'The implementation of %s is wrong'% (algorithm.__name__)

In [34]:
test_sorting_algorithm(mergeSort)

### Generating test data

In [35]:
def ascending_list_int(n):
    """
    Returns a ascending list with values from 0 to n with length n
    """
    List = [i for i in range(n)]
    return List

def descending_list_int(n):
    """
    Returns a descending list with values from n to 0 with length n
    """
    List = [i for i in range(n - 1, -1, -1)]
    return List

def random_list_int(n):
    """
    Returns a list of random integers from -n to n with length n
    """
    List = [random.randint(-n, n) for _ in range(n)]
    return List
  
def random_list_float(n):
    """
    Returns a list of length n with random float values from -n to n
    """
    List = [random.uniform(-n, n) for _ in range(n)]
    return List


def random_charlist(n):
    """
    Returns a list of length n with random characters
    """
    List = [random.choice('abcdefghisjklmnopqrstuvwxyz') 
                 for _ in range(n)]
    
    return List

test_data_list = [ascending_list_int, descending_list_int, random_list_int, random_list_float, random_charlist]

### Determining the running time per iteration for the algorithm

In [36]:
def time_function(sort_function, test_data):
    """
    Actual function which does the timing
    """
    clock = timeit.Timer('func(copy(data))',
                       globals={'func': sort_function, 'data': test_data, 
                                'copy': copy.copy})

    n_ar, t_ar = clock.autorange()
    
    data = np.array(clock.repeat(repeat=7, number=n_ar)) / n_ar
    
    sort = pandas.DataFrame(data)
    sort.to_pickle("merge_sort_times")
    
    return np.min(data)

### Performing benchmark analysis on the algorithm

In [37]:
test_sizes = [10, 100, 1000, 10000, 100000]

def benchmark_function(sort_function):
    data1 = [[], []]
    data2 = [[], []]
    data3 = [[], []]
    data4 = [[], []]
    data5 = [[], []]
    
    for size in tqdm(test_sizes):
        data1[0].append(size) 
        data2[0].append(size)
        data3[0].append(size) 
        data4[0].append(size), 
        data5[0].append(size) 
        data1[1].append(time_function(sort_function,ascending_list_int(size)))
        data2[1].append(time_function(sort_function,descending_list_int(size)))
        data3[1].append(time_function(sort_function,random_list_int(size)))
        data4[1].append(time_function(sort_function,random_list_float(size)))
        data5[1].append(time_function(sort_function,random_charlist(size)))
      
    
    all_data = [data1, data2, data3, data4, data5]
    
    merge_sort = pandas.DataFrame(all_data)
    
    merge_sort.to_pickle("merge_sort")
    
    return all_data

benchmark_merge = benchmark_function(mergeSort)

100%|██████████| 5/5 [02:53<00:00, 45.28s/it]
