In [1]:
import matplotlib
import matplotlib.pyplot as plt
import copy

import math
import random
import time
import timeit
import statistics

import numpy as np
import pandas
from tqdm import tqdm

In [2]:
def time_function(sort_function, test_data):
    
    clock = timeit.Timer('func(copy(data))',
                        globals={'func': sort_function, 'data': test_data,
                                'copy': copy.copy})
    
    data = clock.repeat(repeat=5, number=10)
    
    sort = pandas.DataFrame(data)
    sort.to_pickle("numpy_sort_times")
    
    return np.min(data)

In [3]:
def time_function2(sort_function, test_data):
    
    clock = timeit.Timer('func(copy(data))',
                        globals={'func': sort_function, 'data': test_data,
                                'copy': copy.copy})
    
    data = clock.repeat(repeat=5, number=10)
    
    sort = pandas.DataFrame(data)
    sort.to_pickle("python_sorted_times")
    
    return np.min(data)

In [4]:
def ascending_list_int(n):
    """
    Returns a ascending list with values from 0 to n with length n
    """
    List = [i for i in range(n)]
    return List

def descending_list_int(n):
    """
    Returns a descending list with values from n to 0 with length n
    """
    List = [i for i in range(n - 1, -1, -1)]
    return List

def random_list_int(n):
    """
    Returns a list of random integers from -n to n with length n
    """
    List = [random.randint(-n, n) for _ in range(n)]
    return List
  
def random_list_float(n):
    """
    Returns a list of length n with random float values from -n to n
    """
    List = [random.uniform(-n, n) for _ in range(n)]
    return List


def random_charlist(n):
    """
    Returns a list of length n with random characters
    """
    List = [random.choice('abcdefghisjklmnopqrstuvwxyz') 
                 for _ in range(n)]
    
    return List

test_data_list = [ascending_list_int, descending_list_int, random_list_int, random_list_float, random_charlist]

In [8]:
test_sizes = [10, 100, 1000, 10000, 100000]

def benchmark_function(sort_function):
    data1 = [[], []]
    data2 = [[], []]
    data3 = [[], []]
    data4 = [[], []]
    data5 = [[], []]
    for size in tqdm(test_sizes):
        data1[0].append(size) 
        data1[1].append(time_function(sort_function,ascending_list_int(size)))
        data2[0].append(size)
        data2[1].append(time_function(sort_function,descending_list_int(size)))
        data3[0].append(size)
        data3[1].append(time_function(sort_function,random_list_int(size)))
        data4[0].append(size)
        data4[1].append(time_function(sort_function,random_list_float(size)))
        data5[0].append(size)
        data5[1].append(time_function(sort_function,random_charlist(size)))
    
    all_data = [data1, data2, data3, data4, data5]
    
    numpy_sort = pandas.DataFrame(all_data)
    
    numpy_sort.to_pickle("numpy_sort")
    
    return all_data



In [9]:
test_sizes = [10, 100, 1000, 10000, 100000]

def benchmark_function2(sort_function):
    data1 = [[], []]
    data2 = [[], []]
    data3 = [[], []]
    data4 = [[], []]
    data5 = [[], []]
    for size in tqdm(test_sizes):
        data1[0].append(size) 
        data1[1].append(time_function2(sort_function,ascending_list_int(size)))
        data2[0].append(size)
        data2[1].append(time_function2(sort_function,descending_list_int(size)))
        data3[0].append(size)
        data3[1].append(time_function2(sort_function,random_list_int(size)))
        data4[0].append(size)
        data4[1].append(time_function2(sort_function,random_list_float(size)))
        data5[0].append(size)
        data5[1].append(time_function2(sort_function,random_charlist(size)))
    
    all_data = [data1, data2, data3, data4, data5]
    
    python_sorted = pandas.DataFrame(all_data)
    python_sorted.to_pickle("python_sorted")
    
    return all_data

In [23]:
def plot_data(function_sort):
    """
    Function to plot benchmark results of all sorting algorithms on given test data
    Using the pyplot library in matplotlib for simple plotting
  
    """
    y_descending = [] 
    y_ascending = []
    y_random_int= []
    y_random_float = [] 
    y_random_char = []

    x_descending = [] 
    x_ascending = []
    x_random_int= []
    x_random_float = [] 
    x_random_char = []

    for i in range(len(test_sizes)):
        y_descending.append(function_sort[0][1][i])
        x_descending.append(test_sizes[i])
        y_ascending.append(function_sort[1][1][i])
        x_ascending.append(test_sizes[i])
        y_random_int.append(function_sort[2][1][i])
        x_random_int.append(test_sizes[i])
        y_random_float.append(function_sort[3][1][i])
        x_random_float.append(test_sizes[i])
        y_random_char.append(function_sort[4][1][i])
        x_random_char.append(test_sizes[i])
    
    plt.plot(x_descending, y_descending, x_ascending, y_ascending, x_random_char, y_random_char, x_random_float, y_random_float, x_random_int, y_random_int)
    plt.xlabel("input size")
    plt.ylabel("seconds")
    plt.legend(['Descending List', 'Ascending List', 'Random Letters', 'Random Floats', 'Random Integres'])
    plt.title('HeapSort')
    plt.show

In [10]:
benchmark_Numpy_sort = benchmark_function(np.sort)

benchmark_Python_sort = benchmark_function2(sorted)

100%|██████████| 5/5 [00:05<00:00,  1.66s/it]
100%|██████████| 5/5 [00:05<00:00,  1.57s/it]
