# Empirical Analysis of Sorting Algorithms

## **0. Importing Required Libraries**

In [None]:
from src.utils import generate_test_inputs, create_directory, export_results, sort
from src.timing import measure_execution_time
from src.performance import test_algorithm_performance
from src.plotting import plot_performance_graphs

In [None]:
# Define the directory path for storing the results
directory_path = "result"

# Create a directory to store the results
create_directory(directory_path)

# Define the directory path for storing the plots
plots_path = f"{directory_path}/plots"

# Create a directory to store the plots
create_directory(plots_path)

## **1. Introduction**

## **2. Objective**

## **3 Implementing Sorting Algorithms**

In [None]:
test_arrays = [
    [],
    [1],
    [5, 3, 1, 4, 2],
    [1, 2, 3, 4, 5],
    [5, 4, 3, 2, 1],
    [2, 2, 2, 2, 2],
    [3, 1, 4, 1, 5, 9, 2],
]

In [None]:
def test_sorting(arrays, func, copy=False):
    for arr in arrays:
        result = sort(arr, func, copy)
        print(f"O: {arr} \t S: {result}")

### **3.1 Quick Sort**

In [None]:
def quick_sort(arr):
    if len(arr) <= 1:
        return arr

    pivot = arr[len(arr) // 2]

    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]

    return quick_sort(left) + middle + quick_sort(right)

In [None]:
print("Testing Quick Sort")
test_sorting(test_arrays, quick_sort)

### **3.2 Merge Sort**

In [None]:
def merge_sort(arr):
    if len(arr) <= 1:
        return arr
    mid = len(arr) // 2
    left = merge_sort(arr[:mid])
    right = merge_sort(arr[mid:])
    return merge(left, right)

def merge(left, right):
    result = []
    i = j = 0
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            result.append(left[i])
            i += 1
        else:
            result.append(right[j])
            j += 1
    result.extend(left[i:])
    result.extend(right[j:])
    return result

In [None]:
print("Testing Merge Sort")
test_sorting(test_arrays, merge_sort)

### **3.3 Heap Sort**

In [None]:
def heapify(arr, n, i):
    largest = i
    l = 2 * i + 1
    r = 2 * i + 2
    if l < n and arr[l] > arr[largest]:
        largest = l
    if r < n and arr[r] > arr[largest]:
        largest = r
    if largest != i:
        arr[i], arr[largest] = arr[largest], arr[i]
        heapify(arr, n, largest)

def heap_sort(arr):
    n = len(arr)
    for i in range(n // 2 - 1, -1, -1):
        heapify(arr, n, i)
    for i in range(n - 1, 0, -1):
        arr[0], arr[i] = arr[i], arr[0]
        heapify(arr, i, 0)

In [None]:
print("Testing Heap Sort")
test_sorting(test_arrays, heap_sort, True)

### **3.4 Insertion Sort**

In [None]:
def insertion_sort(arr):
    if len(arr) > 1000:
        return []
    for i in range(1, len(arr)):
        key = arr[i]
        j = i - 1
        while j >= 0 and arr[j] > key:
            arr[j + 1] = arr[j]
            j -= 1
        arr[j + 1] = key

In [None]:
print("Testing Insertion Sort")
test_sorting(test_arrays, insertion_sort, True)

## **4. Experimental Setup**

In [None]:
small_sizes = list(range(10, 999, 10))
large_sizes = list(range(1000, 20001, 100))
sizes = small_sizes + large_sizes
print(sizes)

In [None]:
input_types = ["sorted", "reversed", "random", "duplicates"]
dataset = generate_test_inputs(sizes, input_types)

In [None]:
sorting_methods = {
    "Quick Sort": {"func": quick_sort, "copy": False, "is_slow": False},
    "Merge Sort": {"func": merge_sort, "copy": False, "is_slow": False},
    "Heap Sort": {"func": heap_sort, "copy": True, "is_slow": False},
    "Python Sort": {"func": sorted, "copy": False, "is_slow": False},
    "Insertion Sort": {"func": insertion_sort, "copy": True, "is_slow": True}
}

fast_sorting_methods = [algo for algo in sorting_methods.keys()
                        if not sorting_methods[algo]["is_slow"]]

In [None]:
# Define a wrapper for each sorting algorithm to make them compatible
# with the test_algorithm_performance function

def create_sorting_test_functions(input_type, dataset):
    """
    Create test functions for each sorting algorithm that can be used with test_algorithm_performance.

    :param input_type: Type of input data ("sorted", "reversed", "random", "duplicates")
    :type input_type: str
    :param dataset: Dataset containing different arrays to sort
    :type dataset: Dict[str, Dict[int, List[int]]]

    :return: Dictionary of algorithm names to test functions
    :rtype: Dict[str, Callable]
    """
    sorting_functions = {}

    # Loop through each sorting method and create a wrapper function
    for name, config in sorting_methods.items():
        def test_func(n, algo_name=name, data_type=input_type):
            # Get the right array for this size
            arr = dataset[data_type][n]
            # Get the algorithm and copy settings
            algo_config = sorting_methods[algo_name]
            # Run the sorting with the sort wrapper from utils
            return sort(arr, algo_config["func"], copy=algo_config["copy"])

        # Add the function to our dictionary with a descriptive name
        sorting_functions[f"{name}"] = test_func

    return sorting_functions

# Run performance tests for each input type
all_results = {}

for input_type in input_types:
    print(f"Testing with {input_type} arrays...")

    # Create test functions for this input type
    sorting_tests = create_sorting_test_functions(input_type, dataset)

    # Run the performance test
    df_results = test_algorithm_performance(
        algorithms=sorting_tests,
        test_values=sizes,
        param_name="Size",
        trials=5
    )

    # Save results for this input type
    all_results[input_type] = df_results
    export_results(df_results, directory_path, f"sorting_{input_type}")

    # Plot results
    plot_performance_graphs(
        df=df_results,
        path=f"{plots_path}/{input_type}",
        param_name="Size",
        title_prefix=f"Sorting Algorithms ({input_type} arrays)",
        filename_prefix=f"sorting_{input_type}",
        all_algorithms=list(sorting_methods.keys()),
        display_plots=True,
        plot_types=["log", "linear"]
    )

print("Performance testing complete!")

## **5. Results and Analysis**

## **6. Conclusion**

### **6.1 Key Findings**

### **6.2 Choosing the Optimal Sorting Algorithm**

### **6.3 Final Remarks**