<a href="https://colab.research.google.com/github/maddieiyengar/cs315/blob/main/Counting_Sort.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import random
import numpy as np
import matplotlib.pyplot as plt

def quickselect(A, i):
    """Find ith order statistic (ith smallest element, 1-indexed)"""
    if len(A) == 1:
        return A[0]

    pivot = random.choice(A)
    lows = [x for x in A if x < pivot]
    highs = [x for x in A if x > pivot]
    pivots = [x for x in A if x == pivot]

    if i <= len(lows):
        return quickselect(lows, i)
    elif i <= len(lows) + len(pivots):
        return pivot
    else:
        return quickselect(highs, i - len(lows) - len(pivots))

def quickselect_instrumented(A, i, stats):
    """Instrumented version that counts comparisons from Claude AI"""
    if len(A) == 1:
        return A[0]

    pivot = random.choice(A)
    lows, highs, pivots = [], [], []

    for x in A:
        stats['comparisons'] += 1
        if x < pivot:
            lows.append(x)
        elif x > pivot:
            stats['comparisons'] += 1
            highs.append(x)
        else:
            pivots.append(x)

    if i <= len(lows):
        return quickselect_instrumented(lows, i, stats)
    elif i <= len(lows) + len(pivots):
        return pivot
    else:
        return quickselect_instrumented(highs, i - len(lows) - len(pivots), stats)

# Correctness tests
print("CORRECTNESS TESTS:")
test = [3, 7, 1, 9, 2, 8, 4, 6, 5]
print(f"Array: {test}")
print(f"3rd smallest: {quickselect(test[:], 3)} (expected: 3)")
print(f"5th smallest (median): {quickselect(test[:], 5)} (expected: 5)")
print(f"9th smallest: {quickselect(test[:], 9)} (expected: 9)")

# Performance analysis on different data orderings
print("\nPERFORMANCE ANALYSIS (averaged over 20 trials):")
sizes = [100, 500, 1000, 2000, 5000]

for n in sizes:
    # Random data
    random_comps = []
    for _ in range(20):
        data = [random.randint(1, 10000) for _ in range(n)]
        stats = {'comparisons': 0}
        quickselect_instrumented(data, n//2, stats)
        random_comps.append(stats['comparisons'])

    # Sorted data
    sorted_comps = []
    for _ in range(20):
        data = list(range(n))
        stats = {'comparisons': 0}
        quickselect_instrumented(data, n//2, stats)
        sorted_comps.append(stats['comparisons'])

    # Reverse sorted data
    reverse_comps = []
    for _ in range(20):
        data = list(range(n, 0, -1))
        stats = {'comparisons': 0}
        quickselect_instrumented(data, n//2, stats)
        reverse_comps.append(stats['comparisons'])

    avg_random = np.mean(random_comps)
    avg_sorted = np.mean(sorted_comps)
    avg_reverse = np.mean(reverse_comps)
    ratio = avg_random / n

    print(f"n={n:5d}: Random={avg_random:6.0f}, Sorted={avg_sorted:6.0f}, "
          f"Reverse={avg_reverse:6.0f}, Ratio={ratio:.2f}")

CORRECTNESS TESTS:
Array: [3, 7, 1, 9, 2, 8, 4, 6, 5]
3rd smallest: 3 (expected: 3)
5th smallest (median): 5 (expected: 5)
9th smallest: 9 (expected: 9)

PERFORMANCE ANALYSIS (averaged over 20 trials):
n=  100: Random=   453, Sorted=   504, Reverse=   490, Ratio=4.53
n=  500: Random=  2518, Sorted=  2376, Reverse=  2488, Ratio=5.04
n= 1000: Random=  5192, Sorted=  5123, Reverse=  4787, Ratio=5.19
n= 2000: Random= 10215, Sorted=  9398, Reverse=  9768, Ratio=5.11
n= 5000: Random= 27686, Sorted= 28041, Reverse= 24800, Ratio=5.54
