In [6]:
def counting_sort(a, n, k):
    B = [0] * n
    C = [0] * k

    for j in range(n):
        C[a[j]] += 1

    for i in range(1, k):
        C[i] += C[i - 1]

    for j in range(n - 1, -1, -1):
        B[C[a[j]] - 1] = a[j]
        C[a[j]] -= 1

    return B

In [8]:
a = [6, 0, 2, 0, 1, 3, 4, 6, 1, 3, 2]
print(counting_sort(a, len(a) - 1, max(a) + 1))

[0, 0, 1, 1, 2, 3, 3, 4, 6, 6]


In [9]:
"""
no satellite data
we can sort in place instead of creating a new array (B)
O(n + k)
"""
def counting_sort_inplace(a, k):
    C = [0] * (k + 1)
    for x in a:
        C[x] += 1

    index = 0
    for i in range(k+1):
        for _ in range(C[i]):
            a[index] = i
            index += 1

In [10]:
a = [6, 0, 2, 0, 1, 3, 4, 6, 1, 3, 2]
counting_sort_inplace(a, max(a))
a

[0, 0, 1, 1, 2, 2, 3, 3, 4, 6, 6]

In [11]:
"""
3.14 → 314
0.05 → 5
6.00 → 600
we transform decimals (10 base) so sort can still work
"""
def counting_sort_fractional(a, n, k, d):
    scale = 10 ** d
    for i in range(n):
        a[i] = int(a[i] * scale)

    C = [0] * (k * scale + 1)

    for x in a:
        C[x] += 1

    index = 0
    for i in range(len(C)):
        for _ in range(C[i]):
            a[index] = i
            index += 1

    for i in range(n):
        a[i] /= scale

In [13]:
def radix_sort(a, d):
    n = len(a)
    b = [0] * n

    for i in range(d):
        counting_sort_stable(a, b, i)
        a, b = b, a

    return a

def counting_sort_stable(a, b, digit):
    n = len(a)
    c = [0] * 10

    for j in range(n):
        d = (a[j] // (10 ** digit)) % 10
        c[d] += 1

    for i in range(1, 10):
        c[i] += c[i - 1]

    for j in range(n - 1, -1, -1):
        d = (a[j] // (10 ** digit)) % 10
        b[c[d] - 1] = a[j]
        c[d] -= 1

In [14]:
import math

def insertion_sort(arr):
    for i in range(1, len(arr)):
        key = arr[i]
        j = i - 1
        while j >= 0 and arr[j] > key:
            arr[j + 1] = arr[j]
            j -= 1
        arr[j + 1] = key

def bucket_sort(a, n):
    B = [[] for _ in range(n)]
    
    for i in range(n):
        index = math.floor(n * a[i])
        if index == n:
            index = n - 1
        B[index].append(a[i])

    for i in range(n):
        insertion_sort(B[i])

    concat_lists = []
    for i in range(n):
        concat_lists.extend(B[i])

    return concat_lists

In [17]:
a = [.79, .13, .16, .64, .39, .20, .89, .53, .71, .42]
print(bucket_sort(a, len(a)))

[0.13, 0.16, 0.2, 0.39, 0.42, 0.53, 0.64, 0.71, 0.79, 0.89]


In [18]:
import math

def insertion_sort_distance(points):
    for i in range(1, len(points)):
        key = points[i]
        key_dist = math.sqrt(key[0]**2 + key[1]**2)
        j = i - 1
        while j >= 0 and math.sqrt(points[j][0]**2 + points[j][1]**2) > key_dist:
            points[j + 1] = points[j]
            j -= 1
        points[j + 1] = key

"""
we have to use: min(n - 1, int(n * d2))
because a distance may fall outside of our array range
so we must put the distance on the edge.
"""
def sort_points_by_distance(points):
    n = len(points)
    buckets = [[] for _ in range(n)]
    
    for (x, y) in points:
        d2 = x**2 + y**2
        k = min(n - 1, int(n * d2))
        buckets[k].append((x, y))
    
    for b in buckets:
        insertion_sort_distance(b)
    
    sorted_points = []
    for b in buckets:
        sorted_points.extend(b)
    
    return sorted_points

use this for CDFs to uniformly spread data between 0-1 so that running time will not get to n^2

X = [0.1, 0.5, 1.2, 2.0, 3.5]

Y = [0.095, 0.393, 0.699, 0.865, 0.970]  # uniform-ish

Rule:
Use Y=P(X) whenever X comes from a continuous distribution and you want Θ(n) expected-time bucket sort.

It uniformly spreads the data in [0,1]
Preserves order (monotone)
Ensures linear expected sorting time

$Y = 1 - e^{-\lambda X_i}$

In [19]:
def radix_sort_strings(arr):
    max_len = max(len(s) for s in arr)

    for i in range(max_len - 1, -1, -1):
        # 256 ASCII characters + 1 sentinel
        buckets = [[] for _ in range(257)]

        for s in arr:
            if i < len(s):
                idx = ord(s[i]) + 1
            else:
                idx = 0
            buckets[idx].append(s)

        arr = [s for bucket in buckets for s in bucket]

    return arr

In [20]:
def radix_sort_integers(arr):
    max_digits = max(len(str(x)) for x in arr)

    exp = 1  # 10^i
    for _ in range(max_digits):
        buckets = [[] for _ in range(10)]

        for num in arr:
            digit = (num // exp) % 10   # missing digits automatically give 0
            buckets[digit].append(num)

        arr = [num for bucket in buckets for num in bucket]
        exp *= 10

    return arr