# Median and Order Statistics

- Problem: Find i-th smallest number in a given array.

<img src="../asset/order_statistics/fig1.jpg" alt="img" style="width: 600px;"/>

In [51]:
import random
import time

MIN = 0
MAX = 40000
ARR_SIZE = 10000
FIND_KTH_SMALL = 2000

arr_rand = list(range(MIN, MAX + 1))
arr_rand = random.sample(arr_rand, ARR_SIZE)

print(arr_rand)

[379, 16609, 25295, 39353, 5945, 9558, 16398, 13843, 28934, 7221, 29291, 644, 29087, 17293, 1096, 30809, 18616, 26259, 25778, 1907, 5075, 8998, 35726, 7164, 22182, 29910, 36423, 8081, 36560, 6646, 25761, 10306, 27709, 31672, 23501, 11892, 30684, 39024, 28551, 36686, 9838, 20713, 6984, 15492, 346, 35504, 32577, 29014, 33329, 37675, 17542, 28470, 25186, 6069, 16482, 2090, 9528, 16655, 27186, 945, 33968, 29195, 18395, 5246, 31714, 232, 36380, 11253, 8730, 24965, 6417, 12294, 39249, 11003, 21700, 19377, 18202, 22016, 8793, 19258, 32989, 17715, 11108, 21671, 7391, 11603, 5082, 26054, 921, 3438, 17832, 13756, 11954, 35023, 31244, 22996, 26829, 32967, 38838, 8593, 10727, 29720, 22851, 31984, 36979, 28629, 7603, 12409, 17065, 34656, 23792, 15465, 38653, 13892, 32254, 6270, 33764, 6670, 18700, 29744, 10578, 21280, 30967, 34802, 8599, 9288, 16271, 26079, 30594, 27230, 30958, 10460, 39781, 8484, 10599, 15424, 33566, 38810, 31837, 35280, 4308, 13115, 90, 33159, 39262, 8979, 10697, 11979, 31839, 32

## 1. Insertion sorting
- O(n^2)

In [52]:
timestamp = time.time()
arr = arr_rand.copy()

for i in range(1, ARR_SIZE):
    val_cur = arr[i]
    j = min(i, FIND_KTH_SMALL)
    
    while j > 0:
        if val_cur < arr[j-1]:
            arr[j] = arr[j-1]
            j -= 1
        else:
            break
    idx_sorted = j
    arr[idx_sorted] = val_cur

arr_sorted = arr[:FIND_KTH_SMALL]
print('Total time: ', time.time() - timestamp)
print('K-th smallest element:', arr_sorted[-1])

Total time:  1.2197375297546387
K-th smallest element: 7873


## 2. Heap
- O(nlogn)

In [58]:
timestamp = time.time()
arr = arr_rand.copy()

def exchange_element(arr, i, j):
    temp = arr[i]
    arr[i] = arr[j]
    arr[j] = temp

def max_heapify(arr, idx, heap_size):
    child_left = (idx + 1) * 2 - 1
    child_right = (idx + 1) * 2
    
    idx_high = idx
    
    if child_left < heap_size and arr[child_left] > arr[idx]:
        idx_high = child_left
    
    if child_right < heap_size and arr[child_right] > arr[idx_high]:
        idx_high = child_right
        
    if idx_high != idx:
        exchange_element(arr, idx, idx_high)
        max_heapify(arr, idx_high, heap_size)
        
    
def build_max_heap(arr, heap_size):
    last_parent = heap_size // 2
    
    for i in reversed(range(last_parent)):        
        max_heapify(arr, i, heap_size)

        
def find_kth_smallest_element(arr, k):
    heap_size = k
    build_max_heap(arr, heap_size)
    
    for i in range(k, len(arr)):
        if arr[i] < arr[0]:
            arr[0] = arr[i]
            max_heapify(arr, 0, heap_size)
        
    return arr[0]
        
ans = find_kth_smallest_element(arr, FIND_KTH_SMALL)
        
print('Total time: ', time.time() - timestamp)
print('K-th smallest element:', ans)

Total time:  0.019918203353881836
K-th smallest element: 7873


## 3. Quick selection

- If pivot is randomly selected, average computational cost is O(nlogn)
- Worst case: O(n^2)

<img src="../asset/order_statistics/fig2.jpg" alt="img" style="width: 600px;"/>

In [63]:
timestamp = time.time()
arr = arr_rand.copy()

def partition(arr, p, r):
    pivot = arr[r]
    
    i = p-1
    
    for j in range(p, r):
        if arr[j] < pivot:
            i += 1
            exchange_element(arr, i, j)
    
    q = i+1
    exchange_element(arr, q, r)
    
    return q

def quick_select(arr, p, r, i):
    if p == r:
        return arr[p]
    
    pivot_rand = random.choice(list(range(p,r+1)))
    exchange_element(arr, pivot_rand, r)
    q = partition(arr, p, r)
    k = q - p + 1
    
    if i == k:
        return arr[q]
    elif i < k:
        return quick_select(arr, p, q-1, i)
    else:
        return quick_select(arr, q+1, r, i-k)
        
ans = quick_select(arr, 0, len(arr)-1, FIND_KTH_SMALL)
        
print('Total time: ', time.time() - timestamp)
print('K-th smallest element:', ans)

Total time:  0.003989219665527344
K-th smallest element: 7873
