In [9]:
# K Selection

In [6]:
import time
def timeit(func):
    """
    A decorator that times the function func with the arguments that
    are passed to it
    :param func: Function being timed
    :return: func's result
    """
    def clocked(*args, **kwargs):
        t0 = time.perf_counter()
        result = func(*args, **kwargs)
        elapsed = time.perf_counter() - t0
        name = func.__name__
        #print('[%0.8fs] %s' % (elapsed, name))
        return result, elapsed
    return clocked


Partition an array around a pivot element (the leftmost element). Modify the original
array and return the location of the pivot

In [7]:
#some modules we need

from typing import List, TypeVar
T = TypeVar('T')

from random import randrange as rr

In [8]:
def partition(arr: List[T], left: int, right: int) -> int:
    r = rr(left, right+1)  # r is the pivot index
    (arr[0], arr[r]) = (arr[r], arr[0])
    
    r = left
    
    # if we don't pick a random pivot then smaller values
    # will migrate left, and subsequent pivot choices
    # will be less good
    pivot = arr[r] 
    
    for i in range(left+1,right+1):
        if arr[i] <= pivot:
            r += 1  
            (arr[i], arr[r]) = (arr[r], arr[i])
    
    (arr[left], arr[r]) = (arr[r], arr[left])
    return r

In [9]:
@timeit
def select(lst: List[T], k:int) -> T:
    
    # Helper function to handle the divide-and-conquer
    def select_(left: int, right: int):
        if left == right:      #base case
            return lst[left]
        
        r = partition(lst, left, right)
        if r == k:
            return lst[r]
        elif k < r:
            return select_(left, r - 1)
        else:
            return select_(r + 1, right)
        
    return select_(0, len(lst) - 1)

In [10]:
@timeit
def timsort_select(lst: List[T], k:int) -> T:
    lst.sort() # don't use builtin sorted function because that makes copy of list
    return lst[k]


### Timing $O(n)$ K-Select
Time O(n) select on 100 million items takes 20 sec.

In [11]:
n = int(1e8)
lst = [rr(n*10) for _ in range(n+1)]
print("start")
(rslt, t) = select(lst,len(lst)//2)
print(rslt, t)

start
500021922 25.0001491


### Timing $O(nlogn)$ K-Select implemented with Timsort 
Time O(nlogn) select on 100 million items takes 50 sec

In [6]:
n = int(1e8)
lst = [rr(n*10) for _ in range(n+1)]
(rslt, t) = timsort_select(lst,len(lst)//2)
print(rslt, t)

499931079 49.26352240000001


In [None]:
import numpy as np
from numpy.polynomial.polynomial import polyfit
import matplotlib.pyplot as plt
xs = np.asarray(xs)
times_ks = np.asarray(times_ks)
(b_ks,m_ks) = polyfit(xs, times_ks, 1)
(b_ts,m_ts) = polyfit(xs, times_ts, 1)
fig = plt.figure("K Select", figsize=(10,10))  # top-level plot object

ax = fig.add_subplot(111) # look up what 111 means
ksplot = ax.plot(times_ks, label = "K Select")
ks_linregressplot = ax.plot(xs, b_ks + m_ks * xs, '-')
tsplot = ax.plot(times_ts, label = "Tim Sort")
ts_linregressplot = ax.plot(xs, b_ts + m_ts * xs, '-')
