In [None]:
import random

def testSelect(selMethod, numTCs=None, size=None, data1=None, pos1=None):
    result = True
    try:
        if any((x is not None) for x in (data1, pos1)):
            print ("Warning: Specifying optional data to test is not generic")
        
        numTCs = 1000 if numTCs is None else numTCs
        size = 100 if size is None else size

        for testCase in range(numTCs):
            if data1 is None:
                data = list(range(size))
                random.shuffle(data)
            else:
                data = data1
                
            if pos1 is None:
                pos = random.randrange(0, size)
            else:
                pos = pos1

            res1 = selMethod(data[:], pos)
            res2 = sorted(data)[pos]

            if testCase < 5:
                print ("res1 = {}, res2 = {}".format(res1, res2))

            if res1 != res2:
                print ("Error in verification")
                print ("data = {}, pos = {}".format(data, pos))
                result = False
                break
    except:
        from traceback import print_exc
        print ("Exception in test_rSelect")
        print_exc()
        result = False
        
    if result:
        print("{}: Test Passed".format(selMethod.__name__))
    else:
        print("{}: ***  FAILURE  ***".format(selMethod.__name__))
        


# Randomized Selection


In [5]:

# partition
# choose random pivot pos
# exchange pivot with starting pos
# i = start
# j -> i to last
# if d[j] < pivot:
#     exchange elements j and i
#     i += 1
# exchange elements at start and i-1
# return (i-1)

def partition(data, start, end):
    pIndex = random.randrange(start, end+1)
    pivot = data[pIndex]
    data[start], data[pIndex] = data[pIndex], data[start]

    i = start + 1
    for j in range(i, end + 1):
        if data[j] < pivot:
            data[j], data[i] = data[i], data[j]
            i += 1

    data[start], data[i-1] = data[i-1], data[start]
    return (i - 1)

def rSelectInner(data, start, end, elpos):
    # base case of just 1 element
    if start == end:
        assert(start == elpos)
        return data[elpos]

    ppos = partition(data, start, end)

    if ppos > elpos:
        return rSelectInner(data, start, ppos-1, elpos)
    if ppos < elpos:
        return rSelectInner(data, ppos+1, end, elpos)

    assert(ppos == elpos)
    return data[ppos]


def rSelect(data, pos):
    return rSelectInner(data, 0, len(data)-1, pos)



testSelect(rSelect)


res1 = 29, res2 = 29
res1 = 86, res2 = 86
res1 = 89, res2 = 89
res1 = 93, res2 = 93
res1 = 69, res2 = 69
rSelect: Test Passed


# Deterministic Selection

In [8]:

DEBUG_DSEL = False

def dPartition(data, start, end, pIndex):
    'deterministic partition'
    pivot = data[pIndex]
    data[start], data[pIndex] = data[pIndex], data[start]

    i = start + 1
    for j in range(i, end + 1):
        if data[j] < pivot:
            data[j], data[i] = data[i], data[j]
            i += 1

    data[start], data[i-1] = data[i-1], data[start]
    return (i - 1)

def median(arr):
    'returns the median of an array'
    # print (" 21 ", arr)
    if len(arr) < 3:
        return arr[0]
    for i in range(1, len(arr)):
        j = i
        while j > 0:
            if arr[j-1] > arr[j]:
                arr[j-1], arr[j] = arr[j], arr[j-1]
                j -= 1
            else:
                break
    # print (" 22 ", arr)
    return arr[len(arr)//2]


def getMediansOf5(data, start, end):
    'returns medians of sets of 5 elements'
    if DEBUG_DSEL:
        print (" 2  ", data, start, end)
    mediansOf5 = []
    i = start
    while i <= end:
        j = min(i + 5, end+1)
        batch = data[i: j]
        i = j
        mediansOf5.append(median(batch))
    return mediansOf5

def dSelectInner(data, start, end, pos):
    'the real dSelect method'
    if DEBUG_DSEL:
        print (" 1  ", data, start, end, pos)
    # base case
    if start == end:
        assert (start == pos)
        return data[start]

    # subdivide data into runs of 5 elements each
    # and append medians to a new list
    mediansOf5 = getMediansOf5(data, start, end)
    if DEBUG_DSEL:
        print (" 3  ", mediansOf5)
    mid = len(mediansOf5) // 2  # higher one if we have
    pivot = dSelectInner(mediansOf5, 0, len(mediansOf5) - 1, mid)
    pIndex = data.index(pivot, start)
    if DEBUG_DSEL:
        print (" 4  ", pivot, pIndex)

    newPIndex = dPartition(data, start, end, pIndex)
    if DEBUG_DSEL:
        print (" 5  ", data, pivot, pIndex, newPIndex)

    if newPIndex > pos:
        return dSelectInner(data, start, newPIndex - 1, pos)
    if newPIndex < pos:
        return dSelectInner(data, newPIndex + 1, end, pos)

    assert (newPIndex == pos)
    return data[pos]

def dSelect(data, pos):
    'dSelect wrapper'
    return dSelectInner(data, 0, len(data) - 1, pos)


testSelect(dSelect, 100, 100)


res1 = 85, res2 = 85
res1 = 66, res2 = 66
res1 = 13, res2 = 13
res1 = 32, res2 = 32
res1 = 6, res2 = 6
dSelect: Test Passed
