In [34]:
import random

class QuickSelect:
    def __init__(self, nums):
        self.nums = nums
        self.first_index = 0
        self.last_index = len(nums) - 1

    def run(self, k):
        return self.select(self.first_index, self.last_index, k-1)

    # PARTITION PHASE
    def partition(self, first_index, last_index):
        # generate a random value within the range [first, last]
        pivot_index = random.randint(first_index, last_index)

        self.swap(pivot_index, last_index)

        for i in range(first_index, last_index):
            #if self.nums[i] > self.nums[last_index]:  # k-th max
            if self.nums[i] < self.nums[last_index]:  # k-th min
                if i != first_index:
                    print(i, first_index)                    
                    self.swap(i, first_index)
                first_index += 1

        self.swap(first_index, last_index)

        # it is the index of the pivot
        return first_index

    def swap(self, i, j):
        self.nums[i], self.nums[j] = self.nums[j], self.nums[i]

    # THIS IS THE SELECTION PHASE
    def select(self, first_index, last_index, k):  # k is k-1 since index starts at 0
        pivot_index = self.partition(first_index, last_index)

        # selection phase when we compare the pivot_index with k
        if pivot_index < k:
            # we have to discard the left sub-array and keep
            # considering the items on the right
            return self.select(pivot_index + 1, last_index, k)
        elif pivot_index > k:
            # we have to discard the right sub-array
            return self.select(first_index, pivot_index - 1, k)

        # we have found the item we are looking for
        return self.nums[pivot_index]
    
    # this is how we can do sorting
    def sort(self):
 
        # the result will be another list (sorted order)
        sorted_list = []
 
        # because we decrement the k value (k'=k-1) this is why
        # we have to use range() like that
        for i in range(1, len(self.nums)+1):
            sorted_list.append(self.run(i))
 
        return sorted_list

In [22]:
x = [1, 2, -5, 10, 100, -7, 3, 4]
select = QuickSelect(x)
print(select.run(2))  # k-th max or min

5 4
6 5
1 0
-5


In [47]:
x = [1, -2, 5, 8, 7, 6]
select = QuickSelect(x)
print(select.run(2))  # k-th min
#print(select.sort())
print(x)
print()
print(select.sort())
print(x)

1
[-2, 1, 5, 8, 7, 6]

[-2, 1, 5, 6, 7, 8]
[-2, 1, 5, 6, 7, 8]


In [48]:
def median_algorithm(nums, k):

    # we have to split the list into chunks of 5 items
    chunks = [nums[i:i+5] for i in range(0, len(nums), 5)]
    # the median is the middle item in the sorted order
    # NOTE: median of the medians is just approximately the median of the original data structure
    medians = [sorted(chunk)[len(chunk)//2] for chunk in chunks]
    pivot_value = sorted(medians)[len(medians)//2]

    # PARTITION PHASE
    left_array = [n for n in nums if n < pivot_value]
    right_array = [m for m in nums if m > pivot_value]

    # selection phase
    pivot_index = len(left_array)

    if k < pivot_index:
        # we have to consider the left array because we are looking for
        # smaller items
        return median_algorithm(left_array, k)
    elif k > pivot_index:
        # we have to consider the right array BUT we have to update k value
        # because we have created a new array
        return median_algorithm(right_array, k-len(left_array)-1)
    else:
        return pivot_value


def select(nums, k):
    return median_algorithm(nums, k-1)

In [49]:
x = [1, -5, 0, 10, 15, 20, 3, -1, 21, 22, 23, 24, 25, 26, 27, 28, 29]
print(select(x, 1))
print(select(x, 2))
print(select(x, 3))
print(select(x, 4))

-5
-1
0
1


## Secretary Problem: Online Algorithm

- secretary problem is a very important problem of optimal stopping theory
- also known as the „best choice problem”
- partioning based algorithm cannot solve this problem
- we want to hire the best secretary out of N applicants
- applicants are interviewed one by one and after rejecting, the applicants can not be recalled
- we can rank the applicant among all applicants interviewed so far but we are unaware of the quality of yet unseen applicants
- what is the optimal strategy? Of course we want to maximize the probability of selecting the best applicant
- if we can consider every applicant and we can make the final decision after that: it is the simple selection problem
- it can be done in O(N) linear running time complexity with quickselect algorithm
- but now WE HAVE TO MAKE THE DECISION IMMEDIATELY (when considering the actual applicant)

### The best strategy is to reject the fist n/e    applicants and then select the one that is better than all the applicants interviewed so far: 1/e 37%



In [51]:
# online average, cumulative moving average

#list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
list = [1, 2, 3, 7, 9]

cum_sum = 0
avgs = []
for i, data in enumerate(list):
    cum_sum += data

    avg =  cum_sum/ (i+1)
    avgs.append(avg)

print(avgs)

[1.0, 1.5, 2.0, 3.25, 4.4]
