# Median Heap

In [90]:
import heapq
    
def add_to_median_heap(minh,maxh,elem):
    # if empty maxh (to prevent out of bounds error) 
    if maxh == [] or elem > -maxh[0]:
        # insert in maxh and move to right pos
        heapq.heappush(maxh,elem)
    else:
        # insert in minh and move to right pos
        heapq.heappush(minh,-elem) # negative because module doesn't hanxle maxh
    
    # ensure heap sizes remain balanced
    if len(minh)-len(maxh) > 1:
        heapq.heappush(maxh,-heapq.heappop(minh))
    elif len(minh)-len(maxh) < -1:
        heapq.heappush(minh,-heapq.heappop(maxh))
    return(minh,maxh)

def median(minh,maxh):
    # handle even/ odd total numbers
    if len(minh) > len(maxh):
        return(minh[0])
    elif len(maxh) > len(minh):
        return(maxh[0])
    else:
        return((-minh[0]+maxh[0])/2)

In [93]:
minh = []
maxh = []
for a in range(1,100,2):
    add_to_median_heap(minh, maxh, a)
print(median(minh, maxh))

50.0


### Worst case complexity of median:
Have to bubble through entire heap, have to swap everytime.

In other words, inputs come in sorted in order.

insertion would take heappush (nlgn), rebalancing would take heappush and heappop would take 2*O(nlgn). Overall still bounded by O(nlgn).

### Expanding to other percentiles
Just set the length of minh vs maxh to maintain.

ex: for 75%, maintain minh to be 1/4 the size of maxh.

_______

# Quick Select

In [138]:
lst = [1,2,3]
print(lst[1:])

[2, 3]


In [224]:
from random import randint

def qselect(lst,k):    
    # partition
    p_ind = randint(0,len(lst)-1)
    lst[0],lst[p_ind] = lst[p_ind],lst[0]
    partition = lst[0]
    
    lower = [a for a in lst[1:] if a <= partition]
    upper = [a for a in lst[1:] if a > partition]
    
    ind = len(lower)
    
    # push into right place
    lst[:ind] = lower
    lst[ind] = partition
    lst[ind+1:] = upper
    
    # find which partition k is in
    if k == ind+1:
        val = lst[ind]
        print(val)
        return(val)
    if k < ind+1:
        qselect(lower,k)
    else:
        qselect(upper,k-ind-1)

In [225]:
print(qselect([0,1,2],2))

1
None


In [214]:
import random
random.seed(123)
lst = [x for x in range(1000)]
random.shuffle(lst)
for a in range(1000):
    print(a)
    print(qselect(lst, a))

0


ValueError: empty range for randrange() (0,0, 0)

______


In [95]:
#OOPS: abandoned because I realized we should use the heapq module
#_____IGNORE ME
'''

def sift_down(heap, minHeap = True):
    i = 0
    if minHeap:
        # while not leaf node
        while i < (len(heap)//2):
            # get largest child
            if (i*2+2 > (len(heap)-1)) or (heap[i*2+1]< heap[i*2+2]):
                largest_child = i*2+1
            else:
                largest_child = i*2+2
            # swap with largest child if larger
            if heap[i] > heap[largest_child]:
                heap[i],heap[largest_child] = heap[largest_child],heap[i]
                i = largest_child
            else:
                return(heap)
    else:
        # while not leaf node
        while i < (len(heap)//2):
            # get smallest child
            if (i*2+2 > (len(heap)-1)) or (heap[i*2+1] > heap[i*2+2]):
                smallest_child = i*2+1
            else:
                smallest_child = i*2+2
            # swap with smallest child if smaller
            if heap[i] < heap[smallest_child]:
                heap[i],heap[smallest_child] = heap[smallest_child],heap[i]
                i = smallest_child
            else:
                return(heap)
    return(heap)
    
def add_to_median_heap(minh,maxh,elem):
    # if empty minh (to prevent out of bounds error) 
    # or elem smaller than curr med
    if minh == [] or elem <= minh[0]:
        # insert in minh and move to right pos
        minh.insert(0,elem)
        sift_down(minh)
    else:
        # insert in maxh and move to right pos
        maxh.insert(0,elem)
        sift_down(maxh,False)
    
    # ensure heap sizes remain balanced
    if len(minh)-len(maxh) > 1:
        move_me = minh[0]
        minh[0] = minh.pop()
        sift_down(minh)
        maxh.insert(0,move_me)
        sift_down(maxh, False)
    elif len(minh)-len(maxh) < -1:
        move_me = maxh[0]
        maxh[0] = maxh.pop()
        sift_down(maxh,False)
        minh.insert(0,move_me)
        sift_down(minh)
    return(minh,maxh)

def median(minh,maxh):
    if len(minh) > len(maxh):
        return(minh[0])
    elif len(maxh) > len(minh):
        return(maxh[0])
    else:
        return((minh[0]+maxh[0])/2)
        
'''

'\n\ndef sift_down(heap, minHeap = True):\n    i = 0\n    if minHeap:\n        # while not leaf node\n        while i < (len(heap)//2):\n            # get largest child\n            if (i*2+2 > (len(heap)-1)) or (heap[i*2+1]< heap[i*2+2]):\n                largest_child = i*2+1\n            else:\n                largest_child = i*2+2\n            # swap with largest child if larger\n            if heap[i] > heap[largest_child]:\n                heap[i],heap[largest_child] = heap[largest_child],heap[i]\n                i = largest_child\n            else:\n                return(heap)\n    else:\n        # while not leaf node\n        while i < (len(heap)//2):\n            # get smallest child\n            if (i*2+2 > (len(heap)-1)) or (heap[i*2+1] > heap[i*2+2]):\n                smallest_child = i*2+1\n            else:\n                smallest_child = i*2+2\n            # swap with smallest child if smaller\n            if heap[i] < heap[smallest_child]:\n                heap[i],he