<img src='images/14_heaps_application1.png' width=500>

<img src='images/14_heaps_application2.png' width=500>

### Note to balance the Low heap and High heap, so everytime to extract median, it´s either extract min from low heap or extract max from high heap

<img src='images/14_heaps_application3.png' width=500>

<img src='images/14_heaps_application4.png' width=500>

In [1]:
from heapq import heappush, heappop, heapify

# heapify takes O(n) linear time
# insert or extractmin takes O(logn) time

class MinHeap:
    def __init__(self):
        self.heap = []
    
    @property
    def length(self):
        return len(self.heap)
    
    def parent(self, i):
        return (i-1)//2
    
    def left_child(self, i):
        return i*2+1
    
    def right_child(self, i):
        return (i+1)*2
    
    def has_left_child(self, i):
        if self.left_child(i) < self.length:
            return True
        else:
            return False
    
    def has_right_child(self, i):
        if self.right_child(i) < self.length:
            return True
        else:
            return False
    
    def extract_min(self):
        return heappop(self.heap)
    
    def get_min(self):
        return self.heap[0]
    
    def insert_key(self, k):
        heappush(self.heap, k)
    
    def decrease_key(self, i, new_val):
        self.heap[i] = new_val
        while i!=0 and self.heap[i] < self.heap[self.parent(i)]:
            self.heap[i], self.heap[self.parent(i)] = (self.heap[self.parent(i)], self.heap[i])
    
    def increase_key(self, i, new_val):
        self.heap[i] = new_val
        while self.has_left_child(i):
            if self.has_right_child(i):
                child_dict = {self.left_child(i): self.heap[self.left_child(i)], self.right_child(i): self.heap[self.right_child(i)]}
                min_child = min(child_dict, key=child_dict.get)
            else:
                min_child = self.left_child(i)
                
            if self.heap[i] > self.heap[min_child]: 
                self.heap[i], self.heap[min_child] = (self.heap[min_child], self.heap[i])
                i = min_child
            else:
                return
        
    def replace_key(self, i, new_val):
        if self.heap[i] > new_val:
            self.decrease_key(i, new_val)
        elif self.heap[i] < new_val:
            self.increase_key(i, new_val)
        elif self.heap[i] == new_val:
            return
    
    def delete_key(self, i):
        self.decrease_key(i, float('-inf'))
        self.extract_min()

In [2]:
class MaxHeap:
    def __init__(self):
        self.heap = []
    
    @property
    def length(self):
        return len(self.heap)
    
    def parent(self, i):
        return (i-1)//2
    
    def left_child(self, i):
        return i*2+1
    
    def right_child(self, i):
        return (i+1)*2
    
    def has_left_child(self, i):
        if self.left_child(i) < self.length:
            return True
        else:
            return False
    
    def has_right_child(self, i):
        if self.right_child(i) < self.length:
            return True
        else:
            return False
          
    def bub_up(self, i):
        while i != 0 and self.heap[i] > self.heap[self.parent(i)]:
            self.heap[i], self.heap[self.parent(i)] = (self.heap[self.parent(i)], self.heap[i])
            i = self.parent(i)
            
    def insert_key(self, key):
        self.heap.append(key)
        length = self.length
        self.bub_up(length-1)
        
    def bub_down(self, i):
        while self.has_left_child(i):
            if self.has_right_child(i):
                child_dict = {self.left_child(i): self.heap[self.left_child(i)], self.right_child(i): self.heap[self.right_child(i)]}
                max_child = max(child_dict, key=child_dict.get)
            else:
                max_child = self.left_child(i)
            if self.heap[i] < self.heap[max_child]:
                self.heap[i], self.heap[max_child] = (self.heap[max_child], self.heap[i])
                i = max_child
            else:
                return 
    
    def extract_max(self):
        length = self.length
        self.heap[0], self.heap[length-1] = (self.heap[length-1], self.heap[0])
        p = self.heap.pop(-1)
        self.bub_down(0)
        return p
                
    def get_max(self):
        return self.heap[0]

In [3]:
heapObj = MaxHeap()
heapObj.insert_key(3)
heapObj.insert_key(2)
heapObj.insert_key(15)
heapObj.insert_key(5)
heapObj.insert_key(4)
heapObj.insert_key(45)
heapObj.insert_key(27)
heapObj.insert_key(6)
heapObj.insert_key(13)

In [4]:
heapObj.heap

[45, 13, 27, 6, 4, 3, 15, 2, 5]

In [5]:
heapObj.extract_max()
heapObj.heap

[27, 13, 15, 6, 4, 3, 5, 2]

In [6]:
def compute_median(low, high, key):
    median = None
    max_low = low.get_max()
    min_high = high.get_min()
    
    if low.length == high.length:
        if key < max_low:
            low.insert_key(key)
            median = max_low
        elif key > min_high:
            high.insert_key(key)
            median = min_high
        elif key>max_low and key<min_high:
            low.insert_key(key)
            median = key
    
    elif low.length > high.length:
        if key < max_low:
            p = low.extract_max()
            high.insert_key(p)
            low.insert_key(key)
            median = low.get_max()
        else:
            high.insert_key(key)
            median = max_low
    
    else:
        if key < min_high:
            low.insert_key(key)
            median = low.get_max()
        else:
            p = high.extract_min()
            low.insert_key(p)
            high.insert_key(key)
            median = low.get_max()
    
    return median

In [7]:
def compute_medians(data): # Assume unique values in list of data
    medians = []
    heapL = MaxHeap()
    heapH = MinHeap()
    
    if data[0] < data[1]:
        heapL.insert_key(data[0])
        heapH.insert_key(data[1])
        medians.append(data[0])
        medians.append(data[0])
    else:
        heapL.insert_key(data[1])
        heapH.insert_key(data[0])
        medians.append(data[0])
        medians.append(data[1])
        
    del data[0:2]
        
    while data:
        n = data.pop(0)
        median = compute_median(heapL, heapH, n)
        medians.append(median)
    
    return medians

In [8]:
with open('datasets/Median_test3.txt') as f:
    line_test = [int(line.strip()) for line in f]

In [9]:
line_test

[6331, 2793, 1640, 9290, 225, 625, 6195, 2303, 5685, 1354]

In [10]:
medians_test = compute_medians(line_test)
medians_test

[6331, 2793, 2793, 2793, 2793, 1640, 2793, 2303, 2793, 2303]

In [11]:
import numpy as np
np.sum(np.array(medians_test))

29335

In [12]:

with open('datasets/Median.txt') as f:
    data = [int(line.strip()) for line in f]

medians = compute_medians(data)

np.sum(np.array(medians))

46831213