<a href="https://colab.research.google.com/github/shivavsrivastava/Algorithms/blob/main/Course2_W3_MedianMaintenanceAlgo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Median Maintenance Algorithm


In [None]:
import sys
import numpy as np
import heapq
import time
print(sys.version)

## Heap based implementation
Maintain 2 heaps - a max heap and a min heap. Keep balancing the heaps so that they roughly have the same size

In [None]:
class MedianMaintenance:
  def __init__(self):
    self.maxHeap = []
    self.minHeap = []
    self.size = 0

  def __repr__(self):
    return str([-1*e for e in list(self.maxHeap)[::-1]] + list(self.minHeap))

  def addElement(self, element):
    # Add the element into a respective heap
    # MaxHeap - has elements first turned a negative number and then added
    # MinHeap - has elements added
    # _______________        _______________
    #  MaxHeap      |        | MinHeap
    # ______________|        |______________

    if(len(self.minHeap)==0 or element > self.minHeap[0]):
      heapq.heappush(self.minHeap, element)
    else:
      heapq.heappush(self.maxHeap, -element)

    self.maintainHeaps()


  def getMedian(self):
    return -self.maxHeap[0]

  def maintainHeaps(self):
    # try to keep the size difference between 2 heaps to no greater than 1
    while len(self.minHeap)>len(self.maxHeap):
      heapq.heappush(self.maxHeap, -heapq.heappop(self.minHeap))
      #print("MinHeap became too big, pushing elements to MaxHeap")

    while len(self.maxHeap) > len(self.minHeap) + 1:
      heapq.heappush(self.minHeap, -heapq.heappop(self.maxHeap))
      #print("MaxHeap became too big, pushing elements to MinHeap")

    # At this point maxHeap contains atmost 1 more element than minHeap
    # Check if maxHeap's max element is smaller than minHeaps smallest, then they should be interchanged
    if len(self.maxHeap)!=0 and len(self.minHeap)!=0 and (-self.maxHeap[0] > self.minHeap[0]) :
      self.maxHeap[0], self.minHeap[0] = -self.minHeap[0], -self.maxHeap[0]
      print("Corner case where elements need to be swapped")



In [None]:
class MedianMaintenanceAVLTree:
  def __init__(self):
    self.maxHeap = []
    self.minHeap = []
    self.size = 0

  def __repr__(self):
    return str([-1*e for e in list(self.maxHeap)[::-1]] + list(self.minHeap))

  def addElement(self, element):
    # Add the element into a respective heap
    # MaxHeap - has elements first turned a negative number and then added
    # MinHeap - has elements added
    # _______________        _______________
    #  MaxHeap      |        | MinHeap
    # ______________|        |______________

    if(len(self.minHeap)==0 or element > self.minHeap[0]):
      heapq.heappush(self.minHeap, element)
    else:
      heapq.heappush(self.maxHeap, -element)

    self.maintainHeaps()


  def getMedian(self):
    return -self.maxHeap[0]

  def maintainHeaps(self):
    # try to keep the size difference between 2 heaps to no greater than 1
    while len(self.minHeap)>len(self.maxHeap):
      heapq.heappush(self.maxHeap, -heapq.heappop(self.minHeap))
      #print("MinHeap became too big, pushing elements to MaxHeap")

    while len(self.maxHeap) > len(self.minHeap) + 1:
      heapq.heappush(self.minHeap, -heapq.heappop(self.maxHeap))
      #print("MaxHeap became too big, pushing elements to MinHeap")

    # At this point maxHeap contains atmost 1 more element than minHeap
    # Check if maxHeap's max element is smaller than minHeaps smallest, then they should be interchanged
    if len(self.maxHeap)!=0 and len(self.minHeap)!=0 and (-self.maxHeap[0] > self.minHeap[0]) :
      self.maxHeap[0], self.minHeap[0] = -self.minHeap[0], -self.maxHeap[0]
      print("Corner case where elements need to be swapped")



#### Test the MedianMaintenance



In [None]:
li = [3, 8, 2, 5, 1, 4, 7, 6]
medianMaintenance = MedianMaintenance()
for element in li:
  medianMaintenance.addElement(element)
  print("Heap {} has median {}".format(medianMaintenance, medianMaintenance.getMedian()))

In [None]:
li = [9, 10, 6, 2, 7, 1, 5, 8, 3, 4, 15, 17, 13]
medianMaintenance = MedianMaintenance()
for element in li:
  medianMaintenance.addElement(element)
  print("Heap {} has median {}".format(medianMaintenance, medianMaintenance.getMedian()))

## ASSIGNMENT
Download the following text file:
**Median.txt**

The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 3 lecture on heap applications).  The text file contains a list of the integers from 1 to 10000 in unsorted order; you should treat this as a stream of numbers, arriving one by one. If stream length is odd, then median = (k+1)/2
else
median = k/2

In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits).

OPTIONAL EXERCISE: Compare the performance achieved by heap-based and search-tree-based implementations of the algorithm.



In [None]:
import pandas as pd
df = pd.read_table('https://d3c33hcgiwev3.cloudfront.net/_6ec67df2804ff4b58ab21c12edcb21f8_Median.txt?Expires=1712361600&Signature=jq5Td7uUCjy~YxJ1uD~pbYlobfTkwEfr7pD-JVFo4DJUdsorV1gHg6TK-Zdw0zQ3EdEMIlkjz0SgBorLEQV1-mlbI8mxZMR9mHsEz93vQU6v0-q26e0AK8zjkoPU-jS~cuabzzOvinlwQjPbmpWUkokfzOYjylMuWAY4ZO6aozs_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A', header=None)

In [None]:
df.describe()

In [None]:
array = df[0].tolist()

In [None]:
t1 = time.time()
medianMaintenance = MedianMaintenance()
medTotal = 0
for element in array:
  medianMaintenance.addElement(element)
  #print("Heap {} has median {}".format(medianMaintenance, medianMaintenance.getMedian()))
  medTotal += medianMaintenance.getMedian()

t2=time.time()
print("Total median is {} and it took {} seconds using Heap".format(medTotal%10000, (t2-t1)))


Answer was 1213

## AVL Tree
picked up from: https://gist.github.com/girish3/a8e3931154af4da89995

In [None]:
#import random, math
outputdebug = False

def debug(msg):
  if outputdebug:
    print(msg)

class Node():
  def __init__(self, key):
    self.key = key
    self.left = None
    self.right = None




class AVLTree():
  def __init__(self, *args):
    self.node = None
    self.height = -1
    self.balance = 0;

    if len(args) == 1:
      for i in args[0]:
        self.insert(i)

  def height(self):
    if self.node:
      return self.node.height
    else:
      return 0

  def is_leaf(self):
    return (self.height == 0)

  def insert(self, key):
    tree = self.node
    newnode = Node(key)
    if tree == None:
      self.node = newnode
      self.node.left = AVLTree()
      self.node.right = AVLTree()
      debug("Inserted key [" + str(key) + "]")
    elif key < tree.key:
      self.node.left.insert(key)
    elif key > tree.key:
      self.node.right.insert(key)
    else:
      debug("Key [" + str(key) + "] already in tree.")
    self.rebalance()

  def rebalance(self):
    '''
    Rebalance a particular (sub)tree
    '''
    # key inserted. Let's check if we're balanced
    self.update_heights(False)
    self.update_balances(False)
    while self.balance < -1 or self.balance > 1:
      if self.balance > 1:
        if self.node.left.balance < 0:
          self.node.left.lrotate() # we're in case II
          self.update_heights()
          self.update_balances()
        self.rrotate()
        self.update_heights()
        self.update_balances()

      if self.balance < -1:
        if self.node.right.balance > 0:
          self.node.right.rrotate() # we're in case III
          self.update_heights()
          self.update_balances()
        self.lrotate()
        self.update_heights()
        self.update_balances()


  def rrotate(self):
    # Rotate left pivoting on self
    debug ('Rotating ' + str(self.node.key) + ' right')
    A = self.node
    B = self.node.left.node
    T = B.right.node
    self.node = B
    B.right.node = A
    A.left.node = T


  def lrotate(self):
    # Rotate left pivoting on self
    debug ('Rotating ' + str(self.node.key) + ' left')
    A = self.node
    B = self.node.right.node
    T = B.left.node
    self.node = B
    B.left.node = A
    A.right.node = T


  def update_heights(self, recurse=True):
    if not self.node == None:
      if recurse:
        if self.node.left != None:
          self.node.left.update_heights()
        if self.node.right != None:
          self.node.right.update_heights()

      self.height = max(self.node.left.height,
                        self.node.right.height) + 1
    else:
      self.height = -1

  def update_balances(self, recurse=True):
    if not self.node == None:
      if recurse:
        if self.node.left != None:
          self.node.left.update_balances()
        if self.node.right != None:
          self.node.right.update_balances()

      self.balance = self.node.left.height - self.node.right.height
    else:
      self.balance = 0

  def delete(self, key):
    # debug("Trying to delete at node: " + str(self.node.key))
    if self.node != None:
      if self.node.key == key:
        debug("Deleting ... " + str(key))
        if self.node.left.node == None and self.node.right.node == None:
          self.node = None # leaves can be killed at will
        # if only one subtree, take that
        elif self.node.left.node == None:
          self.node = self.node.right.node
        elif self.node.right.node == None:
          self.node = self.node.left.node
        # worst-case: both children present. Find logical successor
        else:
          replacement = self.logical_successor(self.node)
          if replacement != None: # sanity check
            debug("Found replacement for " + str(key) + " -> " + str(replacement.key))
            self.node.key = replacement.key
            # replaced. Now delete the key from right child
            self.node.right.delete(replacement.key)
        self.rebalance()
        return
      elif key < self.node.key:
        self.node.left.delete(key)
      elif key > self.node.key:
        self.node.right.delete(key)
      self.rebalance()
    else:
      return

  def logical_predecessor(self, node):
    '''
    Find the biggest valued node in LEFT child
    '''
    node = node.left.node
    if node != None:
      while node.right != None:
        if node.right.node == None:
          return node
        else:
          node = node.right.node
    return node

  def logical_successor(self, node):
    '''
    Find the smallese valued node in RIGHT child
    '''
    node = node.right.node
    if node != None: # just a sanity check
      while node.left != None:
        debug("LS: traversing: " + str(node.key))
        if node.left.node == None:
          return node
        else:
          node = node.left.node
    return node

  def check_balanced(self):
    if self == None or self.node == None:
      return True

    # We always need to make sure we are balanced
    self.update_heights()
    self.update_balances()
    return ((abs(self.balance) < 2) and self.node.left.check_balanced() and self.node.right.check_balanced())

  def inorder_traverse(self):
    if self.node == None:
      return []
    inlist = []
    l = self.node.left.inorder_traverse()
    for i in l:
      inlist.append(i)
    inlist.append(self.node.key)
    r = self.node.right.inorder_traverse()
    for i in r:
      inlist.append(i)
    return inlist

  def display(self, level=0, pref=''):
    '''
    Display the whole tree. Uses recursive def.
    TODO: create a better display using breadth-first search
    '''
    self.update_heights()  # Must update heights before balances
    self.update_balances()
    if(self.node != None):
      #print ('-' * level * 2, pref, self.node.key, "[" + str(self.height) + ":" + str(self.balance) + "]", 'L' if self.is_leaf() else ' ')
      if self.node.left != None:
        #self.node.left.display(level + 1, '<')
        self.node.left.display()
      print(str(self.node.key))
      if self.node.left != None:
        #self.node.right.display(level + 1, '>')
        self.node.right.display()




# Usage example
if __name__ == "__main__":
  a = AVLTree()
  print ("----- Inserting -------")
  #inlist = [5, 2, 12, -4, 3, 21, 19, 25]
  inlist = [7, 5, 2, 6, 3, 4, 1, 8, 9, 0]
  for i in inlist:
    a.insert(i)

  a.display()

  print ("----- Deleting -------")
  a.delete(3)
  a.delete(4)
  # a.delete(5)
  a.display()

  print ("\n")
  print ("Input            :", inlist )
  print ("deleting ...       ", 3)
  print ("deleting ...       ", 4)
  print ("Inorder traversal:", a.inorder_traverse())

In [None]:
t1 = time.time()
B = AVLTree()
medTotal = 0
for element in array:
  B.insert(element)
  listB = B.inorder_traverse()
  if(len(listB)%2 == 0): # even
    median = listB[(len(listB)//2)-1]
  else:
    median = listB[((len(listB)+1)//2)-1]
  medTotal += median
t2 = time.time()
print("Total median is {} and it took {} seconds using AVL".format(medTotal%10000, (t2-t1)))


AVL Tree did not work out