# Data Structures

* Heap
* Median Maintenance with 2 Heaps
* Binary Search Tree

In [1]:
import random
import numpy as np

### Heap



In [131]:
class Heap:
    
    # Constructors
    
    def __init__(self, arr=[]):
        self.__heap = arr
        self.__len = len(arr)
        self._to_heap()
    
    # Private Methods
    
    def _is_empty(self):
        return True if self.__len==0 else False
    
    def _swap(self, i, j):
        self.__heap[i],self.__heap[j] = \
        self.__heap[j],self.__heap[i]
    
    def _current(self, i):
        if i<0 or i>=self.__len: return False
        return self.__heap[i]
    def _parent(self, i):
        if i<=0 or i>=self.__len: return False
        return self.__heap[(i-1)//2]
    def _child(self, i, side):
        if i<0: return False
        assert side in {'l','r'}
        i = 2*i+1 if side=='l' else 2*i+2
        if i>=self.__len: return False
        return self.__heap[i]
    
    def _bubble_up(self, i):
        cur = self._current(i)
        parent = self._parent(i)
        j = (i-1)//2
        if parent and parent>cur:
            self._swap(i,j)
            self._bubble_up(j)
    def _bubble_down(self, i):
        cur = self._current(i)
        lc, rc = self._child(i,'l'),self._child(i,'r')
        smallest = 2*i+1 if lc and lc<cur else i
        smallest = 2*i+2 if rc and rc<self._current(smallest) else smallest
        if smallest!=i:
            self._swap(i,smallest)
            self._bubble_down(smallest)
            
    def _to_heap(self):
        for i in range(self.__len//2-1,-1,-1): # floor(len/2) to 0 in id
            self._bubble_down(i) 
            
    def _remove_last(self):
        self.__heap.pop()
        self.__len -= 1
    
    # Public Methods
    
    def get_heap(self):
        return self.__heap
    def get_len(self):
        return self.__len
    
    def insert(self, val):
        self.__heap.append(val)
        self.__len += 1
        self._bubble_up(self.__len-1)
        
    def delete(self, i):
        if i<0 or i>=self.__len: raise Exception('Index Out of Range Error!')
        if i==self.__len-1: self._remove_last()
        else:
            self._swap(i,self.__len-1)
            self._remove_last()
            self._bubble_down(i)
    
    def view_min(self):
        return self._current(0)
    
    def extract_min(self):
        minimum = self._current(0)
        self.delete(0)
        return minimum
    
    def sort(self):
        ret = []
        while self.__len>0:
            ret.append(self.extract_min())
        return ret

In [89]:
l = [14, 4, 9, 12, 7, 18, 17]
print 'original:', l
hp = Heap(l)
print 'init:', hp.get_heap()
hp.insert(0)
print 'inserted 0:', hp.get_heap()
hp.delete(1)
print 'deleted 1th:', hp.get_heap()
print 'extract min:', hp.extract_min(), ', got:',
print hp.get_heap()
print 'sorted:', hp.sort()
print 'final state:', hp.get_heap()

original: [14, 4, 9, 12, 7, 18, 17]
init: [4, 7, 9, 12, 14, 18, 17]
inserted 0: [0, 4, 9, 7, 14, 18, 17, 12]
deleted 1th: [0, 7, 9, 12, 14, 18, 17]
extract min: 0 , got: [7, 12, 9, 17, 14, 18]
sorted: [7, 9, 12, 14, 17, 18]
final state: []


### Median Maintenance

In [163]:
class HeapRev:
    
    # Constructors
    
    def __init__(self, arr=[]):
        self.__heap = arr
        self.__len = len(arr)
        self._to_heap()
    
    # Private Methods
    
    def _is_empty(self):
        return True if self.__len==0 else False
    
    def _swap(self, i, j):
        self.__heap[i],self.__heap[j] = \
        self.__heap[j],self.__heap[i]
    
    def _current(self, i):
        if i<0 or i>=self.__len: return False
        return self.__heap[i]
    def _parent(self, i):
        if i<=0 or i>=self.__len: return False
        return self.__heap[(i-1)//2]
    def _child(self, i, side):
        if i<0: return False
        assert side in {'l','r'}
        i = 2*i+1 if side=='l' else 2*i+2
        if i>=self.__len: return False
        return self.__heap[i]
    
    def _bubble_up(self, i):
        cur = self._current(i)
        parent = self._parent(i)
        j = (i-1)//2
        if parent and parent<cur: # flip to <
            self._swap(i,j)
            self._bubble_up(j)
    def _bubble_down(self, i):
        cur = self._current(i)
        lc, rc = self._child(i,'l'),self._child(i,'r')
        largest = 2*i+1 if lc and lc>cur else i                          # smallest -> largest
        largest = 2*i+2 if rc and rc>self._current(largest) else largest # ..
        if largest!=i:
            self._swap(i,largest)
            self._bubble_down(largest)
            
    def _to_heap(self):
        for i in range(self.__len//2-1,-1,-1): # floor(len/2) to 0 in id
            self._bubble_down(i) 
            
    def _remove_last(self):
        self.__heap.pop()
        self.__len -= 1
    
    # Public Methods
    
    def get_heap(self):
        return self.__heap
    def get_len(self):
        return self.__len
    
    def insert(self, val):
        self.__heap.append(val)
        self.__len += 1
        self._bubble_up(self.__len-1)
        
    def delete(self, i):
        if i<0 or i>=self.__len: raise Exception('Index Out of Range Error!')
        if i==self.__len-1: self._remove_last()
        else:
            self._swap(i,self.__len-1)
            self._remove_last()
            self._bubble_down(i)
    
    def view_max(self):
        return self._current(0)
    
    def extract_max(self):
        maximum = self._current(0)
        self.delete(0)
        return maximum
    
    def sort(self):
        ret = []
        while self.__len>0:
            ret.append(self.extract_max())
        return ret    

In [164]:
l = [14, 4, 9, 12, 7, 18, 17]
print 'original:', l
hp = HeapRev(l)
print 'init:', hp.get_heap()
hp.insert(0)
print 'inserted 0:', hp.get_heap()
hp.delete(1)
print 'deleted 1th:', hp.get_heap()
print 'extract min:', hp.extract_max(), ', got:',
print hp.get_heap()
print 'sorted:', hp.sort()
print 'final state:', hp.get_heap()

original: [14, 4, 9, 12, 7, 18, 17]
init: [18, 12, 17, 4, 7, 9, 14]
inserted 0: [18, 12, 17, 4, 7, 9, 14, 0]
deleted 1th: [18, 7, 17, 4, 0, 9, 14]
extract min: 18 , got: [17, 7, 14, 4, 0, 9]
sorted: [17, 14, 9, 7, 4, 0]
final state: []


In [183]:
class MedianMaintenance:
    
    def __init__(self):
        self.__min_heap = HeapRev([])
        self.__max_heap = Heap([])
        self.__cur_med = False
    
    def get_med(self):
        return self.__cur_med
    
    def insert(self, i):
        if not self.__cur_med:
            self.__cur_med = i
            self.__min_heap.insert(i)
        else:
            if i<self.__cur_med: self.__min_heap.insert(i)
            else: self.__max_heap.insert(i)
        min_size, max_size = self.__min_heap.get_len(), self.__max_heap.get_len()
        if min_size==max_size or min_size-max_size==1: 
            self.__cur_med = self.__min_heap.view_max()
            return
        elif max_size-min_size==1:
            self.__cur_med = self.__max_heap.view_min()
            return
        elif min_size>max_size:
            self.__max_heap.insert(self.__min_heap.extract_max())
            self.__cur_med = self.__min_heap.view_max()
            return
        else:
            self.__min_heap.insert(self.__max_heap.extract_min())
            self.__cur_med = self.__min_heap.view_max()
            return
    
    def clear(self):
        self.__min_heap = HeapRev([])
        self.__max_heap = Heap([])
        self.__cur_med = False 
    
    def get_minheap(self):
        return self.__min_heap.get_heap()
    def get_maxheap(self):
        return self.__max_heap.get_heap()

In [188]:
def median(l):
    l = sorted(l)
    return l[(len(l)-1)//2]

l = [14, 4, 9, 12, 7, 18, 17]

print 'Gold test:\n'

arr = []
sum_med = 0
for l_ in l:
    arr.append(l_)
    med = median(arr)
    sum_med += med
    print 'med:', med, '| arr:', arr, '| sorted:', sorted(arr) # '| sum_med:', sum_med 
print

print 'Prog test:\n' 

l = [14, 4, 9, 12, 7, 18, 17]
med_main = MedianMaintenance()
for l_ in l:
    med_main.insert(l_)
    print med_main.get_med(), '|', med_main.get_minheap(), '|', med_main.get_maxheap()

Gold test:

med: 14 | arr: [14] | sorted: [14]
med: 4 | arr: [14, 4] | sorted: [4, 14]
med: 9 | arr: [14, 4, 9] | sorted: [4, 9, 14]
med: 9 | arr: [14, 4, 9, 12] | sorted: [4, 9, 12, 14]
med: 9 | arr: [14, 4, 9, 12, 7] | sorted: [4, 7, 9, 12, 14]
med: 9 | arr: [14, 4, 9, 12, 7, 18] | sorted: [4, 7, 9, 12, 14, 18]
med: 12 | arr: [14, 4, 9, 12, 7, 18, 17] | sorted: [4, 7, 9, 12, 14, 17, 18]

Prog test:

14 | [14] | []
4 | [4] | [14]
9 | [4] | [9, 14]
9 | [9, 4] | [12, 14]
9 | [9, 4, 7] | [12, 14]
9 | [9, 4, 7] | [12, 14, 18]
12 | [9, 4, 7] | [12, 14, 18, 17]


In [203]:
%%time

# Task: insert 10000 numbers one-by-one, and compute (med1 + med2 + ... + med10000) modulo 10000
 
data_path = '/home/jacobsuwang/Documents/CS TRAINING/ALGORITHMS/DATA/Median.txt'

med_sum = 0
med_main = MedianMaintenance()
with open(data_path, 'r') as source:
    for line in source:
        val = int(line)
        med_main.insert(val)
        med_sum += med_main.get_med()
        
print med_sum%10000

1213
CPU times: user 84 ms, sys: 4 ms, total: 88 ms
Wall time: 74.3 ms


### Binary Search Tree

In [271]:
class Node:
    
    def __init__(self, key=None, parent=None, left=None, right=None, size=0):
        self.key = key
        self.parent = parent
        self.left = left
        self.right = right
        self.size = size
        

class BST:
    
    def __init__(self):
        self.root = None
        
    def insert(self, val):
        z = Node(key=val)
        y = None
        x = self.root
        while x!=None:
            y = x
            if z.key<x.key:
                x = x.left
            else:
                x = x.right
        z.parent = y
        if y==None:
            self.root = z
        elif z.key<y.key:
            y.left = z
        else:
            y.right = z
            
    def _transplant(self, u, v):
        if u.parent==None:
            self.root = v
        elif u==u.parent.left:
            u.parent.left = v
        else:
            u.parent.right = v
        if v!=None:
            v.parent = u.parent
            
    def delete(self, val):
        z = self.search(self.root, val)
        if z==None: return
        if z.left==None:
            self._transplant(z, z.right)
        elif z.right==None:
            self._transplant(z, z.left)
        else:
            y = self.minimum(z.right)
            if y.parent!=z:
                self._transplant(y, y.right)
                y.right = z.right
                y.right.parent = y
            self._transplant(z, y)
            y.left = z.left
            y.left.parent = y
            
    def inorder_walk(self, x):
        if x!=None:
            self.inorder_walk(x.left)
            print x.key,
            self.inorder_walk(x.right)
            
    def search(self, x, k):
        if x==None or k==x.key:
            return x
        if k<x.key:
            return self.search(x.left, k)
        return self.search(x.right, k)
    
    def minimum(self, x):
        while x.left!=None:
            x = x.left
        return x
    
    def maximum(self, x):
        while x.right!=None:
            x = x.right
        return x
    
    def predecessor(self, x):
        if x.left!=None:
            return self.maximum(x.left)
        y = x.parent
        while y!=None and x==y.left:
            x = y
            y = y.parent
        return y
    
    def successor(self, x):
        if x.right!=None:
            return self.minimum(x.right)
        y = x.parent
        while y!=None and x==y.right:
            x = y
            y = y.parent
        return y
    

In [287]:
l = [15,6,18,3,7,17,20,2,4,13,9]
bst = BST()
for l_ in l:
    bst.insert(l_)

In [288]:
bst.inorder_walk(bst.root)

2 3 4 6 7 9 13 15 17 18 20


In [289]:
print bst.search(bst.root, 15).key
print bst.search(bst.root, 30)

15
None


In [290]:
print bst.minimum(bst.root).key
print bst.maximum(bst.root).key

2
20


In [291]:
print bst.successor(bst.search(bst.root, 20))
print '[15] ->', bst.successor(bst.search(bst.root, 15)).key
print '[13] ->', bst.successor(bst.search(bst.root, 13)).key
print '[9] ->', bst.successor(bst.search(bst.root, 9)).key

None
[15] -> 17
[13] -> 15
[9] -> 13


In [292]:
print bst.successor(bst.search(bst.root, 20))
print '[15] ->', bst.predecessor(bst.search(bst.root, 15)).key
print '[13] ->', bst.predecessor(bst.search(bst.root, 13)).key
print '[17] ->', bst.predecessor(bst.search(bst.root, 17)).key

None
[15] -> 13
[13] -> 9
[17] -> 15


In [293]:
bst.inorder_walk(bst.root)
print
bst.delete(7)
bst.inorder_walk(bst.root)
print
bst.delete(15)
bst.inorder_walk(bst.root)

2 3 4 6 7 9 13 15 17 18 20
2 3 4 6 9 13 15 17 18 20
2 3 4 6 9 13 17 18 20
