## Heap Data Structure

- A heap DS is an array that can be viewed as an alomot binary tree, filled at all levels except maybe the last.
- a heap can be minHeap or maxHeap. 
- in a minHeap each node(parent) is smaller or equal to its children, i.e. $H[i] \le H[2i+1], H[2i+2] \quad i=0,1,..., n-1$ with $n$ the number of elements 
- heap property: the root of the minheap is the smallest element
- the elements that do not have children are called leaves
- the height of a node is the number of edges of the longest path from the node to a leaf
- the height of the tree is called the Height of the Heap
- $\text{Height}(H)=\mathcal{O}(\log(n))$

#### Operations on a Heap
- get max/min element: $\mathcal{O}(1)$
- insert an element into the heap
- delete the root of the heap
- heapify the heap, i.e. perform operations to restore the heap-property
- get the length of the heap

In [1]:
class MinHeap:
    '''heapay implementation of a MinHeap
    '''

    def __init__(self, arr : list = None):
        '''Initialize to an empty heap
        '''
        self.heap = []
        self.size = 0
        # if an array is given then heapify that array
        if arr != None:
            self.heap = arr
            self.size = len(self)
            self.heapify()

    def buble_up(self, index: int):
        '''Switch the element at position index with its parent 
        if the MinHeap property is violated
        '''
        if index == 0:
            return
        # get the parent index
        parent_index = (index-1)//2
        # if the min-heap property is violated, switch child with parent
        # and then calle buble up on the new parent
        if self.heap[index] < self.heap[parent_index]:
            self.heap[index], self.heap[parent_index] = self.heap[parent_index], self.heap[index]
            self.buble_up(parent_index)

    def delete_min(self):
        '''Deletes and return the root of the heap. 
        First swiches the first and the last element of the heap,
        deletes and returns the last element, whichi is min element,
        bubles down the first element to each correct position
        to restore the min-heap property

        If the heap is empty returns None.
        '''
        
        # check if the heap is empty
        if len(self) == 0:
            return
        
        # check if the heap has one element
        if len(self) == 1:
            self.size-=1
            return self.heap.pop()

        # heap has at least two elements
        # swich the first and the last elements of the heap
        self.heap[0], self.heap[-1] = self.heap[-1], self.heap[0]

        # delete and return the last element
        min_elt = self.heap.pop()
        
        # decrease the size by one
        self.size-=1

        # buble down the first element to restore min-heap property
        self.buble_down(0)
        
        return min_elt
    
    def buble_down(self, index):
        '''Switches the element of the heap at position index with the minimum of its two children if they exist
        and if it smaller than it, to restore the min-heap property.
        '''
        
        # stop if the index is out of the heap
        if index >= self.size:
            return

        # get the left and right children
        left_child_index = 2*index+1
        right_child_index = 2*index+2

        # get the value of the children if they exist, 
        # otherwise 'technically' set their value to infinity to stop the buble down process
        left_child_value = self.heap[left_child_index] if left_child_index < self.size else float('inf')
        right_child_value = self.heap[right_child_index] if right_child_index < self.size else float('inf')
        
        # check if the parent is smaller or equal than the min value of its children
        if self.heap[index] <= min(left_child_value, right_child_value):
            return
        
        # the parent is smaller than at least one of the children; find the index of the smaller child
        _smaller_child_value, smaller_chiled_index = min((left_child_value, left_child_index), (right_child_value, right_child_index))
        
        # swich the parent with the min of the two children
        self.heap[index], self.heap[smaller_chiled_index] = self.heap[smaller_chiled_index], self.heap[index]

        # call buble down on the position of the smaller child
        self.buble_down(smaller_chiled_index)

    def get_min(self):
        '''return the root element of the heap.
        if the heap is empty returns None
        '''

        if len(self):
            return self.heap[0]

        return 

    def insert(self, elt):
        '''Insert an element into the heap.
        Insert it at the end of the heap then call bubble up to place it in its 
        correct position.
        '''
        # insert the element at the end of the heap
        self.heap.append(elt)
        # buble up the element to reach its correct position on the heap
        self.buble_up(len(self)-1)

        # increase the size by one
        self.size +=1
        
        return
    
    def satisfies_assertions(self):
        '''Assert that the min-heap property is satisfied,
        i.e. its child in the head is bigger or equal than its father
        '''

        for i in range(1, len(self)):
            if self.heap[i] < self.heap[(i-1)//2]:
                mes = f'Heap property is violated between child at position {i} and father at {(i-1)//2}'
                return False, mes
            
        return True, 'MinHeap property holds' 
    
    def heapify(self):
        '''If an array arr if passed as an argument heapify that array
        '''
        
        for i in range(len(self)//2+1, -1, -1):
            self.buble_down(i)
            

    def heapsort(self):
        '''HeapSort the heap.
        
        Algorithm:
        Exchange the root of the heap with the last element.
        Now the root is in its correct final position
        Bubble down the new root to find the next smallest element, 
        but decrease the size of the heap by one to discart the last element which is in correct position.
        Do that for elements of the heap.
        Finally reverse the heap to have a sorted ascending array.
        Restore the size of the heap.
        '''

        for i in range(len(self)-1, 0, -1):
            # exchange the root (min element) with the last element
            self.heap[0], self.heap[self.size-1] = self.heap[self.size-1], self.heap[0]
            # ignoring the last element buble down the first element
            self.size-=1
            self.buble_down(0)
        
        # restore the size of the heap
        self.size = len(self)
        
        # reverse the heap to have it in ascending order
        self.heap.reverse()

        return

    def __len__(self):
        '''set the length of the heap to the length of the heapay
        '''
        return len(self.heap)
    
    def __repr__(self):
        '''return a string representaion of the heap array
        '''
        
        return str(self.heap)

In [2]:
h = MinHeap()
print('Inserting: 5, 2, 4, -1 and 7 in that order.')
h.insert(5)
print(f'\t Heap = {h}')
assert(h.get_min() == 5)
h.insert(2)
print(f'\t Heap = {h}')
assert(h.get_min() == 2)
h.insert(4)
print(f'\t Heap = {h}')
assert(h.get_min() == 2)
h.insert(-1)
print(f'\t Heap = {h}')
assert(h.get_min() == -1)
h.insert(7)
print(f'\t Heap = {h}')
assert(h.get_min() == -1)
print(h.satisfies_assertions())

print('Deleting minimum element')
h.delete_min()
print(f'\t Heap = {h}')
assert(h.get_min() == 2)
h.delete_min()
print(f'\t Heap = {h}')
assert(h.get_min() == 4)
h.delete_min()
print(f'\t Heap = {h}')
assert(h.get_min() == 5)
h.delete_min()
print(f'\t Heap = {h}')
assert(h.get_min() == 7)
# Test delete_max on heap of size 1, should result in empty heap.
h.delete_min()
print(f'\t Heap = {h}')
print('All tests passed: 10 points!')        

Inserting: 5, 2, 4, -1 and 7 in that order.
	 Heap = [5]
	 Heap = [2, 5]
	 Heap = [2, 5, 4]
	 Heap = [-1, 2, 4, 5]
	 Heap = [-1, 2, 4, 5, 7]
(True, 'MinHeap property holds')
Deleting minimum element
	 Heap = [2, 5, 4, 7]
	 Heap = [4, 5, 7]
	 Heap = [5, 7]
	 Heap = [7]
	 Heap = []
All tests passed: 10 points!


In [3]:
h = MinHeap([5,3,4,5,2,5,6,4,6,11,33,3,])
print(h)
h.satisfies_assertions()
h.heapsort()
print(h)

[2, 3, 3, 4, 5, 4, 6, 5, 6, 11, 33, 5]
[2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 11, 33]


In [4]:
class MaxHeap:
    '''array implementation of a MaxHeap
    '''

    def __init__(self, arr : list = None):
        '''Initialize to an empty heap
        '''
        self.heap = []
        self.size = 0
        # if an array is given then heapify that array
        if arr != None:
            self.heap = arr
            self.size = len(self)
            self.heapify()

    def buble_up(self, index: int):
        '''Switch the element at position index with its parent 
        if the MaxHeap property is violated
        '''
        if index == 0:
            return
        # get the parent index
        parent_index = (index-1)//2
        # if the max-heap property is violated, exchange child with parent
        # and then call buble up on the new parent
        if self.heap[index] > self.heap[parent_index]:
            self.heap[index], self.heap[parent_index] = self.heap[parent_index], self.heap[index]
            self.buble_up(parent_index)

    def delete_max(self):
        '''Deletes and return the root of the heap. 
        First swiches the first and the last element of the heap,
        deletes and returns the last element, whichi is min element,
        bubles down the first element to each correct position
        to restore the min-heap property

        If the heap is empty returns None.
        '''
        
        # check if the heap is empty
        if len(self) == 0:
            return
        
        # check if the heap has one element
        if len(self) == 1:
            self.size-=1
            return self.heap.pop()

        # heap has at least two elements
        # swich the first and the last elements of the heap
        self.heap[0], self.heap[-1] = self.heap[-1], self.heap[0]

        # delete and return the last element
        max_elt = self.heap.pop()
        
        # decrease the size by one
        self.size-=1

        # buble down the first element to restore min-heap property
        self.buble_down(0)
        
        return max_elt
    
    def buble_down(self, index):
        '''Switches the element of the heap at position index with the minimum of its two children if they exist
        and if it smaller than it, to restore the min-heap property.
        '''
        
        # stop if the index is out of the heap
        if index >= self.size:
            return

        # get the left and right children
        left_child_index = 2*index+1
        right_child_index = 2*index+2

        # get the value of the children if they exist, 
        # otherwise 'technically' set their value to infinity to stop the buble down process
        left_child_value = self.heap[left_child_index] if left_child_index < self.size else float('-inf')
        right_child_value = self.heap[right_child_index] if right_child_index < self.size else float('-inf')
        
        # check if the parent is bigger or equal than the max value of its children
        if self.heap[index] >= max(left_child_value, right_child_value):
            return
        
        # the parent is smaller than at least one of the children; find the index of the smaller child
        _bigger_child_value, bigger_chiled_index = max((left_child_value, left_child_index), (right_child_value, right_child_index))
        
        # exchange the parent with the max of the two children
        self.heap[index], self.heap[bigger_chiled_index] = self.heap[bigger_chiled_index], self.heap[index]

        # call buble down on the position of the smaller child
        self.buble_down(bigger_chiled_index)

    def get_max(self):
        '''return the root element of the heap.
        if the heap is empty returns None
        '''

        if len(self):
            return self.heap[0]

        return 

    def insert(self, elt):
        '''Insert an element into the heap.
        Insert it at the end of the heap then call bubble up to place it in its 
        correct position.
        '''
        # insert the element at the end of the heap
        self.heap.append(elt)
        # buble up the element to reach its correct position on the heap
        self.buble_up(len(self)-1)

        # increase the size by one
        self.size +=1
        
        return
    
    def satisfies_assertions(self):
        '''Assert that the max-heap property is satisfied,
        i.e. its child in the heap is smaller or equal than it's father
        '''

        for i in range(1, len(self)):
            if self.heap[i] > self.heap[(i-1)//2]:
                mes = f'Heap property is violated between child at position {i} and father at {(i-1)//2}'
                return False, mes
            
        return True, 'MinHeap property holds' 
    
    def heapify(self):
        '''If an array arr if passed as an argument heapify that array
        '''
        
        for i in range(len(self)//2+1, -1, -1):
            self.buble_down(i)
            

    def heapsort(self):
        '''HeapSort the heap.
        
        Algorithm:
        Exchange the root of the heap with the last element.
        Now the root is in its correct final position
        Bubble down the new root to find the next biggest element, 
        but decrease the size of the heap by one to discart the last element which is in correct position.
        Do that for elements of the heap.
        Finally reverse the heap to have a sorted descending array.
        Restore the size of the heap.
        '''

        for i in range(len(self)-1, 0, -1):
            # exchange the root (min element) with the last element
            self.heap[0], self.heap[self.size-1] = self.heap[self.size-1], self.heap[0]
            # ignoring the last element buble down the first element
            self.size-=1
            self.buble_down(0)
        
        # restore the size of the heap
        self.size = len(self)
        
        # reverse the heap to have it in ascending order
        self.heap.reverse()

        return

    def __len__(self):
        '''set the length of the heap to the length of the heapay
        '''
        return len(self.heap)
    
    def __repr__(self):
        '''return a string representaion of the heap array
        '''
        
        return str(self.heap)

In [5]:
h = MaxHeap()
print('Inserting: 5, 2, 4, -1 and 7 in that order.')
h.insert(5)
print(f'\t Heap = {h}')
assert(h.get_max() == 5)
h.insert(2)
print(f'\t Heap = {h}')
assert(h.get_max() == 5)
h.insert(4)
print(f'\t Heap = {h}')
assert(h.get_max() == 5)
h.insert(-1)
print(f'\t Heap = {h}')
assert(h.get_max() == 5)
h.insert(7)
print(f'\t Heap = {h}')
assert(h.get_max() == 7)
h.satisfies_assertions()

print('Deleting maximum element')
h.delete_max()
print(f'\t Heap = {h}')
assert(h.get_max() == 5)
h.delete_max()
print(f'\t Heap = {h}')
assert(h.get_max() == 4)
h.delete_max()
print(f'\t Heap = {h}')
assert(h.get_max() == 2)
h.delete_max()
print(f'\t Heap = {h}')
assert(h.get_max() == -1)
# Test delete_max on heap of size 1, should result in empty heap.
h.delete_max()
print(f'\t Heap = {h}')
print('All tests passed: 5 points!')

Inserting: 5, 2, 4, -1 and 7 in that order.
	 Heap = [5]
	 Heap = [5, 2]
	 Heap = [5, 2, 4]
	 Heap = [5, 2, 4, -1]
	 Heap = [7, 5, 4, -1, 2]
Deleting maximum element
	 Heap = [5, 2, 4, -1]
	 Heap = [4, 2, -1]
	 Heap = [2, -1]
	 Heap = [-1]
	 Heap = []
All tests passed: 5 points!


In [7]:
h = MaxHeap([5,3,4,5,2,5,6,4,6,11,33,3,])
print(h)
h.satisfies_assertions()
h.heapsort()
print(h)

[33, 11, 6, 6, 5, 5, 4, 4, 5, 3, 2, 3]
[33, 11, 6, 6, 5, 5, 5, 4, 4, 3, 3, 2]


### Operations on a Heap
|operations | time | space |
|-----------|------|-------|
|insertion  | $\mathcal{O}(\log_2n)$|$\mathcal{O}(1)$ |
|deletion  | $\mathcal{O}(\log_2n)$|$\mathcal{O}(1)$ |
|min/max access| $\mathcal{O}(1)$|$\mathcal{O}(1)$ |
|heapify array  | $\mathcal{O}(n)$|$\mathcal{O}(1)$ |
|heapsort  | $\mathcal{O}(n\log_2n)$|$\mathcal{O}(1)$ |

### Heapq module of python
To implement a min heap, python has a build-in module called heapq  
Note that there is no built-in max heap, but to use a max-heap we could simple negate all the values in the array and use a min heap  
There are the following methods:
- heapq.heappush(heap, item): inserts the item into the heap
- heapq.heappop(heap): pop and return the smallest item in the heap
- heappushpop(heap, item): push item into the heap and pop and return the smallest item
- heapify(l : list): transforms list x into a min-heap
- heapreplace(heap, item): pop and return smallest item of the heap, then push new item into the heap
- heapq.merge(*iterables, key=None, reverse=True):
- heapq.nlargest(n, iterable, key=None): returns a list of n largest elements in iterable
- heapq.nsmallest(n, iterable, key=None)

In [1]:
import heapq

heap = [3,2,4,1,6,8,1,0]
print(f'heapifying the array {heap}')
heapq.heapify(heap)
print(f'heap after heapifying is {heap}')
heapq.heappush(heap, 5)
print(f'after inserting 5: {heap}')
heapq.heappush(heap, -1)
print(f'after inserting -1: {heap}')
heapq.heappop(heap)
print(f'poping the smallest item: {heap}')

heapifying the array [3, 2, 4, 1, 6, 8, 1, 0]
heap after heapifying is [0, 1, 1, 2, 6, 8, 4, 3]
after inserting 5: [0, 1, 1, 2, 6, 8, 4, 3, 5]
after inserting -1: [-1, 0, 1, 2, 1, 8, 4, 3, 5, 6]
poping the smallest item: [0, 1, 1, 2, 6, 8, 4, 3, 5]


In [10]:
arr = [(0,10), (1,5), (2,3), (5,3),(4,15)]
arr = [(j,i) for (i,j) in arr]
heapq.heapify(arr )
arr

[(3, 2), (3, 5), (10, 0), (5, 1), (15, 4)]

In [15]:
h = [1,0,-1,4,5,6]
heapq.heapify(h)
heapq.heappop(h) , h

(-1, [0, 4, 1, 6, 5])

### Data Structure to hold the median of a stream of numbers

Given an array of number the median is:
- the middle element if the array has odd length
- the averege of the two middle element if the array has even length

To hold the median of a stream of number efficiently, we can design a data structure as follows:
- construct two heaps: one minheap (min_heap), one maxheap (max_heap)
- each element in the min_heap is bigger that every element in the max_heap, i.e. $H_{min}[0]\ge H_{max}[0]$
- the size of both heaps are equal or difference by one
- when inserting a new element compare it to the root of the min_heap and if it bigger insert it in min_heap, otherwise insert it in max_heap
- balnce the size of both heaps
- at any moment access the median in $\mathcal{O}(1)$ time

In [13]:
class MedianHeaps:

    def __init__(self):
        self.min_heap = MinHeap()
        self.max_heap = MaxHeap()

    def satisfies_assertions(self):
        '''test if the data structure is correctly implemented 
        '''

        # empty 
        if self.min_heap.size == 0:
            return self.max_heap.size == 0
        
        # one element
        if self.max_heap.size == 0:
            return self.min_heap.size == 1
        
        # at least two elements
        return self.min_heap.size - self.max_heap.size in (0,1)
    
    def get_median(self):
        '''Returns the median of the numbers.
        If no number have been inserted returns None
        '''
        
        # no elements
        if self.min_heap.size + self.max_heap.size == 0:
            return 
        
        # even number of elements
        if self.min_heap.size == self.max_heap.size:
            return (self.min_heap.get_min()+self.max_heap.get_max())/2
        # odd number of items
        else:
            return self.min_heap.get_min()

    def insert(self, elt):
        '''Insert element into the data structure
        '''
        
        # if both heaps are empty insert it on the min_heap
        if self.min_heap.size == 0:
            self.min_heap.insert(elt)
            return
        
        # if the max heap is empty 
        if self.max_heap.size == 0:
            # check where the new element belongs
            if self.min_heap.get_min() >= elt:
                self.max_heap.insert(elt)
            else:
                # insert new element in min_heap and move the root of min_heap to max_heap
                min_element = self.min_heap.delete_min()
                self.min_heap.insert(elt)
                self.max_heap.insert(min_element)

            return
        
        # if both heaps are non-empty
        if elt >= self.min_heap.get_min():
            self.min_heap.insert(elt)
        else:
            self.max_heap.insert(elt)
        
        self.balance_heap_size()

        return


    def balance_heap_size(self):
        '''Balance the size of the two heaps to different by at most one.
        When different by one the min_heap has one more element.
        '''

        if self.min_heap.size - self.max_heap.size in (0,1):
            return
        
        # if the min_heap has two more elements than max_heap
        if self.min_heap.size - self.max_heap.size == 2:
            min_element = self.min_heap.delete_min()
            self.max_heap.insert(min_element)
            return 
        
        # if the max_heap has one more element than min_heap
        if self.max_heap.size - self.min_heap.size == 1:
            max_element = self.max_heap.delete_max()
            self.min_heap.insert(max_element)
            return

    def __repr__(self):
        return f'Minheap: {str(self.min_heap)} + Maxheap: {str(self.max_heap)}'

    def __len__(self):
        return self.min_heap.size + self.max_heap.size

In [15]:
m = MedianHeaps()
print('Inserting 1, 5, 2, 4, 18, -4, 7, 9')

m.insert(1)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median() == 1,  f'expected median = 1, your code returned {m.get_median()}'

m.insert(5)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median() == 3,  f'expected median = 3.0, your code returned {m.get_median()}'

m.insert(2)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')

assert m.get_median() == 2,  f'expected median = 2, your code returned {m.get_median()}'
m.insert(4)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median() == 3,  f'expected median = 3, your code returned {m.get_median()}'

m.insert(18)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median() == 4,  f'expected median = 4, your code returned {m.get_median()}'

m.insert(-4)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median() == 3,  f'expected median = 3, your code returned {m.get_median()}'

m.insert(7)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median() == 4, f'expected median = 4, your code returned {m.get_median()}'

m.insert(9)
print(m)
print(m.get_median())
print(f'assertions are satisfied? {m.satisfies_assertions()}')
assert m.get_median()== 4.5, f'expected median = 4.5, your code returned {m.get_median()}'

print('All tests passed: 15 points')


Inserting 1, 5, 2, 4, 18, -4, 7, 9
Minheap: [1] + Maxheap: []
1
assertions are satisfied? True
Minheap: [5] + Maxheap: [1]
3.0
assertions are satisfied? True
Minheap: [2, 5] + Maxheap: [1]
2
assertions are satisfied? True
Minheap: [4, 5] + Maxheap: [2, 1]
3.0
assertions are satisfied? True
Minheap: [4, 5, 18] + Maxheap: [2, 1]
4
assertions are satisfied? True
Minheap: [4, 5, 18] + Maxheap: [2, 1, -4]
3.0
assertions are satisfied? True
Minheap: [4, 5, 18, 7] + Maxheap: [2, 1, -4]
4
assertions are satisfied? True
Minheap: [5, 7, 18, 9] + Maxheap: [4, 2, -4, 1]
4.5
assertions are satisfied? True
All tests passed: 15 points
