In [2]:
import heapq as hq

    #################################################################
    #   Below implements Median Maintenance algorithm using heap    #
    #################################################################

def load(filename):
    """
        To use for loading data file to list of numbers
        filename: input filename
        output: dictionary name for output
    """
    output = []
    with open(filename) as file:
        for line in file:
            line = int(line.strip())
            output.append(line)
    return output


def find_median(max_heap, min_heap):
    """
        Max heap is to store smaller numbers, while min heap is used to store larger numbers
        Max heap and min heap should be 50/50 splits. If the list has k number and k is odd, then median number is 
        ((k+1)/2)th smallest number; otherwise, the median is (k/2)th smallest number
    """
    count = len(max_heap) + len(min_heap)
    if count % 2 == 0:
        median_count = count // 2
    else:
        median_count = (count + 1) // 2
    
    # in case there's odd number of elements, and max heap has less numbers, then the median is the first element of
    # min heap
    if median_count > len(max_heap):
        return min_heap[0]
    else: 
        return max_heap[0]*-1
    

def insert(number, max_heap, min_heap):
    
    """
        Insert the number into either min_heap or max_heap, and make sure both heaps have 50/50 splits
        Implementation of max heap using heap with element * -1
        Max heap is to store smaller numbers, while min heap is used to store larger numbers
    """
    # when both heaqs are empty, push the elment to max heap
    if not max_heap and not min_heap:
        hq.heappush(max_heap, number*-1)
        
    # if number <= median_maxHeap, add to max heap; else add to min heap
    elif number <= max_heap[0]*-1 :
        hq.heappush(max_heap, number*-1)
    else:
        hq.heappush(min_heap, number)
        
    # rebalance both heaps if len difference of these two heaps are larger than 1
    
    minHeap_size = max(len(min_heap), 0)
    maxHeap_size = max(len(max_heap), 0)
    
    if abs(maxHeap_size - minHeap_size) > 1:
        if maxHeap_size > minHeap_size:
            temp = hq.heappop(max_heap)
            hq.heappush(min_heap, temp*-1)
        else:
            temp = hq.heappop(min_heap)
            hq.heappush(max_heap, temp*-1)
            


if __name__ == "__main__":
    
    ####################################
    #         test case                #
    ####################################
    
    filename = "test.txt"
    data = load(filename)
    max_heap = []
    min_heap = []
    medians = []
    for number in data:
        insert(number, max_heap, min_heap)
        medians.append(find_median(max_heap, min_heap))
    
    modulo = sum(medians) % 10000
    print("Max heap is: ", max_heap)
    print("Min heap is: ", min_heap)
    print("Medians are: ", medians)
    print("\nOutput (sum of medians modulo 10000): \n", modulo)
        


Max heap is:  [-2303, -1640, -225, -625, -1354]
Min heap is:  [2793, 5685, 6331, 9290, 6195]
Medians are:  [6331, 2793, 2793, 2793, 2793, 1640, 2793, 2303, 2793, 2303]

Output (sum of medians modulo 10000): 
 9335


In [3]:
def HW3():
    
    """
        The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 3 lecture on 
        heap applications).  The text file contains a list of the integers from 1 to 10000 in unsorted order; 
        you should treat this as a stream of numbers, arriving one by one. 
        In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits).  
        That is, you should compute (m1 + m2 + ... + m10000) mod 10000
    """
    import time
    
    filename = "Median.txt"
    data = load(filename)
    max_heap = []
    min_heap = []
    medians = []
    
    #################################################################
    #   Below implements Median Maintenance algorithm using heap    #
    #################################################################
    
    start = time.time()
    for number in data:
        insert(number, max_heap, min_heap)
        medians.append(find_median(max_heap, min_heap))
    modulo = sum(medians) % 10000
    end = time.time()
    print(f"Time spent on Median Maintenance algorithm using heap is {end - start} second(s).\n")
    print("Output (sum of medians modulo 10000): ", modulo)
    
    
if __name__ == "__main__":
    HW3()

Time spent on Median Maintenance algorithm using heap is 0.029376506805419922 second(s).

Output (sum of medians modulo 10000):  1213
