In [2]:
import heapq
import random
import time

In [3]:
class MaxHeap:
    def __init__(self,k):
        self.heap = []
        self.k = k
        
    def add(self,value):
        if len(self.heap) < self.k:
            #add if the heap has less then k elements
            heapq.heappush(self.heap,-value)
        elif value < -self.heap[0]:
            #replace the larger if the new value is smaller
            heapq.heapreplace(self.heap,-value)
            
    def get_kth_smallest(self):
        # The largest number in the heap is the k'th smallest value
        return -self.heap[0]
    

In [4]:
def generate_random_stream(size, max_value):
    for _ in range(size):
        yield random.randint(0, max_value)

In [5]:
def generate_fibonacci_stream(size, modulo):
    a, b = 0, 1
    for _ in range(size):
        yield a % modulo
        a, b = b, a + b

In [6]:
def run_experiment(data_type, size, modulo, k):
    heap = MaxHeap(k)
    stream_generator = generate_random_stream if data_type == 'random' else generate_fibonacci_stream
    start_time = time.perf_counter()
    
    for value in stream_generator(size, modulo):
        heap.add(value)
    
    end_time = time.perf_counter()
    kth_smallest = heap.get_kth_smallest()
    total_time = end_time - start_time
    
    return kth_smallest, total_time

In [7]:
results = []
stream_sizes = [1000000, 100000000]
modulos = [1000, 1000000, 100000000]
ks = [100, 10000, 1000000]

for data_type in ['random']:
    for modulo in modulos:
        for k in ks:
            for size in stream_sizes:
                kth_smallest, total_time = run_experiment(data_type, size, modulo, k)
                results.append((data_type, size, modulo, k, kth_smallest, total_time))

# Now you can print or save the results
for result in results:
    print(f"Data: {result[0]}, Size: {result[1]}, Modulo: {result[2]}, K: {result[3]}, K'th smallest: {result[4]}, Time: {result[5]:.4f} seconds")


Data: random, Size: 1000000, Modulo: 1000, K: 100, K'th smallest: 0, Time: 0.3189 seconds
Data: random, Size: 100000000, Modulo: 1000, K: 100, K'th smallest: 0, Time: 34.0634 seconds
Data: random, Size: 1000000, Modulo: 1000, K: 10000, K'th smallest: 9, Time: 0.3433 seconds
Data: random, Size: 100000000, Modulo: 1000, K: 10000, K'th smallest: 0, Time: 31.6866 seconds
Data: random, Size: 1000000, Modulo: 1000, K: 1000000, K'th smallest: 1000, Time: 0.3708 seconds
Data: random, Size: 100000000, Modulo: 1000, K: 1000000, K'th smallest: 10, Time: 36.6616 seconds
Data: random, Size: 1000000, Modulo: 1000000, K: 100, K'th smallest: 104, Time: 0.3050 seconds
Data: random, Size: 100000000, Modulo: 1000000, K: 100, K'th smallest: 0, Time: 30.4776 seconds
Data: random, Size: 1000000, Modulo: 1000000, K: 10000, K'th smallest: 9966, Time: 0.3336 seconds
Data: random, Size: 100000000, Modulo: 1000000, K: 10000, K'th smallest: 100, Time: 36.2652 seconds
Data: random, Size: 1000000, Modulo: 1000000, 

In [9]:
results = []
stream_sizes = [1000000]
modulos = [1000, 1000000, 100000000]
ks = [100, 10000, 1000000]

for data_type in ['fibonacci']:
    for modulo in modulos:
        for k in ks:
            for size in stream_sizes:
                kth_smallest, total_time = run_experiment(data_type, size, modulo, k)
                results.append((data_type, size, modulo, k, kth_smallest, total_time))

# Now you can print or save the results
for result in results:
    print(f"Data: {result[0]}, Size: {result[1]}, Modulo: {result[2]}, K: {result[3]}, K'th smallest: {result[4]}, Time: {result[5]:.4f} seconds")


Data: fibonacci, Size: 1000000, Modulo: 1000, K: 100, K'th smallest: 0, Time: 72.6925 seconds
Data: fibonacci, Size: 1000000, Modulo: 1000, K: 10000, K'th smallest: 9, Time: 107.1915 seconds
Data: fibonacci, Size: 1000000, Modulo: 1000, K: 1000000, K'th smallest: 999, Time: 99.1714 seconds
Data: fibonacci, Size: 1000000, Modulo: 1000000, K: 100, K'th smallest: 93, Time: 105.5524 seconds
Data: fibonacci, Size: 1000000, Modulo: 1000000, K: 10000, K'th smallest: 9954, Time: 111.3390 seconds
Data: fibonacci, Size: 1000000, Modulo: 1000000, K: 1000000, K'th smallest: 999997, Time: 79.1165 seconds
Data: fibonacci, Size: 1000000, Modulo: 100000000, K: 100, K'th smallest: 6464, Time: 57.5088 seconds
Data: fibonacci, Size: 1000000, Modulo: 100000000, K: 10000, K'th smallest: 977369, Time: 57.2106 seconds
Data: fibonacci, Size: 1000000, Modulo: 100000000, K: 1000000, K'th smallest: 99999848, Time: 57.1575 seconds
