# Median - intro


- https://stackoverflow.com/questions/1387497/find-median-value-from-a-growing-set

- https://en.wikipedia.org/wiki/Selection_algorithm#Online_selection_algorithm

- https://www.geeksforgeeks.org/median-of-stream-of-integers-running-integers/

- https://www.geeksforgeeks.org/median-of-stream-of-running-integers-using-stl/


# Median - 2 sorted arrays same size 

- https://www.geeksforgeeks.org/median-of-two-sorted-arrays/ 

method 1: takes O(n) time and O(1) memory

In [2]:
def median_same_size(a, b):
    n = len(a)
    i, j = 0, 0
    
    for _ in range(n-1):
        if a[i] > b[j]:
            j += 1
        else:
            i += 1
    
    return (a[i] + b[j])/2

In [3]:
#try it out
#a, b = [1,2,3,4,5], [2,3,4,5,6]
a, b = [1,2,3,4,5], [6,7,8,9,10]
print(median_same_size(a,b))

5.5


method 2: uses recursion, so it will require memory. In time it is O(log n) as the array is divided by 2 every time

In [13]:
# with recursion
def median_sorted_array(a):
    if len(a)%2 == 1:
        return a[int(len(a)/2)]
    else:
        return (a[len(a)/2-1]+ a[len(a)/2])/2
    
def median_same_size_v2(a, b):
    
    # end recursion:
    if len(a) == 2:
        if a[1] > b[0]:
            return (a[0] + b[1])/2
        else:
            return (a[1] + b[0])/2
    
    m1 = median_sorted_array(a)
    m2 = median_sorted_array(b)
    
    # recursion
    ind = int(len(a)/2)+1
    if m1 < m2:
        return median_same_size_v2(a[ind:], b[:ind])
    return median_same_size_v2(b[ind:], a[:ind])

In [14]:
#try it out
#a, b = [1,2,3,4,5], [2,3,4,5,6]
a, b = [1,2,3,4,5], [6,7,8,9,10]

print(median_same_size_v2(a,b))

5.5


# Median - 2 sorted arrays different size 

method 1: is linear with time O(n+m)

In [32]:
def median(a, b):
    
    length = len(a) + len(b)
    n = len(a) - 1
    m = len(b) - 1
    
    m1, m2 = 0, 0
    
    # total lenght == odd -> no mean computed
    if length%2 == 1:
        
        i, j = 0, 0
        for _ in range(int(length/2)):
            if i != n and j != m:
                if a[i] < b[j]:
                    i += 1
                    m1 = a[i]
                else:
                    j += 1
                    m1 = b[j]
            elif i == n:
                m1 = b[j]
                j += 1
            else:
                m1 = a[i]
                i += 1
        return m1

    # total lenght == even -> mean computed
    else:
        i, j = 0, 0
        for _ in range(int(length/2)):
            m2 = m1 
            if i != n and j != m:
                if a[i] < b[j]:
                    i += 1
                    m1 = a[i]
                else:
                    j += 1
                    m1 = b[j]
            elif i == n:
                m1 = b[j]
                j += 1
            else:
                m1 = a[i]
                i += 1
        return (m1 + m2)/2        

In [36]:
# try it out
a = [1,2,3,4,5]
b = [6,7,8,9,10]
print(median(a,b))

6.0


method 2: recursively. It has Time Complexity of O(LogM + LogN). Takes a lot of time to code: lot of test cases.

https://www.geeksforgeeks.org/median-of-two-sorted-arrays-of-different-sizes/

# Median - repeated elements
It is easy for an static list. The problem emerges when an increasing list is considered. Take for instance the median of the scores of basketball matches:

In [None]:
class Median():
    def __init__(self, ini_list=None):
        
        max_score = 200
        self.counter = [0]*max_score
        for i in ini_list:
            self.counter[i] += 1
        
        self.d = len(ini_list)            
        self.median = self.median_val()

    def median_val(self):
        
        counter = 0
        
        if self.d%2 == 1:
            for ind, val in enumerate(self.counter):
                counter += val
                if counter > self.d/2:
                    return ind
        else:
            m1, m2 = None, None
            for ind, val in enumerate(self.counter):
                counter += val
                if counter >= self.d/2 and m1 is None:
                    m1 = ind
                if counter >= self.d/2 + 1:
                    m2 = ind
                    break
            return (m1 + m2)/2

    def add_items_v1(self, alist):
        self.d += len(alist)
        for i in alist:
            self.counter[i] += 1
        self.median = self.median_val()

    def add_items_v2(self, alist):
        pass

In [29]:
# try it out
b = Median([2,2,5,5,5])
print(b.median)
b.add_items_v1([1,1,1,1,1,1,1,1,1])
print(b.median)

5
1.0


# Median - stream of integers 

- https://www.geeksforgeeks.org/median-of-stream-of-integers-running-integers/
- https://leetcode.com/problems/find-median-from-data-stream/solution/

In [37]:
# method 1 - using sort insertion
# time = O(n) = O(n) + O(log n) = shifting elements when inserting + finding index to insert
# space = O(n)

In [59]:
# method 2 - using two heaps
# time = O(log n) inserting and removing from a heap + finding index to insert
# space = O(n)

# class MinHeap
# class MaxHeap
# class MedianDs
import heapq as hp

class MinHeap():
    def __init__(self):

        self.length = 0
        self.heap = []

    def insert(self, val):
        hp.heappush(self.heap, val)
        self.length += 1
        
    def pop_root(self):
        self.length -= 1
        return hp.heappop(self.heap)

class MaxHeap():
    def __init__(self):

        self.length = 0
        self.heap = []

    def insert(self, val):
        hp.heappush(self.heap, -val)
        self.length += 1
        
    def pop_root(self):
        self.length -= 1
        return -hp.heappop(self.heap)

class MedianDs():
    def __init__(self):
        self.minheap = MinHeap()
        self.maxheap = MaxHeap()
        self.len_minheap = self.minheap.length
        self.len_maxheap = self.maxheap.length

    def median(self):
        if (self.minheap.length + self.maxheap.length)%2 == 0:
            return (self.minheap.heap[0] + -self.maxheap.heap[0])/2
        elif self.minheap.length > self.maxheap.length:
            return self.minheap.heap[0]
        return -self.maxheap.heap[0]
    
    def insert(self, val):
        if self.minheap.length == 0:
            self.minheap.insert(val)
        elif self.maxheap.length == 0:
            self.maxheap.insert(val)
        else:

            med_min = -self.maxheap.heap[0]
            med_max = self.minheap.heap[0]

            if val <= med_min:
                if self.maxheap.length <= self.minheap.length:
                    self.maxheap.insert(val)
                else:
                    root_maxheap = self.maxheap.pop_root()
                    self.minheap.insert(root_maxheap)
                    self.maxheap.insert(val)
            else:
                if self.minheap.length <= self.maxheap.length:
                    self.minheap.insert(val)
                else:
                    root_minheap = self.minheap.pop_root()
                    self.maxheap.insert(root_minheap)
                    self.minheap.insert(val)

In [60]:
# try it out

a = [10, 1, 5, 15, 20, 2, 3, 4]
hq = MedianDs()

for ind, i in enumerate(a):
    hq.insert(i)
    print(sorted(a[:ind+1]))
    print(hq.maxheap.heap, hq.minheap.heap)
    print(hq.median())

[10]
[] [10]
10
[1, 10]
[-1] [10]
5.5
[1, 5, 10]
[-1] [5, 10]
5
[1, 5, 10, 15]
[-5, -1] [10, 15]
7.5
[1, 5, 10, 15, 20]
[-5, -1] [10, 15, 20]
10
[1, 2, 5, 10, 15, 20]
[-5, -1, -2] [10, 15, 20]
7.5
[1, 2, 3, 5, 10, 15, 20]
[-5, -3, -2, -1] [10, 15, 20]
5
[1, 2, 3, 4, 5, 10, 15, 20]
[-4, -3, -2, -1] [5, 10, 20, 15]
4.5


# Median - fraudalent activity notifications

https://www.hackerrank.com/challenges/fraudulent-activity-notifications/problem

In [3]:
# hackerrank problem

from collections import deque

def median(v, d):
    count = 0
    if d%2==0:
        m1 = None
        m2 = None
        for i in range(len(v)):
            count += v[i]
            if count >= d/2 and m1 is None:
                m1 = i
            if count >= d/2 + 1:
                m2 = i
                break
        return (m1 + m2)/2
    else:
        for i in range(len(v)):
            count += v[i]
            if count > d/2:
                return i
    return -1

def activityNotifications(expenditure, d):
    dq = deque(expenditure[:d])
    v = [0]*201
    for n in dq:
        v[n] += 1
    count = 0
    for current in expenditure[d:]:
        if current >= median(v, d)*2:
            count += 1
        v[current] += 1
        dq.append(current)
        v[dq.popleft()] -= 1
    return count