In [1]:
from collections import deque
import numpy as np

from running_stats import RunningMean, RunningVariance, RunningCovariance


In [2]:
# Simple demo code / sanity check.

rm = RunningMean([1,2,3])
rm.push(3)
rm.push(10)
rm.push(-10)
print(rm.mean)
print(np.mean([1,2,3,3,10,-10]))
assert 1 == rm.pop()
assert 2 == rm.pop()
print(rm.mean)
print(np.mean([3,3,10,-10]))
rm.pushpop(12)
print(rm.mean)
print(np.mean([3,10,-10,12]))


1.5
1.5
1.4999999999999996
1.5
3.7499999999999996
3.75


In [3]:
# Shows that indeed grouping pushing and popping is more
# efficient when that is what the workload calls for.  

import timeit
M = 10

def test_push_pop():
    rm = RunningMean([1])
    for i in range(M):
        rm.push(i)
        rm.pop()

def test_pushpop():
    rm = RunningMean([1])
    for i in range(M):
        rm.pushpop(i)

print(timeit.timeit(f'test_push_pop()',
                     globals=globals()))

print(timeit.timeit(f'test_pushpop()',
                     globals=globals()))


6.406019415997434
4.220219249997172


In [4]:
# Simple demo code / sanity check.

rv = RunningVariance([1,1,1])
assert rv.variance == 0
for i in range(4):
    rv.push(i)
assert rv.pop() == 1
rv.pop()
print(list(rv._x_rm.q))
print(np.var(list(rv._x_rm.q)))
print(rv.variance)

rv.pushpop(10)
print(np.var([0,1,2,3,10]))
print(rv.variance)
print(rv._x_rm.q)


[1, 0, 1, 2, 3]
1.04
1.040000000000001
12.559999999999999
12.560000000000002
deque([0, 1, 2, 3, 10])


In [5]:
def naive_covariance(data1, data2):
    n = len(data1)
    sum1 = sum(data1)
    sum2 = sum(data2)
    sum12 = sum([i1 * i2 for i1, i2 in zip(data1, data2)])
    covariance = (sum12 - sum1 * sum2 / n) / n
    return covariance

rc = RunningCovariance([1,2,3], [1,2,3])
rc.covariance
for i in range(4):
    rc.push(i,i)

nums = [1,2,3,0,1,2,3]
print(naive_covariance(nums, nums))
print(rc.covariance)

print(rc.pop())
print(rc.pop())
nums = [3,0,1,2,3]
print(naive_covariance(nums,nums))
print(rc.covariance)

rc.pushpop(4,5)
rc.pushpop(6,7)
print(rc.covariance)
print(naive_covariance([1,2,3,4,6],[1,2,3,5,7]))


1.0612244897959182
1.0612244897959187
(1, 1)
(2, 2)
1.36
1.3599999999999985
3.679999999999998
3.6799999999999997


In [6]:
l1 = [1,2,3,5]
l2 = [1,2,3,5]

print(np.cov(m=l1, y=l2, ddof=0)) # delta degrees of freedom.  Can also use ``bias=True``.
print(naive_covariance(l1, l2))


[[2.1875 2.1875]
 [2.1875 2.1875]]
2.1875
