[blog](http://rebcabin.github.io/blog/2013/01/22/covariance-matrices/)

In [1]:
%matplotlib inline
import numpy as np

### Online Mean

$\bar{x}_{N + 1} = \frac{N * \bar{x}_N + x_{N + 1}}{N + 1}$

### Online Variance

Note that:

$S^2_N = \frac{1}{N} (\vec{x} - \bar{x} \vec{1})^T (\vec{x} - \bar{x} \vec{1})$

$ = \vec{x}^T \vec{x} / N - \bar{x}^2_N$ 

Therefore, the online formula: 

$S^2_{N+1} = \frac{\vec{x}^T \vec{x} + x^2_{N+1}}{N + 1} - \bar{x}^2_{N + 1} $


In [38]:
class OnlineVariance:
    def __init__(self):
        self.N = 0.0
        self.mean = 0.0
        self.sum_of_squares = 0.0
        self.var = 0.0
    
    def process(self, X_N_plus_1):
        self.mean = (self.N * self.mean + X_N_plus_1) / (self.N + 1)
        self.N += 1
        self.sum_of_squares += X_N_plus_1 * X_N_plus_1
        self.var = self.sum_of_squares / self.N - self.mean * self.mean
        return self.mean, self.var


In [39]:
t = OnlineVariance()
t.process(1)
t.process(2)
t.process(2)
t.process(2)

(1.75, 0.1875)

In [40]:
num_lists = np.array([1,3,5,7,9, 14])
desired_mean = np.mean(num_lists)
desired_variance = np.var(num_lists)

t = OnlineVariance()
for i in num_lists:
    mu, var = t.process(i)

print np.isclose(mu, desired_mean)
print np.isclose(var, desired_variance)

True
True


### Online Covariance

$\sigma_{x,y}^N = \frac{1}{N} \vec{x}^T \vec{y} - \bar{x}_N \bar{y}_N$ 

$\sigma_{x,y}^{N+1} = \frac{1}{N + 1} (\vec{x}^T \vec{y} + x_{N+1} y_{N+1}) - \bar{x}_{N+1} \bar{y}_{N+1}$

In [68]:
class OnlineCovariance:
    def __init__(self):
        self.N = 0.0
        self.mean_x = 0.0
        self.mean_y = 0.0
        self.sum_of_squares = 0.0
        self.cov = 0.0
    
    def process(self, X_N_plus_1, Y_N_plus_1):
        self.mean_x = (self.N * self.mean_x + X_N_plus_1) / (self.N + 1)        
        self.mean_y = (self.N * self.mean_y + Y_N_plus_1) / (self.N + 1)
        self.N += 1
        self.sum_of_squares += X_N_plus_1 * Y_N_plus_1
        self.cov = self.sum_of_squares / self.N - self.mean_x * self.mean_y
        return self.cov

In [70]:
x = np.array([1,3,5,7,9,14])
y = np.array([9,4,5,7,11,24])

desired_cov = np.dot(x - np.mean(x), y - np.mean(y)) / x.shape[0]

t = OnlineCovariance()
for my_x, my_y in zip(x, y):
    cov = t.process(my_x, my_y)

print np.isclose(cov, desired_cov), cov, desired_cov

True 23.3333333333 23.3333333333
