In [1]:
import numpy as np

In [2]:
def rnorm(n, mean=0, stdev=1):
    return np.random.normal(loc=mean, scale=stdev, size=n)

In [3]:
def correlated_normal(cor, n=100000):
    x = rnorm(n)
    y = rnorm(n)
    z = rnorm(n)
    
    mlt = np.sqrt((1 - cor) / cor)
    
    X = x + mlt*y
    Y = x + mlt*z
    
    X /= np.std(X)
    Y /= np.std(Y)
    
    return X, Y

a, b = correlated_normal(.8)
np.corrcoef(a, b)

array([[1.        , 0.79950507],
       [0.79950507, 1.        ]])

### Variance of sum and difference independent of random variables

Variances always add up.

In [4]:
def test_1(n=10000):
    x = rnorm(n)
    y = rnorm(n)
    
    return np.cov(x+y), np.cov(x-y)

test_1(10000000)

(array(2.00122836), array(1.99850096))

### Covariance of sum and difference of correlated random variables

Unlike with variance, here the sign in front of the variable is important: covariance depends on the sign. 

In [5]:
def test_2(n=10000):
    X, Y = correlated_normal(.8, n)
    
    return np.cov(X, Y), np.cov(-X, Y)

test_2(100000)

(array([[1.00001   , 0.80112374],
        [0.80112374, 1.00001   ]]),
 array([[ 1.00001   , -0.80112374],
        [-0.80112374,  1.00001   ]]))

### Covariance with multipliers

Covariance of $aX$ and $bY$ equals to $ab\cdot\mathcal{cov}(X, Y)$.

In [6]:
def test_3(a, b, n=10000):
    X, Y = correlated_normal(.8, n)
    
    actual = np.cov(a*X, b*Y)[0, 1]
    predicted = a*b*np.cov(X, Y)[0, 1]
    
    return np.corrcoef(X, Y), actual, predicted

test_3(1, 2, 1000000)

(array([[1.        , 0.80049875],
        [0.80049875, 1.        ]]),
 1.600999108885091,
 1.600999108885091)

### Variance of a sum and difference of correlated variables

Covariance needs to be added or removed, respectively.

In [7]:
def test_4(n=10000):
    X, Y = correlated_normal(.8, n)

    predicted_sum = 1 + 1 + 2 * .8
    predicted_diff = 1 + 1 - 2 * .8
    return np.cov(X + Y), predicted_sum, np.cov(X-Y), predicted_diff

test_4(10000)

(array(3.59762729), 3.6, array(0.40277275), 0.3999999999999999)

### Variance of a sum and difference of correlated variables with multipliers 

In [8]:
def test_5(a, b, n=10000):
    X, Y = correlated_normal(.8, n)

    predicted_sum = a**2 * 1 + b**2 * 1 + 2 * a*b* .8
    predicted_diff = a**2 * 1 + b**2 * 1 - 2 * a*b* .8
    return np.cov(a*X + b*Y), predicted_sum, np.cov(a*X-b*Y), predicted_diff
    
test_5(1, 2)

(array(8.21188223), 8.2, array(1.78911787), 1.7999999999999998)

### Covariance of two sums of random variables with multipliers

One can pull out multipliers in front of the variances of each random variable. Also, if $X$ and $Y$ are independent, only $ac$ and $bd$ multiplications remain.

In [9]:
def test_6(a, b, c, d, n=10000):
    X = rnorm(n)
    Y = rnorm(n)
    
    S1 = a*X + b*Y
    S2 = c*X + d*Y
    
    predicted_algebraic = a*c + b*d
    predicted_numerical = (a*c*np.var(X) + b*d*np.var(Y))
    actual = np.cov(S1, S2)[0,1]
    
    return predicted_algebraic, predicted_numerical, actual

test_6(1, 2, 3, 4, n=10000000)

(11, 10.995344821354673, 10.998303507467918)

### Covariance of two sums with negative multipliers

Negative multipliers can be pulled out in the same way as before.

In [10]:
test_6(-1, 2, 3, 4, n=1000000)

(5, 4.998832559021045, 4.9985754877998945)

### Covariance of two sums of correlated variables

This time covariances between components of sums need to be taken into account.

In [11]:
def test_7(a, b, c, d, n=10000):
    X, Y = correlated_normal(.8, n)
    
    S1 = a*X + b*Y
    S2 = c*X + d*Y
    
    predicted_algebraic = a*c + .8 *(a*d + b*c) + b*d
    predicted_numerical = a*c*np.var(X) + np.cov(X, Y)[1, 0] * (a*d + b*c) + b*d*np.var(Y)
    actual = np.cov(S1, S2)[0,1]
    
    return predicted_algebraic, predicted_numerical, actual

test_7(1, 2, 3, 4, n=10000000)

(19.0, 19.000005345646443, 19.00000644564659)