# Gaussian operations

## Diabetes data

In [1]:
from sklearn.datasets import load_diabetes

X, y = load_diabetes(return_X_y=True, as_frame=True)

X.shape, y.shape

((442, 10), (442,))

In [2]:
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


## Mean

In [3]:
_M = X.mean()
M = _M.values

_M

age   -2.511817e-19
sex    1.230790e-17
bmi   -2.245564e-16
bp    -4.797570e-17
s1    -1.381499e-17
s2     3.918434e-17
s3    -5.777179e-18
s4    -9.042540e-18
s5     9.293722e-17
s6     1.130318e-17
dtype: float64

## Covariance

In [4]:
_S = X.cov()
S = _S.values

_S

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
age,0.002268,0.000394,0.00042,0.000761,0.00059,0.000497,-0.00017,0.000462,0.000614,0.000684
sex,0.000394,0.002268,0.0002,0.000547,8e-05,0.000323,-0.00086,0.000753,0.00034,0.000472
bmi,0.00042,0.0002,0.002268,0.000897,0.000566,0.000592,-0.000832,0.000938,0.001012,0.000881
bp,0.000761,0.000547,0.000897,0.002268,0.00055,0.000421,-0.000405,0.000584,0.000892,0.000885
s1,0.00059,8e-05,0.000566,0.00055,0.002268,0.002033,0.000117,0.001229,0.001169,0.000739
s2,0.000497,0.000323,0.000592,0.000421,0.002033,0.002268,-0.000445,0.001496,0.000722,0.000659
s3,-0.00017,-0.00086,-0.000832,-0.000405,0.000117,-0.000445,0.002268,-0.001675,-0.000904,-0.000621
s4,0.000462,0.000753,0.000938,0.000584,0.001229,0.001496,-0.001675,0.002268,0.001401,0.000946
s5,0.000614,0.00034,0.001012,0.000892,0.001169,0.000722,-0.000904,0.001401,0.002268,0.001054
s6,0.000684,0.000472,0.000881,0.000885,0.000739,0.000659,-0.000621,0.000946,0.001054,0.002268


## Precision

In [5]:
import numpy as np
import pandas as pd

_Q = pd.DataFrame(np.linalg.inv(X.cov()), columns=X.columns, index=X.columns)
Q = _Q.values

_Q

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
age,536.832173,-54.447763,3.470961,-111.775881,69.042506,-126.972735,-71.144999,21.129371,-95.076456,-78.05349
sex,-54.447763,563.629318,87.464143,-113.524667,186.544784,-132.023942,90.40872,-119.35337,11.968834,-43.969333
bmi,3.470961,87.464143,665.661882,-157.879197,286.256448,-318.186761,75.976064,37.097912,-241.571359,-99.546066
bp,-111.775881,-113.524667,-157.879197,643.60765,-152.305237,77.749907,69.118081,144.150256,-95.749236,-102.28266
s1,69.042506,186.544784,286.256448,-152.305237,26108.306969,-20434.474598,-11589.267224,-3513.743721,-9539.100851,49.44524
s2,-126.972735,-132.023942,-318.186761,77.749907,-20434.474598,17284.276158,8208.325928,1120.88351,7801.137224,-53.854275
s3,-71.144999,90.40872,75.976064,69.118081,-11589.267224,8208.325928,6792.350799,3414.762936,3929.080002,-55.744113
s4,21.129371,-119.35337,37.097912,144.150256,-3513.743721,1120.88351,3414.762936,3920.924985,384.577779,-113.425076
s5,-95.076456,11.968834,-241.571359,-95.749236,-9539.100851,7801.137224,3929.080002,384.577779,4443.501505,-152.32335
s6,-78.05349,-43.969333,-99.546066,-102.28266,49.44524,-53.854275,-55.744113,-113.425076,-152.32335,654.71857


## Marginalize

In [6]:
def marginalize(M, S, Q, v):
    indices = [i for i in range(M.shape[0]) if i not in v]
    
    return M[indices], S[indices][:,indices], Q[indices][:,indices]

In [7]:
_m, _s, _q = marginalize(M, S, Q, {0, 1, 2})

In [8]:
_m.shape, _s.shape, _q.shape

((7,), (7, 7), (7, 7))

## Conditional

In [9]:
def conditional(M, S, Q, obs):
    a = np.array([v for _, v in obs.items()])
    
    i_1 = [i for i in range(M.shape[0]) if i not in obs]
    i_2 = [i for i in range(M.shape[0]) if i in obs]

    m_1, m_2 = M[i_1], M[i_2]
    S_11, S_12, S_21, S_22 = S[i_1][:,i_1], S[i_1][:,i_2], S[i_2][:,i_1], Q[i_2][:,i_2]
    
    _m = m_1 + S_12.dot(S_22).dot(a - m_2)
    _s = S_11 - S_12.dot(S_22).dot(S_21)
    
    return _m, _s

_m, _s = conditional(M, S, Q, {0: 0.1, 1: 0.2})

_m.shape, _s.shape

((8,), (8, 8))

## References

- [multivariate normal distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
- [product-of-two-multivariate-gaussians-distributions](https://math.stackexchange.com/questions/157172/product-of-two-multivariate-gaussians-distributions)
- [multivariate-gaussian-distributions](https://www.cs.princeton.edu/courses/archive/spr07/cos424/scribe_notes/0419.pdf)