# Guassian DAGs

## Serial

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import zscore


np.random.seed(37)

n = 1_000
x1 = np.random.normal(1, 1, n)
x2 = 4 + 2 * x1 + np.random.normal(0, 0.1, n)
x3 = 1 - 3 * x2 + np.random.normal(0, 0.1, n)

X = pd.DataFrame({
    'x1': x1,
    'x2': x2,
    'x3': x3
})

# X = X.apply(zscore)

X

Unnamed: 0,x1,x2,x3
0,0.945536,5.825077,-16.469780
1,1.674308,7.341377,-21.136027
2,1.346647,6.594466,-18.805228
3,-0.300346,3.450351,-9.545815
4,2.518512,9.081274,-26.187220
...,...,...,...
995,0.401151,4.779951,-13.307402
996,0.748119,5.396740,-15.316848
997,1.371860,6.777024,-19.377241
998,2.022343,7.942227,-22.863868


In [2]:
M, S = X.mean().values, X.cov().values

In [3]:
M

array([  1.01277839,   6.0239483 , -17.07285473])

In [4]:
S

array([[  0.9634615 ,   1.92655164,  -5.78078338],
       [  1.92655164,   3.86214714, -11.58922942],
       [ -5.78078338, -11.58922942,  34.7857574 ]])

In [5]:
def conditional(M, S, obs):
    a = np.array([v for _, v in obs.items()])
    
    i_1 = [i for i in range(M.shape[0]) if i not in obs]
    i_2 = [i for i in range(M.shape[0]) if i in obs]
    
    m_1, m_2 = M[i_1], M[i_2]
    
    S_11 = S[i_1][:,i_1]
    S_12 = S[i_1][:,i_2]
    S_21 = S[i_2][:,i_1]
    S_22 = np.linalg.inv(S[i_2][:,i_2])
     
    _r = S_12.dot(S_22)
    _m = m_1 + _r.dot(a - m_2)
    _s = S_11 - _r.dot(S_21)
    
    return _m, _s

In [21]:
_m, _s = conditional(M, S, {2: 0})
_m.shape, _s.shape

((2,), (2, 2))

In [22]:
_m

array([-1.82443093,  0.33595283])

In [23]:
_s

array([[0.00279659, 0.00062477],
       [0.00062477, 0.0010773 ]])