# Guassian DAGs

## Serial

In [1]:
import numpy as np
import pandas as pd

np.random.seed(37)

n = 1_000
x1 = np.random.normal(1, 1, n)
x2 = 4 + 2 * x1 + np.random.normal(0, 0.1, n)
x3 = 1 - 3 * x2 + np.random.normal(0, 0.1, n)

X = pd.DataFrame({
    'x1': x1,
    'x2': x2,
    'x3': x3
})

X

Unnamed: 0,x1,x2,x3
0,0.945536,5.825077,-16.469780
1,1.674308,7.341377,-21.136027
2,1.346647,6.594466,-18.805228
3,-0.300346,3.450351,-9.545815
4,2.518512,9.081274,-26.187220
...,...,...,...
995,0.401151,4.779951,-13.307402
996,0.748119,5.396740,-15.316848
997,1.371860,6.777024,-19.377241
998,2.022343,7.942227,-22.863868


In [2]:
M, S = X.mean().values, X.cov().values
Q = np.linalg.inv(S)

In [3]:
M

array([  1.01277839,   6.0239483 , -17.07285473])

In [4]:
S

array([[  0.9634615 ,   1.92655164,  -5.78078338],
       [  1.92655164,   3.86214714, -11.58922942],
       [ -5.78078338, -11.58922942,  34.7857574 ]])

In [5]:
Q

array([[ 410.8029118 , -238.24121043,  -11.10424004],
       [-238.24121043, 1066.4135891 ,  315.69503545],
       [ -11.10424004,  315.69503545,  103.36043417]])

In [35]:
def conditional(M, S, Q, obs):
    a = np.array([v for _, v in obs.items()])
    print(f'{a=}')
    print('-' * 15)
    
    i_1 = [i for i in range(M.shape[0]) if i not in obs]
    i_2 = [i for i in range(M.shape[0]) if i in obs]
    print(f'{i_1=}')
    print(f'{i_2=}')
    print('-' * 15)

    m_1, m_2 = M[i_1], M[i_2]
    print(f'{m_1=}')
    print(f'{m_2=}')
    print('-' * 15)
    
    S_11 = S[i_1][:,i_1]
    S_12 = S[i_1][:,i_2]
    S_21 = S[i_2][:,i_1]
    S_22 = Q[i_2][:,i_2]
    print(f'{S_11=}')
    print(f'{S_12=}')
    print(f'{S_21=}')
    print(f'{S_22=}')
    print('-' * 15)
    
    _r = S_12.dot(S_22)
    _m = m_1 + _r.dot(a - m_2)
    _s = S_11 - _r.dot(S_21)
    print(f'{_r=}')
    
    return _m, _s

In [40]:
_m, _s = conditional(M, S, Q, {0: 0.0})
_m.shape, _s.shape

a=array([0.])
---------------
i_1=[1, 2]
i_2=[0]
---------------
m_1=array([  6.0239483 , -17.07285473])
m_2=array([1.01277839])
---------------
S_11=array([[  3.86214714, -11.58922942],
       [-11.58922942,  34.7857574 ]])
S_12=array([[ 1.92655164],
       [-5.78078338]])
S_21=array([[ 1.92655164, -5.78078338]])
S_22=array([[410.8029118]])
---------------
_r=array([[  791.43302298],
       [-2374.76264584]])


((2,), (2, 2))

In [41]:
_m

array([-795.52231165, 2388.03542577])

In [42]:
_s

array([[ -1520.87444037,   4563.5136379 ],
       [  4563.5136379 , -13693.20268219]])

In [43]:
from sklearn.linear_model import LinearRegression

m = LinearRegression()
m.fit(X[['x1']], X['x2'])
m.intercept_, m.coef_

(3.9987818919061273, array([1.99961456]))

In [45]:
m.predict([[0]])



array([3.99878189])