# Guassian DAGs

## Diverging

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import zscore


np.random.seed(37)

n = 1_000
x2 = np.random.normal(1, 1, n) + np.random.normal(0, 0.1, n)
x1 = 4 + 2 * x2 + np.random.normal(0, 0.1, n)
x3 = 1 - 3 * x2 + np.random.normal(0, 0.1, n)

X = pd.DataFrame({
    'x1': x1,
    'x2': x2,
    'x3': x3
})

# X = X.apply(zscore)

X

Unnamed: 0,x1,x2,x3
0,5.764534,0.879541,-1.754962
1,7.222243,1.667069,-4.009537
2,6.473810,1.247819,-2.837089
3,3.306634,-0.249303,1.897069
4,9.182125,2.562762,-6.792578
...,...,...,...
995,4.790049,0.378799,-0.056629
996,5.170615,0.648621,-0.878558
997,6.764159,1.405164,-3.264707
998,7.802582,1.919884,-4.784809


In [2]:
M, S = X.mean().values, X.cov().values

In [3]:
M

array([ 6.02133   ,  1.01116992, -2.03342587])

In [4]:
S

array([[ 3.89308828,  1.94335116, -5.84709602],
       [ 1.94335116,  0.97250535, -2.92585223],
       [-5.84709602, -2.92585223,  8.81260585]])

In [5]:
def conditional(M, S, obs):
    a = np.array([v for _, v in obs.items()])
    
    i_1 = [i for i in range(M.shape[0]) if i not in obs]
    i_2 = [i for i in range(M.shape[0]) if i in obs]
    
    m_1, m_2 = M[i_1], M[i_2]
    
    S_11 = S[i_1][:,i_1]
    S_12 = S[i_1][:,i_2]
    S_21 = S[i_2][:,i_1]
    S_22 = np.linalg.inv(S[i_2][:,i_2])
     
    _r = S_12.dot(S_22)
    _m = m_1 + _r.dot(a - m_2)
    _s = S_11 - _r.dot(S_21)
    
    return _m, _s

In [6]:
_m, _s = conditional(M, S, {0: 10, 1: 1})

_m

array([-2.15835515])

In [7]:
_s

array([[0.00995397]])