In [1]:
import json
import networkx as nx

def get_data():
    with open('./huang.json', 'r') as fp:
        return json.load(fp)
        
def get_dag():
    d = nx.DiGraph()
    d.add_nodes_from(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
    d.add_edges_from([
        ('A', 'B'),
        ('A', 'C'),
        ('B', 'D'),
        ('C', 'E'),
        ('D', 'F'),
        ('E', 'F'),
        ('C', 'G'),
        ('E', 'H'),
        ('G', 'H')
    ])
    
    return d

def get_moralized_graph():
    m = nx.Graph()
    m.add_nodes_from(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
    m.add_edges_from([
        ('A', 'B'),
        ('A', 'C'),
        ('B', 'D'),
        ('C', 'E'),
        ('D', 'F'),
        ('E', 'F'),
        ('C', 'G'),
        ('E', 'H'),
        ('G', 'H'),
        ('D', 'E'),
        ('E', 'G')
    ])
    return m

def get_triangulated_graph():
    t = nx.Graph()
    t.add_nodes_from(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
    t.add_edges_from([
        ('A', 'B'),
        ('A', 'C'),
        ('B', 'D'),
        ('C', 'E'),
        ('D', 'F'),
        ('E', 'F'),
        ('C', 'G'),
        ('E', 'H'),
        ('G', 'H'),
        ('D', 'E'),
        ('E', 'G'),
        ('A', 'D'),
        ('A', 'E')
    ])
    return t

def get_junction_tree():
    t = nx.Graph()
    
    t.add_node('ABD', nodes=['A', 'B', 'D'], type='clique')
    t.add_node('ADE', nodes=['A', 'D', 'E'], type='clique')
    t.add_node('ACE', nodes=['A', 'C', 'E'], type='clique')
    t.add_node('CEG', nodes=['C', 'E', 'G'], type='clique')
    t.add_node('DEF', nodes=['D', 'E', 'F'], type='clique')
    t.add_node('EGH', nodes=['E', 'G', 'H'], type='clique')
    t.add_node('AD', nodes=['A', 'D'], type='sepset')
    t.add_node('AE', nodes=['A', 'E'], type='sepset')
    t.add_node('DE', nodes=['D', 'E'], type='sepset')
    t.add_node('CE', nodes=['C', 'E'], type='sepset')
    t.add_node('EG', nodes=['E', 'G'], type='sepset')

    t.add_edges_from([
        ('ABD', 'AD'),
        ('ADE', 'AD'),
        ('ADE', 'DE'),
        ('DEF', 'DE'),
        ('ADE', 'AE'),
        ('ACE', 'AE'),
        ('ACE', 'CE'),
        ('CEG', 'CE'),
        ('CEG', 'EG'),
        ('EGH', 'EG')
    ])
    
    return t

In [2]:
D = get_data()

In [3]:
import itertools
import pandas as pd

def get_cpt(node):
    df = pd.DataFrame(D['cpt'][node])
    return df

def get_pot(nodes):
    df = (D['domain'][n] for n in nodes)
    df = itertools.product(*df)
    df = pd.DataFrame(df, columns=nodes) \
        .assign(**{'__p__': 1})
    return df

def multiply(L, R):
    COLUMNS = list(set(L.columns.drop('__p__')) & set(R.columns.drop('__p__')))
    return L \
        .set_index(COLUMNS) \
        .join(R.set_index(COLUMNS), lsuffix='lhs', rsuffix='rhs') \
        .assign(**{'__p__': lambda d: d['__p__lhs'] * d['__p__rhs']}) \
        .reset_index()[R.columns]

def marginalize(P, nodes):
    return P.groupby(nodes)['__p__'].sum().to_frame().reset_index()

def divide(N, D):
    COLUMNS = list(set(N.columns.drop('__p__')) & set(D.columns.drop('__p__')))
    return N.set_index(COLUMNS) \
        .join(D.set_index(COLUMNS), lsuffix='lhs', rsuffix='rhs') \
        .assign(**{'__p__': lambda d: d['__p__lhs'] / d['__p__rhs']}) \
        .reset_index()[D.columns]

def message(X, s, Y):
    COLUMNS = list(set(X.columns.drop('__p__')) & set(s.columns.drop('__p__')))
    r = divide(marginalize(X, COLUMNS), s)
    return X, r, multiply(r, Y)

In [4]:
_A = get_cpt('A')
_B = get_cpt('B')
_C = get_cpt('C')
_D = get_cpt('D')
_E = get_cpt('E')
_F = get_cpt('F')
_G = get_cpt('G')
_H = get_cpt('H')

_ABD = get_pot(['A', 'B', 'D'])
_ADE = get_pot(['A', 'D', 'E'])
_ACE = get_pot(['A', 'C', 'E'])
_DEF = get_pot(['D', 'E', 'F'])
_CEG = get_pot(['C', 'E', 'G'])
_EGH = get_pot(['E', 'G', 'H'])

_AD = get_pot(['A', 'D'])
_DE = get_pot(['D', 'E'])
_AE = get_pot(['A', 'E'])
_CE = get_pot(['C', 'E'])
_EG = get_pot(['E', 'G'])

In [5]:
_ABD = multiply(_D, multiply(_B, multiply(_A, _ABD)))
_ACE = multiply(_E, multiply(_C, _ACE))
_DEF = multiply(_F, _DEF)
_CEG = multiply(_G, _CEG)
_EGH = multiply(_H, _EGH)

In [6]:
_ABD, _AD, _ADE = message(_ABD, _AD, _ADE)
_DEF, _DE, _ADE = message(_DEF, _DE, _ADE)
_ADE, _AE, _ACE = message(_ADE, _AE, _ACE)
_EGH, _EG, _CEG = message(_EGH, _EG, _CEG)
_CEG, _CE, _ACE = message(_CEG, _CE, _ACE)

_ACE, _AE, _ADE = message(_ACE, _AE, _ADE)
_ADE, _DE, _DEF = message(_ADE, _DE, _DEF)
_ADE, _AD, _ABD = message(_ADE, _AD, _ABD)
_ACE, _CE, _CEG = message(_ACE, _CE, _CEG)
_CEG, _EG, _EGH = message(_CEG, _EG, _EGH)

In [7]:
_ABD.groupby(['A'])['__p__'].sum()
_ABD.groupby(['B'])['__p__'].sum()
_ABD.groupby(['D'])['__p__'].sum()
_ACE.groupby(['C'])['__p__'].sum()
_ADE.groupby(['E'])['__p__'].sum()
_DEF.groupby(['F'])['__p__'].sum()
_CEG.groupby(['G'])['__p__'].sum()
_EGH.groupby(['H'])['__p__'].sum()

H
off    0.73422
on     0.26578
Name: __p__, dtype: float64