In [1]:
# Requires numpy, pandas, wrapt, scipy, networkx 1.11
import math
import itertools
import numpy as np
import copy

from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD, DiscreteFactor
from pgmpy.inference.ExactInference import BeliefPropagation

In [2]:
cancer_model = BayesianModel([('Pollution', 'Cancer'), 
                              ('Smoker', 'Cancer'),
                              ('Cancer', 'Xray'),
                              ('Cancer', 'Dyspnoea')])

cpd_poll = TabularCPD(variable='Pollution', variable_card=2,
                      values=[[0.9], [0.1]])
cpd_smoke = TabularCPD(variable='Smoker', variable_card=2,
                       values=[[0.3], [0.7]])
cpd_cancer = TabularCPD(variable='Cancer', variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray', variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'], evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'], evidence_card=[2])

cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, 
                      cpd_xray, cpd_dysp)

In [3]:
cpd_poll = cpd_poll.to_factor()
cpd_smoke = cpd_smoke.to_factor()
cpd_cancer = cpd_cancer.to_factor()
cpd_xray = cpd_xray.to_factor()
cpd_dysp = cpd_dysp.to_factor()

In [4]:
def init_Q(factors):
    ''' Input: list of factors
        Initialize a uniform distribution over the variables 
        defined by the factors '''
        
    Q = {}
    for f in factors:
        factor_cardinality = f.get_cardinality(f.scope())
        for var,card in factor_cardinality.items():
            if var not in Q:
                # cpd_values = [1/card for i in range(card)]
                # Q[var] = TabularCPD(var, card, [cpd_values])   
                Q[var] = DiscreteFactor(variables=var, cardinality=[card],
                                        values=[np.random.uniform(1,100,size=(card))])    
    return Q

In [16]:
fcard = cpd_poll.get_cardinality(cpd_poll.scope())
for var, card in fcard.items():
    new_Q = DiscreteFactor(variables=['A'], cardinality=[2],
                           values=np.random.uniform(size=(2)))

In [5]:
Q_0 = init_Q([cpd_poll, cpd_smoke])

TypeError: Variables: Expected type list or array like, got string

In [24]:
bp = BeliefPropagation(cancer_model)
bp.calibrate()

query = bp.query(variables=["Cancer", "Xray", "Pollution", "Smoker", "Dyspnoea"])

for i in query:
    print(query[i])

╒═════════════╤══════════════════╕
│ Pollution   │   phi(Pollution) │
╞═════════════╪══════════════════╡
│ Pollution_0 │           0.9000 │
├─────────────┼──────────────────┤
│ Pollution_1 │           0.1000 │
╘═════════════╧══════════════════╛
╒════════════╤═════════════════╕
│ Dyspnoea   │   phi(Dyspnoea) │
╞════════════╪═════════════════╡
│ Dyspnoea_0 │          0.3041 │
├────────────┼─────────────────┤
│ Dyspnoea_1 │          0.6959 │
╘════════════╧═════════════════╛
╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0116 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9884 │
╘══════════╧═══════════════╛
╒══════════╤═══════════════╕
│ Smoker   │   phi(Smoker) │
╞══════════╪═══════════════╡
│ Smoker_0 │        0.3000 │
├──────────┼───────────────┤
│ Smoker_1 │        0.7000 │
╘══════════╧═══════════════╛
╒════════╤═════════════╕
│ Xray   │   phi(Xray) │
╞════════╪═════════════╡
│ Xray_0 │      0.2081 │
├────────┼────────

In [25]:
def get_gibbs_scope(factors):
    variables = set()
    for f in factors:
        fscope = set(f.scope())
        variables = variables.union(fscope)
    return variables

In [26]:
get_gibbs_scope([cpd_xray])

{'Cancer', 'Xray'}

In [27]:
def get_assignment_index(factor, assignment):
    # Assignment should be fully defined
    indices = []
    for var in factor.scope():
        if var in assignment:
            indices.append(assignment[var])
        else:
            indices.append(slice(factor.get_cardinality([var])[var]))
            
    indices = tuple(indices)
    return indices

In [28]:
indices = get_assignment_index(cpd_cancer, {'Pollution': 1})
print(indices)
print(cpd_cancer.values[indices])

(slice(None, 2, None), slice(None, 2, None), 1)
[[0.05 0.02]
 [0.95 0.98]]


In [29]:
print(cpd_cancer)
print(cpd_cancer.get_cardinality(cpd_cancer.scope()))
print(cpd_cancer.assignment([1]))
print(cpd_cancer.values[0,0,1])

╒══════════╤══════════╤═════════════╤════════════════════════════════╕
│ Cancer   │ Smoker   │ Pollution   │   phi(Cancer,Smoker,Pollution) │
╞══════════╪══════════╪═════════════╪════════════════════════════════╡
│ Cancer_0 │ Smoker_0 │ Pollution_0 │                         0.0300 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_0 │ Smoker_0 │ Pollution_1 │                         0.0500 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_0 │ Smoker_1 │ Pollution_0 │                         0.0010 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_0 │ Smoker_1 │ Pollution_1 │                         0.0200 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_1 │ Smoker_0 │ Pollution_0 │                         0.9700 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_1 │ Smoker_0 │ Pollution_1 │                         0.9500 │
├─────

In [30]:
def get_numeric_cardinality(cpd,exclude=None):
    # Get the numeric cardinality
    if exclude != None:
        fscope = list(cpd.scope())
        fscope.remove(exclude)
        card = cpd.get_cardinality(fscope)
    else:
        card = cpd.get_cardinality(cpd.scope())
    
    num_cardinality = 1
    for i in card:
        num_cardinality *= card[i]
    return num_cardinality

In [31]:
print(get_numeric_cardinality(cpd_cancer))

8


In [32]:
def get_factors_with_variable(factors, variable):
    # Get all factors with variable in its scope
    factors_with_variable = set()
    for f in factors:
        if variable in f.scope():
            factors_with_variable.add(f)
    return factors_with_variable

In [33]:
vars = ['apple', 'orange', 'pear']
domain = [range(2), range(3), range(3)]
for i in itertools.product(*domain):
    my_dict = {}
    for j, k in zip(vars,i):
        my_dict[j] = k
    print(my_dict)

{'pear': 0, 'orange': 0, 'apple': 0}
{'pear': 1, 'orange': 0, 'apple': 0}
{'pear': 2, 'orange': 0, 'apple': 0}
{'pear': 0, 'orange': 1, 'apple': 0}
{'pear': 1, 'orange': 1, 'apple': 0}
{'pear': 2, 'orange': 1, 'apple': 0}
{'pear': 0, 'orange': 2, 'apple': 0}
{'pear': 1, 'orange': 2, 'apple': 0}
{'pear': 2, 'orange': 2, 'apple': 0}
{'pear': 0, 'orange': 0, 'apple': 1}
{'pear': 1, 'orange': 0, 'apple': 1}
{'pear': 2, 'orange': 0, 'apple': 1}
{'pear': 0, 'orange': 1, 'apple': 1}
{'pear': 1, 'orange': 1, 'apple': 1}
{'pear': 2, 'orange': 1, 'apple': 1}
{'pear': 0, 'orange': 2, 'apple': 1}
{'pear': 1, 'orange': 2, 'apple': 1}
{'pear': 2, 'orange': 2, 'apple': 1}


In [34]:
def get_all_assignments(cardinality):
    variables = []
    domains = []
    all_assignments = []
    
    for var in cardinality:
        domains.append(range(cardinality[var]))
        variables.append(var)
    
    for vals in itertools.product(*domains):
        assignment = {}
        for var, val in zip(variables, vals):
            assignment[var] = val
        all_assignments.append(assignment)
    
    return all_assignments

In [35]:
all_assn = get_all_assignments(cpd_cancer.get_cardinality(['Smoker', 'Cancer']))

print(cpd_cancer)
for assn in all_assn:
    index = get_assignment_index(cpd_cancer, assn)
    print(assn, ":", cpd_cancer.values[index])

╒══════════╤══════════╤═════════════╤════════════════════════════════╕
│ Cancer   │ Smoker   │ Pollution   │   phi(Cancer,Smoker,Pollution) │
╞══════════╪══════════╪═════════════╪════════════════════════════════╡
│ Cancer_0 │ Smoker_0 │ Pollution_0 │                         0.0300 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_0 │ Smoker_0 │ Pollution_1 │                         0.0500 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_0 │ Smoker_1 │ Pollution_0 │                         0.0010 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_0 │ Smoker_1 │ Pollution_1 │                         0.0200 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_1 │ Smoker_0 │ Pollution_0 │                         0.9700 │
├──────────┼──────────┼─────────────┼────────────────────────────────┤
│ Cancer_1 │ Smoker_0 │ Pollution_1 │                         0.9500 │
├─────

In [36]:
def get_marg_products(assignments, Q):
    product = 1
    for var, assn in assignments.items():
        product *= Q[var].values[assn]
    return product

In [37]:
def fixed_point_optimize(x_i, factors, Q):
    new_x_i = 0
    for f in factors:
        U_phi_cardinality = dict(f.get_cardinality(f.scope()))
        del U_phi_cardinality[x_i[0]]
        U_phi_assignments = get_all_assignments(U_phi_cardinality)
        
        for u_phi in U_phi_assignments:
            marg_product = get_marg_products(u_phi, Q)
            u_phi[x_i[0]] = x_i[1]
            index = get_assignment_index(f, u_phi)
            new_x_i += marg_product * f.values[index]
            
    return math.exp(new_x_i)
            

In [38]:
def mean_field_approximation(factors, init_Q):
    # Q is a set of marginals
    # make function that assigns uniform prob to all assignments
    Q = copy.deepcopy(init_Q)
    unprocessed = get_gibbs_scope(factors)
    while len(unprocessed) != 0:
        variable_to_optimize = unprocessed.pop()
        old_X_i = Q[variable_to_optimize].copy()
        num_cardinality = get_numeric_cardinality(Q[variable_to_optimize])
        relevant_factors = get_factors_with_variable(factors,variable_to_optimize)

        for i in range(num_cardinality):
            variable_assignment = (variable_to_optimize, i)    
            index = get_assignment_index(Q[variable_to_optimize], {variable_to_optimize: i})
            Q[variable_to_optimize].values[index] = fixed_point_optimize(variable_assignment,
                                                                  relevant_factors, Q)
        Q[variable_to_optimize].normalize(inplace=True)
        if not np.allclose(Q[variable_to_optimize].values, old_X_i.values):
            vars_to_process = get_gibbs_scope(relevant_factors)
            unprocessed = unprocessed.union(vars_to_process)
            
        if variable_to_optimize in unprocessed:
            unprocessed.remove(variable_to_optimize)
        
    return Q

In [39]:
def init_Q(factors):
    Q = {}
    for f in factors:
        factor_cardinality = f.get_cardinality(f.scope())
        for var,card in factor_cardinality.items():
            if var not in Q:
                cpd_values = [1/card for i in range(card)]
                Q[var] = TabularCPD(var, card, [cpd_values])
                
    return Q 

In [40]:
my_factors = [cpd_poll, cpd_smoke, cpd_cancer, 
              cpd_xray, cpd_dysp]
my_Q = init_Q(my_factors)

In [41]:
new_Q = mean_field_approximation(my_factors, my_Q)

In [42]:
for i in new_Q:
    print(my_Q[i])
    print(new_Q[i])

╒═════════════╤═════╕
│ Pollution_0 │ 0.5 │
├─────────────┼─────┤
│ Pollution_1 │ 0.5 │
╘═════════════╧═════╛
╒═════════════╤══════════╕
│ Pollution_0 │ 0.692029 │
├─────────────┼──────────┤
│ Pollution_1 │ 0.307971 │
╘═════════════╧══════════╛
╒════════════╤═════╕
│ Dyspnoea_0 │ 0.5 │
├────────────┼─────┤
│ Dyspnoea_1 │ 0.5 │
╘════════════╧═════╛
╒════════════╤═════════╕
│ Dyspnoea_0 │ 0.44433 │
├────────────┼─────────┤
│ Dyspnoea_1 │ 0.55567 │
╘════════════╧═════════╛
╒══════════╤═════╕
│ Cancer_0 │ 0.5 │
├──────────┼─────┤
│ Cancer_1 │ 0.5 │
╘══════════╧═════╛
╒══════════╤═════════╕
│ Cancer_0 │ 0.25199 │
├──────────┼─────────┤
│ Cancer_1 │ 0.74801 │
╘══════════╧═════════╛
╒══════════╤═════╕
│ Smoker_0 │ 0.5 │
├──────────┼─────┤
│ Smoker_1 │ 0.5 │
╘══════════╧═════╛
╒══════════╤══════════╕
│ Smoker_0 │ 0.397825 │
├──────────┼──────────┤
│ Smoker_1 │ 0.602175 │
╘══════════╧══════════╛
╒════════╤═════╕
│ Xray_0 │ 0.5 │
├────────┼─────┤
│ Xray_1 │ 0.5 │
╘════════╧═════╛
╒════════╤═════

In [43]:
bp = BeliefPropagation(cancer_model)
bp.calibrate()
query = bp.query(variables=["Cancer", "Xray", "Pollution", "Smoker", "Dyspnoea"])

for i in query:
    print(query[i])

╒═════════════╤══════════════════╕
│ Pollution   │   phi(Pollution) │
╞═════════════╪══════════════════╡
│ Pollution_0 │           0.9000 │
├─────────────┼──────────────────┤
│ Pollution_1 │           0.1000 │
╘═════════════╧══════════════════╛
╒════════════╤═════════════════╕
│ Dyspnoea   │   phi(Dyspnoea) │
╞════════════╪═════════════════╡
│ Dyspnoea_0 │          0.3041 │
├────────────┼─────────────────┤
│ Dyspnoea_1 │          0.6959 │
╘════════════╧═════════════════╛
╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0116 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9884 │
╘══════════╧═══════════════╛
╒══════════╤═══════════════╕
│ Smoker   │   phi(Smoker) │
╞══════════╪═══════════════╡
│ Smoker_0 │        0.3000 │
├──────────┼───────────────┤
│ Smoker_1 │        0.7000 │
╘══════════╧═══════════════╛
╒════════╤═════════════╕
│ Xray   │   phi(Xray) │
╞════════╪═════════════╡
│ Xray_0 │      0.2081 │
├────────┼────────