### Sampling in Bayesian Networks </br>
1. Prior Sampling </br>
2. Likelihood Weighted </br>

<div style="text-align: right"> <B>Supanat Jintawatsakoon : 60199130181</B></div>

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from pgmpy.models.BayesianModel import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.factors.discrete import State
import random as random


In [2]:
model = BayesianModel([('A', 'C'), ('B', 'C')])

cpd_a = TabularCPD(variable='A', 
                           variable_card=2, 
                           values=[[0.2, 0.8]])
cpd_b = TabularCPD(variable='B', 
                           variable_card=2, 
                           values=[[0.7, 0.3]])
cpd_c = TabularCPD(variable='C', 
                           variable_card=3, 
                           values=[[0.5, 0.8,0.8,0.9],
                                  [0.3, 0.15,0.1,0.08],
                                  [0.2, 0.05,0.1,0.02]],
                      evidence = ['B','A'],
                      evidence_card = [2,2])

model.add_cpds(cpd_a, cpd_b, cpd_c)

### Define Helper Method

In [3]:
def get_value_from_cpd(cpd_values,random_value):
    cpd_value_line = []
    current = 0;
    for i,cpd_value in enumerate(cpd_values):
        current += cpd_value
        if random_value<= current:
            return i

In [4]:
def get_value_from_cpd_evidence(cpd_values,evidence,random_value):
    k_length = len(cpd_values)
    cpd_value_line = []
    current = 0;
    for k in range(k_length):
        v = cpd_values[k][evidence[0]][evidence[1]]
        current += v
        if random_value<= current:
            return k
        #print(v)

### Prior Sampling

In [5]:
def prior_sampling(model,sample_size):  
    sorted_order = list(nx.topological_sort(model))
    variable_count = len(model.nodes)
    size = sample_size * variable_count
    #print(size)
    r = [random.random() for _ in range(size)]
    k = 0
    temp = dict()
    sampled = []
    for i in range(sample_size):
        for node in sorted_order:
            cpd = model.get_cpds(node)
            evidence = cpd.variables[:0:-1]
            v = None
            if evidence:
                #
                evidence_value = []
                for ev in evidence:
                    evidence_value.append(temp[ev])   
                v = get_value_from_cpd_evidence(cpd.values,evidence_value,r[k])  
            else:
                #no evidence case
                v = get_value_from_cpd(cpd.values,r[k])
            temp[cpd.variables[0]] = v
            k+=1
            
        sampled.append(temp.copy())
        print(temp)
    return sampled

In [10]:
samples = prior_sampling(model,sample_size=10)

{'B': 0, 'A': 1, 'C': 0}
{'B': 0, 'A': 1, 'C': 0}
{'B': 0, 'A': 1, 'C': 0}
{'B': 0, 'A': 1, 'C': 1}
{'B': 0, 'A': 0, 'C': 2}
{'B': 0, 'A': 0, 'C': 0}
{'B': 0, 'A': 1, 'C': 0}
{'B': 1, 'A': 0, 'C': 0}
{'B': 1, 'A': 0, 'C': 0}
{'B': 1, 'A': 1, 'C': 0}


In [11]:
samples_df = pd.DataFrame(samples,columns=['A','B','C'])
samples_df

Unnamed: 0,A,B,C
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,1
4,0,0,2
5,0,0,0
6,1,0,0
7,0,1,0
8,0,1,0
9,1,1,0


### Likelihood Weighted

In [12]:
def get_weighted_from_cpd(cpd_values,random_value):
    cpd_value_line = []
    current = 0;
    for i,cpd_value in enumerate(cpd_values):
        current += cpd_value
        #print('cpdv',cpd_value)
        if random_value<= current:
            return cpd_value

In [24]:
def likelihood_weighted_sampling(model,given_evidence,sample_size):
    
    sorted_order = list(nx.topological_sort(model))
    variable_count = len(model.nodes)
    size = sample_size * variable_count
    #print(size)
    r = [random.random() for _ in range(size)]
    k = 0
    temp = dict()
    sampled = []
    
    for i in range(sample_size):
        w_list = []
        for node in sorted_order:
            cpd = model.get_cpds(node)
            evidence = cpd.variables[:0:-1]
            #print(evidence)  
            v = None
            w = 1.0
            if cpd.variable in given_evidences:
                v = given_evidences[cpd.variable]
                w_list.append(get_weighted_from_cpd(cpd.values,r[k]))
            else:
                if evidence:
                #
                    evidence_value = []
                    for ev in evidence:
                        evidence_value.append(temp[ev])   
                        #print(evidence_value)
                        #v = get_value_from_cpd_evidence(cpd_g.values,evidence_value,r[k])
                    v = get_value_from_cpd_evidence(cpd.values,evidence_value,r[k])
                else:
                #no evidence case
                    v = get_value_from_cpd(cpd.values,r[k])
            temp[cpd.variables[0]] = v
            k+=1
        w = 1.0
        for wi in w_list:
            w *= wi
        temp['weighted'] = w   
        sampled.append(temp.copy())

    return sampled

In [29]:
given_evidences = dict()
#given_evidences['A'] = 0
given_evidences['B'] = 1

samples = likelihood_weighted_sampling(model,given_evidences,sample_size=10)
samples_df = pd.DataFrame(samples,columns=['A','B','C','weighted'])
samples_df

Unnamed: 0,A,B,C,weighted
0,0,1,0,0.3
1,0,1,2,0.7
2,1,1,0,0.7
3,1,1,0,0.7
4,1,1,0,0.7
5,1,1,0,0.7
6,0,1,0,0.7
7,1,1,0,0.7
8,0,1,0,0.3
9,1,1,0,0.7
