In [1]:
import pandas as pd
import numpy as np
from pgmpy.models import BayesianModel
from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.estimators import BayesianEstimator
from pgmpy.estimators import HillClimbSearch
from pgmpy.estimators import BDeuScore, K2Score, BicScore
from pgmpy.metrics import structure_score
from pgmpy.utils import get_example_model
from pgmpy.estimators import ScoreCache
from pgmpy.inference.CausalInference import CausalInference
import networkx as nx
import bnlearn as bn

In [2]:
asia = get_example_model("asia")

In [3]:
asia.edges()

OutEdgeView([('asia', 'tub'), ('tub', 'either'), ('smoke', 'lung'), ('smoke', 'bronc'), ('lung', 'either'), ('bronc', 'dysp'), ('either', 'xray'), ('either', 'dysp')])

In [4]:
data = pd.read_csv("../data/asia10K.csv")
data = data.replace({'yes': 1, 'no': 0})
data.rename(columns={
    'Smoker': 'S',
    'LungCancer': 'L',
    'VisitToAsia': 'A',
    'Tuberculosis': 'T',
    'TuberculosisOrCancer': 'E',
    'X-ray': 'X',
    'Bronchitis': 'B',
    'Dyspnea': 'D'
}, inplace=True)

In [5]:
data.head()

Unnamed: 0,S,L,A,T,E,X,B,D
0,1,1,0,0,1,1,1,1
1,0,0,0,0,0,1,1,1
2,0,0,0,0,0,0,1,1
3,0,0,0,0,0,0,1,1
4,1,0,0,0,0,0,1,0


In [6]:
est = HillClimbSearch(data)
model = est.estimate(scoring_method="bicscore")

  0%|          | 0/1000000 [00:00<?, ?it/s]

In [58]:
model.edges()

OutEdgeView([('L', 'S'), ('T', 'L'), ('T', 'A'), ('E', 'L'), ('E', 'T'), ('E', 'D'), ('E', 'B'), ('D', 'B'), ('B', 'S'), ('X', 'E')])

In [8]:
model = BayesianNetwork(model.edges())

In [9]:
estimator = MaximumLikelihoodEstimator(model, data)
estimator.get_parameters()
model.fit(data, estimator=MaximumLikelihoodEstimator)

In [34]:
# compute P(tub=True)
inference = VariableElimination(model)
prob_tub = inference.query(variables=["T"], evidence={'A': 1, 'X': 1, 'L': 0})
print(prob_tub)

+------+----------+
| T    |   phi(T) |
| T(0) |   0.3980 |
+------+----------+
| T(1) |   0.6020 |
+------+----------+


In [36]:
prob_tub.values[0]

0.39801363612054974

In [None]:
def compute_delta_prob(inference, var, evidence):
    prob_var = inference.query(variables=[var])
    prob_var_new = inference.query(variables=[var], evidence=evidence)
    delta_prob = prob_var_new[var].values[1] - prob_var[var].values[1]
    return delta_prob

def compute_KL_divergence(inference, var, evidence):
    prob_var = inference.query(variables=[var])
    prob_var_new = inference.query(variables=[var], evidence=evidence)
    kl_div = np.sum(np.where(prob_var.values != 0, prob_var.values * np.log(prob_var.values / prob_var_new.values), 0))
    return kl_div

In [55]:
# function that computes the expected reward of observing a new variable
def expected_reward(inference, var, target_var, current_evidence):
    # compute the current probability of the target variable
    prob_target = inference.query(variables=[target_var], evidence=current_evidence)
    expected_reward = 0
    # iterate over the possible values of the variable to be observed
    for value in [0, 1]: # BINARY
        new_evidence = current_evidence.copy()
        new_evidence[var] = value
        prob_var = inference.query(variables=[var], evidence=current_evidence)

        prob_value = prob_var.values[value]
        prob_target_new = inference.query(variables=[target_var], evidence=new_evidence)

        #reward = prob_target_new.values[1] - prob_target.values[1]
        reward = compute_KL_divergence(inference, target_var, new_evidence)
        print(f"Value: {value}, Prob: {prob_value}, Reward: {reward}")
        expected_reward += prob_value * reward
    return expected_reward

importante: lembrar que KL não é simétrica

In [56]:
random_patient = {'A': 1, 'X': 1}

latent_variables = set(data.columns.to_list()) - set(list(random_patient.keys()))
latent_variables.remove('T')
latent_variables = list(latent_variables)   

In [57]:
for v in latent_variables:
    print(f"Computing expected reward for variable {v}...")
    er = expected_reward(inference, v, 'T', random_patient)
    print(f"Expected reward of observing {v}: {er}")

Computing expected reward for variable S...
Value: 0, Prob: 0.3652287540951489, Reward: 0.7391068999671903
Value: 1, Prob: 0.6347712459048511, Reward: 0.410519306148356
Expected reward of observing S: 0.5305289436499316
Computing expected reward for variable E...
Value: 0, Prob: 0.26353505007879646, Reward: inf
Value: 1, Prob: 0.7364649499212035, Reward: 0.8331388144248139
Expected reward of observing E: inf
Computing expected reward for variable B...
Value: 0, Prob: 0.48403735281011495, Reward: 0.4756421380318725
Value: 1, Prob: 0.5159626471898849, Reward: 0.5592136853930176
Expected reward of observing B: 0.5187619348380837
Computing expected reward for variable L...
Value: 0, Prob: 0.6621256815406632, Reward: 0.8626372446971315
Value: 1, Prob: 0.33787431845933685, Reward: 0.07815527070732561
Expected reward of observing L: 0.5975809323916907
Computing expected reward for variable D...
Value: 0, Prob: 0.30783743265775765, Reward: 0.29425792458140965
Value: 1, Prob: 0.6921625673422424