In [33]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer
import matplotlib.pyplot as plt

In [34]:
# Loading the Heart Disease Dataset
heartdisease = pd.read_csv('./datasets/P9_dataset.csv')
heartdisease = heartdisease.replace('?', np.nan)

print('Few examples from the dataset are given below')
print(heartdisease.head())

Few examples from the dataset are given below
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   1       145   233    1        2      150      0      2.3      3   
1   67    1   4       160   286    0        2      108      1      1.5      2   
2   67    1   4       120   229    0        2      129      1      2.6      2   
3   37    1   3       130   250    0        0      187      0      3.5      3   
4   41    0   2       130   204    0        2      172      0      1.4      1   

  ca thal  heartdisease  
0  0    6             0  
1  3    3             2  
2  2    7             1  
3  0    3             0  
4  0    3             0  


In [29]:
# Initializing a KBinsDiscretizer
discretizer = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform')
continuous_vars = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']

heartdisease[continuous_vars] = discretizer.fit_transform(heartdisease[continuous_vars])

In [30]:
# Defining the structure of the Bayesian Network
model = BayesianNetwork([('age', 'trestbps'), ('age', 'fbs'),
                         ('sex', 'trestbps'), ('exang', 'trestbps'),
                         ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'),
                         ('heartdisease', 'restecg'), ('heartdisease', 'thalach'),
                         ('chol', 'heartdisease')])

print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartdisease, estimator=MaximumLikelihoodEstimator)

print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)


Learning CPD using Maximum likelihood estimators

Inferencing with Bayesian Network:


In [31]:
# Discretizing specific evidence values for age and cholesterol
evidence_age = discretizer.transform(np.array([[30, 0, 0, 0, 0]]))[0][0]

evidence_chol = discretizer.transform(np.array([[0, 0, 254, 0, 0]]))[0][2]



In [35]:
# Displaying the probability for Heart Disease given Age=30 and Cholesterol=254
print('\n1. Probability of HeartDisease given Age=30')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': int(evidence_age)})
print(q)

print('\n2. Probability of HeartDisease given cholesterol=254')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': int(evidence_chol)})
print(q)


1. Probability of HeartDisease given Age=30
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.5399 |
+-----------------+---------------------+
| heartdisease(1) |              0.1946 |
+-----------------+---------------------+
| heartdisease(2) |              0.1121 |
+-----------------+---------------------+
| heartdisease(3) |              0.1151 |
+-----------------+---------------------+
| heartdisease(4) |              0.0383 |
+-----------------+---------------------+

2. Probability of HeartDisease given cholesterol=254
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.5203 |
+-----------------+---------------------+
| heartdisease(1) |              0.2275 |
+-----------------+---------------------+
| heartdisease(2) |              0.1121 |
+-----------------+---------------------+
| heartdisease(3) |              0.1085 |
+-----------------+