In [3]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer
import matplotlib.pyplot as plt


heartdisease = pd.read_csv('hea.csv')
heartdisease = heartdisease.replace('?', np.nan)

print('Few examples from the dataset are given below')
print(heartdisease.head())


discretizer = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform')
continuous_vars = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']


heartdisease[continuous_vars] = discretizer.fit_transform(heartdisease[continuous_vars])


model = BayesianNetwork([('age', 'trestbps'), ('age', 'fbs'),
                         ('sex', 'trestbps'), ('exang', 'trestbps'),
                         ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'),
                         ('heartdisease', 'restecg'), ('heartdisease', 'thalach'),
                         ('chol', 'heartdisease')])

print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartdisease, estimator=MaximumLikelihoodEstimator)

print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)


evidence_age = discretizer.transform(np.array([[30, 0, 0, 0, 0]]))[0][0]

evidence_chol = discretizer.transform(np.array([[0, 0, 254, 0, 0]]))[0][2]

print('\n1. Probability of HeartDisease given Age=30')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': int(evidence_age)})
print(q)

print('\n2. Probability of HeartDisease given cholesterol=254')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': int(evidence_chol)})
print(q)

Few examples from the dataset are given below
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  

Learning CPD using Maximum likelihood estimators




KeyError: 'heartdisease'

In [1]:
pip install numpy

Note: you may need to restart the kernel to use updated packages.
