In [1]:
#Diagnosis of heart patients using standard Heart Disease Data Set
import numpy as np
from urllib.request import urlopen
import pandas as pd

In [2]:
#Importing Heart Disease Data Set and Customizing
Cleveland_data_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'
np.set_printoptions(threshold=np.nan)
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']
heartDisease = pd.read_csv(urlopen(Cleveland_data_URL), names = names)
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [3]:
#Modeling Heart Disease Data
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator

model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'), 
                       ('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),
                      ('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])

# Learing CPDs using Maximum Likelihood Estimators
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
print("Nodes of the Model:\n",model.nodes())
print("Edges of the model:\n",model.edges())
print("CPDS for Cholestrol:\n",model.get_cpds('chol'))#Cpds for other attributes can be printed in a similar way
print("Models independencies:\n",model.get_independencies())

Nodes of the Model:
 ['age', 'trestbps', 'fbs', 'sex', 'exang', 'heartdisease', 'restecg', 'thalach', 'chol']
Edges of the model:
 [('age', 'trestbps'), ('age', 'fbs'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'), ('sex', 'trestbps'), ('exang', 'trestbps'), ('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease', 'chol')]
CPDS for Cholestrol:
 ╒══════════════╤══════════════════════╤══════════════════════╕
│ heartdisease │ heartdisease(0)      │ heartdisease(1)      │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(100)    │ 0.006493506493506494 │ 0.006493506493506494 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(117)    │ 0.006493506493506494 │ 0.006493506493506494 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(129)    │ 0.006493506493506494 │ 0.006493506493506494 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(132)    │ 0.006493506493506494 │ 0.00649350649350649

In [4]:
#Inferencing with Bayesian Network
# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
HeartDisease_infer = VariableElimination(model)

# Computing the probability of bronc given smoke.
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print("Probability of gettting heart disease with the evidence age 28:\n ")
print(q['heartdisease'])

q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print("Probability of gettting heart disease with the evidence Cholestrol 100:\n ")
print(q['heartdisease'])

Probability of gettting heart disease with the evidence age 28:
 
╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.5897 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.4103 │
╘════════════════╧═════════════════════╛
Probability of gettting heart disease with the evidence Cholestrol 100:
 
╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.6013 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.3987 │
╘════════════════╧═════════════════════╛
