# 7.Problem : Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Python ML library API.

References:  http://pgmpy.org/  and https://www.inertia7.com/projects/55

# 7.1. Constructing  a Bayesian Network considering Medical Data

# 7.1.1 Defining a Structure with nodes and edges 

In [None]:
# Author : Dr.Thyagaraju G S , Context Innovations Lab , DEpt of CSE , SDMIT - Ujire 
# Date : July 11 2018 
# Starting with defining the network structure
from pgmpy.models import BayesianModel

cancer_model = BayesianModel([('Pollution', 'Cancer'), 
                              ('Smoker', 'Cancer'),
                              ('Cancer', 'Xray'),
                              ('Cancer', 'Dyspnoea')])

In [None]:
print(cancer_model)

In [3]:
cancer_model.nodes()

['Pollution', 'Cancer', 'Smoker', 'Xray', 'Dyspnoea']

In [4]:
cancer_model.edges()

[('Pollution', 'Cancer'),
 ('Cancer', 'Xray'),
 ('Cancer', 'Dyspnoea'),
 ('Smoker', 'Cancer')]

In [5]:
cancer_model.get_cpds()

[]

# 7.1.2 Creation  of Conditional Probability Table 

In [6]:
# Now defining the parameters.
from pgmpy.factors.discrete import TabularCPD

cpd_poll = TabularCPD(variable='Pollution', variable_card=2,
                      values=[[0.9], [0.1]])
cpd_smoke = TabularCPD(variable='Smoker', variable_card=2,
                       values=[[0.3], [0.7]])
cpd_cancer = TabularCPD(variable='Cancer', variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray', variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'], evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'], evidence_card=[2])

# 7.1.3 Associating Conditional probabilities with the Bayesian Structure 

In [7]:
# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
cancer_model.check_model()

True

In [8]:
# Doing some simple queries on the network
cancer_model.is_active_trail('Pollution', 'Smoker')

False

In [9]:
cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer'])

True

In [10]:
cancer_model.get_cpds()

[<TabularCPD representing P(Pollution:2) at 0x84ca390>,
 <TabularCPD representing P(Smoker:2) at 0x84ca3b0>,
 <TabularCPD representing P(Cancer:2 | Smoker:2, Pollution:2) at 0x84ca3d0>,
 <TabularCPD representing P(Xray:2 | Cancer:2) at 0x84ca430>,
 <TabularCPD representing P(Dyspnoea:2 | Cancer:2) at 0x84ca410>]

In [11]:
print(cancer_model.get_cpds('Pollution'))

╒═════════════╤═════╕
│ Pollution_0 │ 0.9 │
├─────────────┼─────┤
│ Pollution_1 │ 0.1 │
╘═════════════╧═════╛


In [12]:
print(cancer_model.get_cpds('Smoker'))

╒══════════╤═════╕
│ Smoker_0 │ 0.3 │
├──────────┼─────┤
│ Smoker_1 │ 0.7 │
╘══════════╧═════╛


In [13]:
print(cancer_model.get_cpds('Xray'))

╒════════╤══════════╤══════════╕
│ Cancer │ Cancer_0 │ Cancer_1 │
├────────┼──────────┼──────────┤
│ Xray_0 │ 0.9      │ 0.2      │
├────────┼──────────┼──────────┤
│ Xray_1 │ 0.1      │ 0.8      │
╘════════╧══════════╧══════════╛


In [14]:
print(cancer_model.get_cpds('Dyspnoea'))

╒════════════╤══════════╤══════════╕
│ Cancer     │ Cancer_0 │ Cancer_1 │
├────────────┼──────────┼──────────┤
│ Dyspnoea_0 │ 0.65     │ 0.3      │
├────────────┼──────────┼──────────┤
│ Dyspnoea_1 │ 0.35     │ 0.7      │
╘════════════╧══════════╧══════════╛


In [15]:
print(cancer_model.get_cpds('Cancer'))

╒═══════════╤═════════════╤═════════════╤═════════════╤═════════════╕
│ Smoker    │ Smoker_0    │ Smoker_0    │ Smoker_1    │ Smoker_1    │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Pollution │ Pollution_0 │ Pollution_1 │ Pollution_0 │ Pollution_1 │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Cancer_0  │ 0.03        │ 0.05        │ 0.001       │ 0.02        │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Cancer_1  │ 0.97        │ 0.95        │ 0.999       │ 0.98        │
╘═══════════╧═════════════╧═════════════╧═════════════╧═════════════╛


# 7.1.4 Determining the Local independencies

In [16]:
cancer_model.local_independencies('Xray')

(Xray _|_ Dyspnoea, Pollution, Smoker | Cancer)

In [17]:
cancer_model.local_independencies('Pollution')

(Pollution _|_ Smoker)

In [18]:
cancer_model.local_independencies('Smoker')

(Smoker _|_ Pollution)

In [19]:
cancer_model.local_independencies('Dyspnoea')

(Dyspnoea _|_ Pollution, Xray, Smoker | Cancer)

In [20]:
cancer_model.local_independencies('Cancer')



In [21]:
cancer_model.get_independencies()

(Pollution _|_ Smoker)
(Pollution _|_ Dyspnoea, Xray | Cancer)
(Pollution _|_ Xray | Dyspnoea, Cancer)
(Pollution _|_ Dyspnoea, Xray | Cancer, Smoker)
(Pollution _|_ Dyspnoea | Cancer, Xray)
(Pollution _|_ Xray | Dyspnoea, Cancer, Smoker)
(Pollution _|_ Dyspnoea | Cancer, Xray, Smoker)
(Smoker _|_ Pollution)
(Smoker _|_ Dyspnoea, Xray | Cancer)
(Smoker _|_ Xray | Dyspnoea, Cancer)
(Smoker _|_ Dyspnoea, Xray | Cancer, Pollution)
(Smoker _|_ Dyspnoea | Cancer, Xray)
(Smoker _|_ Xray | Dyspnoea, Cancer, Pollution)
(Smoker _|_ Dyspnoea | Cancer, Pollution, Xray)
(Xray _|_ Dyspnoea, Pollution, Smoker | Cancer)
(Xray _|_ Pollution, Smoker | Dyspnoea, Cancer)
(Xray _|_ Dyspnoea, Smoker | Cancer, Pollution)
(Xray _|_ Dyspnoea, Pollution | Cancer, Smoker)
(Xray _|_ Smoker | Dyspnoea, Cancer, Pollution)
(Xray _|_ Pollution | Dyspnoea, Cancer, Smoker)
(Xray _|_ Dyspnoea | Cancer, Pollution, Smoker)
(Dyspnoea _|_ Pollution, Smoker, Xray | Cancer)
(Dyspnoea _|_ Smoker, Xray | Cancer, Pollution)
(Dy

# 7.1.5.Inferencing with Bayesian Network

In [22]:
# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
cancer_infer = VariableElimination(cancer_model)

# Computing the probability of bronc given smoke.
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0029 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9971 │
╘══════════╧═══════════════╛


In [23]:
# Computing the probability of bronc given smoke.
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0029 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9971 │
╘══════════╧═══════════════╛


In [24]:
# Computing the probability of bronc given smoke.
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1,'Pollution': 1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0200 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9800 │
╘══════════╧═══════════════╛


# 7.2 Diagnosis of heart patients using standard Heart Disease Data Set

In [26]:
import numpy as np
from urllib.request import urlopen
import urllib
import matplotlib.pyplot as plt # Visuals
import seaborn as sns 
import sklearn as skl
import pandas as pd

# 7.2.1 Importing Heart Disease Data Set and Customizing

In [27]:
Cleveland_data_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'
#Hungarian_data_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'
#Switzerland_data_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.switzerland.data'
np.set_printoptions(threshold=np.nan) #see a whole array when we output it

names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']
heartDisease = pd.read_csv(urlopen(Cleveland_data_URL), names = names) #gets Cleveland data
#HungarianHeartDisease = pd.read_csv(urlopen(Hungarian_data_URL), names = names) #gets Hungary data
#SwitzerlandHeartDisease = pd.read_csv(urlopen(Switzerland_data_URL), names = names) #gets Switzerland data
#datatemp = [ClevelandHeartDisease, HungarianHeartDisease, SwitzerlandHeartDisease] #combines all arrays into a list
#heartDisease = pd.concat(datatemp)#combines list into one array
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [28]:
del heartDisease['ca']
del heartDisease['slope']
del heartDisease['thal']
del heartDisease['oldpeak']

heartDisease = heartDisease.replace('?', np.nan)
heartDisease.dtypes

age              int64
sex              int64
cp               int64
trestbps        object
chol            object
fbs             object
restecg         object
thalach         object
exang           object
heartdisease     int64
dtype: object

In [29]:
heartDisease.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'heartdisease'],
      dtype='object')

# 7.2.2 Modeling Heart Disease Data 

In [36]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'), 
                       ('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),
                      ('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])

# Learing CPDs using Maximum Likelihood Estimators
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
#for cpd in model.get_cpds():
 #   print("CPD of {variable}:".format(variable=cpd.variable))
  #  print(cpd)

In [37]:
print(model.get_cpds('age'))

╒═════════╤════════════╕
│ age(28) │ 0.00383142 │
├─────────┼────────────┤
│ age(29) │ 0.00383142 │
├─────────┼────────────┤
│ age(30) │ 0.00383142 │
├─────────┼────────────┤
│ age(31) │ 0.00766284 │
├─────────┼────────────┤
│ age(32) │ 0.0153257  │
├─────────┼────────────┤
│ age(33) │ 0.00766284 │
├─────────┼────────────┤
│ age(34) │ 0.0153257  │
├─────────┼────────────┤
│ age(35) │ 0.0191571  │
├─────────┼────────────┤
│ age(36) │ 0.0191571  │
├─────────┼────────────┤
│ age(37) │ 0.0306513  │
├─────────┼────────────┤
│ age(38) │ 0.0191571  │
├─────────┼────────────┤
│ age(39) │ 0.0344828  │
├─────────┼────────────┤
│ age(40) │ 0.0191571  │
├─────────┼────────────┤
│ age(41) │ 0.0383142  │
├─────────┼────────────┤
│ age(42) │ 0.0268199  │
├─────────┼────────────┤
│ age(43) │ 0.0421456  │
├─────────┼────────────┤
│ age(44) │ 0.0268199  │
├─────────┼────────────┤
│ age(45) │ 0.0229885  │
├─────────┼────────────┤
│ age(46) │ 0.045977   │
├─────────┼────────────┤
│ age(47) │ 0.0344828  │


In [31]:
print(model.get_cpds('chol'))

╒══════════════╤══════════════════════╤══════════════════════╕
│ heartdisease │ heartdisease(0)      │ heartdisease(1)      │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(100)    │ 0.006134969325153374 │ 0.0                  │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(117)    │ 0.0                  │ 0.01020408163265306  │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(129)    │ 0.006134969325153374 │ 0.0                  │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(132)    │ 0.006134969325153374 │ 0.0                  │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(147)    │ 0.012269938650306749 │ 0.0                  │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(156)    │ 0.0                  │ 0.01020408163265306  │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(160)    │ 0.012269938650306749 │ 0.0102040816326

In [40]:
print(model.get_cpds('sex'))

╒════════╤══════════╕
│ sex(0) │ 0.264368 │
├────────┼──────────┤
│ sex(1) │ 0.735632 │
╘════════╧══════════╛


In [28]:
model.get_independencies()

(age _|_ exang, sex)
(age _|_ sex | exang)
(age _|_ exang, sex | fbs)
(age _|_ thalach, chol, restecg | heartdisease)
(age _|_ exang | sex)
(age _|_ thalach, restecg | chol, heartdisease)
(age _|_ thalach, chol, heartdisease, restecg | trestbps, fbs)
(age _|_ thalach, chol, restecg | trestbps, heartdisease)
(age _|_ chol, restecg | thalach, heartdisease)
(age _|_ sex | exang, fbs)
(age _|_ thalach, chol, restecg | exang, heartdisease)
(age _|_ thalach, chol, restecg | fbs, heartdisease)
(age _|_ exang | fbs, sex)
(age _|_ thalach, chol, restecg | heartdisease, sex)
(age _|_ thalach, chol | heartdisease, restecg)
(age _|_ thalach, heartdisease, restecg | trestbps, chol, fbs)
(age _|_ thalach, restecg | trestbps, chol, heartdisease)
(age _|_ restecg | thalach, chol, heartdisease)
(age _|_ thalach, restecg | exang, chol, heartdisease)
(age _|_ thalach, restecg | fbs, chol, heartdisease)
(age _|_ thalach, restecg | chol, heartdisease, sex)
(age _|_ thalach | chol, heartdisease, restecg)
(a

# 7.2.3.Inferencing with Bayesian Network

In [33]:
# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
HeartDisease_infer = VariableElimination(model)

# Computing the probability of bronc given smoke.
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print(q['heartdisease'])

╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.6333 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.3667 │
╘════════════════╧═════════════════════╛


In [35]:
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q['heartdisease'])

╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              1.0000 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.0000 │
╘════════════════╧═════════════════════╛
