# Baysian Network

In [2]:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

In [13]:
heartDisease = pd.read_csv('heart.csv')
heartDisease.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,63,1,1,145,233,1,2,150,0,2.3,3,0,6,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,3,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2,7,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0
5,56,1,2,120,236,0,0,178,0,0.8,1,0,3,0
6,62,0,4,140,268,0,2,160,0,3.6,3,2,3,3
7,57,0,4,120,354,0,0,163,1,0.6,1,0,3,0
8,63,1,4,130,254,0,2,147,0,1.4,2,1,7,2
9,53,1,4,140,203,1,2,155,1,3.1,3,0,7,1


In [5]:
print('Attributes and datatypes')
heartDisease.dtypes

Attributes and datatypes


age               int64
sex               int64
cp                int64
trestbps          int64
chol              int64
fbs               int64
restecg           int64
thalach           int64
exang             int64
oldpeak         float64
slope             int64
ca                int64
thal              int64
heartdisease      int64
dtype: object

In [6]:
heartDisease.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.438944,0.679868,3.158416,131.689769,246.693069,0.148515,0.990099,149.607261,0.326733,1.039604,1.60066,0.683168,4.729373,0.937294
std,9.038662,0.467299,0.960126,17.599748,51.776918,0.356198,0.994971,22.875003,0.469794,1.161075,0.616226,0.944808,1.941025,1.228536
min,29.0,0.0,1.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0,2.0,0.0
25%,48.0,0.0,3.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,3.0,0.0
50%,56.0,1.0,3.0,130.0,241.0,0.0,1.0,153.0,0.0,0.8,2.0,0.0,3.0,0.0
75%,61.0,1.0,4.0,140.0,275.0,0.0,2.0,166.0,1.0,1.6,2.0,1.0,7.0,2.0
max,77.0,1.0,4.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,3.0,3.0,7.0,4.0


In [7]:
model = BayesianModel([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
print('Learning CPD using Maximum likelihood estimators')

Learning CPD using Maximum likelihood estimators


In [8]:
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

In [9]:
print('Inferencing with Bayesian Network:')
HeartDiseasetest_infer = VariableElimination(model)

Inferencing with Bayesian Network:


In [24]:
print('1. Probability of HeartDisease given evidence = restecg : 1')
q1 = HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)

Finding Elimination Order: :   0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: chol:   0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: age:   0%|          | 0/5 [00:00<?, ?it/s] [A
Eliminating: cp:   0%|          | 0/5 [00:00<?, ?it/s] [A
Eliminating: exang:   0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: sex: 100%|██████████| 5/5 [00:00<00:00, 155.84it/s]

1. Probability of HeartDisease given evidence = restecg : 1
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.1016 |
+-----------------+---------------------+
| heartdisease(1) |              0.0000 |
+-----------------+---------------------+
| heartdisease(2) |              0.2361 |
+-----------------+---------------------+
| heartdisease(3) |              0.2017 |
+-----------------+---------------------+
| heartdisease(4) |              0.4605 |
+-----------------+---------------------+





In [18]:
print('Tuples with \'restecg = 1\' in the database are:')
heartDisease[heartDisease['restecg'] == 1]

Tuples with 'restecg = 1' in the database are:


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
231,55,0,4,180,327,0,1,117,1,3.4,2,0,3,2
257,76,0,3,140,197,0,1,116,0,1.1,2,0,3,0
282,55,0,4,128,205,0,1,130,1,2.0,2,1,7,3
285,58,1,4,114,318,0,1,140,0,4.4,3,3,6,4


In [20]:
print('2. Probability of HeartDisease given evidence = cp : 2')
q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)

Finding Elimination Order: : 100%|██████████| 5/5 [00:37<00:00,  7.56s/it]
Finding Elimination Order: :   0%|          | 0/5 [00:00<?, ?it/s]
  0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: chol:   0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: restecg:   0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: age:   0%|          | 0/5 [00:00<?, ?it/s]    [A
Eliminating: exang:   0%|          | 0/5 [00:00<?, ?it/s][A
Eliminating: sex: 100%|██████████| 5/5 [00:00<00:00, 142.78it/s]

2. Probability of HeartDisease given evidence = cp : 2
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.3742 |
+-----------------+---------------------+
| heartdisease(1) |              0.2018 |
+-----------------+---------------------+
| heartdisease(2) |              0.1375 |
+-----------------+---------------------+
| heartdisease(3) |              0.1541 |
+-----------------+---------------------+
| heartdisease(4) |              0.1323 |
+-----------------+---------------------+





In [23]:
print('Tuples with \'cp = 2\' in the database are:')
heartDisease[heartDisease['cp'] == 2].head(10)

Tuples with 'cp = 2' in the database are:


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0
5,56,1,2,120,236,0,0,178,0,0.8,1,0,3,0
11,56,0,2,140,294,0,2,153,0,1.3,2,0,3,0
13,44,1,2,120,263,0,0,173,0,0.0,1,0,7,0
16,48,1,2,110,229,0,0,168,0,1.0,3,0,7,1
19,49,1,2,130,266,0,0,171,0,0.6,1,0,3,0
22,58,1,2,120,284,0,2,160,0,1.8,2,0,3,1
42,71,0,2,160,302,0,0,162,0,0.4,1,2,3,0
50,41,0,2,105,198,0,0,168,0,0.0,1,1,3,0
53,44,1,2,130,219,0,2,188,0,0.0,1,0,3,0
