# IR Practical No.-3
                            -By Aditya Nikam(BE AI & DS)

Write a program Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using the standard Heart Disease Data Set.

In [1]:
import pandas as pd
import numpy as np
try:
    from pgmpy.models import DiscreteBayesianNetwork as BN   # for pgmpy â‰¥ 1.0.0
except ImportError:
    from pgmpy.models import BayesianModel as BN             # fallback for older versions
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination


In [2]:
# Load the dataset (keep heart.csv in the same folder)
df = pd.read_csv("heart.csv").replace('?', np.nan).dropna()

# Rename target column if needed
for t in ['target', 'num']:
    if t in df.columns and 'heartdisease' not in df.columns:
        df = df.rename(columns={t: 'heartdisease'})

# Keep only important columns
cols = ['age','sex','trestbps','fbs','exang','restecg','thalach','chol','heartdisease']
df = df[[c for c in cols if c in df.columns]].copy()

df.head()


Unnamed: 0,age,sex,trestbps,fbs,exang,restecg,thalach,chol,heartdisease
0,52,1,125,0,0,1,168,212,0
1,53,1,140,1,1,0,155,203,0
2,70,1,145,0,1,1,125,174,0
3,61,1,148,0,0,1,161,203,0
4,62,0,138,1,0,1,106,294,0


In [3]:
if 'age' in df:       df['age'] = (df['age'].astype(float) > 50).astype(int)
if 'trestbps' in df:  df['trestbps'] = (df['trestbps'].astype(float) > 130).astype(int)
if 'thalach' in df:   df['thalach'] = (df['thalach'].astype(float) < 150).astype(int)  # 1 = low
if 'chol' in df:      df['chol'] = (df['chol'].astype(float) > 200).astype(int)
df = df.dropna().astype(int)

df.head()


Unnamed: 0,age,sex,trestbps,fbs,exang,restecg,thalach,chol,heartdisease
0,1,1,0,0,0,1,0,1,0
1,1,1,1,1,1,0,0,1,0
2,1,1,1,0,1,1,1,0,0
3,1,1,1,0,0,1,0,1,0
4,1,0,1,1,0,1,1,1,0


In [4]:
features = [c for c in df.columns if c != 'heartdisease']
model = BN([('heartdisease', f) for f in features])

# Learn CPDs
model.fit(df, estimator=MaximumLikelihoodEstimator)
infer = VariableElimination(model)

print("Learning CPDs using Maximum Likelihood Estimators...\n")
print("Inferencing with Bayesian Network:")


INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'N', 'sex': 'N', 'trestbps': 'N', 'fbs': 'N', 'exang': 'N', 'restecg': 'N', 'thalach': 'N', 'chol': 'N', 'heartdisease': 'N'}


Learning CPDs using Maximum Likelihood Estimators...

Inferencing with Bayesian Network:


In [5]:
def bin_evidence(evidence):
    b = {}
    for k, v in evidence.items():
        if k == 'age': b[k] = int(float(v) > 50)
        elif k == 'trestbps': b[k] = int(float(v) > 130)
        elif k == 'chol': b[k] = int(float(v) > 200)
        elif k == 'thalach': b[k] = int(float(v) < 150)
        else: b[k] = int(v)
    return b


In [6]:
print("\n1. Probability of HeartDisease given Age = 28")
print(infer.query(['heartdisease'], evidence=bin_evidence({'age': 28})))

print("\n2. Probability of HeartDisease given Cholesterol = 100")
print(infer.query(['heartdisease'], evidence=bin_evidence({'chol': 100})))



1. Probability of HeartDisease given Age = 28
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.3302 |
+-----------------+---------------------+
| heartdisease(1) |              0.6698 |
+-----------------+---------------------+

2. Probability of HeartDisease given Cholesterol = 100
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.4244 |
+-----------------+---------------------+
| heartdisease(1) |              0.5756 |
+-----------------+---------------------+
