# Name : Navale Prathamesh Rajendra
# Year & Div : TY-ML-B
# Roll No: 30

In [2]:
# -------------------------------------------------------
# Bayesian Network for Heart Disease Diagnosis
# Using Cleveland Heart Disease Dataset
# -------------------------------------------------------

import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import KBinsDiscretizer

# Step 1: Load Dataset
df = pd.read_csv("Cleveland_hd.csv")  # Cleveland dataset
print("Dataset loaded successfully!")
print(df.head())

# Step 2: Preprocess Data
df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)

# Select attributes
data = df[['age', 'sex', 'cp', 'chol', 'trestbps', 'fbs', 'target']]

# Discretize continuous attributes
discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
data.loc[:, ['age', 'chol', 'trestbps']] = discretizer.fit_transform(
    data[['age', 'chol', 'trestbps']]
)

# Rename target
data.rename(columns={'target': 'heart_disease'}, inplace=True)

print("\nPreprocessed Data:")
print(data.head())

# Step 3: Define Bayesian Network Structure
model = BayesianModel([
    ('age', 'trestbps'),
    ('age', 'chol'),
    ('sex', 'heart_disease'),
    ('cp', 'heart_disease'),
    ('trestbps', 'heart_disease'),
    ('chol', 'heart_disease'),
    ('fbs', 'heart_disease')
])

# Step 4: Train Model
model.fit(data, estimator=MaximumLikelihoodEstimator)
print("\nModel learned successfully!")

# Step 5: Inference
inference = VariableElimination(model)

# Example queries
q1 = inference.query(variables=['heart_disease'], evidence={'chol': 2})
print("\nP(Heart Disease | High Cholesterol):")
print(q1)

q2 = inference.query(variables=['heart_disease'], evidence={'cp': 2, 'fbs': 1})
print("\nP(Heart Disease | Chest Pain & High FBS):")
print(q2)


  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'target': 'heart_disease'}, inplace=True)


Dataset loaded successfully!
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   0       145   233    1        2      150      0      2.3      2   
1   67    1   3       160   286    0        2      108      1      1.5      1   
2   67    1   3       120   229    0        2      129      1      2.6      1   
3   37    1   2       130   250    0        0      187      0      3.5      2   
4   41    0   1       130   204    0        2      172      0      1.4      0   

   ca  thal  target  
0   0     2       0  
1   3     1       1  
2   2     3       1  
3   0     1       0  
4   0     1       0  

Preprocessed Data:
   age  sex  cp  chol  trestbps  fbs  heart_disease
0    2    1   0     0         1    1              0
1    2    1   3     1         1    0              1
2    2    1   3     0         0    0              1
3    0    1   2     0         1    0              0
4    0    0   1     0         1    0              0

Model learned succe