In [1]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Load the Heart Disease Dataset
heart_data = pd.read_csv('heart.csv')
heart_data = heart_data.replace('?', np.nan)  # Replace '?' with NaN for missing data handling

# Display the first few rows of the dataset
print("Sample data from the Heart Disease dataset:\n")
print(heart_data.head())

# Define the Bayesian Network structure
# Adding 'cp' (chest pain type) as a node in the network
model = BayesianNetwork([
    ('age', 'trestbps'),
    ('age', 'chol'),
    ('sex', 'heart_disease'),
    ('exang', 'heart_disease'),
    ('cp', 'heart_disease'),
    ('trestbps', 'heart_disease'),
    ('chol', 'heart_disease'),
    ('heart_disease', 'thalach')
])

# Rename the 'target' column to 'heart_disease' for ease of understanding
heart_data = heart_data.rename(columns={'target': 'heart_disease'})

# Learn the CPDs (Conditional Probability Distributions) using Maximum Likelihood Estimation
model.fit(heart_data, estimator=MaximumLikelihoodEstimator)

# Initialize inference on the model
heart_disease_infer = VariableElimination(model)

# Diagnosis Examples:

# 1. Probability of Heart Disease given Age=50 and Cholesterol=200
print("\n1. Probability of Heart Disease given Age=50 and Cholesterol=200:")
q1 = heart_disease_infer.query(variables=['heart_disease'], evidence={'age': 38})
print(q1)

# 2. Probability of Heart Disease given Sex=Male (1), Chest Pain Type=2, and Exercise Induced Angina=No (0)
print("\n2. Probability of Heart Disease given Sex=Male, Chest Pain Type=2, and No Exercise Induced Angina:")
q2 = heart_disease_infer.query(variables=['heart_disease'], evidence={'sex': 1, 'cp': 2, 'exang': 0})
print(q2)

# 3. Probability of Heart Disease given Resting Blood Pressure=130 and Maximum Heart Rate Achieved=150
print("\n3. Probability of Heart Disease given Resting Blood Pressure=130 and Maximum Heart Rate=150:")
q3 = heart_disease_infer.query(variables=['heart_disease'], evidence={'trestbps': 130, 'thalach': 150})
print(q3)

  from .autonotebook import tqdm as notebook_tqdm


Sample data from the Heart Disease dataset:

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

1. Probability of Heart Disease given Age=50 and Cholesterol=200:
+------------------+----------------------+
| heart_disease    |   phi(heart_disease) |
| heart_disease(0) |               0.4716 |
+------------------+----------------------+
| heart_disease(1) |               0.5284 |
+------------------+----------------------+

2. P