<a href="https://colab.research.google.com/github/ms-starryvoid/BS-3-4/blob/main/Programs/Exp4_HeartDiseaseDiagnosis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas pgmpy scikit-learn

Collecting pgmpy
  Using cached pgmpy-1.0.0-py3-none-any.whl.metadata (9.4 kB)
Collecting pyro-ppl (from pgmpy)
  Downloading pyro_ppl-1.9.1-py3-none-any.whl.metadata (7.8 kB)
Collecting pyro-api>=0.1.1 (from pyro-ppl->pgmpy)
  Downloading pyro_api-0.1.2-py3-none-any.whl.metadata (2.5 kB)
Downloading pgmpy-1.0.0-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyro_ppl-1.9.1-py3-none-any.whl (755 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyro_api-0.1.2-py3-none-any.whl (11 kB)
Installing collected packages: pyro-api, pyro-ppl, pgmpy
Successfully installed pgmpy-1.0.0 pyro-api-0.1.2 pyro-ppl-1.9.1


In [None]:
import pandas as pd
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import HillClimbSearch, BIC, BayesianEstimator
from pgmpy.inference import VariableElimination

data=pd.read_csv('https://raw.githubusercontent.com/ms-starryvoid/ML_Lab_DataSet/refs/heads/main/Lab_data/heart.csv')
print("Sample data set info ",data.head())
print(data.info())
# Discretize continuous variables using pandas cut()
def discretize(series, bins, labels):
    return pd.cut(series, bins=bins, labels=labels, include_lowest=True)

# Discretization bins and labels
data['age_cat'] = discretize(data['age'], bins=[28, 40, 55, 77], labels=['young', 'middle', 'old'])
data['trestbps_cat'] = discretize(data['trestbps'], bins=[94, 120, 140, 200], labels=['low', 'normal', 'high'])
data['chol_cat'] = discretize(data['chol'], bins=[126, 200, 300, 564], labels=['normal', 'high', 'very_high'])
data['thalach_cat'] = discretize(data['thalach'], bins=[71, 120, 160, 202], labels=['low', 'medium', 'high'])
data['oldpeak_cat'] = discretize(data['oldpeak'], bins=[-1, 1, 3, 6], labels=['low', 'medium', 'high'])

# Select discretized + categorical columns for model
model_data = data[['age_cat', 'sex', 'cp', 'trestbps_cat', 'chol_cat', 'fbs', 'restecg',
                   'thalach_cat', 'exang', 'oldpeak_cat', 'slope', 'ca', 'thal', 'target']]
 #Drop any rows with missing values (if any)
model_data = model_data.dropna()

# Learn structure with Hill Climbing and BIC score
hc = HillClimbSearch(model_data)
best_model = hc.estimate(scoring_method=BIC(model_data))


print("Learned network edges:")
print(best_model.edges())

# Create Bayesian Network model with learned structure
model = DiscreteBayesianNetwork(best_model.edges())
# Parameter estimation with Bayesian Estimator
model.fit(model_data, estimator=BayesianEstimator, prior_type='BDeu')

# Inference object for queries
inference = VariableElimination(model)

# Example query: Probability of heart disease (target=1) given some symptoms
evidence = {
    'age_cat': 'old',
    'sex': 1,
    'cp': 3,
    'trestbps_cat': 'high',
    'chol_cat': 'high',
    'fbs': 0,
    'exang': 1,
}

query_result = inference.query(variables=['target'], evidence=evidence)
print("\nProbability of Heart Disease given evidence:")
print(query_result)

evidence = {
    'age_cat': 'old',
    'sex': 1,
    'cp': 3,
    'trestbps_cat': 'high',
    'chol_cat': 'high',
    'fbs': 0,
    'restecg': 1,
    'thalach_cat': 'medium',
    'exang': 1,
    'oldpeak_cat': 'medium',
    'slope': 2,
    'ca': 0,
    'thal': 3,
}
query_result = inference.query(variables=['target'], evidence=evidence)
print(query_result)

Sample data set info     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      102

  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned network edges:
[('age_cat', 'trestbps_cat'), ('age_cat', 'chol_cat'), ('sex', 'chol_cat'), ('cp', 'exang'), ('trestbps_cat', 'fbs'), ('thalach_cat', 'restecg'), ('exang', 'thalach_cat'), ('exang', 'oldpeak_cat'), ('oldpeak_cat', 'slope'), ('slope', 'thalach_cat'), ('ca', 'target'), ('ca', 'age_cat'), ('thal', 'sex'), ('target', 'cp'), ('target', 'thal'), ('target', 'oldpeak_cat'), ('target', 'slope'), ('target', 'exang'), ('target', 'sex'), ('target', 'trestbps_cat')]

Probability of Heart Disease given evidence:
+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.4969 |
+-----------+---------------+
| target(1) |        0.5031 |
+-----------+---------------+
+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.2069 |
+-----------+---------------+
| target(1) |        0.7931 |
+-----------+---------------+
