<a href="https://colab.research.google.com/github/shreyasbkgit/ailab/blob/main/NaiveBayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install pgmpy


Collecting pgmpy
  Downloading pgmpy-0.1.26-py3-none-any.whl.metadata (9.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->pgmpy)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->pgmpy)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->pgmpy)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->pgmpy)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->pgmpy)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->pgmpy)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x8

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Load Heart Disease Dataset
dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
data = pd.read_csv(dataset_url, names=columns, na_values='?')
data.dropna(inplace=True)

data["target"] = (data["target"] > 0).astype(int)  # Convert to binary classification

# Split dataset into features and labels
X = data.drop(columns=["target"])
y = data["target"]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naive Bayes Model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Predictions
y_pred = nb_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Bayesian Network Model
model = BayesianModel([("age", "target"), ("sex", "target"), ("cp", "target"), ("chol", "target"), ("thal", "target")])
model.fit(data, estimator=MaximumLikelihoodEstimator)

# Inference
infer = VariableElimination(model)
print("Probability of Heart Disease given cp=3:")
print(infer.query(variables=["target"], evidence={"cp": 3}))




Accuracy: 0.9166666666666666
              precision    recall  f1-score   support

           0       0.90      0.97      0.93        36
           1       0.95      0.83      0.89        24

    accuracy                           0.92        60
   macro avg       0.92      0.90      0.91        60
weighted avg       0.92      0.92      0.92        60





Probability of Heart Disease given cp=3:
+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.5017 |
+-----------+---------------+
| target(1) |        0.4983 |
+-----------+---------------+
