In [6]:
import pandas as pd # for data manipulation
import numpy as np

from sklearn.model_selection import train_test_split # for splitting the data into train and test samples
from sklearn.metrics import classification_report, confusion_matrix # for model evaluation metrics
from sklearn.preprocessing import OrdinalEncoder # for encoding categorical features from strings to number arrays
from sklearn.preprocessing import LabelEncoder

import plotly.express as px  # for data visualization
import plotly.graph_objects as go # for data visualization

import matplotlib.pyplot as plt

# Differnt types of Naive Bayes Classifiers
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [7]:
df = pd.read_csv('heart_2020_cleaned.csv')

df['BMI'] = pd.qcut(df['BMI'], 5, duplicates='drop')
df['PhysicalHealth'] = pd.qcut(df['PhysicalHealth'], 5, duplicates='drop')
df['MentalHealth'] = pd.qcut(df['MentalHealth'], 5, duplicates='drop')
df['SleepTime'] = pd.qcut(df['SleepTime'], 5, duplicates='drop')

y = df['HeartDisease']
x = df.drop('HeartDisease', axis=1)

enc = LabelEncoder()
y = enc.fit_transform(y)

enc = OrdinalEncoder()
x = enc.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15)

In [8]:
model = CategoricalNB()

model.fit(x_train, y_train)

y_pred = model.predict(x_test)

In [9]:
report = classification_report(y_test, y_pred, target_names=['No','Yes']);

print("Model: ", model)
print('--------------------------------------------------------')
print('Classification Report:\n\n', report)
print('--------------------------------------------------------')
print('Confusion Matrix:\n\n', confusion_matrix(y_test, y_pred))

Model:  CategoricalNB()
--------------------------------------------------------
Classification Report:

               precision    recall  f1-score   support

          No       0.94      0.92      0.93     43876
         Yes       0.33      0.41      0.37      4094

    accuracy                           0.88     47970
   macro avg       0.64      0.67      0.65     47970
weighted avg       0.89      0.88      0.88     47970

--------------------------------------------------------
Confusion Matrix:

 [[40418  3458]
 [ 2408  1686]]
