In [1]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, mean_squared_error

In [2]:
# Load the dataset
df = pd.read_csv("cleanheart_dataset.csv")

In [3]:
# Split data into features and target
X = df.drop('HeartDiseaseorAttack', axis=1)
y = df['HeartDiseaseorAttack']

In [4]:
X_new = X[["GenHlth", "Age","DiffWalk", "HighBP", "Stroke", "HighChol", "Diabetes", "PhysHlth", "Smoker", "Sex", "MentHlth","CholCheck"]]

In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.2, random_state=42)

In [6]:
# Oversample using SMOTE
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

In [7]:
# Build the Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

GaussianNB()

In [8]:
# Make predictions on test set
y_pred = nb.predict(X_test)

In [9]:
# Calculate evaluation metrics
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

# Print the evaluation metrics
print("Confusion Matrix:\n", cm)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Mean Squared Error:", mse)

Confusion Matrix:
 [[30080 11189]
 [ 1176  3512]]
Accuracy: 0.7309441434384316
Precision: 0.23889531324399702
Recall: 0.7491467576791809
F1 Score: 0.36226726494404043
Mean Squared Error: 0.26905585656156844
