In [None]:
import pandas as pd

dataset = pd.read_csv('heart_2020_cleaned.csv')
dataset.head()

# number of data in the dataset
print("Number of data in the dataset: ", len(dataset.index))
print(dataset.shape[0])

In [None]:
if dataset.duplicated().any():      # checking for duplicate data
    dataset.drop_duplicates(inplace=True)       # removing the duplicates
    print("Number of data after removing duplicates: ", dataset.shape[0])
else: 
    print("No duplicates")

In [None]:
if dataset.isnull().values.any():    # checking for null data
    print(dataset.isnull().sum())
    dataset.dropna()        # removing the null values
    print("Number of data after removing null values: ", dataset.shape[0])
else:
    print("No null values")

In [None]:
no_of_target_data = dataset['HeartDisease'].value_counts()      # checking the number of 'yes' and 'no' in the label
print(no_of_target_data)

In [None]:
# We haven't encoded BMI, sleeptime, MentalHealth and PhysicalHealth
# They are already integers and floats

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

label_encoder.fit(dataset['HeartDisease'])
dataset['HeartDisease']=label_encoder.transform(dataset['HeartDisease'])

label_encoder.fit(dataset['Smoking'])
dataset['Smoking']=label_encoder.transform(dataset['Smoking'])

label_encoder.fit(dataset['AlcoholDrinking'])
dataset['AlcoholDrinking']=label_encoder.transform(dataset['AlcoholDrinking'])

label_encoder.fit(dataset['Stroke'])
dataset['Stroke']=label_encoder.transform(dataset['Stroke'])

label_encoder.fit(dataset['DiffWalking'])
dataset['DiffWalking']=label_encoder.transform(dataset['DiffWalking'])

label_encoder.fit(dataset['Sex'])
dataset['Sex']=label_encoder.transform(dataset['Sex'])

label_encoder.fit(dataset['AgeCategory'])
dataset['AgeCategory']=label_encoder.transform(dataset['AgeCategory'])

label_encoder.fit(dataset['Race'])
dataset['Race']=label_encoder.transform(dataset['Race'])

label_encoder.fit(dataset['Diabetic'])
dataset['Diabetic']=label_encoder.transform(dataset['Diabetic'])

label_encoder.fit(dataset['PhysicalActivity'])
dataset['PhysicalActivity']=label_encoder.transform(dataset['PhysicalActivity'])

label_encoder.fit(dataset['GenHealth'])
dataset['GenHealth']=label_encoder.transform(dataset['GenHealth'])

label_encoder.fit(dataset['Asthma'])
dataset['Asthma']=label_encoder.transform(dataset['Asthma'])

label_encoder.fit(dataset['KidneyDisease'])
dataset['KidneyDisease']=label_encoder.transform(dataset['KidneyDisease'])

label_encoder.fit(dataset['SkinCancer'])
dataset['SkinCancer']=label_encoder.transform(dataset['SkinCancer'])

In [None]:
print("Dataset after encoding the labels")
dataset.head()

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

# Correlation matrix
plt.figure(figsize= (12,12))
sns.heatmap(dataset.corr(),annot = True,cmap='Reds')

In [None]:
# defining features and label
x = dataset.drop(["HeartDisease"], axis = 1).values     # feature
y = dataset["HeartDisease"].values      # label

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state= 42)

print("Total number of data: ", x.shape[0])
print("Total number of train data: ", x_train.shape[0])
print("Total number of test data: ", x_test.shape[0])

In [None]:
# data scaling
from sklearn.preprocessing import RobustScaler

robust_scaler = RobustScaler()

x_train = robust_scaler.fit_transform(x_train)
x_test = robust_scaler.fit_transform(x_test)

Decision Tree

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

d_tree = tree.DecisionTreeClassifier()
d_tree.fit(x_train, y_train)
d_tree_pred = d_tree.predict(x_test)

accuracy = accuracy_score(y_test, d_tree_pred)

print("Accuracy: ", accuracy*100, "%")
print(classification_report(y_test, d_tree_pred))

In [None]:
# d_tree = tree.DecisionTreeClassifier()
# d_tree_param = {"criterion": ['gini', 'entropy'],
#                 "max_depth": range(3,10),
#                 "min_samples_split": range(1,8),
#                 "min_samples_leaf": range(1,5)
#                 }
# d_tree_grid_search = GridSearchCV(estimator=d_tree, param_grid=d_tree_param, cv = 5)
# d_tree_grid_search.fit(x_train, y_train)
# best_params = d_tree_grid_search.best_params_
# print(best_params)

In [None]:
d_tree = tree.DecisionTreeClassifier(criterion = 'entropy', 
                                     max_depth = 5, 
                                     min_samples_split = 2,
                                     min_samples_leaf = 1)
d_tree.fit(x_train, y_train)
d_tree_pred = d_tree.predict(x_test)

accuracy = accuracy_score(y_test, d_tree_pred)
print("Accuracy: ", accuracy*100, "%")

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import roc_auc_score, roc_curve

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, d_tree_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, d_tree_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, d_tree_pred)
print("Precision: ", precision)
recall = recall_score(y_test, d_tree_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, d_tree_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, d_tree_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
classifier.fit(x_train, y_train)
pred = classifier.predict(x_test)

accuracy = accuracy_score(y_test, pred)

print("Accuracy: ", accuracy*100, "%")

In [None]:
# classifier = RandomForestClassifier()

# param_grid = {'n_estimators': [50, 75, 100, 125], 
#               'max_depth': [5, 6, 7, 8, 9, 10], 
#               'max_features': [5, 6, 7, 8]}

# grid_search = GridSearchCV(estimator = classifier, param_grid = param_grid, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

classifier = RandomForestClassifier(n_estimators=100, 
                                     max_depth = 10, 
                                     max_features = 5)

classifier.fit(x_train, y_train)
pred = classifier.predict(x_test)

accuracy1 = accuracy_score(y_test, pred)
print("Random Forest results after hyperparameter finetuning:")
print("Accuracy: ", accuracy1*100, "%")



In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, pred)
print("Precision: ", precision)
recall = recall_score(y_test, pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

XGBoost

In [None]:
import sys
!{sys.executable} -m pip install xgboost

import xgboost as xgb
xgb_classifier = xgb.XGBClassifier(objective="binary:logistic", random_state=42)
xgb_classifier.fit(x_train, y_train)

xgb_pred = xgb_classifier.predict(x_test)

accuracy2 = accuracy_score(y_test, xgb_pred)
print("Accuracy: ", accuracy2*100, "%")

In [None]:
# xgb_classifier = xgb.XGBClassifier(objective="binary:logistic", random_state=42)

# param_xgb = {
#     'max_depth': range(2, 10, 1),
#     'n_estimators': range(60, 220, 40),
#     'learning_rate': [0.1, 0.01, 0.05]}

# grid_search = GridSearchCV(estimator = xgb_classifier, param_grid = param_xgb, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
xgb_classifier = xgb.XGBClassifier(n_estimators = 140, 
                                     max_depth = 4, 
                                     learning_rate = 0.1,
                                     objective="binary:logistic", random_state=42)

xgb_classifier.fit(x_train, y_train)
xgb_pred = xgb_classifier.predict(x_test)

accuracy2 = accuracy_score(y_test, xgb_pred)
print("XGBoost results after hyperparameter finetuning:")
print("Accuracy: ", accuracy2*100, "%")


In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, xgb_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, xgb_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, xgb_pred)
print("Precision: ", precision)
recall = recall_score(y_test, xgb_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, xgb_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, xgb_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb_classifier = GradientBoostingClassifier()
gb_classifier.fit(x_train, y_train)
gb_pred = gb_classifier.predict(x_test)

accuracy3 = accuracy_score(y_test, gb_pred)
print("Accuracy: ", accuracy3*100, "%")

In [None]:
# gb_classifier = GradientBoostingClassifier()

# param_gb = {
#     "n_estimators":[5,50,250,500],
#     "max_depth":[1,3,5,7,9],
#     "learning_rate":[0.01,0.1,1]}

# grid_search = GridSearchCV(estimator = gb_classifier, param_grid = param_gb, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
gb_classifier = GradientBoostingClassifier(n_estimators = 50, 
                                     max_depth = 5, 
                                     learning_rate = 0.1)

gb_classifier.fit(x_train, y_train)
gb_pred = gb_classifier.predict(x_test)

accuracy3 = accuracy_score(y_test, gb_pred)
print("Gradient Boost results after hyperparameter finetuning:")
print("Accuracy: ", accuracy3*100, "%")

In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, gb_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, gb_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, gb_pred)
print("Precision: ", precision)
recall = recall_score(y_test, gb_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, gb_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, gb_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

Ada Boost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
ada_classifier = AdaBoostClassifier()
ada_classifier.fit(x_train, y_train)
ada_pred = ada_classifier.predict(x_test)

accuracy4 = accuracy_score(y_test, ada_pred)
print("Accuracy: ", accuracy4*100, "%")


In [None]:
# ada_classifier = AdaBoostClassifier()

# param_ada = {
#     "n_estimators":[5,50,100,250],
#     "learning_rate":[0.01,0.1,1]}

# grid_search = GridSearchCV(estimator = ada_classifier, param_grid = param_ada, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
ada_classifier = AdaBoostClassifier(n_estimators = 100, 
                                     learning_rate = 1)

ada_classifier.fit(x_train, y_train)
ada_pred = ada_classifier.predict(x_test)

accuracy5 = accuracy_score(y_test, ada_pred)
print("AdaBoost results after hyperparameter finetuning:")
print("Accuracy: ", accuracy5*100, "%")


In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, ada_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, ada_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, ada_pred)
print("Precision: ", precision)
recall = recall_score(y_test, ada_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, ada_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, ada_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

SVM

In [None]:
from sklearn import svm

svm_classifier = svm.SVC()
svm_classifier.fit(x_train, y_train)
svm_pred= svm_classifier.predict(x_test)

accuracy5 = accuracy_score(y_test, svm_pred)
print("Accuracy: ", accuracy5*100, "%")

In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, svm_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, svm_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, svm_pred)
print("Precision: ", precision)
recall = recall_score(y_test, svm_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, svm_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, svm_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# svm_classifier = svm.SVC()

# param_svm = {
#     "kernel":['linear', 'rbf'],
#     "C":[1, 3, 5, 8], 
#     "gamma":[0.01, 0.05, 0.1]}

# grid_search = GridSearchCV(estimator = svm_classifier, param_grid = param_svm, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
# svm_classifier = svm.SVC(kernel = best_parameters['kernel'], 
#                         C = best_parameters['C'],
#                         gamma = best_parameters['gamma'])

# svm_classifier.fit(x_train, y_train)
# svm_pred = svm_classifier.predict(x_test)

# accuracy6 = accuracy_score(y_test, svm_pred)
# print("SVM results after hyperparameter finetuning:")
# print("Accuracy: ", accuracy6*100, "%")

# print(classification_report(y_test, svm_pred))

Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

lr_classifier = LogisticRegression()
lr_classifier.fit(x_train, y_train)
lr_pred = lr_classifier.predict(x_test)

accuracy7 = accuracy_score(y_test, lr_pred)
print("Accuracy: ", accuracy7*100, "%")

In [None]:
# lr_classifier = LogisticRegression()

# param_lr = {"penalty":['l1', 'l2'],}

# grid_search = GridSearchCV(estimator = lr_classifier, param_grid = param_lr, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
lr_classifier = LogisticRegression(penalty = 'l2')

lr_classifier.fit(x_train, y_train)
lr_pred = lr_classifier.predict(x_test)

accuracy7 = accuracy_score(y_test, lr_pred)
print("LR results after hyperparameter finetuning:")
print("Accuracy: ", accuracy7*100, "%")


In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, lr_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, lr_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, lr_pred)
print("Precision: ", precision)
recall = recall_score(y_test, lr_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, lr_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, lr_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()

Nearest Neighbor

In [None]:
from sklearn.neighbors import KNeighborsClassifier

nn_classifier = KNeighborsClassifier()
nn_classifier.fit(x_train, y_train)
nn_pred = nn_classifier.predict(x_test)

accuracy8 = accuracy_score(y_test, nn_pred)
print("Accuracy: ", accuracy8*100, "%")

In [None]:
# nn_classifier = KNeighborsClassifier()

# param_nn = {"n_neighbors":[3, 5, 7],
#             "weights": ['uniform', 'distance'],
#             "metric": ['euclidean', 'l1', 'manhattan']}

# grid_search = GridSearchCV(estimator = nn_classifier, param_grid = param_nn, cv = 5) 
# grid_search.fit(x_train, y_train)

# best_parameters = grid_search.best_params_
# print("Best Hyperparameters: ", best_parameters)

In [None]:
nn_classifier = KNeighborsClassifier(n_neighbors=7,
                                     weights='uniform',
                                     metric='l1')

nn_classifier.fit(x_train, y_train)
nn_pred = nn_classifier.predict(x_test)

accuracy9 = accuracy_score(y_test, nn_pred)
print("NN results after hyperparameter finetuning:")
print("Accuracy: ", accuracy9*100, "%")


In [None]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, nn_pred)

plt.figure(figsize= (4,3))
sns.heatmap(conf_matrix, 
            annot=True,
            fmt='g', 
            xticklabels=['Heart Disease','Not Heart Disease'],
            yticklabels=['Heart Disease','Not Heart Disease'])
plt.ylabel('Prediction',fontsize=8)
plt.xlabel('Actual',fontsize=8)
plt.title('Confusion Matrix',fontsize=12)
plt.show()

# Accuracy
accuracy = accuracy_score(y_test, nn_pred)
print("Accuracy: ", accuracy*100, "%")

# precision_score and recall_score 
precision = precision_score(y_test, nn_pred)
print("Precision: ", precision)
recall = recall_score(y_test, nn_pred)
print("Recall: ", recall)

# roc_auc score
roc_auc = roc_auc_score(y_test, nn_pred)
print("AUC (Area under the curve) score: ", roc_auc)

# Plot ROC curve
fpr, tpr, thresholds = roc_curve(y_test, nn_pred)
plt.figure(figsize=(4, 3))
plt.plot(fpr, tpr, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.show()