# Module 5 : Performance measurements of NB and KNN

In [1]:
#import library packages
import pandas as p
import matplotlib.pyplot as plt
import seaborn as s
import numpy as n
#read the given dataset
df = p.read_csv("dataset.csv")

In [2]:
from sklearn.preprocessing import LabelEncoder
var_mod = ['target']
le = LabelEncoder()
for i in var_mod:
    df[i] = le.fit_transform(df[i]).astype(str)



In [3]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [4]:
#According to the cross-validated MCC scores, the random forest is the best-performing model, so now let's evaluate its performance on the test set.
from sklearn.metrics import confusion_matrix, classification_report, matthews_corrcoef, cohen_kappa_score, accuracy_score, average_precision_score, roc_auc_score

# Prediction of Heart Disease

In [5]:
X = df.drop(labels='target', axis=1)
#Response variable
y = df.loc[:,'target']    

In [6]:
#We'll use a test size of 30%. We also stratify the split on the response variable, which is very important to do because there are so few fraudulent transactions.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)

 K-Neighbors Classifier:

In [7]:
from sklearn.neighbors import KNeighborsClassifier
rfc = KNeighborsClassifier()

rfc.fit(X_train,y_train)

predictR = rfc.predict(X_test)

print("")
print('Classification report of KNeighborsClassifier Results:')
print("")

print(classification_report(y_test,predictR))
x = (accuracy_score(y_test,predictR)*100)

print('Accuracy result of KNeighborsClassifier is:', x)
print("")
cm1=confusion_matrix(y_test,predictR)
print('Confusion Matrix result of KNeighborsClassifier is:\n',cm1)
print("")
sensitivity1 = cm1[0,0]/(cm1[0,0]+cm1[0,1])
print('Sensitivity : ', sensitivity1 )
print("")
specificity1 = cm1[1,1]/(cm1[1,0]+cm1[1,1])
print('Specificity : ', specificity1)
print("")



Classification report of KNeighborsClassifier Results:

              precision    recall  f1-score   support

           0       0.64      0.61      0.62        41
           1       0.69      0.72      0.71        50

    accuracy                           0.67        91
   macro avg       0.67      0.66      0.67        91
weighted avg       0.67      0.67      0.67        91

Accuracy result of KNeighborsClassifier is: 67.03296703296702

Confusion Matrix result of KNeighborsClassifier is:
 [[25 16]
 [14 36]]

Sensitivity :  0.6097560975609756

Specificity :  0.72



In [8]:
TN = cm1[0][0]
FN = cm1[1][0]
TP = cm1[1][1]
FP = cm1[0][1]
print("True Positive :",TP)
print("True Negative :",TN)
print("False Positive :",FP)
print("False Negative :",FN)
print("")
TPR = TP/(TP+FN)
TNR = TN/(TN+FP)
FPR = FP/(FP+TN)
FNR = FN/(TP+FN)
print("True Positive Rate :",TPR)
print("True Negative Rate :",TNR)
print("False Positive Rate :",FPR)
print("False Negative Rate :",FNR)
print("")
PPV = TP/(TP+FP)
NPV = TN/(TN+FN)
print("Positive Predictive Value :",PPV)
print("Negative predictive value :",NPV)

True Positive : 36
True Negative : 25
False Positive : 16
False Negative : 14

True Positive Rate : 0.72
True Negative Rate : 0.6097560975609756
False Positive Rate : 0.3902439024390244
False Negative Rate : 0.28

Positive Predictive Value : 0.6923076923076923
Negative predictive value : 0.6410256410256411


Naive Bayes:

In [9]:
from sklearn.naive_bayes import GaussianNB
s = GaussianNB()

s.fit(X_train,y_train)

predicts = s.predict(X_test)

print("")
print('Classification report of Naive Bayes Results:')
print("")

print(classification_report(y_test,predicts))
x = (accuracy_score(y_test,predicts)*100)

print('Accuracy result of Naive Bayes is:', x)
print("")
cm2=confusion_matrix(y_test,predicts)
print('Confusion Matrix result of Naive Bayes is:\n',cm2)
print("")
sensitivity1 = cm2[0,0]/(cm2[0,0]+cm2[0,1])
print('Sensitivity : ', sensitivity1 )
print("")
specificity1 = cm2[1,1]/(cm2[1,0]+cm2[1,1])
print('Specificity : ', specificity1)
print("")



Classification report of Naive Bayes Results:

              precision    recall  f1-score   support

           0       0.80      0.78      0.79        41
           1       0.82      0.84      0.83        50

    accuracy                           0.81        91
   macro avg       0.81      0.81      0.81        91
weighted avg       0.81      0.81      0.81        91

Accuracy result of Naive Bayes is: 81.31868131868131

Confusion Matrix result of Naive Bayes is:
 [[32  9]
 [ 8 42]]

Sensitivity :  0.7804878048780488

Specificity :  0.84



In [10]:
TN = cm2[0][0]
FN = cm2[1][0]
TP = cm2[1][1]
FP = cm2[0][1]
print("True Positive :",TP)
print("True Negative :",TN)
print("False Positive :",FP)
print("False Negative :",FN)
print("")
TPR = TP/(TP+FN)
TNR = TN/(TN+FP)
FPR = FP/(FP+TN)
FNR = FN/(TP+FN)
print("True Positive Rate :",TPR)
print("True Negative Rate :",TNR)
print("False Positive Rate :",FPR)
print("False Negative Rate :",FNR)
print("")
PPV = TP/(TP+FP)
NPV = TN/(TN+FN)
print("Positive Predictive Value :",PPV)
print("Negative predictive value :",NPV)

True Positive : 42
True Negative : 32
False Positive : 9
False Negative : 8

True Positive Rate : 0.84
True Negative Rate : 0.7804878048780488
False Positive Rate : 0.21951219512195122
False Negative Rate : 0.16

Positive Predictive Value : 0.8235294117647058
Negative predictive value : 0.8
