# Import Necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load csv file

In [None]:
dataset = pd.read_csv("../input/heart-disease-uci/heart.csv")
dataset.head()

# Looking at the basic properties of the dataset

In [None]:
dataset.info()

In [None]:
dataset.describe()

# Dividing into input and output variables for our models

In [None]:
X = dataset.iloc[:,0:13].values
y = dataset.loc[:,['target']].values

In [None]:
print(X.shape)
print(y.shape)

# Feature Scaling 
A point to note we are not fiting and transforming the X_test, X_train because we wont split the data the cross validation methods will do it for us.

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

# Heatmap for correlation

In [None]:
sns.set(font_scale=1)
sns.set_style("darkgrid")
fig, ax = plt.subplots(figsize=(12 , 6))
sns.heatmap(dataset.corr(), annot = True , ax = ax, cmap="Blues")

# Traning different models and using cross validation as evaluation method
# Importing required libraries for K-fold and stratified K-fold

In [None]:
from sklearn.model_selection import cross_val_score # For K-Fold 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn import metrics # required for precision_score, recall_score, f1_score in K-fold (macros)
from sklearn.model_selection import StratifiedKFold # For stratified K-Fold
from sklearn.metrics import roc_curve,auc
from numpy import interp

 # User Defined Function to Find Accuracy, Precision, recall ,f1_score using K-fold 
 We will use the function for later models

In [None]:
def k_fold(classifer):
    accuracy  = cross_val_score(classifer, X,y.ravel(), cv =10)
    precision = cross_val_score(classifer, X, y.ravel(), cv=10, scoring='precision_macro')
    recall = cross_val_score(classifer, X, y.ravel(), cv=10, scoring='recall_macro')
    f1 = cross_val_score(classifer, X, y.ravel(), cv=10, scoring='f1_macro')
    accuracy = accuracy.mean()
    precision = precision.mean()
    recall = recall.mean()
    f1 = f1.mean()
    yplot = [accuracy, precision, recall, f1]
    xplot = ['accuracy', 'precision' ,'recall', 'f1 score']
    sns.barplot(x = xplot, y= yplot)
    print(accuracy)
    print(precision)
    print(recall)
    print(f1)

# User Defined Class for Stratified K-Fold and it's AOC
 We will use the function for later models

In [None]:
class k:
    def __init__(self,classifier,skf):
        self.classifier = classifier
        self.skf = skf
        
    def strat_k(self):
        
        accuracy   = []
        precision  = []
        recall     = []
        f1_value = []

        for train_index, test_index in skf.split(X, y.ravel()):
          #print("TRAIN:", train_index, "TEST:", test_index)
          X_train, X_test = X[train_index], X[test_index]
          y_train, y_test = y[train_index].ravel(), y[test_index].ravel()

          self.classifier.fit(X_train,y_train)
          y_pred = self.classifier.predict(X_test)

          ac = accuracy_score(y_test,y_pred)
          pre = precision_score(y_test,y_pred)
          rec = recall_score(y_test,y_pred)
          f1  = f1_score(y_test, y_pred)



          accuracy.append(ac)
          precision.append(pre)
          recall.append(rec)
          f1_value.append(f1)

        yplot = [sum(accuracy)/len(accuracy), sum(precision)/len(precision), sum(recall)/len(recall), sum(f1_value)/len(f1_value)]
        xplot = ['accuracy', 'precision' ,'recall', 'f1 score']
        sns.barplot(x = xplot, y= yplot)
        print(sum(accuracy)/len(accuracy))
        print(sum(precision)/len(precision))
        print(sum(recall)/len(recall))
        print(sum(f1_value)/len(f1_value))
        
    def roc(self):


        tprs = []
        aucs = []
        mean_fpr = np.linspace(0,1,100)


        plt.figure(figsize=(3,2), dpi=300)
        sns.set(font_scale=.4)
        i = 1
        for train,test in skf.split(X,y):
            prediction =self.classifier.fit(X[train],y[train].ravel()).predict_proba(X[test])
            fpr, tpr, t = roc_curve(y[test].ravel(), prediction[:, 1])
            tprs.append(interp(mean_fpr, fpr, tpr))
            roc_auc = auc(fpr, tpr)
            aucs.append(roc_auc)
            plt.plot(fpr, tpr, lw=.5, alpha=0.3, label='ROC fold %d (AUC = %0.4f)' % (i, roc_auc))
            i= i+1

        plt.plot([0,1],[0,1],linestyle = '--',lw = .7,color = 'black')
        sns.set_style("darkgrid")
        mean_tpr = np.mean(tprs, axis=0)
        mean_auc = auc(mean_fpr, mean_tpr)
        plt.plot(mean_fpr, mean_tpr, color='blue',
                 label='Mean ROC (AUC = %0.4f )' % (mean_auc),lw=.7, alpha=1)

        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve')
        plt.legend(loc="lower right")
        plt.grid(True)
        plt.show()

# Logistic Regression
We will see that we dont have to separately fit the data and predict the cross validation methods inbuilt library will do it for us.

K -Fold Results

In [None]:
from sklearn.linear_model import LogisticRegression
classifier_lr = LogisticRegression(random_state  = 0)
k_fold(classifier_lr)

# Stratified K-Fold Results

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_lr,skf)
c.strat_k()

# ROC for Logistic Regression

In [None]:
c.roc()

# Naive Bayes
k-fold

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier_nb = GaussianNB()
k_fold(classifier_nb)

# Stratified K-Fold results for Naive Bayes

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_nb,skf)
c.strat_k()

# ROC Curve for Naive Bayes

In [None]:
c.roc()

# XGBOOST
k-fold results

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
classifier_xg = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth = 4, random_state = 0)
k_fold(classifier_xg)

# Stratified K-Fold Results for XGBOOST

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_xg,skf)
c.strat_k()

# ROC CURVE FOR XGBOOST

In [None]:
c.roc()

# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier_knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
k_fold(classifier_knn)

# Stratified K-fold results for KNN

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_knn,skf)
c.strat_k()

# ROC CURVE FOR KNN

In [None]:
c.roc()

# SVM

In [None]:
from sklearn.svm import SVC
classifier_svm = SVC(kernel = 'rbf',probability=True, random_state = 0)
k_fold(classifier_svm)

# Stratified K- fold for SVM

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_svm,skf)
c.strat_k()

# ROC CURVE FOR SVM

In [None]:
c.roc()

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
classifier_dt = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
k_fold(classifier_dt)

# Stratified K-fold results for Decision Trees

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_dt,skf)
c.strat_k()

# Roc curve for decision Trees

In [None]:
c.roc()

# ADA Boost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
classifier_abc = AdaBoostClassifier(n_estimators=100, random_state=0)
k_fold(classifier_abc)

# Stratified K -Fold results for AdaBoost

In [None]:
skf = StratifiedKFold(n_splits=10,shuffle=False)
c = k(classifier_abc,skf)
c.strat_k()

# Roc Curve for AdaBoost

In [None]:
c.roc()