**Import Dataset To OS**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Import Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB,BernoulliNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score,confusion_matrix,roc_auc_score
from sklearn.metrics import classification_report

**Read CSV**

In [None]:
dt = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')
dt.head()

**Prepare Data For Modeling**

**Assign** 
* the 13 features to X,  
* the last column(**target**) to classification predictor Y.

In [None]:
X = dt.iloc[:, :-1].values
y = dt.iloc[:, -1].values

**Split**  
* The data set into the Training and Test Set.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=10) #split the data

**Normalize**
*  Standardizing the data will transform the data so that its distribution will have a mean of 0 and a standard deviation of 1.

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

**Create Model List**

In [None]:
models = []
models.append(['Logistic Regression',LogisticRegression(random_state=0)])
models.append(['SVM',SVC(random_state=0)])
models.append(['KNeigbors',KNeighborsClassifier()])
models.append(['GaussianNB',GaussianNB()])
models.append(['BernoulliNB',BernoulliNB()])
models.append(['DecisionTree',DecisionTreeClassifier(random_state=0)])
models.append(['RandomForest',RandomForestClassifier(random_state=0)])
models.append(['MLPClassifier',MLPClassifier(random_state = 42, max_iter=1000)])

**Training and Test**
* Train various Classification Models on the Training set.
* See which model has highest accuracy.
* Compare accuracy of Classification Models.

In [None]:
lst_1 = []
cmlist = []
for m in range(len(models)):
    lst_2 = []
    model = models[m][1]
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test,y_pred)
    cmlist.append(cm)
    accuracies = cross_val_score(estimator= model, X = X_train,y = y_train, cv=10)

# k-fOLD Validation
    roc = roc_auc_score(y_test,y_pred)
    
    print(models[m][0],':')
    print(cm)
    print('Accuracy Score: ',accuracy_score(y_test,y_pred))
    print('')
    print('K-Fold Validation Mean Accuracy: {:.2f} %'.format(accuracies.mean()*100))
    print('')
    print('ROC AUC Score: {:.2f}'.format(roc))
    print('-'*40)
    print('')
    print(classification_report(y_test, y_pred))
    lst_2.append(models[m][0])
    lst_2.append(accuracy_score(y_test,y_pred)*100)
    lst_2.append(format(accuracies.mean()*100))
    lst_2.append(roc)
    lst_1.append(lst_2)

# **Creating Confusion Matrix Plots**

In [None]:
cm_lr = cmlist[0]
cm_knn = cmlist[2]
cm_svm = cmlist[1]
cm_gnb = cmlist[3]
cm_bnb = cmlist[4]
cm_dtc = cmlist[5]
cm_rf = cmlist[6]
cm_mlp = cmlist[7]

In [None]:
import seaborn as sns
plt.figure(figsize=(25,8))

plt.suptitle("Confusion Matrixes",fontsize=24)
plt.subplots_adjust(wspace = 0.4, hspace= 0.4)

plt.subplot(2,4,1)
plt.title("Logistic Regression Confusion Matrix")
sns.heatmap(cm_lr,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,2)
plt.title("K Nearest Neighbors Confusion Matrix")
sns.heatmap(cm_knn,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,3)
plt.title("Support Vector Machine Confusion Matrix")
sns.heatmap(cm_svm,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,4)
plt.title("Bernoullie Naive Bayes Confusion Matrix")
sns.heatmap(cm_bnb,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,5)
plt.title("Gaussian Naive Bayes Confusion Matrix")
sns.heatmap(cm_gnb,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,6)
plt.title("Decision Tree Classifier Confusion Matrix")
sns.heatmap(cm_dtc,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,7)
plt.title("Random Forest Confusion Matrix")
sns.heatmap(cm_rf,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,8)
plt.title("Multilayer Perceptron Confusion Matrix")
sns.heatmap(cm_mlp,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.show()

In [None]:
df2 = pd.DataFrame(lst_1,columns=['Model','Accuracy','K-Fold Mean Accuracy','ROC_AUC'])

df2.sort_values(by=['ROC_AUC'],inplace=True,ascending=False)
df2

In [None]:
fig = plt.figure(figsize=(12,12))
sns.barplot(x='ROC_AUC',y='Model',data=df2,color='r')
plt.title('Model Comparison');

In [None]:
import seaborn as sns
plt.figure(figsize=(25,8))

plt.suptitle("Confusion Matrixes",fontsize=24)
plt.subplots_adjust(wspace = 0.4, hspace= 0.4)

plt.subplot(2,4,1)
plt.title("Logistic Regression Confusion Matrix")
sns.heatmap(cm_lr,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,2)
plt.title("K Nearest Neighbors Confusion Matrix")
sns.heatmap(cm_knn,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,3)
plt.title("Support Vector Machine Confusion Matrix")
sns.heatmap(cm_svm,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,4)
plt.title("Bernoullie Naive Bayes Confusion Matrix")
sns.heatmap(cm_bnb,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,5)
plt.title("Gaussian Naive Bayes Confusion Matrix")
sns.heatmap(cm_gnb,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,6)
plt.title("Decision Tree Classifier Confusion Matrix")
sns.heatmap(cm_dtc,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,7)
plt.title("Random Forest Confusion Matrix")
sns.heatmap(cm_rf,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.subplot(2,4,8)
plt.title("Multilayer Perceptron Confusion Matrix")
sns.heatmap(cm_mlp,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 16})

plt.show()

print(np.logspace(-3,3,7))

In [None]:
grid_models = [(LogisticRegression(),[{'C': [0.001,0.01,0.1,1,10,100], 'penalty':['l1','l2'], 'solver':['liblinear', 'saga']}]),
               (SVC(random_state=0),[{'C':[0.1 , 1, 10 , 100,1000]}]),
               (KNeighborsClassifier(),[{'n_neighbors':np.arange(1, 100), 'metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski']}]), 
               (GaussianNB(),[{'var_smoothing': np.logspace(0,-9, num=100)}]),
               (DecisionTreeClassifier(),[{'criterion':['gini','entropy'],'max_depth':np.arange(1, 50), 'min_samples_leaf':[1,2,4]}]), 
               (RandomForestClassifier(),[{'n_estimators':[100,150,200],'criterion':['gini','entropy'], 'min_samples_leaf':[2, 10, 30]}]),
               (MLPClassifier(max_iter = 1000),[{'solver':['lbfgs', 'sgd', 'adam'], 'learning_rate' :['constant', 'invscaling', 'adaptive']}]),
              ]

In [None]:
for i,j in grid_models:
    grid = GridSearchCV(estimator=i,param_grid = j, scoring = 'roc_auc',cv = 5)
    grid.fit(X_train,y_train)
    best_score = grid.best_score_
    best_param = grid.best_params_
    y_pred = grid.predict(X_test)
    cm = confusion_matrix(y_test,y_pred)
    roc = roc_auc_score(y_test,y_pred)
    print(' {}: \n Best score: {:.1f} %'.format(i,best_score*100))
    print('')
    print(classification_report(y_test, y_pred))
    print('')
    print(cm)
    print('')
    print(best_param)
    print('')
    print('ROC AUC Score: {:.2f}'.format(roc))
    print('-'*25)
    print('')

**Predictions**  
Scenario: 
* A patient has cardiac symptoms
* You use his vitals as input into the Machine Learning Algorithm.   
1. He is a 20 year old male, with a chest pain value of 2 (atypical angina), with resting blood pressure of 110.
2. In addition he has a serum cholestoral of 230 mg/dl.
3. He is fasting blood sugar > 120 mg/dl.
4. He has a resting electrocardiographic result of 1.
5. The patients maximum heart rate achieved is 140.
6. Also, he was exercise induced angina.
7. His ST depression induced by exercise relative to rest value was 2.2.
8. The slope of the peak exercise ST segment is flat.
9. He has no major vessels colored by fluoroscopy, and in addition his maximum heart rate achieved is a reversible defect.
10. Based on this information, can you classify this patient with Heart Disease?

In [None]:
for m in range(len(models)):
    model = models[m][1]
    print(models[m][0],':')
    print(model.predict(sc.transform([[20,1,2,110,230,1,1,140,1,2.2,2,0,2]])))

# **Conclusions**
1. 13 features are examined but the top 4 significant features help us classify between positive and negative Diagnosis.  
   These features are chest pain type(cp), maximum hearth rate achieved(thalach), number of major vessels(ca) and St depression.  
2. Machine learning models can classify patients with Hearth Disease. Patients can be diagnosed and 