In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **Importing Libraries**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# **Importing Dataset**

In [None]:
dataset = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')

In [None]:
dataset

In [None]:
dataset.info()

**Checking if there are any NULL values.**

In [None]:
dataset.isnull().sum()

# **Heat Map Correlation**

In [None]:
plt.figure(figsize= (10,10))
sns.heatmap(dataset.corr(), annot= True)

In [None]:
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
x

In [None]:
y

# **Splitting dataset into Train and Test set**

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2, random_state= 0)

# **Feature Scaling**

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# **Selection of Models**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score

In [None]:
models = []
models.append(['Logistic Regreesion', LogisticRegression(random_state=0)])
models.append(['SVM', SVC(random_state=0)])
models.append(['KNeighbors', KNeighborsClassifier()])
models.append(['Naive Bayes', GaussianNB()])
models.append(['Decision Tree', DecisionTreeClassifier(random_state=0)])
models.append(['Random Forest', RandomForestClassifier(random_state=0)])
models.append(['XGBoost', XGBClassifier()])

lst= []

for m in range(len(models)):
  a= []
  model = models[m][1]
  model.fit(x_train, y_train)
  y_pred = model.predict(x_test)
  cm = confusion_matrix(y_test, y_pred)
  accuracies = cross_val_score(estimator = model, X = x_train, y = y_train, cv = 10)
  print(models[m][0])
  print(cm)
  print('Accuracy Score',accuracy_score(y_test, y_pred))
  print('')
  print("Mean Accuracy: {:.2f} %".format(accuracies.mean()*100))
  print('')
  print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))
  print('')
  print('-----------------------------------')
  print('')
  a.append(models[m][0])
  a.append((accuracy_score(y_test, y_pred))*100) 
  a.append(accuracies.mean()*100)
  a.append(accuracies.std()*100)
  lst.append(a)

In [None]:
lst

**Making Data Frame.**

In [None]:
df = pd.DataFrame(lst, columns= ['Model', 'Accuracy', 'Mean Accuracy', 'Std. Deviation'])

In [None]:
df.sort_values(by= ['Accuracy', 'Mean Accuracy'], inplace= True, ascending= False)

**Below shows the values of models in Descending Order.**

In [None]:
df

**Applying Grid Search on Top 3 above models for best parameters and model selection.**
1. Random Forest
2. SVM
3. Logistic Regression

In [None]:
rf = RandomForestClassifier()
svm = SVC()
lr = LogisticRegression()

In [None]:
data = [(rf, [{'n_estimators': [50, 100, 200, 300, 500], 'criterion': ['gini', 'entropy'], 'random_state':[0]}]), 
        (svm, [{'C': [0.1, 0.5, 1.0], 'kernel': ['linear', 'rbf'], 'random_state':[0]}]),
        (lr, [{'C': [0.1, 0.5, 1.0], 'random_state':[0]}])]

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
for i,j in data:
  grid = GridSearchCV(estimator = i , param_grid = j , scoring = 'accuracy',cv = 10)
  grid.fit(x_train,y_train)
  best_accuracy = grid.best_score_
  best_parameters = grid.best_params_
  print('{} BestAccuracy : {:.2f}%'.format(i,best_accuracy*100))
  print('BestParameters : ',best_parameters)

**Therefore, after applying GridSearch we can confirm that RandomForest is best suited model on the dataset and gives best accuracy of 84.75%.**