In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import xgboost as xgb
dataset = pd.read_csv('../input/heart-disease-cleveland-uci/heart_cleveland_upload.csv')
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
labelencoder_X = LabelEncoder()
X[:,2] = labelencoder_X.fit_transform(X[:,2])
ct = ColumnTransformer([("cp", OneHotEncoder(), [2])],    remainder = 'passthrough') 
X = ct.fit_transform(X)

ct = ColumnTransformer([("restecg", OneHotEncoder(), [9])],    remainder = 'passthrough')
X = ct.fit_transform(X)

ct = ColumnTransformer([("slope", OneHotEncoder(), [15])],    remainder = 'passthrough')
X = ct.fit_transform(X)

ct = ColumnTransformer([("ca", OneHotEncoder(), [18])],    remainder = 'passthrough')
X = ct.fit_transform(X)

ct = ColumnTransformer([("thal", OneHotEncoder(), [22])],    remainder = 'passthrough')
X = ct.fit_transform(X)

from sklearn.preprocessing import StandardScaler
scalerX = StandardScaler()
X = scalerX.fit_transform(X)

from sklearn.model_selection import train_test_split
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state=0)

seed = 7
# prepare models
models = []
models.append(('LR', LogisticRegression()))
models.append(('KNN', KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')))
models.append(('SVM', SVC(kernel='linear',random_state=0)))
models.append(('NB', GaussianNB()))
models.append(('DTC', DecisionTreeClassifier(criterion='entropy', random_state=0)))
models.append(('RFC', RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)))
models.append(('PER', Perceptron(tol=1e-3, random_state=0)))
models.append(('MLPER', MLPClassifier(activation='logistic', max_iter=200, hidden_layer_sizes=(20,), alpha=0.001, solver='lbfgs', verbose = True)))
models.append(('XGB', xgb.XGBClassifier(max_depth=5, learning_rate=0.1, objective= 'reg:logistic', n_jobs=-1,use_label_encoder=False)))
models.append(('XGB2', xgb.XGBClassifier(objective= 'reg:logistic',use_label_encoder=False)))

# evaluate models
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = model_selection.KFold(n_splits=10)
    cv_results = model_selection.cross_val_score(model, X , y, cv=kfold, scoring=scoring)
    #cv_results = model_selection.cross_val_score(model, XTrain , yTrain, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results,vert=0)
ax.set_yticklabels(names)
plt.show()