In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [17]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt
%matplotlib inline

seed = 1142

In [5]:
df=pd.read_csv("data/diabetes.csv")
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
X = df.drop("class", axis = 1)
y = df["class"]

In [8]:
sc = StandardScaler()
X = sc.fit_transform(X)
X.shape

(768, 8)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=seed)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((576, 8), (576,), (192, 8), (192,))

In [10]:
log_clf = LogisticRegression()
log_clf.fit(X_train, y_train)
print("log_clf.score:",log_clf.score(X_test, y_test))

log_clf.score: 0.7708333333333334


In [13]:
svc_clf = SVC()
svc_clf.fit(X_train, y_train)
print("svc_clf.score:",svc_clf.score(X_test, y_test))

svc_clf.score: 0.7604166666666666


In [16]:
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train, y_train)
print("dt_clf.score:",dt_clf.score(X_test, y_test))

dt_clf.score: 0.6614583333333334


In [18]:
y_log_predict = log_clf.predict(X_test)
y_svc_predict = svc_clf.predict(X_test)
y_dt_predict = dt_clf.predict(X_test)

y_predict = np.array((y_log_predict + y_svc_predict + y_dt_predict) >= 2, dtype='int')
print("accuracy_score:",accuracy_score(y_test, y_predict))

accuracy_score: 0.7708333333333334


In [20]:
hard_voting_clf = VotingClassifier(estimators=[
    ('log_clf', LogisticRegression()),
    ('svc_clf', SVC()),
    ('dt_clf', DecisionTreeClassifier(random_state=666))], voting='hard')

hard_voting_clf.fit(X_train, y_train)
print("hard_voting_clf.score:",hard_voting_clf.score(X_test, y_test))

hard_voting_clf.score: 0.7708333333333334


In [21]:
soft_voting_clf = VotingClassifier(estimators=[
    ('log_clf', LogisticRegression()),
    ('svc_clf', SVC(probability=True)),
    ('dt_clf', DecisionTreeClassifier(random_state=666))], voting='soft')

soft_voting_clf.fit(X_train, y_train)
print("soft_voting_clf.score:",soft_voting_clf.score(X_test, y_test))

soft_voting_clf.score: 0.7447916666666666
