In [1]:
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import roc_auc_score, accuracy_score


from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

In [2]:
data = np.loadtxt('Dane/pima-indians-diabetes.data', delimiter=',')

In [4]:
data

array([[   6.   ,  148.   ,   72.   , ...,    0.627,   50.   ,    1.   ],
       [   1.   ,   85.   ,   66.   , ...,    0.351,   31.   ,    0.   ],
       [   8.   ,  183.   ,   64.   , ...,    0.672,   32.   ,    1.   ],
       ..., 
       [   5.   ,  121.   ,   72.   , ...,    0.245,   30.   ,    0.   ],
       [   1.   ,  126.   ,   60.   , ...,    0.349,   47.   ,    1.   ],
       [   1.   ,   93.   ,   70.   , ...,    0.315,   23.   ,    0.   ]])

In [5]:
X = data[:,:-1]
y = data[:,-1]

In [8]:
X.shape, y.shape

((768, 8), (768,))

In [9]:
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

In [11]:
models = [LogisticRegression(), \
         DecisionTreeClassifier(), \
         SVC(probability=True), \
         LinearDiscriminantAnalysis(), \
         QuadraticDiscriminantAnalysis(), \
         RandomForestClassifier()]

for model in models:
    model.fit(X_train, y_train)
    pred = model.predict_proba(X_test)[:, 1]
    y_pred = np.round(pred)
    print(accuracy_score(y_test, y_pred), roc_auc_score(y_test, pred))

0.755905511811 0.824074074074
0.704724409449 0.674584004294
0.641732283465 0.611111111111
0.771653543307 0.830716586151
0.736220472441 0.785963499732
0.724409448819 0.786030595813


In [12]:
from sklearn.ensemble import VotingClassifier

In [13]:
?VotingClassifier

In [14]:
modele = list(zip(["L","D","S","LDA","QDA","RF"], models))
modele

[('L',
  LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
            intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
            penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
            verbose=0, warm_start=False)),
 ('D',
  DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
              max_features=None, max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              presort=False, random_state=None, splitter='best')),
 ('S', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True,
    tol=0.001, verbose=False)),
 ('LDA',
  LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                solver='svd', store_covariance=False, tol=0.000

In [31]:
model = VotingClassifier(estimators=modele, voting='hard')
model.fit(X_train, y_train)
pred = model.predict(X_test)
accuracy_score(y_test, pred)

0.76377952755905509

In [19]:
model = VotingClassifier(estimators=modele, voting='soft')
model.fit(X_train, y_train)
pred = model.predict(X_test)
accuracy_score(y_test, pred)

0.76377952755905509

In [38]:
models = [LogisticRegression(), \
         DecisionTreeClassifier(), \
         SVC(probability=True), \
         LinearDiscriminantAnalysis(), \
         QuadraticDiscriminantAnalysis()]

modele = list(zip(["L","D","S","LDA","QDA"], models))
modele

model = VotingClassifier(estimators=modele, voting='soft')
model.fit(X_train, y_train)
pred = model.predict(X_test)
accuracy_score(y_test, pred)

0.76377952755905509

In [39]:
from sklearn.ensemble import ExtraTreesClassifier

In [40]:
?ExtraTreesClassifier

### XGBoost

In [41]:
from xgboost.sklearn import XGBClassifier

In [45]:
model = XGBClassifier()
model.fit(X_train, y_train)
pred = model.predict_proba(X_test)[:,1]

accuracy_score(y_test, np.round(pred)), roc_auc_score(y_test, pred)

(0.77952755905511806, 0.82118894256575414)

In [46]:
?XGBClassifier