# Voting Classifier
### Nathan Kirse

In [1]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import scale
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score

In [33]:
df = pd.read_csv("No_Errors_Reduced.csv").drop('Unnamed: 0', axis=1)
pd.set_option('display.max_columns', None)
print(df.shape)
print(df['Exoplanet Archive Disposition'].value_counts())
df.head()

(5217, 24)
FALSE POSITIVE    3115
CONFIRMED         2102
Name: Exoplanet Archive Disposition, dtype: int64


Unnamed: 0,PRF Δθ<sub>SQ</sub>(KIC) [arcsec],Number of Planets,Planet-Star Radius Ratio,Planetary Radius [Earth radii],PRF Δθ<sub>SQ</sub>(OOT) [arcsec],Maximum Multiple Event Statistic,FW Offset Significance [percent],Planet-Star Distance over Star Radius,Transit Depth [ppm],Transit Signal-to-Noise,Inclination [deg],Equilibrium Temperature [K],Impact Parameter,Number of Transits,Stellar Metallicity [dex],FW Source Δα(OOT) [sec],Transit Duration [hrs],FW Source Δδ(OOT) [arcsec],Maximum Single Event Statistic,Fitted Stellar Density [g/cm**3],Orbit Semi-Major Axis [AU],Orbital Period [days],Insolation Flux [Earth flux],Exoplanet Archive Disposition
0,0.32,2,0.022344,2.26,0.2,28.47082,0.002,24.81,615.8,35.8,89.66,793.0,0.146,142.0,0.14,0.43,2.9575,0.94,5.135849,3.20796,0.0853,9.488036,93.59,CONFIRMED
1,0.5,2,0.027954,2.83,0.39,20.109507,0.003,77.9,874.8,25.8,89.57,443.0,0.586,25.0,0.14,-0.63,4.507,1.23,7.027669,3.02368,0.2734,54.418383,9.11,CONFIRMED
2,0.276,1,0.387394,33.46,0.289,541.8951,0.0,3.278,8079.2,505.6,67.09,1395.0,1.276,621.0,-0.52,-0.111,2.40641,0.002,39.06655,0.2208,0.0267,1.736952,891.96,FALSE POSITIVE
3,0.07,1,0.024064,2.75,0.1,33.1919,0.733,8.75,603.3,40.9,85.41,1406.0,0.701,515.0,0.07,-0.01,1.6545,0.23,4.749945,1.98635,0.0374,2.525592,926.16,CONFIRMED
4,8.948,1,0.183387,39.21,8.93,46.15308,0.0,2.4,233.7,47.7,60.92,1342.0,1.169,185.0,0.0,-13.45,5.022,24.09,10.964684,0.00485,0.082,7.36179,767.22,FALSE POSITIVE


# Scale and Split the Data

In [34]:
cols = list(df.columns)
X = pd.DataFrame(scale(df.drop('Exoplanet Archive Disposition',axis=1)), columns=cols[:-1])
y = df['Exoplanet Archive Disposition']
X.head()

  


Unnamed: 0,PRF Δθ<sub>SQ</sub>(KIC) [arcsec],Number of Planets,Planet-Star Radius Ratio,Planetary Radius [Earth radii],PRF Δθ<sub>SQ</sub>(OOT) [arcsec],Maximum Multiple Event Statistic,FW Offset Significance [percent],Planet-Star Distance over Star Radius,Transit Depth [ppm],Transit Signal-to-Noise,Inclination [deg],Equilibrium Temperature [K],Impact Parameter,Number of Transits,Stellar Metallicity [dex],FW Source Δα(OOT) [sec],Transit Duration [hrs],FW Source Δδ(OOT) [arcsec],Maximum Single Event Statistic,Fitted Stellar Density [g/cm**3],Orbit Semi-Major Axis [AU],Orbital Period [days],Insolation Flux [Earth flux]
0,-0.512749,0.574665,-0.082209,-0.029689,-0.552872,-0.251865,-0.504768,-0.135335,-0.3089,-0.313184,0.520745,-0.450817,-0.217554,-0.479365,0.929629,0.048426,-0.38023,0.078659,-0.224601,-0.008,-0.300312,-0.310893,-0.091127
1,-0.45606,0.574665,-0.080036,-0.029535,-0.493124,-0.254227,-0.500411,0.502853,-0.305029,-0.325034,0.515233,-0.897733,-0.049885,-0.686469,0.929629,-0.019383,-0.157404,0.095238,-0.222009,-0.015344,0.478554,0.197945,-0.093133
2,-0.526606,-0.482468,0.059159,-0.021243,-0.524885,-0.106805,-0.513481,-0.394169,-0.197334,0.24354,-0.861401,0.317877,0.21305,0.368523,-1.370972,0.013818,-0.459479,0.025032,-0.178113,-0.127042,-0.542957,-0.398674,-0.072172
3,-0.591483,-0.482468,-0.081543,-0.029556,-0.584318,-0.250531,2.679901,-0.328391,-0.309087,-0.30714,0.260482,0.331923,-0.006062,0.18089,0.685626,0.020279,-0.567608,0.038067,-0.225129,-0.056683,-0.498652,-0.389743,-0.07136
4,2.204522,-0.482468,-0.019844,-0.019687,2.192392,-0.246869,-0.513481,-0.404723,-0.314612,-0.299082,-1.23924,0.250202,0.172276,-0.40325,0.441623,-0.839492,-0.083345,1.402179,-0.216615,-0.135648,-0.313976,-0.334973,-0.075134


In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Voting Classifier

In [25]:
from sklearn.ensemble import VotingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier

In [26]:
knn_clf = KNeighborsClassifier(algorithm='auto',n_neighbors=5,p=1,leaf_size=2)
dt_clf = DecisionTreeClassifier(criterion='entropy',max_depth=10,min_samples_leaf=5,min_samples_split=8, 
                                random_state=42)
mlp = MLPClassifier(activation='relu', alpha=0.25, hidden_layer_sizes=50, learning_rate='constant', 
                    max_iter=40, solver='lbfgs', tol=0.00001, random_state=42)
svc = SVC(C=30, decision_function_shape='ovo', kernel='rbf', probability=True, shrinking=False, tol=0.001,
         random_state=42)
gpc = GaussianProcessClassifier(random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=160, max_features=3, criterion='entropy',
                                 min_samples_split=3, random_state=3)
ada = AdaBoostClassifier(DecisionTreeClassifier(criterion='entropy',max_depth=10,min_samples_leaf=5,min_samples_split=8),
    n_estimators=150, algorithm="SAMME.R", learning_rate=0.5, random_state=3)

# Hard Voting

In [8]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='hard')

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='accuracy', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


37.57203793525696
Validation Accuracy: 95.8850 (+/- 0.75)
Train Accuracy: 97.70 (+/- 0.40)


### AUC

In [19]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='hard')

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


AttributeError: predict_proba is not available when voting='hard'

# Soft Voting

In [9]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft')

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='accuracy', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


45.99282264709473
Validation Accuracy: 96.2865 (+/- 0.82)
Train Accuracy: 98.34 (+/- 0.29)


### AUC

In [16]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft')

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


38.894959926605225
Validation Accuracy: 99.2875 (+/- 0.32)
Train Accuracy: 99.93 (+/- 0.02)


# Adding Weights

In [17]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='accuracy', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


38.35266327857971
Validation Accuracy: 96.7153 (+/- 1.11)
Train Accuracy: 99.02 (+/- 0.25)


### AUC

In [14]:
from sklearn.model_selection import ShuffleSplit
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,2,1,4])

start=time.time()
#ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
ss = ShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


45.24237632751465
Validation Accuracy: 99.5044 (+/- 0.26)
Train Accuracy: 99.97 (+/- 0.01)


In [15]:
from sklearn.model_selection import ShuffleSplit
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


41.1366913318634
Validation Accuracy: 99.4186 (+/- 0.31)
Train Accuracy: 99.97 (+/- 0.01)


In [37]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp), ('ada', ada),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,4,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='accuracy', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))

63.206697940826416
Validation Accuracy: 97.3266 (+/- 0.97)
Train Accuracy: 99.81 (+/- 0.10)


In [38]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp), ('ada', ada),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,4,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


54.44930839538574
Validation Accuracy: 99.4774 (+/- 0.31)
Train Accuracy: 100.00 (+/- 0.00)


# With Errors

In [22]:
df = pd.read_csv("Errors_Reduced.csv").drop('Unnamed: 0', axis=1)
cols = list(df.columns)
X = pd.DataFrame(scale(df.drop('koi_disposition',axis=1)), columns=cols[:-1])
y = df.koi_disposition
print(X.shape)
X.head()

(5145, 34)


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,koi_dikco_msky,koi_dicco_msky,koi_steff_err2,koi_fwm_stat_sig,koi_prad_err1,koi_steff_err1,koi_ror_err1,koi_smet_err2,koi_smet_err1,koi_count,koi_max_mult_ev,koi_ror,koi_depth,koi_prad,koi_impact,koi_prad_err2,koi_dor,koi_time0bk_err1,koi_duration_err1,koi_model_snr,koi_srho_err1,koi_dikco_msky_err,koi_ror_err2,koi_duration_err2,koi_time0bk_err2,koi_srho_err2,koi_insol_err1,koi_fwm_sdeco_err,koi_depth_err1,koi_period,koi_fwm_srao_err,koi_insol,koi_dor_err2,koi_dicco_msky_err
0,-0.515076,-0.556351,1.026059,-0.503632,-0.047243,-1.307297,-0.207968,1.203666,-1.070064,0.571733,-0.253258,-0.081331,-0.310178,-0.029753,-0.216598,0.026552,-0.13299,-0.283404,-0.288016,-0.315063,-0.176109,-0.441786,0.074506,0.288016,0.283404,0.030339,-0.095829,-0.126831,-0.152076,-0.310349,-0.069964,-0.087819,0.236,-0.437587
1,-0.457467,-0.495659,1.026059,-0.499274,-0.047017,-1.307297,-0.207151,1.203666,-1.070064,0.571733,-0.255609,-0.079158,-0.306323,-0.029599,-0.048871,0.026525,0.512402,-0.196448,-0.226758,-0.326844,-0.147941,0.236536,0.073847,0.226758,0.196448,-0.090122,-0.0969,0.233257,-0.086841,0.200957,0.3075,-0.089831,-0.670372,0.001918
2,-0.529158,-0.527922,-0.187011,-0.512346,-0.016279,0.29864,-0.197229,-0.581401,0.934023,-0.48297,-0.108919,0.060116,-0.199073,-0.021365,0.214157,0.024769,-0.394746,-0.414157,-0.425495,0.238405,-0.180959,-0.638265,0.006559,0.425495,0.414157,0.123627,-0.070056,-0.94243,-0.179394,-0.398556,-0.930941,-0.068804,0.322562,-0.648087
3,-0.595088,-0.588295,-0.669631,2.681612,-0.044913,0.552209,-0.207679,-0.581401,0.265994,-0.48297,-0.251931,-0.080665,-0.310364,-0.029621,-0.005033,0.026419,-0.328225,-0.34926,-0.359692,-0.309055,-0.140324,-0.348224,0.073706,0.359692,0.34926,-0.025345,-0.061779,-0.324879,-0.162677,-0.389581,-0.357556,-0.067989,0.186817,-0.506983
4,2.246297,2.232292,0.465177,-0.512346,-0.023982,-0.673375,0.498124,1.203666,-1.070064,-0.48297,-0.248287,-0.018931,-0.315867,-0.019819,0.173368,0.020221,-0.405419,-0.259747,-0.19083,-0.301044,-0.176907,-0.635926,0.040217,0.19083,0.259747,0.125165,-0.082939,-0.594945,-0.207935,-0.334545,-0.5373,-0.071775,0.285183,-0.659653


In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [24]:
from sklearn.model_selection import ShuffleSplit
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


43.67449951171875
Validation Accuracy: 99.4556 (+/- 0.28)
Train Accuracy: 99.99 (+/- 0.01)


In [29]:

import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp), ('ada', ada),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,4,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))


64.68505525588989
Validation Accuracy: 99.4750 (+/- 0.26)
Train Accuracy: 100.00 (+/- 0.00)


In [32]:
import time
eclf1 = VotingClassifier(estimators=[('knn', knn_clf), ('dt', dt_clf), ('mlp', mlp), ('ada', ada),
                                     ('svc', svc), ('gpc', gpc), ('rnd', rnd_clf)], voting='soft',
                                      weights=[1,2,2,4,2,1,4])

start=time.time()
ss = StratifiedShuffleSplit(n_splits=10, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='accuracy', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))

63.6869056224823
Validation Accuracy: 97.4746 (+/- 0.72)
Train Accuracy: 99.91 (+/- 0.06)


# Only AdaBoost and RandomForest

In [41]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(criterion='entropy',max_depth=10,min_samples_leaf=5,min_samples_split=8),
   n_estimators=160, algorithm="SAMME.R", learning_rate=0.8, random_state=3)

import time
eclf1 = VotingClassifier(estimators=[('ada', ada), ('rnd', rnd_clf)], voting='soft')

start=time.time()
ss = StratifiedShuffleSplit(n_splits=100, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='accuracy', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))

180.4611780643463
Validation Accuracy: 97.0027 (+/- 1.11)
Train Accuracy: 100.00 (+/- 0.00)


In [42]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(criterion='entropy',max_depth=10,min_samples_leaf=5,min_samples_split=8),
   n_estimators=160, algorithm="SAMME.R", learning_rate=0.8, random_state=3)

import time
eclf1 = VotingClassifier(estimators=[('ada', ada), ('rnd', rnd_clf)], voting='soft')

start=time.time()
ss = StratifiedShuffleSplit(n_splits=100, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
end=time.time()

print(end-start)
#print(scores)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))

179.99813961982727
Validation Accuracy: 99.5072 (+/- 0.29)
Train Accuracy: 100.00 (+/- 0.00)


In [43]:
eclf1 = VotingClassifier(estimators=[('ada', ada), ('rnd', rnd_clf)], voting='soft')
ss = StratifiedShuffleSplit(n_splits=1000, test_size=0.3, random_state=3)
scores=cross_validate(eclf1, X_train, y_train, cv=ss, return_train_score=True, scoring='roc_auc', n_jobs=-1)
print("Validation Accuracy: %0.4f (+/- %0.2f)" % (scores['test_score'].mean()*100, scores['test_score'].std()*2*100))
print("Train Accuracy: %0.2f (+/- %0.2f)" % (scores['train_score'].mean()*100, scores['train_score'].std()*2*100))

Validation Accuracy: 99.5072 (+/- 0.29)
Train Accuracy: 100.00 (+/- 0.00)
