In [243]:
import numpy as np
import matplotlib.pyplot as plt
import mltools as ml
import seaborn as sns
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout, ZeroPadding1D, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import SGD, Adam, RMSprop

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import VarianceThreshold

from sklearn.linear_model import LogisticRegression

%matplotlib inline

## Data Import

In [260]:
X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)
# X,Y = ml.shuffleData(X,Y)

Xdf = pd.DataFrame(X)
Ydf = pd.DataFrame(Y)

In [261]:
Xdf.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,242.0,227.0,240.92,232.44,1195.0,253.0,0.0,1.2671,6.4128,1.9869,3.9756,2.3392,6.3537,0.0
1,249.0,230.0,242.31,233.68,1579.0,243.0,0.0,9.6831,6.0824,1.1964,3.4577,2.0416,7.6746,0.0
2,223.0,195.0,227.64,204.42,1034.0,603.0,318.0,1.5286,17.869,13.23,5.712,4.7216,6.603,0.0
3,234.0,221.0,236.27,229.73,7716.0,3907.0,0.0,0.60465,8.0497,3.4476,2.4845,1.5741,1.4205,0.0
4,234.0,233.0,245.51,234.1,545.0,21.0,0.0,6.7473,5.2649,0.86766,5.9626,4.245,3.1429,24.2


### Try Reducing Dimension

In [262]:
X_kBest = SelectKBest(f_classif, k=10).fit_transform(X,Y)

In [263]:
X_kBestdf = pd.DataFrame(X_kBest)
X_kBestdf.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,242.0,227.0,240.92,232.44,253.0,6.4128,1.9869,2.3392,6.3537,0.0
1,249.0,230.0,242.31,233.68,243.0,6.0824,1.1964,2.0416,7.6746,0.0
2,223.0,195.0,227.64,204.42,603.0,17.869,13.23,4.7216,6.603,0.0
3,234.0,221.0,236.27,229.73,3907.0,8.0497,3.4476,1.5741,1.4205,0.0
4,234.0,233.0,245.51,234.1,21.0,5.2649,0.86766,4.245,3.1429,24.2


### Try Increasing Dimension

In [267]:
X_poly = PolynomialFeatures(2).fit_transform(X_kBest)
print(X_poly.shape)
print(X_poly[0])

(200000, 66)
[  1.00000000e+00   2.42000000e+02   2.27000000e+02   2.40920000e+02
   2.32440000e+02   2.53000000e+02   6.41280000e+00   1.98690000e+00
   2.33920000e+00   6.35370000e+00   0.00000000e+00   5.85640000e+04
   5.49340000e+04   5.83026400e+04   5.62504800e+04   6.12260000e+04
   1.55189760e+03   4.80829800e+02   5.66086400e+02   1.53759540e+03
   0.00000000e+00   5.15290000e+04   5.46888400e+04   5.27638800e+04
   5.74310000e+04   1.45570560e+03   4.51026300e+02   5.30998400e+02
   1.44228990e+03   0.00000000e+00   5.80424464e+04   5.59994448e+04
   6.09527600e+04   1.54497178e+03   4.78683948e+02   5.63560064e+02
   1.53073340e+03   0.00000000e+00   5.40283536e+04   5.88073200e+04
   1.49059123e+03   4.61835036e+02   5.43723648e+02   1.47685403e+03
   0.00000000e+00   6.40090000e+04   1.62243840e+03   5.02685700e+02
   5.91817600e+02   1.60748610e+03   0.00000000e+00   4.11240038e+01
   1.27415923e+01   1.50008218e+01   4.07450074e+01   0.00000000e+00
   3.94777161e+00   4

In [268]:
X_poly_train = X_poly[:180000,:]
Y_poly_train = Y[:180000]
X_poly_val = X_poly[180000:,:]
Y_poly_val = Y[180000:]

In [296]:
X_train, X_val = X[:180000,:], X[180000:,:]
Y_train, Y_val = Y[:180000], Y[180000:]
print(X_train.shape)
print(Y_train.shape)

(180000, 14)
(180000,)


### Log Regression

In [297]:
logReg = LogisticRegression()
logReg.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [298]:
logReg.score(X_val, Y_val)

0.68884999999999996

In [301]:
preds = logReg.predict(X_val)
wrong = Y_val[preds != Y_val]
right = Y_val[preds == Y_val]
wrong.shape

(6223,)

In [290]:
logReg = LogisticRegression()
logReg.fit(X_poly_train, Y_poly_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [291]:
logReg.score(X_poly_val, Y_poly_val)

0.68774999999999997

In [292]:
logReg3 = LogisticRegression()
logReg3.fit(X_poly_3rd_train, Y_poly_3rd_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [293]:
logReg3.score(X_poly_3rd_val, Y_poly_3rd_val)

0.68540000000000001

## Keras - Neural Networks

In [250]:
nnModel = Sequential()
nnModel.add(BatchNormalization(input_shape=(66,)))
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(Dense(1, activation='sigmoid'))
adam = Adam(lr=.01)
nnModel.compile(loss="binary_crossentropy", optimizer=adam, metrics=["accuracy"])

In [253]:
nnModel.optimizer = Adam(lr=0.001)
nnModel.fit(X_poly_train, Y_poly_train, epochs=1, batch_size=64, validation_data=(X_poly_val, Y_poly_val))

Train on 180000 samples, validate on 20000 samples
Epoch 1/1


<keras.callbacks.History at 0x1a38d5f550>

In [252]:
nnModel.summaryy()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_37 (Batc (None, 66)                264       
_________________________________________________________________
dense_130 (Dense)            (None, 1024)              68608     
_________________________________________________________________
dense_131 (Dense)            (None, 512)               524800    
_________________________________________________________________
dense_132 (Dense)            (None, 512)               262656    
_________________________________________________________________
dense_133 (Dense)            (None, 512)               262656    
_________________________________________________________________
dense_134 (Dense)            (None, 512)               262656    
_________________________________________________________________
dense_135 (Dense)            (None, 512)               262656    
__________

### Dense Network with Dropout

In [None]:
dropout = .2

In [272]:
nnModel = Sequential()
nnModel.add(BatchNormalization(input_shape=(66,)))
nnModel.add(Dense(256, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dropout(dropout))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(256, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(256, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(256, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(256, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(256, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dropout(dropout))
nnModel.add(Dense(512, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))

nnModel.add(Dense(1, activation='sigmoid'))
nnModel.compile(loss="binary_crossentropy", optimizer=adam, metrics=["accuracy"])

In [273]:
nnModel.optimizer = Adam(lr=0.01)
nnModel.fit(X_poly_train, Y_poly_train, epochs=1, batch_size=16, validation_data=(X_poly_val, Y_poly_val))

Train on 180000 samples, validate on 20000 samples
Epoch 1/1


<keras.callbacks.History at 0x1a55cb0350>

### Deep Network with 3rd degree polynomial features

In [276]:
X_poly_3rd = PolynomialFeatures(3).fit_transform(X_kBest)
print(X_poly_3rd.shape)
X_poly_3rd_train = X_poly_3rd[:180000,:]
Y_poly_3rd_train = Y[:180000]
X_poly_3rd_val = X_poly_3rd[180000:,:]
Y_poly_3rd_val = Y[180000:]

(200000, 286)


In [277]:
nnModel = Sequential()
nnModel.add(BatchNormalization(input_shape=(286,)))
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(Dropout(dropout))
nnModel.add(Dense(2048, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(2048, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(Dropout(dropout))
nnModel.add(Dense(2048, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(2048, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(1024, activation="relu"))
nnModel.add(Dropout(dropout))
nnModel.add(Dense(2048, activation="relu"))
nnModel.add(BatchNormalization())
nnModel.add(Dense(2048, activation="relu"))
nnModel.add(BatchNormalization())

nnModel.add(Dense(512, activation="relu"))
nnModel.add(Dense(1, activation='sigmoid'))
nnModel.compile(loss="binary_crossentropy", optimizer=adam, metrics=["accuracy"])

In [278]:
nnModel.optimizer = Adam(lr=0.01)
nnModel.fit(X_poly_3rd_train, Y_poly_3rd_train, epochs=1, batch_size=32, validation_data=(X_poly_3rd_val, Y_poly_3rd_val))

Train on 180000 samples, validate on 20000 samples
Epoch 1/1


<keras.callbacks.History at 0x1a42a061d0>

In [279]:
nnModel.save_weights("deep_dense_weights.hdf5")

### Conv Net

In [282]:
Conv1D?

In [283]:
model = Sequential()
model.add(Conv1D(256, 3, padding="same", activation='relu', input_shape=(286,1)))
model.add(BatchNormalization())
model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, padding="same", strides=2))

model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, strides=2))

model.add(Conv1D(512, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(512, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, padding="same", strides=2))

model.add(Conv1D(512, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(512, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, padding="same", strides=2))

model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, strides=2))

model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(256, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, strides=2))

model.add(Conv1D(128, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(128, 3, padding="same", activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2, padding="same", strides=2))

model.add(Flatten())
model.add(Dense(516, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [284]:
X_train_expanded = np.expand_dims(X_poly_3rd_train, axis=2)
X_val_expanded = np.expand_dims(X_poly_3rd_val, axis=2)

In [287]:
model.compile(loss="binary_crossentropy", optimizer=adam, metrics=["accuracy"])

In [289]:
model.optimizer = Adam(lr=0.01)
model.fit(X_train_expanded, Y_poly_3rd_train, epochs=1, 
          validation_data=(X_val_expanded, Y_poly_3rd_val), batch_size=32)

Train on 180000 samples, validate on 20000 samples
Epoch 1/1
 40192/180000 [=====>........................] - ETA: 3:22:10 - loss: 0.7106 - acc: 0.6496

KeyboardInterrupt: 

# Scikit-Learn Models

In [74]:
from sklearn.preprocessing import StandardScaler

from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier

from sklearn.feature_selection import SelectFromModel
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

from sklearn.metrics import accuracy_score

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

In [272]:
print(X.shape)
lsvc = LinearSVC(C=0.5, penalty="l1", dual=False).fit(X, Y)
model = SelectFromModel(lsvc, prefit=True)
X_new = model.transform(X)
print(X_new.shape)

(200000, 14)
(200000, 13)


## Decision Tree

## Gradient Boosting

In [40]:
xBoostClf = Pipeline([('scaling', StandardScaler()),
  ('classification', GradientBoostingClassifier())
])


In [78]:
param_grid = [
  {'max_depth': [1, 5, 9, 13, 17, 21, 23], 'n_estimators': [3, 7, 11, 15, 19, 23]},
]
xBoostClf = GradientBoostingClassifier()
grid = GridSearchCV(xBoostClf, param_grid)

In [79]:
grid.fit(X_train, Y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'n_estimators': [3, 7, 11, 15, 19, 23], 'max_depth': [1, 5, 9, 13, 17, 21, 23]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [82]:
xBoostCLF = grid.best_estimator_

In [83]:
xBoostCLF.score(X_val, Y_val)

0.69084999999999996

## Random Forest

In [84]:
param_grid = [
  {'max_depth': [1, 5, 9, 13, 17, 21, 23], 'n_estimators': [3, 7, 11, 15, 19, 23]},
]
randomForestClf = GradientBoostingClassifier()
grid = GridSearchCV(randomForestClf, param_grid)

In [85]:
grid.fit(X_train, Y_train)

GridSearchCV(cv=None, error_score='raise',
       estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'n_estimators': [3, 7, 11, 15, 19, 23], 'max_depth': [1, 5, 9, 13, 17, 21, 23]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [86]:
grid.best_estimator_

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=5,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=19,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [277]:
depths = [1, 5, 9, 13, 17, 21, 23]
scores = []
for depth in depths:
    rfClf = RandomForestClassifier(n_estimators=10, max_depth=depth)
    xValScores = cross_val_score(rfClf, X_new, Y)
    scores.append(xValScores.mean())

In [278]:
print(scores)
print("best depth: %d" % depths[np.argmax(scores)])
best_depth = depths[np.argmax(scores)]

[0.6571050000328554, 0.6904250109595761, 0.7052950045602876, 0.7182350000859121, 0.7242300058112406, 0.7234000077112085]
best depth: 17


In [279]:
nEstimators = [3, 7, 11, 15, 19, 23]
n_est_scores = []
for n in nEstimators:
    rfClf = RandomForestClassifier(n_estimators=n, max_depth=best_depth)
    xValScores = cross_val_score(rfClf, X_new, Y)
    n_est_scores.append(xValScores.mean())

In [280]:
print(n_est_scores)
print("best n estimators: %d" % nEstimators[np.argmax(n_est_scores)])
best_n_est = nEstimators[np.argmax(n_est_scores)]

[0.6571050000328554, 0.6904250109595761, 0.7052950045602876, 0.7182350000859121, 0.7242300058112406, 0.7234000077112085]
best n estimators: 19


In [281]:
rfClf = RandomForestClassifier(n_estimators=best_n_est, max_depth=best_depth)
cross_val_score(rfClf, X_new, Y).mean()

In [285]:
min_samples_sp = [2, 7, 12, 17, 22, 27]
min_samples_scores = []
for m in min_samples_sp:
    rfClf = RandomForestClassifier(n_estimators=n, max_depth=best_depth, min_samples_split=m)
    xValScores = cross_val_score(rfClf, X_new, Y)
    min_samples_scores.append(xValScores.mean())

In [287]:
print(min_samples_scores)
print("best min_samples: %d" % min_samples_sp[np.argmax(min_samples_scores)])
best_min_samples = min_samples_sp[np.argmax(min_samples_scores)]

[0.7301700072365448, 0.728255001061418, 0.73007000331152, 0.7290600059614828, 0.7277900076614278, 0.7275100031363912]
best min_samples: 2


In [342]:
rlClf = Pipeline([('scaling', StandardScaler()),
  ('classification', RandomForestClassifier(n_estimators=best_n_est, max_depth=best_depth, min_samples_split=best_min_samples))
])

## Ada Boosting

In [334]:
adaClf = Pipeline([('scaling', StandardScaler()),
  ('classification', AdaBoostClassifier())
])

In [329]:
cross_val_score(clf, X, Y)

array([0.68743156, 0.69256154, 0.6897669 ])

In [None]:
x_poly_train.shape

## Combinations

In [46]:
xBoostClf = Pipeline([('scaling', StandardScaler()),
  ('classification', GradientBoostingClassifier())
])

rfClf = Pipeline([('scaling', StandardScaler()),
  ('classification', RandomForestClassifier())
])

adaClf = Pipeline([('scaling', StandardScaler()),
  ('classification', AdaBoostClassifier())
])

In [235]:
adaClf.fit(X_poly_train, Y_poly_train);

In [238]:
adaClf.score(X_poly_train, Y_poly_train)

0.69758888888888892

In [49]:
rfClf.fit(X_train, Y_train);

Pipeline(memory=None,
     steps=[('scaling', StandardScaler(copy=True, with_mean=True, with_std=True)), ('classification', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))])

In [50]:
rfClf.score(X_val, Y_val)

0.67815000000000003

In [51]:
xBoostClf.fit(X_train, Y_train);

Pipeline(memory=None,
     steps=[('scaling', StandardScaler(copy=True, with_mean=True, with_std=True)), ('classification', GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impu...      presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False))])

In [52]:
xBoostClf.score(X_val, Y_val);

0.69494999999999996

In [89]:
rf = RandomForestClassifier(n_estimators=19, max_depth=5).fit(X_train, Y_train)
ada = AdaBoostClassifier(n_estimators=19).fit(X_train, Y_train)
xb = GradientBoostingClassifier(n_estimators=19, max_depth=5).fit(X_train, Y_train)
models = [rf, ada, xb]
predictions = []
for x in X_val:
    preds = [0,0]
    for model in models:
        pred = model.predict(x.reshape(1,14)).astype(int)
        preds[pred[0]] = preds[pred[0]] + 1
    predictions.append(np.argmax(preds))

In [90]:
predictions = np.array(predictions)

In [91]:
acc = (predictions == Y_val).sum().astype(float) / Y_val.shape[0]
print(acc)

0.68855


In [69]:
predictions[10:20]

array([3, 3, 3, 3, 3, 3, 3, 3, 2, 3])