In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

In [18]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [19]:
X = np.load("./tatanic_X_train.npy")
y = np.load("./tatanic_y_train.npy")

In [20]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator2 = LGBMClassifier(max_depth=2, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1)
estimator4 = SVC(probability=True)
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

In [21]:
base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [22]:
from sklearn.model_selection import train_test_split

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((533, 27), (356, 27), (533,), (356,))

In [24]:
for estimator in base_estimators:
            estimator.fit(X_train, y_train)

In [25]:
base_estimators[0].predict_proba(X_test)

array([[0.9330668 , 0.06693321],
       [0.9906465 , 0.00935353],
       [0.9812113 , 0.01878867],
       [0.86443233, 0.13556768],
       [0.7007035 , 0.29929647],
       [0.9825856 , 0.01741437],
       [0.0232529 , 0.9767471 ],
       [0.9436043 , 0.05639573],
       [0.9920395 , 0.0079605 ],
       [0.04396135, 0.95603865],
       [0.00647581, 0.9935242 ],
       [0.25299114, 0.74700886],
       [0.63829863, 0.36170134],
       [0.00917792, 0.9908221 ],
       [0.04161227, 0.95838773],
       [0.4947309 , 0.5052691 ],
       [0.56215775, 0.43784228],
       [0.9898585 , 0.01014147],
       [0.8120281 , 0.18797188],
       [0.98292226, 0.01707776],
       [0.9931666 , 0.00683337],
       [0.03298986, 0.96701014],
       [0.92668885, 0.07331117],
       [0.53293306, 0.46706694],
       [0.00667095, 0.99332905],
       [0.967048  , 0.03295198],
       [0.5648044 , 0.43519562],
       [0.45252854, 0.54747146],
       [0.267713  , 0.732287  ],
       [0.3207528 , 0.6792472 ],
       [0.

In [26]:
meta_train_set = np.array([estimator.predict(X_test) for estimator in base_estimators]).T

In [35]:
meta_train_set

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       ...,
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [36]:
from sklearn.model_selection import cross_val_score

In [37]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8345752291526939
0.8149742901855579
0.8373921305611447
0.8346926000447127
0.8345752291526939


In [41]:
meta_train_set2 = np.array([estimator.predict_proba(X_test)[:,1] for estimator in base_estimators]).T

In [42]:
meta_train_set2

array([[6.69332147e-02, 1.55982311e-01, 1.48736806e-01, 1.65636656e-01,
        3.98538392e-02],
       [9.35353059e-03, 3.60357613e-03, 1.32761161e-01, 1.66641934e-01,
        6.10264406e-02],
       [1.87886711e-02, 6.73929862e-02, 1.44463489e-01, 1.65692979e-01,
        7.89698994e-02],
       ...,
       [9.96354222e-01, 9.95018604e-01, 8.59222255e-01, 7.97022539e-01,
        9.99995666e-01],
       [2.24937752e-01, 9.65824693e-02, 1.94254565e-01, 1.67647762e-01,
        8.08248040e-02],
       [9.39262882e-02, 3.16079522e-01, 3.17965482e-01, 1.68955215e-01,
        1.91184448e-04]])

In [43]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set2, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.7923585960205679
0.7923977196512407
0.8401687905209032
0.8289403085177733
0.8121953945897606


In [44]:
new_X_test = np.concatenate([X_test, meta_train_set2], axis = 1)
new_X_test.shape

(356, 32)

In [166]:
for estimator in base_estimators:
    result = cross_val_score(estimator, new_X_test, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8088240554437738
0.8228705566733735
0.8480695282807957
0.8537402190923318
0.8592957746478873
