In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

In [3]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [4]:
X = np.load("./tatanic_X_train.npy")
y = np.load("./tatanic_y_train.npy")

In [5]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator2 = LGBMClassifier(max_depth=2, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1)
estimator4 = SVC(probability=True)
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

In [6]:
base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((533, 27), (356, 27), (533,), (356,))

In [9]:
for estimator in base_estimators:
            estimator.fit(X_train, y_train)

In [10]:
base_estimators[0].predict_proba(X_test)

array([[3.30471456e-01, 6.69528544e-01],
       [9.75249052e-01, 2.47509759e-02],
       [8.19614232e-01, 1.80385783e-01],
       [9.00255799e-01, 9.97442305e-02],
       [9.60513592e-01, 3.94864343e-02],
       [7.58224130e-01, 2.41775885e-01],
       [8.48788619e-01, 1.51211411e-01],
       [8.72853994e-01, 1.27145991e-01],
       [6.27666712e-03, 9.93723333e-01],
       [9.65894461e-01, 3.41055244e-02],
       [9.90168571e-01, 9.83141083e-03],
       [9.86099422e-01, 1.39005678e-02],
       [7.08112359e-01, 2.91887611e-01],
       [1.27879143e-01, 8.72120857e-01],
       [9.87531543e-01, 1.24684693e-02],
       [8.84289980e-01, 1.15710042e-01],
       [9.03456211e-01, 9.65438187e-02],
       [2.36702144e-01, 7.63297856e-01],
       [8.24391842e-04, 9.99175608e-01],
       [9.55871284e-01, 4.41287309e-02],
       [9.79409456e-01, 2.05905270e-02],
       [3.78582120e-01, 6.21417880e-01],
       [2.19927251e-01, 7.80072749e-01],
       [4.93745387e-01, 5.06254613e-01],
       [1.009506

In [11]:
meta_train_set = np.array([estimator.predict(X_test) for estimator in base_estimators]).T

In [12]:
meta_train_set

array([[1., 1., 1., 0., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 1.]])

In [13]:
from sklearn.model_selection import cross_val_score

In [14]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8230438184663535
0.8257824726134585
0.8258215962441314
0.8173708920187792
0.8173708920187792


In [15]:
meta_train_set2 = np.array([estimator.predict_proba(X_test)[:,1] for estimator in base_estimators]).T

In [16]:
meta_train_set2

array([[6.69528544e-01, 9.33306529e-01, 5.72066667e-01, 4.27268221e-01,
        9.99988680e-01],
       [2.47509759e-02, 2.86046926e-02, 1.60384383e-01, 1.38312017e-01,
        1.96973394e-02],
       [1.80385783e-01, 1.32164672e-01, 1.35003676e-01, 1.47783146e-01,
        7.97683423e-02],
       ...,
       [8.98052566e-03, 4.11686733e-02, 3.55473409e-01, 1.40728684e-01,
        5.65367816e-09],
       [2.07200777e-02, 4.57575269e-02, 1.52219927e-01, 1.40000122e-01,
        2.30276118e-02],
       [3.51976573e-01, 6.06881574e-01, 7.02783593e-01, 7.87008230e-01,
        9.40784651e-01]])

In [17]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set2, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.7528169014084507
0.7696791862284821
0.8033255086071988
0.8033255086071988
0.8060641627543037


In [18]:
new_X_test = np.concatenate([X_test, meta_train_set2], axis = 1)
new_X_test.shape

(356, 32)

In [19]:
for estimator in base_estimators:
    result = cross_val_score(estimator, new_X_test, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.7557120500782473
0.7865414710485134
0.800508607198748
0.809037558685446
0.7696009389671361
