In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

In [3]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [4]:
X = np.load("./tatanic_X_train.npy")
y = np.load("./tatanic_y_train.npy")

In [5]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator2 = LGBMClassifier(max_depth=2, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1)
estimator4 = SVC(probability=True)
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

In [6]:
base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((533, 27), (356, 27), (533,), (356,))

In [9]:
for estimator in base_estimators:
            estimator.fit(X_train, y_train)

In [10]:
base_estimators[0].predict_proba(X_test)

array([[9.68038619e-01, 3.19613591e-02],
       [9.10684049e-01, 8.93159583e-02],
       [1.68835104e-01, 8.31164896e-01],
       [1.22331381e-02, 9.87766862e-01],
       [8.85450482e-01, 1.14549547e-01],
       [8.73306394e-01, 1.26693591e-01],
       [4.34922993e-01, 5.65077007e-01],
       [6.33871555e-03, 9.93661284e-01],
       [8.17237318e-01, 1.82762697e-01],
       [9.43713784e-01, 5.62862121e-02],
       [9.70308959e-01, 2.96910536e-02],
       [9.45044816e-01, 5.49551845e-02],
       [7.95712471e-02, 9.20428753e-01],
       [7.34301209e-02, 9.26569879e-01],
       [5.25285602e-02, 9.47471440e-01],
       [1.53357387e-02, 9.84664261e-01],
       [7.97166169e-01, 2.02833816e-01],
       [3.63114178e-01, 6.36885822e-01],
       [1.38005316e-01, 8.61994684e-01],
       [9.22081649e-01, 7.79183358e-02],
       [9.16392446e-01, 8.36075246e-02],
       [6.12021089e-02, 9.38797891e-01],
       [7.39830136e-02, 9.26016986e-01],
       [6.21459484e-01, 3.78540486e-01],
       [9.492158

In [11]:
meta_train_set = np.array([estimator.predict(X_test) for estimator in base_estimators]).T

In [12]:
meta_train_set

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       ...,
       [1., 1., 1., 1., 1.],
       [0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.]])

In [13]:
from sklearn.model_selection import cross_val_score

In [14]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.831377151799687
0.8341549295774648
0.8341158059467919
0.8425665101721439
0.8285211267605634


In [15]:
meta_train_set2 = np.array([estimator.predict_proba(X_test)[:,1] for estimator in base_estimators]).T

In [16]:
meta_train_set2

array([[0.03196136, 0.06581491, 0.1424873 , 0.15550208, 0.09441124],
       [0.08931596, 0.0677858 , 0.15481395, 0.15629556, 0.04196361],
       [0.8311649 , 0.9277563 , 0.65180658, 0.760383  , 0.97346206],
       ...,
       [0.98263055, 0.97659519, 0.67364077, 0.75389892, 0.99990755],
       [0.26977044, 0.17347012, 0.16387913, 0.15822681, 0.09158297],
       [0.98464334, 0.98374294, 0.85556138, 0.76620557, 1.        ]])

In [17]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set2, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8256651017214397
0.8201095461658843
0.8369327073552425
0.8341158059467919
0.8228090766823162


In [18]:
new_X_test = np.concatenate([X_test, meta_train_set2], axis = 1)
new_X_test.shape

(356, 32)

In [19]:
for estimator in base_estimators:
    result = cross_val_score(estimator, new_X_test, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8032863849765258
0.8228482003129891
0.8369327073552426
0.8341158059467919
0.7834115805946791


In [None]:
x