In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

In [3]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

In [4]:
X = np.load("./tatanic_X_train.npy")
y = np.load("./tatanic_y_train.npy")

In [5]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator2 = LGBMClassifier(max_depth=2, learning_rate=0.5, n_estimators=50, n_jobs=-1)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1)
estimator4 = LinearSVC()
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

In [6]:
base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((533, 27), (356, 27), (533,), (356,))

In [9]:
for estimator in base_estimators:
            estimator.fit(X_train, y_train)

In [10]:
base_estimators[0].predict_proba(X_test)

array([[9.67741787e-01, 3.22582349e-02],
       [3.66913736e-01, 6.33086264e-01],
       [1.00064933e-01, 8.99935067e-01],
       [9.27623510e-01, 7.23764822e-02],
       [2.41828799e-01, 7.58171201e-01],
       [2.52092421e-01, 7.47907579e-01],
       [9.29115057e-01, 7.08849132e-02],
       [9.66407180e-01, 3.35928202e-02],
       [2.25713730e-01, 7.74286270e-01],
       [8.61730218e-01, 1.38269812e-01],
       [9.19967294e-01, 8.00327137e-02],
       [8.12281311e-01, 1.87718704e-01],
       [8.82106245e-01, 1.17893778e-01],
       [5.12654960e-01, 4.87345040e-01],
       [6.10115528e-02, 9.38988447e-01],
       [6.11549616e-03, 9.93884504e-01],
       [9.56017852e-01, 4.39821258e-02],
       [9.61536586e-01, 3.84633988e-02],
       [9.73941326e-01, 2.60586701e-02],
       [8.70166123e-01, 1.29833892e-01],
       [9.73118186e-01, 2.68818140e-02],
       [9.35865581e-01, 6.41344115e-02],
       [6.07093334e-01, 3.92906696e-01],
       [8.82106245e-01, 1.17893778e-01],
       [5.512006

In [11]:
meta_train_set = np.array([estimator.predict(X_test) for estimator in base_estimators]).T

In [12]:
meta_train_set

array([[0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       ...,
       [0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0.],
       [0., 0., 0., 0., 0.]])

In [13]:
from sklearn.model_selection import cross_val_score

In [14]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8425396825396826
0.8568253968253969
0.8426984126984127
0.8513492063492064
0.836984126984127


In [15]:
meta_train_set2 = np.array([base_estimators[i].predict_proba(X_test)[:,1] for i in [0,1,2,4]]).T

In [163]:
meta_train_set2.shape

(356, 4)

In [164]:
for estimator in base_estimators:
    result = cross_val_score(estimator, meta_train_set2, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.7976738207019898
0.8060473954840152
0.833943661971831
0.8339425441538116
0.8368388106416276


In [165]:
new_X_test = np.concatenate([X_test, meta_train_set2], axis = 1)
new_X_test.shape

(356, 31)

In [166]:
for estimator in base_estimators:
    result = cross_val_score(estimator, new_X_test, y_test, scoring="accuracy" , cv=5).mean()
    print(result)

0.8088240554437738
0.8228705566733735
0.8480695282807957
0.8537402190923318
0.8592957746478873
