In [17]:
import numpy as np
import pandas as pd
import statistics as stts

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

<img src="blending_scheme.png" alt="Drawing" style="width: 800px;"/>

### 1.Import data

In [18]:
data = pd.read_csv('../data/data_cleaned.csv')

X, y = data.drop('Survived', axis=1), data.pop('Survived')

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, stratify=y)

### 2. Creade models

In [19]:
DT = DecisionTreeClassifier()
KNN = KNeighborsClassifier()
LR = LogisticRegression(solver='lbfgs', max_iter=1000)


### 3. Evaluate each model

In [20]:
DT.fit(X_train, y_train)
KNN.fit(X_train, y_train)
LR.fit(X_train, y_train)

DT.score(X_test, y_test), KNN.score(X_test, y_test) , LR.score(X_test, y_test)

(0.7947761194029851, 0.7164179104477612, 0.8134328358208955)

### 3. Train models and create new datasets using their predictions


#### 3.1 First approach: feeding the entire dataset

In [21]:
def model_predictions_holdout(model, X_train, y_train, X_holdout, X_test):
    #train the model
    model.fit(X_train, y_train)
    
    model_name = model.__class__.__name__
    model_name = "".join(c for c in model_name if c.isupper()) 

    #storing predictions for train and test
    pred_train=model.predict(X_holdout)
    pred_train = pd.DataFrame(pred_train, index = X_holdout.index, columns=[model_name])
    pred_test=model.predict(X_test)
    pred_test = pd.DataFrame(pred_test, index = X_test.index, columns=[model_name])

    return pred_train, pred_test

#### a) Train models

In [22]:
DT = DecisionTreeClassifier()
KNN = KNeighborsClassifier()
LR = LogisticRegression(solver='liblinear', max_iter=1000)

X_train, X_holdout, y_train, y_holdout = train_test_split(X_train, y_train, test_size=0.3)


M1_train, M1_test = model_predictions_holdout(DT, X_train, y_train, X_holdout, X_test)
M2_train, M2_test = model_predictions_holdout(KNN, X_train, y_train, X_holdout, X_test)
M3_train, M3_test = model_predictions_holdout(LR, X_train, y_train, X_holdout, X_test)

#### b) Create the base models predictions dataset

In [23]:
X_train_meta = pd.concat([X_holdout, M1_train, M2_train, M3_train],axis=1)
X_test_meta = pd.concat([X_test, M1_test, M2_test, M3_test],axis=1)

#### c) Make predictions

In [24]:
# Stacker Model
meta_model = RidgeClassifier()
meta_model.fit(X_train_meta, y_holdout)
meta_model.score(X_test_meta,y_test)


0.8134328358208955