In [1]:
import os
from typing import Union, List, Dict, Optional
import pickle

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
import tensorflow as tf
import tensorflow.keras.layers as layers
import tensorflow.keras.losses as losses
import tensorflow.keras.optimizers as optim
import tensorflow.keras.models as models
from tensorflow.keras.callbacks import EarlyStopping

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
class Stacking:
    def __init__(self, base_models: Dict, meta_model: "meta model of sklearn API or Tensorflow etc.",
                 task: str, n_classies: Optional[int] = None):
        """
        Constructor

        Parameters
        -----------
        base_models : Dict of Sklearn APIs or Tensroflow models.
            Base models of stacking.
        meta_model : Sklearn API or Tensorflow model
            Meta model of stacking.
        task : str
            Kind of task you wanto to solve. 'regression' or 'classification'.
        n_classies : int
            Number of class. You must set this if task is 'classification'.

        Returns
        -----------
        None
        """

        self.meta_model = meta_model
        self.base_models = dict()
        self.n_base = 0
        self.task = task

        if self.task not in ["regression", "classification"]:
            raise Exception("Please set task is regression or classification.")

        self.n_classies = n_classies

        if self.n_classies is None and self.task == "classification":
            raise Exception("If you set task is classification, please set n_classies(int).")

        for base_model in base_models:
            self.n_base += 1
            self.base_models[f"models_{self.n_base}"] = base_model

    def train(self, X: np.ndarray, y: np.ndarray, cv: int = 8,
              batch_size: int = 16, epochs: int = 100):
        """
        Training base models and meta model. Trained models is saved as class variables.

        Parameters
        -----------
        X : np.ndarray
            X of training set.
        y : np.ndarray
            Target of training set.
        cv : int
            Number of fold for cross validation.
        batch_size : int
            Batch size for training neural network. You must set this if you use newral network.
        epochs : int
            Number of epochs for training neural network. You must set this if you use newral network.

        Returns
        ----------
        None
        """

        features = {}
        model_no = 0
        features[f"feature_{model_no}"] = X  # dict for saving intermidiate features.

        # training base models
        print("Start training base models")
        for models in tqdm(self.base_models.values()):
            model_no += 1
            if self.task == "regression":
                features[f"feature_{model_no}"] = np.empty((X.shape[1],
                                                            len(models)))
            else:
                features[f"feature_{model_no}"] = np.empty((X.shape[1],
                                                            len(models)*self.n_classies))
            kf = KFold(n_splits=cv, random_state=6174, shuffle=True)

            for train_idx, val_idx in kf.split(features[f"feature_{model_no-1}"]):
                train_X, val_X = \
                    features[f"feature_{model_no-1}"][train_idx], features[f"feature_{model_no-1}"][val_idx]
                train_y, val_y = y[train_idx], y[val_idx]

                if model_no < 1:
                    sc = StandardScaler()
                    train_X = sc.fit_transoform(train_X)
                    val_X = sc.transform(val_X)

                for i, model in enumerate(models):
                    if "sklearn" in str(type(model)):
                        model.fit(train_X, train_y)
                        if self.task == "regression":
                            pred = model.predict(val_X)
                            features[f"feature_{model_no}"][val_idx, i] = pred.reshape(len(val_idx))
                        else:
                            pred = model.predict_proba(val_X)
                            features[f"feature_{model_no}"][val_idx, i:i+self.n_classies] = pred
                    elif "tensorflow" in str(type(model)):
                        ES = EarlyStopping(monitor="val_loss", patience=5)
                        model.fit(x=train_X, y=train_y, epochs=epochs,
                                  batch_size=batch_size, validation_data=(val_X, val_y),
                                  callbacks=[ES])
                        if self.task == "regression":
                            pred = model.predict(val_X)
                            features[f"feature_{model_no}"][val_idx, i] = pred.reshape(len(val_idx))
                        else:
                            pred = model.predict(val_X)
                            features[f"feature_{model_no}"][val_idx, i:i+self.n_classies] = pred

        # training meta model
        print("Start training meta model")
        kf = KFold(n_splits=cv, random_state=6174, shuffle=True)

        for train_idx, val_idx in tqdm(kf.split(features[f"feature_{self.n_base}"])):
            train_X, val_X = \
                features[f"feature_{self.n_base}"][train_idx], features[f"feature_{self.n_base}"][val_idx]
            train_y, val_y = y[train_idx], y[val_idx]
            sc = StandardScaler()
            train_X = sc.fit_transform(train_X)
            val_X = sc.transform(val_X)

            if "sklearn" in str(type(self.meta_model)):
                self.meta_model.fit(train_X, train_y)
                if self.task == "regression":
                    pred = self.meta_model.predict(val_X)
                else:
                    pred = self.meta_model.predict_proba(val_X)
            elif "tensorflow" in str(type(self.meta_model)):
                ES = EarlyStopping(monitor="val_loss", patience=5)
                self.meta_model.fit(x=train_X, y=train_y, epochs=epochs,
                                    batch_size=batch_size, validation_data=(val_X, val_y),
                                    callbacks=[ES])
                if self.task == "regression":
                    pred = self.meta_model.predict(val_X)
                else:
                    pred = self.meta_model.predict(val_X)

    def predict(self, X, cv=8):
        features = {}
        model_no = 0
        features[f"feature_{model_no}"] = X
        for models in tqdm(self.base_models.values()):
            model_no += 1
            if self.task == "regression":
                features[f"feature_{model_no}"] = np.empty((X.shape[1],
                                                            len(models)))
            else:
                features[f"feature_{model_no}"] = np.empty((X.shape[1],
                                                            len(models)*self.n_classies))
            kf = KFold(n_splits=cv, random_state=6174, shuffle=True)

            for train_idx, val_idx in kf.split(features[f"feature_{model_no-1}"]):
                train_X, val_X = \
                    features[f"feature_{model_no-1}"][train_idx], features[f"feature_{model_no-1}"][val_idx]

                if model_no < 1:
                    sc = StandardScaler()
                    train_X = sc.fit_transoform(train_X)
                    val_X = sc.transform(val_X)

                for i, model in enumerate(models):
                    if "sklearn" in str(type(model)):
                        if self.task == "regression":
                            pred = model.predict(val_X)
                            features[f"feature_{model_no}"][val_idx, i] = pred.reshape(len(val_idx), -1)
                        else:
                            pred = model.predict_proba(val_X)
                            features[f"feature_{model_no}"][val_idx, i:i+self.n_classies] = pred
                    elif "tensorflow" in str(type(model)):
                        if self.task == "regression":
                            pred = model.predict(val_X)
                            features[f"feature_{model_no}"][val_idx, i] = pred.reshape(len(val_idx), -1)
                        else:
                            pred = model.predict(val_X)
                            features[f"feature_{model_no}"][val_idx, i:i+self.n_classies] = pred

        kf = KFold(n_splits=cv, random_state=6174, shuffle=True)

        for train_idx, val_idx in tqdm(kf.split(features[f"feature_{self.n_base}"])):
            train_X, val_X = \
                features[f"feature_{self.n_base}"][train_idx], features[f"feature_{self.n_base}"][val_idx]
            sc = StandardScaler()
            train_X = sc.fit_transform(train_X)
            val_X = sc.transform(val_X)

            if "sklearn" in str(type(self.meta_model)):
                if self.task == "regression":
                    pred = self.meta_model.predict(val_X)
                else:
                    pred = self.meta_model.predict_proba(val_X)

            elif "tensorflow" in str(type(self.meta_model)):
                pred = self.meta_model.predict(val_X)

        return pred

In [3]:
def create_nn(input_shape, outputs_shape):
    inputs = layers.Input(shape=(2,))
    hidden = layers.Dense(units=8, activation="relu")(inputs)
    outputs = layers.Dense(units=outputs_shape)(hidden)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer=optim.Adam(learning_rate=0.001),
                  loss=losses.MeanSquaredError(),
                  metrics=["mse"])

    return model

In [4]:
X = np.random.rand(16, 16)
y = np.random.normal(size=(16, 1))

In [5]:
nn = create_nn(16, 1)
knn = KNeighborsRegressor()
ridge = Ridge()
lasso = Lasso()
linear = LinearRegression()

In [6]:
base_models = [[knn, ridge], [lasso, nn]]
meta_model = linear

test = Stacking(base_models, meta_model, task="regression")

In [7]:
test.train(X, y, cv=2, batch_size=1, epochs=1)

Start training base models


  0%|          | 0/2 [00:00<?, ?it/s]



0it [00:00, ?it/s]