# Model Ensemble: Toy example

In [41]:
# load libraries

from sklearn.base import TransformerMixin
from sklearn.datasets import make_regression
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, Ridge
import sklearn.metrics as met
import numpy as np

I use three different regression models: rigde, random forest, and k-neighbors. 

In [225]:
# define model transformers
class RidgeTransformer(Ridge, TransformerMixin):

    def transform(self, X, *_):
        return self.predict(X).reshape(-1, 1)

class RandomForestTransformer(RandomForestRegressor, TransformerMixin):

    def transform(self, X, *_):
        return self.predict(X).reshape(-1, 1)

class KNeighborsTransformer(KNeighborsRegressor, TransformerMixin):

    def transform(self, X, *_):
        return self.predict(X).reshape(-1, 1)

I combine predictions generated from these models and use them to **predict** the actual outcome. I weight those predictions using linear regression, although I could use any other model (e.g., random forest, xgboost). That one is the actual ensemble. 

In [217]:
# function to create model
def build_model():
    
    ridge_transformer = Pipeline(steps=[
        ('scaler', StandardScaler()),
        ('poly_feats', PolynomialFeatures()),
        ('ridge', RidgeTransformer())
    ])

    pred_union = FeatureUnion(
        transformer_list=[
            ('ridge', ridge_transformer),
            ('rand_forest', RandomForestTransformer()),
            ('knn', KNeighborsTransformer())
        ],
    )

    # return combination of predictions
    model = Pipeline(steps=[
        ('pred_union', pred_union),
        ('lin_regr', LinearRegression()) # linear combination
    ])

    return model

# Run an example

In [219]:
model = build_model()

X, y = make_regression(n_features=10, n_targets=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [220]:
model.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('pred_union', FeatureUnion(n_jobs=1,
       transformer_list=[('ridge', Pipeline(memory=None,
     steps=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('poly_feats', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('ridge', RidgeTransformer(...=None)), ('lin_regr', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False))])

In [230]:
pred = model.predict(X_test)

In [228]:
met.mean_squared_error(y_test, pred)

932.46628676976309

This is the MSE ensembling and weighting the three models. 

Using this ensemble model, I can predict the counterfactual for different values of the variable of interest (e.g., GDP), and then estimate the **first difference**.

The keys are: 

- We can combine pretty easily any model we want
- The combination weighting can very flexible (random forest, neural networks)
- The meaning of the contrafactual though is not that clear

# Uncertainty

The easiest way to do it is to bootstrap the estimates for the first difference. 

I would do it when I have the data used in the paper. 