In [25]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston

A simple walk through of a stacking example. A meta model trained on the predictions of base learners have the impaced of improving the results.

In [58]:
 np.random.seed(100) 

In [59]:
X, y = load_boston(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1)
X_train, X_valid, y_train, y_valid = train_test_split(X_train,y_train, test_size=0.5)

In [60]:
print(X_train.shape)
print(y_train.shape)

(227, 13)
(227,)


In [61]:
# Specifiy base learners.
m1 = RandomForestRegressor()
m2 = LinearRegression()

In [62]:
m1.fit(X_train, y_train)
m2.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [63]:
pred_1 = m1.predict(X_valid)
pred_2 = m2.predict(X_valid)

In [64]:
test_pred_1 = m1.predict(X_test)
test_pred_2 = m2.predict(X_test)

In [65]:
# Create new data set (inputs) by stacking the predictions
# of the base learners.
stacked_pred = np.column_stack((pred_1, pred_2))
stacked_test_pred = np.column_stack((test_pred_1, test_pred_2))

In [66]:
meta_m = LinearRegression()
# Train meta_m on predictions from stacked predictions as input and y valid.
meta_m.fit(stacked_pred, y_valid)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [67]:
# Make final prediction using the stacked predictions derived from the test data set.
final_pred = meta_m.predict(stacked_test_pred)

In [70]:
from sklearn.metrics import r2_score
# We can see that the meta model trained on the stacked predictions of base models
# outperforms based on R2 metric. (We should run on different seeds to really confirm)
print("RandomForestRegressor R2: {}".format(r2_score(y_test, test_pred_1)))
print("LinearRegression R2: {}".format(r2_score(y_test, test_pred_2)))
print("Stacked: {}".format(r2_score(y_test, final_pred)))

RandomForestRegressor R2: 0.8333083153847172
LinearRegression R2: 0.8323001818623342
Stacked: 0.8641864863588599
