### Stacking

In this notebook we are using another ensemble technique called stacking, in which estimators pass their predictions as additional input features to the second layer estimator and the combiner model(second layer estimator) itself is a trainable model

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import datetime
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.regressor import StackingRegressor
import utils

In [2]:
df_train = pd.read_csv('dataset/df_train.csv')
df_test = pd.read_csv('dataset/df_test.csv')
target = df_train['SalePrice']
df_train = df_train.drop(['SalePrice'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(df_train, target, test_size = 0.25, random_state = 42)

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from mlxtend.regressor import StackingCVRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
RANDOM_SEED = 42

ridge = Ridge(random_state=RANDOM_SEED)
lasso = Lasso(random_state=RANDOM_SEED)
rf = RandomForestRegressor(random_state=RANDOM_SEED)

params = {'lasso__alpha': [0.1, 1.0, 10.0],
          'ridge__alpha': [0.1, 1.0, 10.0]}

stack = StackingCVRegressor(regressors=(lasso, ridge),
                            meta_regressor=rf, 
                            random_state=RANDOM_SEED,
                            use_features_in_secondary=True)

grid = GridSearchCV(
    estimator=stack, 
    param_grid=params, 
    cv=5,
    refit=True
)

grid.fit(X_train, y_train)

print("Best: %f using %s" % (grid.best_score_, grid.best_params_))







Best: 0.896329 using {'lasso__alpha': 10.0, 'ridge__alpha': 10.0}
