## Stacking
Aim: To implement Stacking (Ensemble Learning)

In [None]:
#Importing libraries
from numpy import mean  #To calc avg of cross val scores
from sklearn.datasets import make_regression #To create a synthetic regression dataset
from sklearn.model_selection import cross_val_score, RepeatedKFold #For cross validation and model evaluation
from sklearn.linear_model import LinearRegression #Meta model for stacking
from sklearn.neighbors import KNeighborsRegressor  #Base model 1
from sklearn.tree import DecisionTreeRegressor #Base model 2
from sklearn.svm import SVR # Base model 3
from sklearn.ensemble import StackingRegressor #To build the stacking ensemble model

In [None]:
#Creating a synthetic regression dataset
x,y = make_regression(n_samples = 100, n_features = 20, random_state = 47)

In [None]:
#Defining a function to create a stacking model
def get_stacking():
  level0 = list()  #Init an empty list to hold all models
  level0.append(('knn',KNeighborsRegressor()))
  level0.append(('cart', DecisionTreeRegressor()))
  level0.append(('svm', SVR()))
  level1 = LinearRegression()
  model = StackingRegressor(estimators = level0, final_estimator = level1) #Combine base models and meta models
  return model

#Level 0 consists of base models, it is a list since it has multiple models in it
#Level 1 is meta model

In [None]:
#Creating a function to retrieve the models for comparison
def get_models():
  models = dict()
  models['knn'] = KNeighborsRegressor()
  models['cart'] = DecisionTreeRegressor()
  models['svm'] = SVR()
  models['stacking'] = get_stacking()
  return models

In [None]:
#Defining a function to evaluate a model using cross-validation
def evaluate_model(model, x, y):
  cv = RepeatedKFold(n_splits= 10, n_repeats = 3, random_state = 47)
  #This divides the datasets into 10 splits and repeats the process 3 times for reliable results

  scores = cross_val_score(model, x, y, scoring = "neg_mean_absolute_error", cv = cv) #Evaluate using negative MAE

  #Negative mean absolute error is used here because cross val score expects higher scores to be better
  return scores #Return the cross validation scores

In [None]:
models = get_models()

results, names = [],[]

for name, model in models.items():
  scores = evaluate_model(model, x, y) #Evaluate the model using cross validation
  results.append(scores)
  names.append(name)
  print(f"{name}: {mean(scores)}") #Print the model name and the mean cross-validation score

knn: -119.28079020365918
cart: -154.53105671320435
svm: -139.32647871139048
stacking: -115.45396795560663


As evident, our stacking model has outperformed all the other models.