# Importing libraries

Importing all the necessary libraries

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import os

from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler,StandardScaler,OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error,auc

import warnings
warnings.filterwarnings('ignore')

# Loading the Dataset

In [None]:
train = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/train.csv')
test = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/test.csv')
submission = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
def rmse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)

In [None]:
train_df = train.drop('id', axis = 1)
test_df = test.drop('id', axis=1)

In [None]:
X = train_df.drop('target', axis = 1)
y = train_df['target']

# Building Different Models

In [None]:
lasso = Lasso(alpha=0.0005, random_state = 1,max_iter=100)

ENet = ElasticNet(alpha = 0.0005, l1_ratio=0.9, random_state = 3,max_iter=100)

GBoost = GradientBoostingRegressor(n_estimators = 100,learning_rate=0.05,
                                   max_depth = 10, random_state=5)

model_rf = RandomForestRegressor(max_depth=17,n_estimators=100)

In [None]:
models = [lasso,ENet,GBoost,model_rf]
scores={}
for model in models:
    print(model)
    model.fit(X,y)
    tr_pred = model.predict(X)
    scores[model] = rmse(y,tr_pred)
    print(scores[model])

# Stacked regressor

In [None]:
class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=3):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    # We again fit the data on clones of the original models
    def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        
        # Train cloned base models then create out-of-fold predictions
        # that are needed to train the cloned meta-model
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X.iloc[train_index], y[train_index])
                y_pred = instance.predict(X.iloc[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Now train the cloned  meta-model using the out-of-fold predictions as new feature
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    #Do the predictions of all base models on the test data and use the averaged predictions as 
    #meta-features for the final prediction which is done by the meta-model
    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X) for model in base_models]).mean(axis=1)
            for base_models in self.base_models_ ])
        return self.meta_model_.predict(meta_features)

In [None]:
stacked_averaged_models = StackingAveragedModels(base_models = (ENet, GBoost, model_rf),
                                                 meta_model = lasso)


stacked_averaged_models.fit(X, y)

stacked_train_pred = stacked_averaged_models.predict(X)
# stacked_pred = np.expm1(stacked_averaged_models.predict(test_))
print(rmse(y, stacked_train_pred))

Predicting for the test data

In [None]:
test_pred = stacked_averaged_models.predict(test)
test_pred

In [None]:
submission['target'] = test_pred
submission.to_csv('submission.csv', index=False)