In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
test=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')


In [None]:
train.columns

In [None]:
col4train=['relative_humidity', 'absolute_humidity',
       'sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5']

y1=train['target_carbon_monoxide']
y2=train['target_benzene']
y3=train['target_nitrogen_oxides']


In [None]:
#transformations
y1 = np.log1p(y1)
y2 = np.log1p(y2)
y3 = np.log1p(y3)


#for col in col4train:
    #train[col] = boxcox1p(train[col], lam)
    
#for col in col4train:
    #test[col] = boxcox1p(test[col], lam)
    


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
for col in col4train:
    train[col] = scaler.fit_transform(train[col].values.reshape(-1,1))
for col in col4train:
    test[col] = scaler.fit_transform(test[col].values.reshape(-1,1))

In [None]:
X=train[col4train]
x=test[col4train]

In [None]:
X

In [None]:
from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import lightgbm as lgb

In [None]:
class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    # We again fit the data on clones of the original models
    def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        
        # Train cloned base models then create out-of-fold predictions
        # that are needed to train the cloned meta-model
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Now train the cloned  meta-model using the out-of-fold predictions as new feature
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    #Do the predictions of all base models on the test data and use the averaged predictions as 
    #meta-features for the final prediction which is done by the meta-model
    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X) for model in base_models]).mean(axis=1)
            for base_models in self.base_models_ ])
        return self.meta_model_.predict(meta_features)

In [None]:
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))
KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5)
GBoost = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
                                   max_depth=4, max_features='sqrt',
                                   min_samples_leaf=15, min_samples_split=10, 
                                   loss='huber', random_state =5)
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.0005, random_state=1))

In [None]:
stacked_averaged_models = StackingAveragedModels(base_models = (ENet, GBoost, KRR),
                                                 meta_model = lasso)

In [None]:
stacked_averaged_models.fit(X.values, y1)
pred1 = stacked_averaged_models.predict(x.values)
pred1=np.expm1(pred1)

In [None]:
stacked_averaged_models.fit(X.values, y2)
pred2 = stacked_averaged_models.predict(x.values)
pred2=np.expm1(pred2)

In [None]:
stacked_averaged_models.fit(X.values, y3)
pred3 = stacked_averaged_models.predict(x.values)
pred3=np.expm1(pred3)

In [None]:
submission = pd.DataFrame({'date_time': test.date_time, 'target_carbon_monoxide': pred1, 'target_benzene': pred2, 'target_nitrogen_oxides': pred3})
# you could use any filename. We choose submission here
submission.to_csv('submission.csv', index=False)

In [None]:
submission