In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
import xgboost as xgb
import lightgbm as lgb

In [4]:
from sklearn.base import BaseEstimator, RegressorMixin

In [5]:
from sklearn.multioutput import MultiOutputRegressor

In [6]:
from sklearn.tree import DecisionTreeRegressor

In [7]:
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, HistGradientBoostingRegressor

In [8]:
from sklearn.model_selection import KFold

In [9]:
from sklearn.linear_model import Ridge, Lasso

In [10]:
from sklearn.base import clone

In [11]:
input_data = pd.read_csv('2023_smartFarm_AI_hackathon_dataset.csv')

In [12]:
df = input_data.drop(columns=['frmDist'])
df = df.sort_values(by='date')

X = df[df.drop(columns=['outtrn_cumsum','HeatingEnergyUsage_cumsum']).columns]
Y = df[['outtrn_cumsum','HeatingEnergyUsage_cumsum']]
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [13]:
X_train_np = X_train.values
y_train_np = y_train.values

In [14]:
class SimpleEnsemble(BaseEstimator, RegressorMixin):
    def __init__(self, models):
        self.models = [MultiOutputRegressor(model) for model in models]

    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)
        return self

    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.models])
        return np.mean(predictions, axis=0)

model_xgb = xgb.XGBRegressor()
GBoost = GradientBoostingRegressor()
LightGB = lgb.LGBMRegressor()
RF = RandomForestRegressor()
ETR = ExtraTreesRegressor()
DT = DecisionTreeRegressor()
HGBR = HistGradientBoostingRegressor()

models = [GBoost, LightGB, RF, model_xgb, ETR, DT, HGBR]

In [None]:
class StackingModel(BaseEstimator, RegressorMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds

    def fit(self, X, y):
        self.base_models_ = [list() for model in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True)

        meta_train_data = np.zeros((X.shape[0], len(self.base_models) * y.shape[1]))

        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                multi_target_instance = MultiOutputRegressor(instance)
                self.base_models_[i].append(multi_target_instance)
                multi_target_instance.fit(X[train_index], y[train_index])
                y_pred = multi_target_instance.predict(X[holdout_index])
                meta_train_data[holdout_index, i*y.shape[1]:(i+1)*y.shape[1]] = y_pred

        self.meta_model_ = MultiOutputRegressor(self.meta_model_)
        self.meta_model_.fit(meta_train_data, y)
        return self

    def predict(self, X):
        meta_features = np.zeros((X.shape[0], len(self.base_models) * y_train.shape[1]))
        for i, base_models in enumerate(self.base_models_):
            for model in base_models:
                y_pred = model.predict(X)
                meta_features[:, i*y_train.shape[1]:(i+1)*y_train.shape[1]] += y_pred
        meta_features /= self.n_folds
        return self.meta_model_.predict(meta_features)

In [None]:
models.append(Lasso())
model = StackingModel(base_models=models, meta_model=Ridge())

In [None]:
model.fit(X_train.values, y_train.values)
y_pred = model.predict(X_test.values)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2score = r2_score(y_test, y_pred)

print('RMSE:', rmse)
print('R2_score:', r2score)