In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
from tensorflow.keras.models import load_model

  from pandas.core import (


In [4]:
import pandas as pd
df = pd.read_csv("../../datasets/latestdataset - Copy.csv")
df.drop(columns=["rain (mm)", "precipitation (mm)", "soil_moisture_0_to_7cm (m³/m³)"], inplace=True)
df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
df = df.astype(float)

In [5]:
# ---------- Load Scaler ----------
from sklearn.preprocessing import MinMaxScaler
scaler = joblib.load("scaler.pkl")  
df_scaled = pd.DataFrame(scaler.transform(df), index=df.index, columns=df.columns)



In [6]:
# Create sequences

lookback = 72
horizon = 6
n_features = df_scaled.shape[1]

def create_sequences(data, lookback=72, horizon=6):
    X, y = [], []
    for i in range(len(data) - lookback - horizon + 1):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback:i+lookback+horizon])
    return np.array(X), np.array(y)

X, y = create_sequences(df_scaled.values, lookback, horizon)

In [7]:
# train test val split

n = len(X)
train_idx = int(0.7 * n)
val_idx   = int(0.85 * n)

X_train, y_train = X[:train_idx], y[:train_idx]
X_val, y_val     = X[train_idx:val_idx], y[train_idx:val_idx]
X_test, y_test   = X[val_idx:], y[val_idx:]

In [8]:
# Flatten for XGBoost
X_val_flat  = X_val.reshape((X_val.shape[0], -1))
X_test_flat = X_test.reshape((X_test.shape[0], -1))
y_val_flat  = y_val.reshape((y_val.shape[0], -1))
y_test_flat = y_test.reshape((y_test.shape[0], -1))

# -------------------------------
# 1. Load models
# -------------------------------

In [2]:
lstm_model = load_model("best_lstm.h5")
gru_model  = load_model("best_gru.h5")
xgb_model  = joblib.load("xgb_multi_model.pkl")

# -------------------------------
# 2. Prepare validation data
# -------------------------------

In [9]:
n_features = X_val.shape[2]
horizon = y_val.shape[1]

X_val_flat = X_val.reshape((X_val.shape[0], -1))

# -------------------------------
# 3. Base model predictions on validation set
# -------------------------------

In [10]:
y_val_lstm = lstm_model.predict(X_val).reshape(X_val.shape[0], -1)
y_val_gru  = gru_model.predict(X_val).reshape(X_val.shape[0], -1)
y_val_xgb  = xgb_model.predict(X_val_flat)

# Stack base model predictions
X_meta = np.concatenate([y_val_lstm, y_val_gru, y_val_xgb], axis=1)
y_meta = y_val.reshape(X_val.shape[0], -1)  # flatten target for meta-model



# -------------------------------
# 4. Train meta-model
# -------------------------------

In [11]:
meta_model = Ridge(alpha=1.0)
meta_model.fit(X_meta, y_meta)
print("✅ Stacking meta-model trained!")

✅ Stacking meta-model trained!


# -------------------------------
# 5. Prepare test set predictions
# -------------------------------

In [12]:
X_test_flat = X_test.reshape((X_test.shape[0], -1))

y_test_lstm = lstm_model.predict(X_test).reshape(X_test.shape[0], -1)
y_test_gru  = gru_model.predict(X_test).reshape(X_test.shape[0], -1)
y_test_xgb  = xgb_model.predict(X_test_flat)

X_test_meta = np.concatenate([y_test_lstm, y_test_gru, y_test_xgb], axis=1)

# Final stacked predictions
y_pred_stacked = meta_model.predict(X_test_meta)
y_pred_stacked = y_pred_stacked.reshape((X_test.shape[0], horizon, n_features))



# -------------------------------
# 6. Evaluate stacked predictions
# -------------------------------

In [15]:
features = df.columns.tolist()

results = {}
avg_results = {}

for f_idx, feature in enumerate(features):
    results[feature] = {}
    mae_list, rmse_list, r2_list = [], [], []
    for h in range(horizon):
        yt = y_test[:, h, f_idx]
        yp = y_pred_stacked[:, h, f_idx]

        mae  = mean_absolute_error(yt, yp)
        rmse = np.sqrt(mean_squared_error(yt, yp))
        r2   = r2_score(yt, yp)

        results[feature][f"Horizon_{h+1}"] = {"MAE": mae, "RMSE": rmse, "R2": r2}

        mae_list.append(mae)
        rmse_list.append(rmse)
        r2_list.append(r2)

    avg_results[feature] = {
        "MAE": np.mean(mae_list),
        "RMSE": np.mean(rmse_list),
        "R2": np.mean(r2_list)
    }

# -------------------------------
# 7. Show results in tabular form
# -------------------------------

In [16]:
# Per feature & horizon
rows = []
for feat, horizons in results.items():
    for h, metrics in horizons.items():
        row = {"Feature": feat, "Horizon": h}
        row.update(metrics)
        rows.append(row)

results_df = pd.DataFrame(rows)
print("\n===== Stacking Ensemble Metrics per Feature & Horizon =====")
print(results_df)

# Average per feature
avg_df = pd.DataFrame(avg_results).T.reset_index().rename(columns={"index":"Feature"})
print("\n===== Stacking Ensemble Average Metrics per Feature =====")
print(avg_df)


===== Stacking Ensemble Metrics per Feature & Horizon =====
                           Feature    Horizon       MAE      RMSE        R2
0              temperature_2m (°C)  Horizon_1  0.014273  0.021645  0.978819
1              temperature_2m (°C)  Horizon_2  0.018730  0.026951  0.967169
2              temperature_2m (°C)  Horizon_3  0.021287  0.030120  0.959003
3              temperature_2m (°C)  Horizon_4  0.022646  0.031929  0.953940
4              temperature_2m (°C)  Horizon_5  0.023891  0.033380  0.949672
5              temperature_2m (°C)  Horizon_6  0.025049  0.034691  0.945660
6         relative_humidity_2m (%)  Horizon_1  0.024400  0.037105  0.973911
7         relative_humidity_2m (%)  Horizon_2  0.034235  0.048821  0.954838
8         relative_humidity_2m (%)  Horizon_3  0.040561  0.056454  0.939617
9         relative_humidity_2m (%)  Horizon_4  0.044433  0.061346  0.928709
10        relative_humidity_2m (%)  Horizon_5  0.047162  0.064899  0.920219
11        relative_humidity

# -------------------------------
# 8. Save stacked model for later use
# -------------------------------

In [17]:
joblib.dump({
    "meta_model": meta_model,
    "lstm_model": lstm_model,
    "gru_model": gru_model,
    "xgb_model": xgb_model
}, "stacked_ensemble.pkl")
print("✅ Stacked ensemble saved as 'stacked_ensemble.pkl'")



INFO:tensorflow:Assets written to: ram://303bc03c-4907-4947-bef1-fef52d4d9a0a/assets


INFO:tensorflow:Assets written to: ram://303bc03c-4907-4947-bef1-fef52d4d9a0a/assets


INFO:tensorflow:Assets written to: ram://10ff1be2-7324-4fe5-9b39-7d64b43d5ae0/assets


INFO:tensorflow:Assets written to: ram://10ff1be2-7324-4fe5-9b39-7d64b43d5ae0/assets


✅ Stacked ensemble saved as 'stacked_ensemble.pkl'
