In [12]:
import joblib
import numpy as np
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [14]:
# Load preprocessed data
X_train = joblib.load('../data/X_train.pkl')
X_test = joblib.load('../data/X_test.pkl')
y_train = joblib.load('../data/y_train.pkl')
y_test = joblib.load('../data/y_test.pkl')
y_train_log = joblib.load('../data/y_train_log.pkl')
y_test_log = joblib.load('../data/y_test_log.pkl')

In [5]:
# Define base learners
base_models = [
    ('ridge', Ridge(alpha=10)),
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', XGBRegressor(n_estimators=100, random_state=42))
]

In [6]:
# Meta-model
meta_model = Lasso(alpha=0.001)

# Create the stacking model
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model, cv=5)

In [9]:
# Fit on log-transformed y
stacked_model.fit(X_train, y_train_log)

# Predict and reverse transform
y_pred_stack_log = stacked_model.predict(X_test)
y_pred_stack = np.expm1(y_pred_stack_log)

In [20]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred_stack))
mae = mean_absolute_error(y_test, y_pred_stack)
r2 = r2_score(y_test, y_pred_stack)
print(f"Stacked Model RMSE: {rmse:.2f}")
print(f"Stacked Model MAE: {mae:.2f}")
print(f"Stacked Model R²: {r2:.4f}")

Stacked Model RMSE: 25080.49
Stacked Model MAE: 14222.66
Stacked Model R²: 0.9180


In [21]:
joblib.dump(stacked_model, '../models/stacked_model.pkl')

['../models/stacked_model.pkl']