In [None]:
pip install optuna
pip install --upgrade xgboost

Setup + Load + Preprocess

In [None]:
# ==== Setup Paths and Imports ====
import os, sys
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import optuna
import json
import matplotlib.pyplot as plt

project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(project_root)

save_dir = os.path.join(project_root, "Models", "Weights", "XGB")
os.makedirs(save_dir, exist_ok=True)

# ==== Load Dataset ====
train_file = os.path.join(project_root, "Data", "Train", "train1990s.csv")
df = pd.read_csv(train_file, parse_dates=["observation_date"])
df.rename(columns={"observation_date": "Date", "fred_PCEPI": "Inflation"}, inplace=True)

# ==== Feature Engineering ====
from Training.Helper.dataPreprocessing import add_lagged_features
df = add_lagged_features(df, target_cols=["Inflation"], lags=[1, 2, 3, 5, 10])
df["Target"] = df["Inflation"].shift(-1)
df = df.dropna().reset_index(drop=True)

# ==== Scaling ====
feature_cols = [col for col in df.columns if col not in ["Date", "Target"]]
scaler = StandardScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])


Optuna Hyperparameter Tuning + Training

In [None]:
# ==== Train-Validation Split ====
split_idx = int(len(df) * 0.8)
X_train = df.iloc[:split_idx][feature_cols]
y_train = df.iloc[:split_idx]["Target"]
X_val = df.iloc[split_idx:][feature_cols]
y_val = df.iloc[split_idx:]["Target"]

# ==== Optuna Objective ====
def objective(trial):
    params = {
        "objective": "reg:squarederror",
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0)
    }
    model = XGBRegressor(**params)
    model.fit(X_train, y_train, verbose=False)
    val_pred = model.predict(X_val)
    return np.sqrt(mean_squared_error(y_val, val_pred))

# ==== Run Study ====
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)
best_params = study.best_params
print(f"Best Hyperparameters: {best_params}")


Final Training + Save + Logging

In [None]:
# ==== Train Final Model ====
best_params["eval_metric"] = "rmse"
best_xgb_model = XGBRegressor(**best_params)
best_xgb_model.fit(X_train, y_train, verbose=True)

# ==== Evaluate ====
y_pred = best_xgb_model.predict(X_val)
best_rmse = np.sqrt(mean_squared_error(y_val, y_pred))
print(f"\n Final Validation RMSE: {best_rmse:.4f}")

# ==== Save Predictions ====
preds_path = os.path.join(save_dir, "XGB_val_preds.npy")
np.save(preds_path, y_pred)
print(f"Predictions saved to: {preds_path}")

# ==== Save Model & Log ====
model_path = os.path.join(save_dir, "XGB_best_model.model")
log_path = os.path.join(save_dir, "XGB_training_log.json")
best_xgb_model.save_model(model_path)

with open(log_path, "w") as f:
    json.dump({
        "best_iteration": best_params["n_estimators"],
        "best_rmse": best_rmse
    }, f, indent=4)

print(f"Model saved to {model_path} | Log saved to {log_path}")


Save & Trim XGB Predictions for Compatibility

In [None]:
# ==== Save XGB Predictions (Same Format as GRU) ====
def save_predictions(predictions, project_root):
    pred_save_path = os.path.join(project_root, "Predictions", "XGB.npy")
    np.save(pred_save_path, predictions)
    print(f"Saved XGB predictions to: {pred_save_path}")

def trim_and_save_predictions(predictions_path, n_trim=48):
    xgb_preds = np.load(predictions_path)
    xgb_preds_trimmed = xgb_preds[-n_trim:]
    np.save(predictions_path, xgb_preds_trimmed)
    print(f"Trimmed XGB.npy to {len(xgb_preds_trimmed)} values and saved.")

# Save and trim
predictions_dir = os.path.join(project_root, "Predictions")
os.makedirs(predictions_dir, exist_ok=True)

save_predictions(y_pred, project_root)
trim_and_save_predictions(os.path.join(predictions_dir, "XGB.npy"), n_trim=48)
