## VAR Model Training


In [6]:
import pandas as pd
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import pickle
import matplotlib.dates as mdates
from datetime import timedelta

# Set root and paths
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
sys.path.append(ROOT_PATH)

from Training.Helper.dataPreprocessing import make_stationary, granger_causes, rank_features_ccf
from Models.VAR import VARModel

In [7]:
# Load training data
train_file = os.path.join(ROOT_PATH, "Data", "Train", "train1990s.csv")
df = pd.read_csv(train_file, parse_dates=["observation_date"])
df.set_index("observation_date", inplace=True)

target_col = "fred_PCEPI"
HORIZONS = [1, 3, 6, 12]
max_lag = 1
max_features = 1000

  df = pd.read_csv(train_file, parse_dates=["observation_date"])


In [8]:
# Filter non-Granger-causing features
print("Filtering non-Granger-causing features...")
kept_cols = [target_col]
for col in df.columns:
    if col != target_col:
        try:
            stationary = make_stationary(df, target_col, col)
            if stationary is not np.nan and granger_causes(df, col, target_col):
                kept_cols.append(col)
        except Exception as e:
            print(f"Skipping {col}: {e}")

df_filtered = df[kept_cols]
print(f"Remaining after Granger filtering: {len(kept_cols) - 1} exogenous variables")

# Rank by cross-correlation
print("Ranking by cross-correlation...")
ranked_cols = rank_features_ccf(df_filtered, target_col)
ranked_exog = [col for col in ranked_cols if col != target_col]

# Standardize full data
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df_filtered), index=df_filtered.index, columns=df_filtered.columns)




Filtering non-Granger-causing features...




Remaining after Granger filtering: 345 exogenous variables
Ranking by cross-correlation...


In [11]:
# === VAR MODEL TRAINING LOOP ===
print("Training VAR models separately for each horizon (CCF)...")
best_models_by_horizon = {}
best_forecasts_by_horizon = {}

for H in HORIZONS:
    best_model = None
    best_subset = []
    best_forecast = None
    best_rmse = float("inf")

    for k in range(1, min(len(ranked_exog), max_features) + 1):
        selected_cols = [target_col] + [col for col in ranked_exog[:k] if col in df_scaled.columns]
        try:
            model = VARModel()
            model_fit = model.fit(df_scaled[selected_cols], maxlags=max_lag)
            forecast_input = df_scaled[selected_cols].values[-model_fit.k_ar:]
            forecast = model_fit.forecast(y=forecast_input, steps=12)

            forecast_idx = pd.date_range(start=df_scaled.index[-1] + timedelta(days=1), periods=12, freq="MS")
            forecast_scaled_df = pd.DataFrame(forecast, index=forecast_idx, columns=selected_cols)

            dummy = pd.DataFrame(0, index=forecast_idx, columns=df_scaled.columns)
            dummy.update(forecast_scaled_df[[target_col]])
            forecast_all = scaler.inverse_transform(dummy)
            forecast_target = forecast_all[:, df_scaled.columns.get_loc(target_col)]

            # Select forecast points depending on H
            if H == 1:
                forecast_horizon = forecast_target  # Every month
            else:
                forecast_horizon = forecast_target[::H]  # Every H months

            # Make sure we always have 12 points
            if len(forecast_horizon) < 12:
                forecast_horizon = np.interp(
                    np.linspace(0, len(forecast_horizon)-1, 12),
                    np.arange(len(forecast_horizon)),
                    forecast_horizon
                )

            rmse = np.std(forecast_horizon)

            if rmse < best_rmse:
                best_rmse = rmse
                best_model = model_fit
                best_subset = selected_cols
                best_forecast = forecast_horizon

        except Exception as e:
            continue

    best_models_by_horizon[H] = best_model
    best_forecasts_by_horizon[H] = best_forecast

# === SAVE FORECASTS PER HORIZON ===
for H in HORIZONS:
    pred_array = best_forecasts_by_horizon.get(H, None)
    if pred_array is not None:
        save_path = os.path.join(ROOT_PATH, "Predictions", f"Horizon{H}")
        os.makedirs(save_path, exist_ok=True)
        np.save(os.path.join(save_path, f"VARccf_horizon_{H}.npy"), pred_array)

# === SAVE MODELS PER HORIZON ===
for H in HORIZONS:
    model = best_models_by_horizon.get(H, None)
    if model is not None:
        weights_dir = os.path.join(ROOT_PATH, "Models", "Weights", "VARX", f"Horizon{H}")
        os.makedirs(weights_dir, exist_ok=True)
        model_path = os.path.join(weights_dir, "VAR_ccf_model.pkl")
        with open(model_path, "wb") as f:
            pickle.dump(model, f)
        print(f"Saved Horizon {H} VAR CCF model to {model_path}")

# === PRINT SUMMARY ===
print("\nFinished training VAR CCF models for horizons:", HORIZONS)


Training VAR models separately for each horizon (CCF)...


  self._init_dates(dates, freq)
 2.37206397 2.3888084  2.40559829 2.42243382 2.43931516 2.45624249]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  dummy.update(forecast_scaled_df[[target_col]])
  self._init_dates(dates, freq)
 2.42716433 2.45239997 2.47784051 2.50348769 2.52934323 2.5554089 ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  dummy.update(forecast_scaled_df[[target_col]])
  self._init_dates(dates, freq)
 2.44036483 2.46729097 2.49439455 2.52168317 2.54916403 2.57684398]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  dummy.update(forecast_scaled_df[[target_col]])
  self._init_dates(dates, freq)
 2.42336261 2.44831166 2.47350039 2.49892299 2.52457439 2.5504502 ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  dummy.update(forecast_scaled_df[[target_col]])
  self._init_dates(dates, freq)
 2.42004288 2.445229

Saved Horizon 1 VAR CCF model to /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Models/Weights/VARX/Horizon1/VAR_ccf_model.pkl
Saved Horizon 3 VAR CCF model to /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Models/Weights/VARX/Horizon3/VAR_ccf_model.pkl
Saved Horizon 6 VAR CCF model to /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Models/Weights/VARX/Horizon6/VAR_ccf_model.pkl
Saved Horizon 12 VAR CCF model to /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Models/Weights/VARX/Horizon12/VAR_ccf_model.pkl

Finished training VAR CCF models for horizons: [1, 3, 6, 12]


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._