# Import Data

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from model.prediction_engine import get_final_data
# final_data = get_final_data(save_to_csv=False)
final_data = pd.read_csv('data/final_data.csv')

## Process Data

In [2]:
import torch
import tensorly as tl
%load_ext autoreload
%autoreload 2

from model.prediction_engine import PredictionTestEngine

final_data = final_data.sort_values(['year_month', 'GVKEY'])
final_data['trt1m'] = pd.to_numeric(final_data['trt1m'], errors='coerce')

# feature columns
feature_cols = [c for c in final_data.columns 
                if c not in ['GVKEY','year_month','trt1m']]
for col in feature_cols:
    final_data[col] = pd.to_numeric(final_data[col], errors='coerce')

# Build Y (T × n_series)
Y_df = (final_data
    .pivot_table(index='year_month', columns='GVKEY', 
                 values='trt1m', aggfunc='mean')
    .sort_index().sort_index(axis=1)
)
Y_df = Y_df.apply(lambda x: (x - x.mean())/x.std(), axis=0)
Y = Y_df.to_numpy()

# Build X tensor (T × n_series × n_features)
X_list = []
for col in feature_cols:
    pivot = (final_data
        .pivot_table(index='year_month', columns='GVKEY', 
                     values=col, aggfunc='mean')
        .reindex(index=Y_df.index, columns=Y_df.columns)
    )
    pivot = pivot.apply(lambda x: (x - x.mean())/x.std(), axis=0)
    X_list.append(pivot.to_numpy())
X = np.stack(X_list, axis=2)

# Shift so that X[t] predicts Y[t+1]
X_all = np.nan_to_num(X[:-1, :, :])
Y_all = np.nan_to_num(Y[1:, :])

# Time index for the test (use first day of month)
time_index_all = pd.to_datetime(Y_df.index[1:], format='%Y-%m')

# Ensure tensorly backend
tl.set_backend("pytorch")
# torch.set_default_tensor_type(torch.DoubleTensor)
torch.set_default_dtype(torch.float64)



## HOPLS RIDGE

In [3]:
# Instantiate engine
window_size = 50
train_start = int(0.85 * X_all.shape[0])
train_end = int(0.90 * X_all.shape[0])
engine = PredictionTestEngine(
    X_all,
    Y_all,
    window_size=window_size,
    time_index=time_index_all,
    train_start=train_start,
    train_end= train_end
)

kwargs = {
    "method": "hopls",
    "R": 60,
    "Ln": (3, 3),
    "epsilon": 1e-6,
    "verbose": False,
    "n_jobs": 7,
}

y_pred, y_true, times, metrics = engine.run_window(**kwargs)

print("Overall metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")
    


Overall metrics:
mse: 1.0951
r2: -0.1065
directional_accuracy: 0.4679


## HOPLS MILR

In [16]:
# Instantiate engine
window_size = 50
train_start = int(0.85 * X_all.shape[0])
train_end = int(0.90 * X_all.shape[0])
engine = PredictionTestEngine(
    X_all,
    Y_all,
    window_size=window_size,
    time_index=time_index_all,
    train_start=train_start,
    train_end= train_end
)

y_pred, y_true, times, metrics = engine.run_window(
    method="hopls_milr"
    # , R=30, Ln=(3, 3), epsilon=1e-7, verbose=True, n_jobs=7, 
    # , R=32, Ln=(3, 3), epsilon=1e-6, verbose=True, n_jobs=7, lambda_X = 0.001, lambda_Y = 0.001, alpha=0.1
    , R=60, Ln=(7, 7), epsilon=1e-5, verbose=True, n_jobs=7, lambda_X = 1e-2, lambda_Y = 1e-7, alpha=4.0
)

print("Overall metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")


run_window: X_all shape (275, 127, 38), y_all shape (275, 127)
run_window: window_size=50, n_series=127, n_features=38
run_window: number of test windows=14


Parallel preds: 100%|██████████| 14/14 [04:31<00:00, 19.39s/it]


MILR components: 60
  P comp 0: [(127, 7), (38, 7)]
  P comp 1: [(127, 7), (38, 7)]
  P comp 2: [(127, 7), (38, 7)]
  P comp 3: [(127, 7), (38, 7)]
  P comp 4: [(127, 7), (38, 7)]
  P comp 5: [(127, 7), (38, 7)]
  P comp 6: [(127, 7), (38, 7)]
  P comp 7: [(127, 7), (38, 7)]
  P comp 8: [(127, 7), (38, 7)]
  P comp 9: [(127, 7), (38, 7)]
  P comp 10: [(127, 7), (38, 7)]
  P comp 11: [(127, 7), (38, 7)]
  P comp 12: [(127, 7), (38, 7)]
  P comp 13: [(127, 7), (38, 7)]
  P comp 14: [(127, 7), (38, 7)]
  P comp 15: [(127, 7), (38, 7)]
  P comp 16: [(127, 7), (38, 7)]
  P comp 17: [(127, 7), (38, 7)]
  P comp 18: [(127, 7), (38, 7)]
  P comp 19: [(127, 7), (38, 7)]
  P comp 20: [(127, 7), (38, 7)]
  P comp 21: [(127, 7), (38, 7)]
  P comp 22: [(127, 7), (38, 7)]
  P comp 23: [(127, 7), (38, 7)]
  P comp 24: [(127, 7), (38, 7)]
  P comp 25: [(127, 7), (38, 7)]
  P comp 26: [(127, 7), (38, 7)]
  P comp 27: [(127, 7), (38, 7)]
  P comp 28: [(127, 7), (38, 7)]
  P comp 29: [(127, 7), (38, 7)]
