# Generic transformer FM cross-validation
This notebook trains and evaluates a Transformer FM time-series transformer using the chronological 10-fold scheme.

In [None]:
from pathlib import Path
import pandas as pd
from sklearn.linear_model import LinearRegression

from utils import (
    TimeSeriesPreprocessor,
    default_catalog,
    evaluate_time_series_model,
    load_table,
)

catalog = default_catalog()
preprocessor = TimeSeriesPreprocessor()
train_path = catalog['train_raw']
test_path = catalog['test_raw']


## Build training features inline

In [None]:
# Load raw training data directly so the notebook remains self-contained
if train_path.exists():
    train_df = load_table(train_path)
else:
    train_df = None
    print(f"Raw training data not found at {train_path}.")

# Build basic lag, rolling, and time features inline
if train_df is not None:
    feature_df = preprocessor.create_all_features(
        train_df, target_col='target', lags=[1, 2, 3, 5, 7, 14], windows=[7, 14, 30]
    ).fillna(method='ffill').dropna()
    target = feature_df['target']
    features = feature_df.drop(columns=['target'])
else:
    target = None
    features = None


## Define the model wrapper

In [None]:
class FoundationTransformerRegressor:
    """Lightweight placeholder that mimics a Transformer FM regressor.
    Swap the internals with the real transformer (e.g., Hugging Face pipeline) when available.
    """

    def __init__(self):
        self.base = LinearRegression()

    def fit(self, X, y):
        self.base.fit(X, y)
        return self

    def predict(self, X):
        return self.base.predict(X)


## Evaluate with chronological folds

In [None]:
if features is not None and target is not None:
    avg_score, fold_scores = evaluate_time_series_model(
        features,
        target,
        estimator_factory=lambda: FoundationTransformerRegressor(),
        date_col='date',
    )
    print(f'Average Sharpe-style score: {avg_score:.4f}')
    print('Fold scores:', [round(s, 4) for s in fold_scores])
else:
    print('Provide raw training data at the catalog path to run evaluation.')
