In [None]:
!pip install timesfm

In [None]:
import timesfm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
plt.rcParams['font.size'] = 14
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=[
    "#000072", # blue
    "#80c21d", # green
    "#924eae", # purple
    "#ff0000", # red
    "#ff9100", # orange
])

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/marcopeix/FoundationModelsForTimeSeriesForecasting/main/data/walmart_sales_small.csv")
df.head()

In [None]:
tfm = timesfm.TimesFm(
      hparams=timesfm.TimesFmHparams(
          backend="cpu", # "gpu" if CUDA is available
          per_core_batch_size=32,
          horizon_len=8,
          num_layers=50,
          use_positional_embedding=False,
          context_len=2048,
      ),
      checkpoint=timesfm.TimesFmCheckpoint(
          huggingface_repo_id="google/timesfm-2.0-500m-pytorch"),
  )

In [None]:
df = df.rename(columns={"Store": "unique_id", "Date": "ds"})
df['ds'] = pd.to_datetime(df['ds'])
df.head()

In [None]:
preds_df = tfm.forecast_on_df(
    inputs=df,
    freq="W",
    value_name="Weekly_Sales",
    num_jobs=-1
)

In [None]:
preds_df.head()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 8))

for i, ax in enumerate(axes.flatten()):
    store_id = i+1
    data = df.query("unique_id == @store_id")
    preds = preds_df.query("unique_id == @store_id")

    ax.plot(data['ds'], data['Weekly_Sales'])
    ax.plot(preds['ds'], preds['timesfm'], label='TimesFM')
    ax.fill_between(preds['ds'], preds['timesfm-q-0.1'], preds['timesfm-q-0.9'], color="#80c21d", alpha=0.2)

    ax.set_title(f"Store {store_id}")
    ax.set_xlabel('Date')
    ax.set_ylabel('Sales volume ($)')
    ax.legend(loc=1)

fig.autofmt_xdate()
plt.tight_layout()

## Cross-validation with TimesFM

In [None]:
def cross_validation_timesfm(df, h, n_windows, target_col, freq):
  all_preds = []

  for i in range(n_windows, 0, -1):
    input_df = df.iloc[:-(h*i)]

    preds_df = tfm.forecast_on_df(
    inputs=input_df,
    freq=freq,
    value_name=target_col,
    num_jobs=-1
    )

    all_preds.append(preds_df)

  preds = pd.concat(all_preds, axis=0, ignore_index=True)

  return preds

In [None]:
cv_df = df.query("unique_id == 1")

In [None]:
cv_preds = cross_validation_timesfm(
    df=cv_df,
    h=8,
    n_windows=4,
    target_col="Weekly_Sales",
    freq="W")

cv_preds.head()

In [None]:
fig, ax = plt.subplots(figsize=(10,7))

ax.plot(cv_df['ds'], cv_df['Weekly_Sales'])
ax.plot(cv_preds['ds'], cv_preds['timesfm'], ls='--', color='green', label='Forecast')
ax.fill_between(cv_preds['ds'], cv_preds['timesfm-q-0.1'], cv_preds['timesfm-q-0.9'], color="#80c21d", alpha=0.2)

ax.set_title(f"Store 1")
ax.set_xlabel('Date')
ax.set_ylabel('Sales volume ($)')
ax.legend(loc=1)

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
eval_df = cv_preds[['unique_id', 'ds', 'timesfm']]
eval_df['Weekly_Sales'] = cv_df['Weekly_Sales'][-32:].values

In [None]:
from utilsforecast.losses import mae, smape
from utilsforecast.evaluation import evaluate

evaluation = evaluate(
    eval_df,
    metrics=[mae, smape],
    models=['timesfm'],
    target_col='Weekly_Sales',
    id_col='unique_id'
)

evaluation

## Forecasting with covariates

In [None]:
train = cv_df[:-32]
test = cv_df[-32:]

train.head()

In [None]:
from collections import defaultdict

# Data pipelining
def get_batched_data_fn(
    batch_size: int = 2,
    context_len: int = 64,
    horizon_len: int = 32,
):
    examples = defaultdict(list)

    num_examples = 0
    for start in range(0, len(cv_df) - (context_len + horizon_len), horizon_len):
        num_examples += 1
        examples["inputs"].append(train["Weekly_Sales"][start:(context_end := start + context_len)].tolist())
        examples["Holiday_Flag"].append(train["Holiday_Flag"][start:context_end + horizon_len].tolist())
        examples["outputs"].append(train["Weekly_Sales"][context_end:(context_end + horizon_len)].tolist())

    def data_fn():
        for i in range(1 + (num_examples - 1) // batch_size):
            yield {k: v[(i * batch_size) : ((i + 1) * batch_size)] for k, v in examples.items()}

    return data_fn

In [None]:
tfm_h32 = timesfm.TimesFm(
      hparams=timesfm.TimesFmHparams(
          backend="cpu", # "gpu" if CUDA is available
          per_core_batch_size=32,
          horizon_len=32,
          num_layers=50,
          use_positional_embedding=False,
          context_len=2048,
      ),
      checkpoint=timesfm.TimesFmCheckpoint(
          huggingface_repo_id="google/timesfm-2.0-500m-pytorch"),
  )

In [None]:
input_data = get_batched_data_fn()

for i, example in enumerate(input_data()):
    cov_forecast, _ = tfm_h32.forecast_with_covariates(
        inputs=example["inputs"],
        dynamic_numerical_covariates={},
        dynamic_categorical_covariates={
            "Holiday_Flag": example["Holiday_Flag"],
        },
        static_numerical_covariates={},
        static_categorical_covariates={},
        freq=[1] * len(example["inputs"]),
        xreg_mode="xreg + timesfm",
        ridge=0.0,
        force_on_cpu=False,
        normalize_xreg_target_per_input=True,
    )
    print(f"Done with round {i}")

In [None]:
cov_forecast[0]

In [None]:
no_cov_preds = tfm_h32.forecast_on_df(
    inputs=train,
    freq="W",
    value_name="Weekly_Sales",
    num_jobs=-1
)

no_cov_preds.head()

In [None]:
fig, ax = plt.subplots(figsize=(10,7))

ax.plot(train['ds'], train['Weekly_Sales'])
ax.plot(test['ds'], test['Weekly_Sales'])
ax.plot(test['ds'], cov_forecast[0], ls='--', label='Forecast (w/ covariates)')
ax.plot(test['ds'], no_cov_preds['timesfm'], ls=':', label='Forecast w/o covariates')

ax.set_title(f"Store 1")
ax.set_xlabel('Date')
ax.set_ylabel('Sales volume ($)')
ax.legend(loc=1)

fig.autofmt_xdate()
plt.tight_layout()

In [None]:
from utilsforecast.losses import mae, smape
from utilsforecast.evaluation import evaluate

eval_df = test[['unique_id', 'ds', 'Weekly_Sales']]
eval_df['timesfm_cov'] = cov_forecast[0]
eval_df['timesfm'] = no_cov_preds['timesfm'].values

evaluation = evaluate(
    eval_df,
    metrics=[mae, smape],
    models=['timesfm', 'timesfm_cov'],
    target_col='Weekly_Sales',
    id_col='unique_id'
)

evaluation

In [None]:
plt.rcParams.update({'font.size': 15})
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(16,8))

x = ['TimesFM', 'TimesFM + features']
y_mae = [64579, 81961]
y_smape = [2.02, 2.57]

ax1.bar(x, y_mae, width=0.4, label='MAE')
ax1.set_xlabel('Models')
ax1.set_ylabel('MAE ($)')
ax1.legend()

for i, v in enumerate(y_mae):
    ax1.text(x=i, y=v+300, s=str(v), ha='center')

ax2.bar(x, y_smape, width=0.4, label='sMAPE')
ax2.set_xlabel('Models')
ax2.set_ylabel('sMAPE (%)')
ax2.legend()

for i, v in enumerate(y_smape):
    ax2.text(x=i, y=v+.03, s=str(v), ha='center')

plt.tight_layout()