## Introduction

This notebook evaluates the TimesFM model with ambient dataset for benchmarking purpose

Github: https://github.com/google-research/timesfm

arxiv: https://arxiv.org/abs/2310.10688

Frequency definitions

0: T, MIN, H, D, B, U

1: W, M

2: Q, Y

In [1]:
from typing import Optional, Tuple
from os import path
import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader
from dataloader.dataloader import UnivariateMethaneHourly

from finetuning.finetuning_torch import FinetuningConfig, TimesFMFinetuner
from huggingface_hub import snapshot_download
import wandb

from timesfm import TimesFm, TimesFmCheckpoint, TimesFmHparams
from timesfm.pytorch_patched_decoder import PatchedTimeSeriesDecoder
import plotly.graph_objects as go
import argparse
wandb.login()

torch.cuda.is_available()

 See https://github.com/google-research/timesfm/blob/master/README.md for updated APIs.
Loaded PyTorch TimesFM, likely because python version is 3.11.12 (main, Apr  9 2025, 08:55:54) [GCC 11.4.0].


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ran/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mranluo87[0m ([33mranluo87-university-of-calgary-in-alberta[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
data_dir = '/home/ran/Desktop/PycharmProjects/TimeSeries_Benchmarking/datasets/select'
data_file = 'Anzac.csv'

parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, default=data_dir)
parser.add_argument('--data_file', type=str, default=data_file)
# TimesFM configurations
parser.add_argument('--timesfm', type=bool, default=True)
parser.add_argument('--freq_type', type=int, default=0)

parser.add_argument('--seq_len', type=int, default=512)
parser.add_argument('--pred_len', type=int, default=128)
# Optimization Hyperparams
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--learning_rate', type=float, default=1e-4)

args = parser.parse_args('')

In [7]:
repo_id = "google/timesfm-2.0-500m-pytorch"

hparams = TimesFmHparams(
    backend='gpu',
    per_core_batch_size=32,
    num_layers=50,
    horizon_len=args.pred_len,
    context_len=args.seq_len,
    use_positional_embedding=False,
    output_patch_len=128
)

tfm = TimesFm(
    hparams=hparams,
    checkpoint=TimesFmCheckpoint(
        huggingface_repo_id=repo_id
    )
)

model = PatchedTimeSeriesDecoder(tfm._model_config)

checkpoint_path = path.join(snapshot_download(repo_id), 'torch_model.ckpt')
loaded_checkpoint = torch.load(checkpoint_path, weights_only=True)
model.load_state_dict(loaded_checkpoint)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

<All keys matched successfully>

In [8]:
config = FinetuningConfig(
    batch_size=args.batch_size,
    num_epochs=args.epochs,
    learning_rate=args.learning_rate,
    freq_type=args.freq_type,
    log_every_n_steps=10,
    val_check_interval=0.2,
    use_quantile_loss=True,
    use_wandb=True
)

train_dataset = UnivariateMethaneHourly(args, flag='train')
val_dataset = UnivariateMethaneHourly(args, flag='val')
model = torch.nn.DataParallel(model, device_ids=[0, 1, 2])

finetuner = TimesFMFinetuner(model, config) 
finetuner.finetune(train_dataset=train_dataset, val_dataset=val_dataset)

100%|██████████| 48024/48024 [00:00<00:00, 99294.85it/s] 
100%|██████████| 9789/9789 [00:00<00:00, 201054.99it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 10.57 GiB of which 8.12 MiB is free. Including non-PyTorch memory, this process has 10.55 GiB memory in use. Of the allocated memory 9.65 GiB is allocated by PyTorch, and 669.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
raw_df = pd.read_csv(str(path.join(data_dir, data_file)), parse_dates=True)
raw_df.reset_index(inplace=True)

test_df = raw_df[int(len(raw_df) * 0.8):]
test_df.columns = ['unique_id', 'ds', 'values']

test_df['ds'] = pd.to_datetime(test_df['ds'])

forecast_df = tfm.forecast_on_df(
    inputs=test_df,
    freq='1H'
)

forecast_df = forecast_df[['ds', 'timesfm']]
forecast_df = forecast_df.groupby(['ds']).mean()
forecast_df.reset_index(inplace=True)

fig = go.Figure()

fig.add_trace(go.Scatter(x=forecast_df['ds'], y=forecast_df['timesfm'], mode='lines+markers', name='Forecast'))
fig.add_trace(go.Scatter(x=test_df['ds'], y=test_df['values'], mode='lines+markers', name='True'))

fig.write_html("./timesfm.html")