---
author: Zeel B Patel
badges: true
categories: ML
description: Exploring the foundation models for time series forecasting
title: Foundation Models for Time Series Forecasting
date: '2024-07-06'
toc: true
---

In [1]:
# Config
import os

# Basic
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Monitoring
from tqdm.notebook import tqdm

# IO
from os.path import join, exists, basename, dirname
from glob import glob

# Parallel processing
from joblib import Parallel, delayed

import xarray as xr

## Data

In [2]:
ds = xr.open_zarr("zip:///::https://huggingface.co/datasets/Zeel/P1/resolve/main/all_in_one.zarr.zip")
ds

In [3]:
one_station_ds = ds.sel(station="IGI Airport (T3), Delhi - IMD", Timestamp=slice("2022", "2023"))[["PM2.5"]]
one_station_ds

In [4]:
data = one_station_ds['PM2.5'].to_dataframe()[['PM2.5']]

# convert to hourly data
data = data.resample('h').mean()

# how much missing data
print(f"Missing data: {data.isna().sum().values[0]}")

# fill missing data
data = data.interpolate(method='linear')

print(f"Missing data after interpolation: {data.isna().sum().values[0]}")

data.head()

Missing data: 298
Missing data after interpolation: 0


Unnamed: 0_level_0,PM2.5
Timestamp,Unnamed: 1_level_1
2022-01-01 00:00:00,273.5475
2022-01-01 01:00:00,268.8675
2022-01-01 02:00:00,258.0225
2022-01-01 03:00:00,194.91
2022-01-01 04:00:00,197.9975


In [6]:
import timesfm

tfm = timesfm.TimesFm(
    context_len=32,
    horizon_len=24,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend="gpu",
)
tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")

Multiprocessing context has already been set.
Constructing model weights.




Constructed model weights in 3.76 seconds.
Restoring checkpoint from /home/patel_zeel/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


MemoryError: 