# Basic Weather Forecasting Model

Simple workflow to train a multivariate time series forecasting model using tsai.

In [None]:
# Imports
import numpy as np
import pandas as pd
from tsai.all import (
    Nan2Value,
    ShowGraph,
    # SlidingWindow,
    SlidingWindowPanel,
    TSForecaster,
    # TimeSplitter,
    TSStandardize,
    get_splits,
    mae,
    rmse,
)

## 1. Load Data

In [None]:
# Load the data
df = pd.read_csv("data.csv")
print(f"Data shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
df.head()

<cell_type>markdown</cell_type>## 2. Prepare Data

Process all 19 stations together for multi-station forecasting.

In [None]:
# Process all stations (no filtering to single station)
# Sort by id and timestamp to ensure proper ordering
df_sorted = df.sort_values(["id", "timestamp"]).reset_index(drop=True)

# Define feature columns
features = ["temperature", "wind_average", "wind_gust", "wind_bearing"]

# Handle missing values per station group
df_sorted[features] = df_sorted.groupby("id")[features].transform(
    lambda x: x.ffill().bfill()
)

# Drop any remaining rows with missing values
df_sorted = df_sorted.dropna(subset=features)

print(f"Total stations: {df_sorted['id'].nunique()}")
print(f"Total rows after cleaning: {len(df_sorted)}")
print(f"Missing values remaining: {df_sorted[features].isnull().sum().sum()}")

<cell_type>markdown</cell_type>## 3. Create Sliding Windows with SlidingWindowPanel

- Window: 60 timesteps (10 hours)
- Horizon: 6 timesteps (1 hour)
- All 19 stations processed together

In [None]:
# Create sliding windows using SlidingWindowPanel for multi-station data
window_len = 60  # 10 hours of history
horizon = 6  # 1 hour prediction

X, y = SlidingWindowPanel(
    window_len=window_len,
    horizon=horizon,
    unique_id_cols=["id"],  # Station identifier
    get_x=features,
    get_y=features,
    sort_by="timestamp",
)(df_sorted)

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Verify no NaN values in the windows
print("\nNaN check:")
print(f"X contains NaN: {np.isnan(X).any()}")
print(f"y contains NaN: {np.isnan(y).any()}")
if np.isnan(X).any() or np.isnan(y).any():
    print("WARNING: NaN values detected in training data!")

## 4. Create Train/Validation Split

In [None]:
# Create random split for more evenly distributed validation data (80/20)
# Pass range of indices instead of the 3D array y
splits = get_splits(range(len(y)), valid_size=0.2, shuffle=True, random_state=23)

print(f"Train samples: {len(splits[0])}")
print(f"Valid samples: {len(splits[1])}")

## 5. Build and Train Model

In [None]:
# Create TSForecaster
fcst = TSForecaster(
    X,
    y,
    splits=splits,
    path="models",
    batch_tfms=[TSStandardize(), Nan2Value()],  # Normalize and handle NaN values
    bs=128,  # Batch size
    arch="PatchTST",  # Use PatchTST model
    metrics=[mae, rmse],  # Evaluation metrics
    cbs=ShowGraph(),  # Show training graph
)

In [None]:
# Train the model
fcst.fit_one_cycle(n_epoch=5, lr_max=1e-3)

## 6. Save Model

In [None]:
# Export the trained model
fcst.export("weather_forecast_basic.pkl")
print("Model saved!")

## 7. Quick Evaluation

In [None]:
# Get predictions on validation set
preds, targets = fcst.get_X_preds(X[splits[1]], y[splits[1]])

print(f"Predictions shape: {preds.shape}")
print(f"Targets shape: {targets.shape}")

# Calculate MAE per variable
mae_per_var = np.abs(preds - targets).mean(axis=(0, 1))
for i, feature in enumerate(features):
    print(f"{feature}: MAE = {mae_per_var[i]:.3f}")