# Monsoon LPS Position Forecasting with Temporal Fusion Transformer

This notebook loads monsoon Low Pressure System (LPS) track data, applies preprocessing including background variable integration and rolling means, and trains a Temporal Fusion Transformer (TFT) model to forecast LPS positions (Latitude and Longitude).

In [None]:
# Importing necessary libraries
import os
import warnings
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import torch

# Import the custom model class
from models.PositionForecasting import LPSPositionForecasting

## Load and Merge LPS Track Data with Background Variables

In [None]:
# Load core LPS dataset
data = pd.read_csv("processed/all-lps-dataframe.csv").drop(columns=['Unnamed: 0'])

# Load background environment variables from separate files
bg_data = pd.read_csv("processed/data_with_bg.csv").drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])
uv_bg = pd.read_csv("/kaggle/input/u-v-bg/U-V-bg.csv")

# Convert date columns to datetime format
data['Genesis_Date'] = pd.to_datetime(data['Genesis_Date'])
data['DateTime'] = pd.to_datetime(data['DateTime'])

# Merge background variables into main dataframe
data["Q850_bg"] = bg_data["Q850_bg"]
data["VS_bg"] = bg_data["VS_bg"]
data["u_bg_250_850"] = uv_bg["u_bg_250_850"]
data["u_bg_300_700"] = uv_bg["u_bg_300_700"]
data["u_bg_400_600"] = uv_bg["u_bg_400_600"]
data["v_bg_250_850"] = uv_bg["v_bg_250_850"]
data["v_bg_300_700"] = uv_bg["v_bg_300_700"]
data["v_bg_400_600"] = uv_bg["v_bg_400_600"]

## Add Time Index and Apply Rolling Mean to Key Columns
This helps smooth out short-term fluctuations and captures broader trends in variables.

In [None]:
# Add time index per track to allow temporal encoding
time_idx = []
for i in range(1, len(data.groupby("id").count()) + 1):
    for j in range(0, data.groupby("id").count()["Genesis_Date"][i]):
        time_idx.append(j)
data["time_idx"] = time_idx

# Apply rolling mean for smoothing key dynamic variables
rolling_columns = ['Latitude', 'Longitude', 'mslp', 'ls_ratio',
       'VO550', 'VO750', 'VO850', 'PV', 'Q850', 'Q850_grad', 'Q2', 'US_850',
       'UN_850', 'VE_850', 'VW_850', 'T2', 'Z_tilt', 'integrated_mse', 'Z250',
       'Z550', 'Z850', 'RF']

window_size = 6  # hours
data_rolling = data.groupby('id')[rolling_columns].rolling(window=window_size, min_periods=1).mean().reset_index(drop=True)
data[rolling_columns] = data_rolling[rolling_columns]

## Crop Tracks to a Maximum Length of 5 Days (120 hours)

In [None]:
# Set the maximum number of hourly steps to 5 days (8 * 24)
max_rows = 8 * 24

# Function to truncate longer tracks
def process_track(track):
    track_length = len(track)
    if track_length > max_rows:
        track = track.iloc[:max_rows]
    return track

# Apply track truncation
data = data.groupby('id', group_keys=False).apply(process_track)
data.reset_index(drop=True, inplace=True)

## Instantiate the Position Forecasting Model

In [None]:
# Define variables for model input
unknown_variables = ["Latitude", "Longitude", "mslp", 'ls_ratio', 'VO850', "PV", "T2", "Q850",
                     "Q2", "UN_850", "US_850", "VE_850", "RF"]
max_prediction_length = 5 * 24  # 5 days
max_encoder_length = 24  # use first 24 hours as input
bg_data = ["Q850_bg", "VS_bg"]

# Instantiate the model class with processed data and configuration
postion_tft_model = LPSPositionForecasting(
    data=data,
    max_prediction_length=max_prediction_length,
    max_encoder_length=max_encoder_length,
    unknown_variables=unknown_variables,
    bg_data=bg_data
)

## Train the Model

In [None]:
# Train both latitude and longitude prediction models
postion_tft_model.train()

## Evaluate Model Performance
Evaluate the trained model on validation/test dataset to understand the performance.

In [None]:
# Evaluate Latitude and Longitude predictions
postion_tft_model.evaluate()