In [1]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import (
    ModelCheckpoint,
    LearningRateMonitor,
    EarlyStopping,
)
from pytorch_lightning.loggers import TensorBoardLogger
import polars as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from typing import Optional
from typing import Tuple, Dict
import torch.optim as optim
from model.data_loader import load_data, IonosphereDataModule
from model.lstm_model import LSTMForecastModel

In [2]:
data = load_data("/home/pupperemeritus/isro_project/data/October2023.parquet")

In [3]:
data.shape

(2861526, 38)

In [4]:
torch.clear_autocast_cache()
torch.cuda.memory.empty_cache()
torch.cuda.empty_cache()
prediction_horizon = 10
grid_resolution = 10
data_module = IonosphereDataModule(
    dataframe=data,
    sequence_length=30,
    prediction_horizon=prediction_horizon,
    grid_lon_range=(65, 100),
    grid_lat_range=(0, 40),
    grid_resolution=grid_resolution,
    batch_size=32,
    num_workers=0,
)
grid_lat_steps = len(np.arange(0, 40, grid_resolution))
grid_lon_steps = len(np.arange(65, 100, grid_resolution))
grid_shape = (grid_lat_steps, grid_lon_steps)

model = LSTMForecastModel(
    input_dim=grid_shape[0] * grid_shape[1],  # Flattened grid size
    hidden_dim=32,
    grid_shape=grid_shape,
    output_size=prediction_horizon,  # Correct sequence length
)
logger = TensorBoardLogger("tb_logs", name="gurunet_model")
epochs = 50
checkpoint_callback = ModelCheckpoint(
    dirpath=f"checkpoints/version_{logger.version}",
    filename="gurunet-{epoch:02d}-{val_loss:.5f}",
    save_top_k=3,
    monitor="val_loss",
    mode="min",
    verbose=True,
)

early_stop_callback = EarlyStopping(monitor="val_loss", patience=15, mode="min")

lr_monitor = LearningRateMonitor(logging_interval="epoch", log_momentum=True)
torch.set_float32_matmul_precision("medium")
trainer = Trainer(
    max_epochs=epochs,
    accelerator="gpu",
    devices=1,
    callbacks=[
        checkpoint_callback,
        early_stop_callback,
        lr_monitor,
    ],
    logger=logger,
    precision="16",
    enable_progress_bar=True,
    enable_checkpointing=True,
    accumulate_grad_batches=3,
    profiler="simple",
    min_epochs=25,
    deterministic=True,
)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [5]:
trainer.fit(model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type   | Params | Mode 
----------------------------------------------
0 | lstm_s4    | LSTM   | 6.4 K  | train
1 | fc_s4      | Linear | 5.3 K  | train
2 | lstm_phase | LSTM   | 6.4 K  | train
3 | fc_phase   | Linear | 5.3 K  | train
----------------------------------------------
23.4 K    Trainable params
0         Non-trainable params
23.4 K    Total params
0.093     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

AttributeError: 'IonosphereDataset' object has no attribute '_calculate_grid'

In [None]:
trainer.test(model, data_module)