In [1]:
import os
import yaml
import torch
import lightning.pytorch as pl
from lightning.pytorch import loggers as pl_loggers
from lightning.pytorch.callbacks import ModelCheckpoint
pl.seed_everything(42)

import sys
sys.path.append('..')

from chaosbench import dataset
from chaosbench.models import model

[rank: 0] Seed set to 42
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load config filepath which consists of all the definition needed to fit/eval a model
model_config_filepath = '../chaosbench/configs/cnn_s2s.yaml'
with open(model_config_filepath, 'r') as config_filepath:
    hyperparams = yaml.load(config_filepath, Loader=yaml.FullLoader)

model_args = hyperparams['model_args']
data_args = hyperparams['data_args']

In [3]:
hyperparams

{'model_args': {'model_name': 'cnn_s2s',
  'input_size': 60,
  'output_size': 60,
  'learning_rate': 0.01,
  'num_workers': 0,
  'epochs': 500,
  't_max': 500,
  'only_headline': False},
 'data_args': {'batch_size': 32,
  'train_years': [1979,
   1980,
   1981,
   1982,
   1983,
   1984,
   1985,
   1986,
   1987,
   1988,
   1989,
   1990,
   1991,
   1992,
   1993,
   1994,
   1995,
   1996,
   1997,
   1998,
   1999,
   2000,
   2001,
   2002,
   2003,
   2004,
   2005,
   2006,
   2007,
   2008,
   2009,
   2010,
   2011,
   2012,
   2013,
   2014,
   2015],
  'val_years': [2016, 2017, 2018, 2019, 2020],
  'n_step': 1,
  'lead_time': 1,
  'land_vars': ['t2m', 'swvl1', 'swvl2', 'swvl3', 'lai_hv', 'lai_lv', 'sd'],
  'ocean_vars': ['sosstsst', 'ileadfra']}}

In [4]:
# Initialize model
# By passing the necessary hyperparameters (model + dataset)
baseline = model.S2SBenchmarkModel(model_args=model_args, data_args=data_args)
baseline.setup()

In [7]:
# Setup trainer
# Including tensorboard logger and checkpoint callback (eg. saving top-1 based on lowest validation error)
tb_logger = pl_loggers.TensorBoardLogger(save_dir=f'logs/{model_args["model_name"]}')
checkpoint_callback = ModelCheckpoint(monitor='val_loss', mode='min')

trainer = pl.Trainer(
    devices="auto",
    accelerator='cpu',
    strategy='auto',
    max_epochs=model_args['epochs'],
    logger=tb_logger,
    callbacks=[checkpoint_callback]
 )

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/data/gent/vo/000/gvo00090/vsc42145/.local/miniconda/envs/heat/lib/python3.10/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [8]:
# Fit the model
# Checkpoint can be found under `logs/<MODEL_NAME>`
trainer.fit(baseline)


  | Name  | Type           | Params | Mode 
-------------------------------------------------
0 | model | EncoderDecoder | 21.7 M | train
1 | loss  | MSE            | 0      | train
-------------------------------------------------
21.7 M    Trainable params
0         Non-trainable params
21.7 M    Total params
86.773    Total estimated model params size (MB)
85        Modules in train mode
0         Modules in eval mode
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 0:   2%|▏         | 10/423 [07:29<5:09:21,  0.02it/s, v_num=8, train_loss_step=0.928]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
# Initialize observations dataset by specifying the training/validation years
train_years = np.arange(1979, 2016)
val_years = test_years = np.arange(2016, 2023)

# Also land + ocean variables to be included (acronyms are detailed in the paper)
#land_vars = ['skt', 'src', 'stl1', 'stl2', 'stl3', 'swvl1', 'swvl2', 'swvl3']
#ocean_vars = ['somxl010', 'somxl030', 'sosaline', 'sossheig', 'sosstsst'] # 
land_vars = ['t2m', 'swvl1', 'swvl2', 'swvl3', 'lai_hv', 'lai_lv']
ocean_vars = ['sosstsst']

train_dataset = dataset.S2SObsDataset(years=train_years, land_vars=land_vars, ocean_vars=ocean_vars)
val_dataset = dataset.S2SObsDataset(years=val_years, land_vars=land_vars, ocean_vars=ocean_vars)