## Example of training/testing using deterministic models with pytorch-lightning

In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from types import SimpleNamespace
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from bnnrul.cmapss.dataset import CMAPSSDataModule
from bnnrul.cmapss.models import CMAPSSModel, get_checkpoint

In [3]:
args = SimpleNamespace(
    data_path="../data/cmapss/",
    out_path="../results/cmapss/",
    scn="deterministic",
    net="linear",
)
checkpoint_dir = Path(f"{args.out_path}/{args.scn}/checkpoints/{args.net}")

In [6]:
# Just to illustrate. To properly train see scripts/cmapss_training.py
data = CMAPSSDataModule(args.data_path, batch_size=10000)
model = CMAPSSModel(data.win_length, data.n_features, args.net)

checkpoint_file = get_checkpoint(checkpoint_dir)
monitor = "loss/val"
checkpoint_callback = ModelCheckpoint(dirpath=checkpoint_dir, monitor=monitor)
earlystopping_callback = EarlyStopping(monitor=monitor, patience=20)

trainer = pl.Trainer(
    gpus=[0],
    max_epochs=5,
    log_every_n_steps=10,
    resume_from_checkpoint=checkpoint_file,
    logger=pl.loggers.TensorBoardLogger(
        f"{args.out_path}/{args.scn}/lightning_logs/{args.net}"
    ),
    callbacks=[
        checkpoint_callback,
        earlystopping_callback,
    ],
)
trainer.fit(model, data)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at ../results/cmapss/deterministic/checkpoints/linear/epoch=3-step=463.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]
Restored all states from the checkpoint file at ../results/cmapss/deterministic/checkpoints/linear/epoch=3-step=463.ckpt

  | Name | Type   | Params
--------------------------------
0 | net  | Linear | 45.6 K
--------------------------------
45.6 K    Trainable params
0         Non-trainable params
45.6 K    Total params
0.182     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [5]:
data = CMAPSSDataModule(args.data_path, batch_size=10000)
model = CMAPSSModel.load_from_checkpoint(get_checkpoint(checkpoint_dir))
trainer = pl.Trainer(
    gpus=[0],
    log_every_n_steps=10,
    logger=pl.loggers.TensorBoardLogger(
        f"{args.out_path}/{args.scn}/lightning_logs/{args.net}"
    ),
)
trainer.test(model, data, verbose=False)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Testing: 0it [00:00, ?it/s]

[{'y_0': -20.67729949951172,
  'y_hat0': 37.00532150268555,
  'err_0': 72.58831787109375,
  'y_1': -31.992835998535156,
  'y_hat1': 37.00532531738281,
  'err_1': 82.15648651123047}]