# Example of how to use torchtrainer

In [1]:
from typing import List, Tuple

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, Subset, DataLoader

# trainer
from torchtrainer import Trainer
# hooks
from torchtrainer import EarlyStopping, NaNStopping, CSVHook

In [2]:
DEVICE = torch.device("cpu")
BATCH = 10

## Prepare dataset

In [3]:
# dummy dataset
x = torch.rand(100, 32)
y = torch.rand(100, 1)

dataset = TensorDataset(x, y)

In [4]:
# data loader
train_loader = DataLoader(Subset(dataset, list(range(80))), batch_size=BATCH, shuffle=True)
val_loader = DataLoader(Subset(dataset, list(range(80, 100))), batch_size=BATCH, shuffle=True)

## Prepare model

In [5]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(32, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
    def forward(self, x):
        return self.layers(x)
model = Model()

## Define Training

Model training can be easily performed in **only two steps**.

    1. Define one training step. (if necessary, also define one validation step.)
    2. Define Trainer object.

### 1. Define one training step
Define a function representing one training step.  
The function should take one `batch` and `model`, and return a list of `loss` and a list of `prediction` in that order.  
One `batch` can handle any data format (ex `dict`, `tuple`).

In [6]:
# define loss function
loss_fn = torch.nn.L1Loss(reduction="sum")

def train_step(train_batch, model) -> Tuple[List[torch.Tensor]]:
    # get input and output from one batch
    # if necessary, using `to()` method
    x = train_batch[0].to(DEVICE)
    y = train_batch[1].to(DEVICE)
    # prediction
    pred = model(x)
    # calculate loss
    loss = loss_fn(pred, y)
    
    # return list of loss and list of prediction
    return [loss], [pred]

### 2. Define Trainer object

In [7]:
# define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Optional: define LR sceduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)

# Optional: metrics for CSVhook
# metrics get one batch and reslut list (you define on train_step)
# and return List of Tensor
def rmse(batch, result_list) -> List[torch.Tensor]:
    grand_truth = batch[1]
    pred = result_list[0]
    return [torch.sqrt(torch.mean(torch.pow(grand_truth - pred, 2)))]

trainer = Trainer(
    # The best model and the `state_dict` of the last step are automatically saved.
    # last step is saved in checkpoints directory.
    model_path="./",
    
    # PyTorch model
    model=model,
    
    # training setting
    n_epoch=100,
    device=DEVICE,
    
    # set loader
    train_loader=train_loader,
    val_loader=val_loader,
    
    # set List of optimizer
    optimizer_list=[optimizer],
    
    # Optional: set List of scheduler
    scheduler_list=[scheduler],
    
    # set List of hooks
    hooks=[
        # early stoppting hooks
        EarlyStopping(10),
        # catch nan of loss
        NaNStopping(),
        # log to csv
        CSVHook(
            log_path="./",
            # Optional: you can set any metrics you like other than loss.
            metrics=[
                rmse
            ],
            # set name of metrics which write on csv file
            metrics_names=["rmse"]
        )
    ]
)

### Training
Just call the `train` method!  
`train` returns the best loss.  
If set `verbose=True`, every epoch in the training are displayed in the std out.

In [8]:
best_loss = trainer.train(
    # set batch size
    batch=BATCH,
    
    # set train_step
    train_step=train_step,
    
    # Optional: if validation step is None, using the train_step
    val_step=None,
    
    # set verbosity
    verbose=True,
)

epoch 1 start
Training loss:
	 0: 0.237
validation loss:
	 0: 0.229
model is saved in epoch 1
--------------------------------------------------

epoch 2 start
Training loss:
	 0: 0.235
validation loss:
	 0: 0.227
model is saved in epoch 2
--------------------------------------------------

epoch 3 start
Training loss:
	 0: 0.234
validation loss:
	 0: 0.227
model is saved in epoch 3
--------------------------------------------------

epoch 4 start
Training loss:
	 0: 0.233
validation loss:
	 0: 0.227
--------------------------------------------------

epoch 5 start
Training loss:
	 0: 0.233
validation loss:
	 0: 0.227
model is saved in epoch 5
--------------------------------------------------

epoch 6 start
Training loss:
	 0: 0.232
validation loss:
	 0: 0.226
model is saved in epoch 6
--------------------------------------------------

epoch 7 start
Training loss:
	 0: 0.231
validation loss:
	 0: 0.224
model is saved in epoch 7
--------------------------------------------------

epoc

In [9]:
# check log file
import pandas as pd
df = pd.read_csv("./log.csv")
df.head()

Unnamed: 0,Time,LearningRate_1,TrainLoss_1,ValidationLoss_1,rmse_1
0,0.011162,9e-05,0.255615,0.234342,0.025994
1,0.018201,6.5e-05,0.253478,0.236725,0.026634
2,0.025172,3.5e-05,0.251,0.237357,0.026591
3,0.032074,1e-05,0.250394,0.238492,0.026827
4,0.039026,0.0,0.249666,0.238597,0.026509
