### Model Validation & Learning Rate Monitoring

Wrote a quick script in case you print out Train/Val Loss and Learning rate per epoch which is always good practice to understand how the learning rate is scheduled and to make sure you are not overfitting on train set. Monitoring the learning rate vs Val loss can be useful in understanding things like which rate has the highest gradient in val loss.

Format expected is something like this but it is easy to change based on your output in the regex lines here

In [None]:
fold_line = '.+Fold (.+?)\s.+'
train_loss_line = 'Epoch (.+?): Train loss: (.+?)\s.*LR: (.+?)\s.+'
val_loss_line = 'Epoch (.+?): Val loss: (.+?)$'

### Full code

In [None]:
import re
import argparse
import plotly.graph_objects as go
from plotly.subplots import make_subplots

"""
parser = argparse.ArgumentParser()
parser.add_argument("-l", "--log", help="Path to ml training log", required=True)
args = parser.parse_args()
ml_log_file = args.log

"""

ml_log_file = '../input/ml-logs/training_log'

fold_line = '.+Fold (.+?)\s.+'
train_loss_line = 'Epoch (.+?): Train loss: (.+?)\s.*LR: (.+?)\s.+'
val_loss_line = 'Epoch (.+?): Val loss: (.+?)$'

loss_dict = dict()
train_losses = []
val_losses = []
lrs = []

#print(f'Parsing {ml_log_file}')
with open(ml_log_file) as fp:
    line = fp.readline()
    while line:
        # Extract fold
        match = re.match(fold_line, line, re.I)
        if match:
            if train_losses:
                loss_dict[fold] = {'train_loss': train_losses, 'val_loss': val_losses, 'lr': lrs}
            fold = match[1]
            train_losses = []
            val_losses = []
        # Extract train loss
        match = re.match(train_loss_line, line, re.I)
        if match:
            epoch = match[1]
            train_loss = float(match[2])
            lr = float(match[3])
            train_losses.append(train_loss)
            lrs.append(lr)
        # Extract val loss
        match = re.match(val_loss_line, line, re.I)
        if match:
            epoch = int(match[1])
            val_loss = float(match[2])
            val_losses.append(val_loss)

        line = fp.readline()


for fold in loss_dict:
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Train & Val Loss", "Val Loss vs LR"))
    fig.update_layout(title=f'Fold: {fold}')
    train_loss = loss_dict[fold]['train_loss']
    val_loss = loss_dict[fold]['val_loss']
    fig.add_trace(go.Scatter(y=train_loss,
                             mode='lines',
                             name='Train Loss'), row=1, col=1)
    fig.add_trace(go.Scatter(y=val_loss,
                             mode='lines',
                             name='Val Loss'), row=1, col=1)

    fig.update_layout(title=f'Fold: {fold}')
    lr = loss_dict[fold]['lr']
    fig.add_trace(go.Scatter(y=val_loss, x=lr,
                             mode='lines',
                             name='Val Loss vs LR'),row=1, col=2)
    fig.update_layout(height=400, width=1200, title_text="Model Validation")
    fig.show()


### The above charts can be used to check a few things:
* Train & Val Loss vs Epochs Chart - Make sure that val loss continues to decrease and doesnt plateau or worse start increasing as train loss decreases. Overfitting on train cean be avoided using things like Early Stopping
* Val Loss vs LR curve - Monitor how val loss decreases with learning rate. This can be helpful in finding a good set of learning rates. 