### Training

Next, we perform training. We use the train, dev, and test folders specified above, and our logging is done in tensorboard. The model is saved every `save_every` epochs; the model with the best IoU is also saved under the name `model_best`.

In [1]:
from addict import Dict
from pathlib import Path

data_dir = Path("/datadrive/glaciers/")
processed_dir = data_dir / "processed_exper"

args = Dict({
    "batch_size": 16,
    "run_name": "demo", 
    "epochs": 200,
    "save_every": 50,
    "loss_type": "dice",
    "device": "cuda:0"
})


In [2]:
from glacier_mapping.data.data import fetch_loaders
from glacier_mapping.models.frame import Framework
import glacier_mapping.train as tr
from torch.utils.tensorboard import SummaryWriter
from torchvision.utils import make_grid
from glacier_mapping.models.metrics import diceloss
import yaml
import torch
import json

conf = Dict(yaml.safe_load(open(data_dir / "conf/train.yaml", "r")))
loaders = fetch_loaders(processed_dir, args.batch_size)
device = torch.device(args.device)

loss_fn = None
outchannels = conf.model_opts.args.outchannels
if args.loss_type == "dice":
    loss_fn = diceloss(
        act=torch.nn.Softmax(dim=1), 
        w=[0.6, 0.9, 0.2], # clean ice, debris, background
        outchannels=outchannels, 
        label_smoothing=0.2
    )
    
frame = Framework(
    model_opts=conf.model_opts,
    optimizer_opts=conf.optim_opts,
    reg_opts=conf.reg_opts,
    device=device,
    loss_fn=loss_fn
)

# Setup logging
writer = SummaryWriter(f"{data_dir}/{args.run_name}/logs/")
writer.add_text("Arguments", json.dumps(vars(args)))
writer.add_text("Configuration Parameters", json.dumps(conf))
out_dir = f"{data_dir}/{args.run_name}/models/"

best_epoch, best_iou = None, 0
for epoch in range(args.epochs):
    loss_d = {}
    for phase in ["train", "val"]:
        loss_d[phase], metrics = tr.train_epoch(loaders[phase], frame, conf.metrics_opts)
        tr.log_metrics(writer, metrics, loss_d[phase], epoch, phase, mask_names=conf.log_opts.mask_names)

    # save model
    writer.add_scalars("Loss", loss_d, epoch)
    if (epoch + 1) % args.save_every == 0:
        frame.save(out_dir, epoch)
        tr.log_images(writer, frame, next(iter(loaders["train"])), epoch)
        tr.log_images(writer, frame, next(iter(loaders["val"])), epoch, "val")

    if best_iou <= metrics['IoU'][0]:
        best_iou  = metrics['IoU'][0]
        best_epoch = epoch
        frame.save(out_dir, "best")

    print(f"{epoch}/{args.epochs} | train: {loss_d['train']} | val: {loss_d['val']}")

frame.save(out_dir, "final")
writer.close()

  y = torch.tensor(y, dtype=torch.long, device=self.device)


ValueError: Loss weights should be equal to the output channels.

In [3]:
conf

{'model_opts': {'name': 'UnetDropout',
  'args': {'inchannels': 3,
   'outchannels': 2,
   'net_depth': 5,
   'dropout': 0.3,
   'spatial': True}},
 'optim_opts': {'name': 'Adam', 'args': {'lr': 0.0001}},
 'metrics_opts': {'IoU': {'threshold': 0.4},
  'pixel_acc': {'threshold': 0.4},
  'precision': {'threshold': 0.4},
  'recall': {'threshold': 0.4},
  'dice': {'threshold': 0.4}},
 'log_opts': {'mask_names': ['glaciers', 'background']},
 'reg_opts': {'l1_reg': 0.0005}}