## Import & Environment Setting

In [1]:
import torch
from torch import autograd
from torch.utils.data import DataLoader

import json
import gc
import numpy as np

from utils.dataset import DetectionFolder
from utils.model import YoloV3, YoloLoss

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dtype = torch.float

## Load Config

In [3]:
with open("./config/config.json", "r") as config_file:
    main_config = json.load(config_file)

try:
    model_config = main_config['model']
    loss_config = main_config['loss']
    train_config = main_config['train']
except NameError:
    assert False, ('Failed to load config file')
except KeyError:
    assert False, ('Failed to find key on config file')

In [4]:
model_config['device'] = device
model_config['dtype'] = dtype
model_config['attrib_count'] = 5 + model_config['class_count']

loss_config['device'] = device
loss_config['dtype'] = dtype
loss_config['attrib_count'] = model_config['attrib_count']


train_config['device'] = device
train_config['dtype'] = dtype

## Build

In [5]:
model = YoloV3(model_config)
model.to(model_config['device'])

YoloV3(
  (darknet): Darknet53(
    (baseline): Sequential(
      (0): ConvLayer(
        (body): Sequential(
          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.01)
        )
      )
      (1): ConvLayer(
        (body): Sequential(
          (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.01)
        )
      )
      (2): ResidualLayer(
        (block): Sequential(
          (0): ConvLayer(
            (body): Sequential(
              (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): LeakyReLU(negative_slope=

In [6]:
loss_func = YoloLoss(loss_config)

In [7]:
train_context = { }

train_context['dataset'] = DetectionFolder(train_config['train_list'], train_config['train_image'], train_config['train_label'])
train_context['dataloader'] = DataLoader(train_context['dataset'], batch_size = train_config['batch_size'], num_workers = 4)

train_context['epoch'] = 0
train_context['last_checkpoint'] = 0

train_context['lr'] = train_config['init_lr']
train_context['loss_window'] = []

In [8]:

lr_func = lambda epoch: train_context['lr']
optimizer = torch.optim.Adam(model.parameters(), lr = train_context['lr'])
#train_context['optimizer'] = torch.optim.SGD(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda = lr_func, last_epoch = -1)

In [9]:
def train(model, loss_func, optimizer, scheduler, train_context, train_config, epochs):
    
    # set anomaly detection
    torch.autograd.set_detect_anomaly(train_config['use_anomaly_detection'])
        
    # training step
    for _ in range(0, epochs):
        losses = []
        for idx, batches in enumerate(train_context['dataloader']):
            
            image = batches['image'].to(train_config['device'], dtype = train_config['dtype'])
            labels = batches['label'].to(train_config['device'], dtype = train_config['dtype'])
            label_len = batches['label_len'].to(train_config['device'], dtype = torch.long)
            
            # forward
            out1, out2, out3 = model(image)
       
            # clear optimizer
            optimizer.zero_grad()
        
            # loss
            loss = loss_func(torch.cat((out1, out2, out3), 1), labels, label_len)
            losses.append(loss.item())
            
            # backward
            loss.backward()
            optimizer.step()
        
            # cleanup
            del image, labels, label_len
            del out1, out2, out3
            gc.collect()
            torch.cuda.empty_cache()
    
        # update learning rate & scheduler
        avg_loss = np.mean(losses) if len(losses) is not 0 else 0
        train_context['loss_window'].append(avg_loss)
        
        if (len(train_context['loss_window']) >= train_config['lr_window'] and
            np.mean(train_context['loss_window']) < avg_loss):
                
            train_context['lr'] = train_context['lr'] * train_config['lr_decay']
            train_context['loss_window'] = []
            
        if len(train_context['loss_window']) > 3 * train_config['lr_window'] :
            train_context['loss_window'] = train_context['loss_window'][2 * train_config['lr_window']:]
        
        scheduler.step()
        
        
        # update context
        train_context['dataset'].shuffle()
        train_context['epoch'] += 1
        
        # save model
        if train_context['epoch'] % train_config['checkpoint'] is 0:
            train_context['last_checkpoint'] = train_config['checkpoint']
            torch.save(model, train_config['checkpoint_dir'] + 'model_' + str(train_context['epoch']) + '.dat')
    

## Run

In [13]:
train(model, loss_func, optimizer, scheduler, train_context, train_config, train_config['epochs'])

In [14]:
print(train_context)

{'dataset': <utils.dataset.DetectionFolder object at 0x000002F602025E48>, 'dataloader': <torch.utils.data.dataloader.DataLoader object at 0x000002F602025EF0>, 'epoch': 20, 'last_checkpoint': 0, 'lr': 0.1, 'loss_window': [3631.4145304361978, 3384.686830066499, 3210.3362412225633, 3081.909654163179, 2947.4850020635695, 2810.181434268043, 2672.3710196358816, 2532.3981330508277, 2396.435538155692, 2270.2538641066776, 2145.1538514636813, 2024.0569668724424, 1908.6742393856957, 1792.015873500279, 1694.9195004417784, 1595.8247840518043, 1503.2187917800177, 1419.182502019973, 1338.4549734933037, 1263.557623726981]}
