In [1]:
import os.path as osp
import torch
from datasets import CardsDataset
from augmentations import SSDAugmentation, BaseTransform
from criterion import Criterion, Scheduler
from torch.utils.data import DataLoader
from dataloader import detection_collate
from torch.utils.tensorboard import SummaryWriter
from IPython.display import clear_output
from torchvision.models.detection import ssd300_vgg16 as ssd300

In [2]:
home = osp.expanduser("~")

get_dir = '\project_SSD\data'
data_root = home+get_dir
print(data_root)

C:\Users\Seriy755\project_SSD\data


In [3]:
traindataset = CardsDataset(root=data_root, mode='train', transform=SSDAugmentation())
testdataset = CardsDataset(root=data_root, mode='test', transform=BaseTransform())

In [4]:
trainloader = DataLoader(traindataset, batch_size=4, num_workers=0, shuffle=True, 
                        pin_memory=True, collate_fn=detection_collate)
testloader = DataLoader(testdataset, batch_size=4, num_workers=0, shuffle=False, 
                        pin_memory=True, collate_fn=detection_collate)

In [5]:
def train(model, opt, opt_params, epochs, train_loader, val_loader, device, 
          scheduler=None, scheduler_params=None, resume=None):
    writer = SummaryWriter()
    model.train()
    model.to(device)
    alpha = 1.5
    
    %load_ext tensorboard
    %tensorboard --logdir=runs
    
    if resume is not None:
        model.load_state_dict(torch.load(resume))
        lr = opt_params['learning_rate'] 
    else:
        lr = opt_params['learning_rate'] / 1e3
    criterion = Criterion(opt, opt_params)
    opt = criterion(model)
    if scheduler is not None:
        scheduler = Scheduler(scheduler, scheduler_params)
        scheduler = scheduler(opt)
    
    n_iter_train = 0
    n_iter_val = 0
    for epoch in range(epochs):
        print("Epoch {}/{}".format(epoch+1, epochs))
        train_loss = 0
        val_loss = 0
        for imgs, labels in train_loader:
            if device == 'cuda':
                imgs = imgs.cuda(non_blocking=True)
                for label in labels:
                    label['boxes'] = label['boxes'].cuda(non_blocking=True)
                    label['labels'] = label['labels'].cuda(non_blocking=True)
            
            opt.zero_grad()
            losses = model(imgs, labels)
            loss = alpha*losses['bbox_regression'] + losses['classification']
            loss.backward()
            opt.step()
            
            if lr < opt_params['learning_rate']:
                lr += (opt_params['learning_rate'] - opt_params['learning_rate'] / 1e3) / len(train_loader)
                opt = criterion(model, lr)
            
            writer.add_scalar('LossIter/train', loss, n_iter_train+1)
            n_iter_train += 1
            
            train_loss += loss / len(train_loader)
                   
        for imgs, labels in val_loader:
            if device == 'cuda':
                imgs = imgs.cuda(non_blocking=True)
                for label in labels:
                    label['boxes'] = label['boxes'].cuda(non_blocking=True)
                    label['labels'] = label['labels'].cuda(non_blocking=True)
            
            losses = model(imgs, labels)
            loss = alpha*losses['bbox_regression'].item() + losses['classification'].item()
            
            writer.add_scalar('LossIter/valid', loss, n_iter_val+1)
            n_iter_val += 1
            
            val_loss += loss / len(val_loader)
            
        if scheduler is not None:
            scheduler.step(val_loss)
        
        clear_output(wait=True)
        writer.add_scalars('LossEpoch', {'Train': train_loss, 'Valid': val_loss}, epoch+1)
        %load_ext tensorboard
        %tensorboard --logdir=runs
        
        
        print('On {} epoch train loss: {}; validation loss: {}'.format(epoch+1, train_loss, val_loss))
        if (epoch+1) % 5 == 0:
            torch.save(model.state_dict(), 'weights/SSD300_epoch{}.pth'.format(epoch+1))
            print('Weights on {} epoch is saved!'.format(epoch+1))

In [6]:
NUM_CLASSES=7
model = ssd300(num_classes=NUM_CLASSES, pretrained_backbone=True)
optimizer = 'adam'
opt_params = {'learning_rate': 1e-3, 'betas': (0.9, 0.999), 'weight_decay': 5e-4}
scheduler = 'ReduceLROnPlateau'
scheduler_params = {'mode': 'min', 'factor': 0.1, 'patience': 5, 'verbose': True,
                   'threshold': 1e-4, 'threshold_mode': 'rel', 'cooldown': 0,
                   'min_lr': 0, 'eps': 1e-8}

if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [7]:
train(model, optimizer, opt_params, 50, trainloader, testloader, 
      device, scheduler, scheduler_params)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 29528), started 0:25:28 ago. (Use '!kill 29528' to kill it.)

On 50 epoch train loss: 1.5327566862106323; validation loss: 4.987405507003561
Weights on 50 epoch is saved!
