# Train YOLOv1

In [1]:
import shutil
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import sys
sys.path.append('../')
from torchsummary.torchsummary import summary
from dataloader import VOC
import numpy as np
import matplotlib.pyplot as plt
import yolov1
from yolov1 import detection_collate
from yolov1 import detection_loss
from yolov1 import save_checkpoint

# 0. Vidom on

In [2]:
def create_vis_plot(_xlabel, _ylabel, _title, _legend):
    return viz.line(
        X=torch.zeros((1,)).cpu(),
        Y=torch.zeros((1, 1)).cpu(),
        opts=dict(
            xlabel=_xlabel,
            ylabel=_ylabel,
            title=_title,
            legend=_legend
        )
    )

In [3]:
def update_vis_plot(iteration, loss, window1, window2, update_type,
                    epoch_size=1):
    viz.line(
        X=torch.ones((1, 1)).cpu() * iteration,
        Y=torch.Tensor([loss]).unsqueeze(0).cpu() / epoch_size,
        win=window1,
        update=update_type
    )

In [4]:
import visdom
viz = visdom.Visdom()

vis_title = 'Yolo V1 Deepbaksu_vision (feat. martin, visionNoob) PyTorch on ' + 'VOC'
vis_legend = ['Train Loss']

iter_plot = yolov1.create_vis_plot(viz, 'Iteration', 'Total Loss', vis_title, vis_legend)

coord1_plot = yolov1.create_vis_plot(viz, 'Iteration', 'coord1', vis_title, vis_legend)
size1_plot = yolov1.create_vis_plot(viz, 'Iteration', 'size1', vis_title, vis_legend)

coord2_plot = yolov1.create_vis_plot(viz, 'Iteration', 'coord2', vis_title, vis_legend)
size2_plot = yolov1.create_vis_plot(viz, 'Iteration', 'size2', vis_title, vis_legend)

obj_cls_plot = yolov1.create_vis_plot(viz, 'Iteration', 'obj_cls', vis_title, vis_legend)
noobj_cls_plot = yolov1.create_vis_plot(viz, 'Iteration', 'noobj_cls', vis_title, vis_legend)

objectness1_plot = yolov1.create_vis_plot(viz, 'Iteration', 'objectness1', vis_title, vis_legend)
objectness2_plot = yolov1.create_vis_plot(viz, 'Iteration', 'objectness2', vis_title, vis_legend)

# 1. Device configuration

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

num_epochs = 16000
num_classes = 21
batch_size = 15
learning_rate = 1e-4

dropout_prop = 0.5

# 2. Load VOC Pascal'12 Dataset

In [6]:
DATASET_PATH_MARTIN = "/media/keti-ai/AI_HARD3/DataSets/VOC_Pascal/VOC/VOCdevkit/VOC2012"
DATASET_PATH_JAEWON = "H:\VOC\VOC12\VOCdevkit_2\VOC2012"
SMALL_DATASET_PATH = "D:/dataset/person-300"
train_dataset = VOC(root = SMALL_DATASET_PATH,
                    transform=transforms.ToTensor(), cls_option = True, selective_cls="person")


VOC Parsing:  |----------------------------------------| 0.0% (0/300)  CompleteVOC Parsing:   |----------------------------------------| 0.3% (1/300)  CompleteVOC Parsing:   |----------------------------------------| 0.7% (2/300)  CompleteVOC Parsing:   |----------------------------------------| 1.0% (3/300)  CompleteVOC Parsing:   |----------------------------------------| 1.3% (4/300)  CompleteVOC Parsing:   |----------------------------------------| 1.7% (5/300)  CompleteVOC Parsing:   |----------------------------------------| 2.0% (6/300)  CompleteVOC Parsing:   |----------------------------------------| 2.3% (7/300)  CompleteVOC Parsing:   |█---------------------------------------| 2.7% (8/300)  CompleteVOC Parsing:   |█---------------------------------------| 3.0% (9/300)  CompleteVOC Parsing:   |█---------------------------------------| 3.3% (10/300)  CompleteVOC Parsing:   |█---------------------------------------| 3.7% (11/300)  CompleteVOC Parsing: 

In [7]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True,
                                           collate_fn=detection_collate)

# 3. Load YOLOv1

In [8]:
net = yolov1.YOLOv1()
# visualize_weights_distribution(net)

model = torch.nn.DataParallel(net, device_ids=[0]).cuda()

summary(model, (3, 448,448))

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           9,472
       BatchNorm2d-2         [-1, 64, 224, 224]             128
         LeakyReLU-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5        [-1, 192, 112, 112]         110,784
       BatchNorm2d-6        [-1, 192, 112, 112]             384
         LeakyReLU-7        [-1, 192, 112, 112]               0
         MaxPool2d-8          [-1, 192, 56, 56]               0
            Conv2d-9          [-1, 128, 56, 56]          24,704
      BatchNorm2d-10          [-1, 128, 56, 56]             256
        LeakyReLU-11          [-1, 128, 56, 56]               0
           Conv2d-12          [-1, 256, 56, 56]         295,168
      BatchNorm2d-13          [-1, 256, 56, 56]             512
        LeakyReLU-14          [-1, 256,

# 4.Train the model

In [9]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):

    if (epoch == 200) or (epoch == 400) or (epoch == 600) or (epoch == 20000) or (epoch == 30000):
        scheduler.step()

    for i, (images, labels) in enumerate(train_loader):

    
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)

        # Calc Loss
        loss, \
        obj_coord1_loss, \
        obj_size1_loss, \
        obj_coord2_loss, \
        obj_size2_loss, \
        obj_class_loss, \
        noobj_class_loss, \
        objness1_loss, \
        objness2_loss = yolov1.detection_loss(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 10 == 0:

            print('Epoch:[{}/{}], Step:[{}/{}], learning rate:{}\ttotal_loss\t{:.4f}\tcoord1\t{}\tsize1\t{}\tcoord2\t{}\tsize2\t{}\tclass\t{}\tnoobj_clss\t{}\tobjness1\t{}\tobjness2\t{}'
                  .format(epoch + 1,
                          num_epochs,
                          i + 1,
                          total_step,
                          [param_group['lr'] for param_group in optimizer.param_groups],
                          loss.item(),
                          obj_coord1_loss,
                          obj_size1_loss,
                          obj_coord2_loss,
                          obj_size2_loss,
                          obj_class_loss,
                          noobj_class_loss,
                          objness1_loss,
                          objness2_loss
                          ))
            

            yolov1.update_vis_plot(viz, (epoch+1)*batch_size +(i + 1), loss.item(), iter_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_coord1_loss, coord1_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_size1_loss, size1_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_coord2_loss, coord2_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_size2_loss, size2_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_class_loss, obj_cls_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), noobj_class_loss, noobj_cls_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), objness1_loss, objectness1_plot, None, 'append')
            yolov1.update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), objness2_loss, objectness2_plot, None, 'append')


            

    if (epoch % 300) == 0:
        yolov1.save_checkpoint({
            'epoch': epoch + 1,
            'arch': "YOLOv1",
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, False, filename='checkpoint_{}.pth.tar'.format(epoch))


Epoch:[1/16000], Step:[10/20], learning rate:[0.0001]	total_loss	82.3859	coord1	163.52267456054688	size1	231.57789611816406	coord2	140.68667602539062	size2	408.13275146484375	class	17.46445083618164	noobj_clss	212.5845489501953	objness1	23.660568237304688	objness2	38.15949249267578
Epoch:[1/16000], Step:[20/20], learning rate:[0.0001]	total_loss	397.9143	coord1	2173.35546875	size1	2347.875732421875	coord2	893.447265625	size2	225.50157165527344	class	20.604516983032227	noobj_clss	242.1030731201172	objness1	27.727191925048828	objness2	38.1001091003418
Epoch:[2/16000], Step:[10/20], learning rate:[0.0001]	total_loss	49.5314	coord1	36.42535400390625	size1	369.726318359375	coord2	107.34135437011719	size2	63.81912612915039	class	6.162673473358154	noobj_clss	130.1411590576172	objness1	14.736624717712402	objness2	14.618753433227539
Epoch:[2/16000], Step:[20/20], learning rate:[0.0001]	total_loss	20.0719	coord1	17.402257919311523	size1	44.649085998535156	coord2	8.79107666015625	size2	47.1765823

KeyboardInterrupt: 