In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

In [2]:
os.makedirs("weights", exist_ok=True)
os.makedirs("log", exist_ok=True)
input_size = 256

# set up person only VOC dataset

In [3]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath, cls="person")

# extend with VOC2012
vocpath = "../VOCdevkit/VOC2012"
train_img_list2, train_anno_list2, _, _ = make_datapath_list(vocpath, cls="person", VOC2012=True)

train_img_list.extend(train_img_list2)
train_anno_list.extend(train_anno_list2)

# make Dataset
voc_classes = ['person']
color_mean = (104, 117, 123)  # (BGR)の色の平均値

print("trainlist: ", len(train_img_list))
print("vallist: ", len(val_img_list))

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

val not found
trainlist:  6469
vallist:  2097


In [4]:
train_dataset[0]

(tensor([[[-45.5372, -81.0999, -99.0463,  ..., -76.5134, -64.2030, -64.1938],
          [-58.1552, -68.6390, -72.4191,  ..., -65.0008,  -0.5020, -56.6581],
          [-70.5009, -86.7548, -91.1066,  ..., -66.4612, -56.4577, -46.4708],
          ...,
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000]],
 
         [[ -8.1659, -45.8792, -64.0686,  ..., -64.9146, -42.8002, -31.0119],
          [-22.4291, -34.9771, -38.3957,  ..., -46.8317,  23.1840, -19.9719],
          [-38.7112, -54.4861, -55.6691,  ..., -39.4456, -27.3896, -16.1881],
          ...,
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
          [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000]],
 
         [[-57.7041, -83.018

In [5]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([32, 3, 256, 256])
32
torch.Size([1, 5])


In [6]:
targets[1]

tensor([[0.7582, 0.5593, 0.9674, 1.0000, 0.0000]])

# test with ssd model.

In [7]:
from utils.blazeface import SSD256

In [8]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 2,  # 背景クラスを含めた合計クラス数
    'input_size': 256,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [8, 16],  # DBOXの大きさを決める
    'min_sizes': [16, 32],  # DBOXの大きさを決める
    'max_sizes': [32, 100],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD256(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [9]:
print(net)

SSD256(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace=True)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), strid

In [10]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=3, device=device, focal=True)

# optim
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=1e-4, weight_decay=5e-4)

In [11]:
def get_current_lr(epoch):
    lr = 1e-4
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [12]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log/log_output_focal.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface256_focal_' +
                       str(epoch+1) + '.pth')

# start training here

In [None]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
lr is: 0.0001
-------------
Epoch 1/200
-------------
train


  logpt = F.log_softmax(input)


Iteration 10 || Loss: 3159.7498 || 10iter: 12.5271 sec.
Iteration 20 || Loss: 3631.0842 || 10iter: 6.5335 sec.
Iteration 30 || Loss: 2789.6321 || 10iter: 6.5990 sec.
Iteration 40 || Loss: 3270.4949 || 10iter: 6.7237 sec.
Iteration 50 || Loss: 3391.3289 || 10iter: 6.5214 sec.
Iteration 60 || Loss: 3542.2898 || 10iter: 6.9441 sec.
Iteration 70 || Loss: 3469.5723 || 10iter: 6.4193 sec.
Iteration 80 || Loss: 3409.4910 || 10iter: 6.7932 sec.
Iteration 90 || Loss: 2698.3599 || 10iter: 6.9819 sec.
Iteration 100 || Loss: 3397.1628 || 10iter: 6.8270 sec.
Iteration 110 || Loss: 3273.5764 || 10iter: 7.1126 sec.
Iteration 120 || Loss: 2289.5771 || 10iter: 7.0161 sec.
Iteration 130 || Loss: 3439.6101 || 10iter: 6.6665 sec.
Iteration 140 || Loss: 2720.1807 || 10iter: 6.1559 sec.
Iteration 150 || Loss: 3203.8037 || 10iter: 6.1873 sec.
Iteration 160 || Loss: 2252.1763 || 10iter: 7.7409 sec.
Iteration 170 || Loss: 3687.1772 || 10iter: 7.3464 sec.
Iteration 180 || Loss: 2489.1870 || 10iter: 6.6487 sec.


  logpt = F.log_softmax(input)


Iteration 210 || Loss: 2655.8091 || 10iter: 9.3385 sec.
Iteration 220 || Loss: 1911.7057 || 10iter: 6.3987 sec.
Iteration 230 || Loss: 2042.7052 || 10iter: 6.5660 sec.
Iteration 240 || Loss: 1861.6342 || 10iter: 6.3684 sec.
Iteration 250 || Loss: 1559.8262 || 10iter: 6.5851 sec.
Iteration 260 || Loss: 2036.4868 || 10iter: 6.2675 sec.
Iteration 270 || Loss: 2183.6946 || 10iter: 6.7817 sec.
Iteration 280 || Loss: 1995.1785 || 10iter: 6.6071 sec.
Iteration 290 || Loss: 1704.7499 || 10iter: 6.6313 sec.
Iteration 300 || Loss: 2065.6289 || 10iter: 6.5683 sec.
Iteration 310 || Loss: 1564.0509 || 10iter: 6.8948 sec.
Iteration 320 || Loss: 1433.5189 || 10iter: 6.5765 sec.
Iteration 330 || Loss: 1857.1807 || 10iter: 6.1599 sec.
Iteration 340 || Loss: 1899.5281 || 10iter: 6.2952 sec.
Iteration 350 || Loss: 1843.9053 || 10iter: 6.5094 sec.
Iteration 360 || Loss: 1492.8733 || 10iter: 5.8008 sec.
Iteration 370 || Loss: 1563.2986 || 10iter: 8.8031 sec.
Iteration 380 || Loss: 1883.1088 || 10iter: 6.53

Iteration 1510 || Loss: 1047.0333 || 10iter: 6.5160 sec.
Iteration 1520 || Loss: 1280.1982 || 10iter: 6.7910 sec.
Iteration 1530 || Loss: 1459.7520 || 10iter: 6.4760 sec.
Iteration 1540 || Loss: 1306.7867 || 10iter: 6.3842 sec.
Iteration 1550 || Loss: 1259.7910 || 10iter: 6.4871 sec.
Iteration 1560 || Loss: 1209.4601 || 10iter: 6.7319 sec.
Iteration 1570 || Loss: 1147.7842 || 10iter: 6.1014 sec.
Iteration 1580 || Loss: 1363.8795 || 10iter: 7.1123 sec.
Iteration 1590 || Loss: 1015.3784 || 10iter: 6.7016 sec.
Iteration 1600 || Loss: 690.8978 || 10iter: 6.7860 sec.
Iteration 1610 || Loss: 980.3287 || 10iter: 5.9830 sec.
Iteration 1620 || Loss: 1058.1613 || 10iter: 5.9733 sec.
-------------
epoch 8 || Epoch_TRAIN_Loss:240491.0688 ||Epoch_VAL_Loss:0.0000
timer:  136.2437 sec.
lr is: 0.0001
-------------
Epoch 9/200
-------------
train
Iteration 1630 || Loss: 1109.8073 || 10iter: 7.6954 sec.
Iteration 1640 || Loss: 1127.0784 || 10iter: 5.8194 sec.
Iteration 1650 || Loss: 1313.7483 || 10iter:

Iteration 2780 || Loss: 1004.1494 || 10iter: 6.3606 sec.
Iteration 2790 || Loss: 1143.2438 || 10iter: 6.4900 sec.
Iteration 2800 || Loss: 1158.5140 || 10iter: 6.3440 sec.
Iteration 2810 || Loss: 1368.9877 || 10iter: 6.7756 sec.
Iteration 2820 || Loss: 1322.5387 || 10iter: 6.6574 sec.
Iteration 2830 || Loss: 1159.4125 || 10iter: 5.8592 sec.
Iteration 2840 || Loss: 1320.0477 || 10iter: 6.1629 sec.
-------------
epoch 14 || Epoch_TRAIN_Loss:230157.3434 ||Epoch_VAL_Loss:0.0000
timer:  130.4956 sec.
lr is: 0.0001
-------------
Epoch 15/200
-------------
train
Iteration 2850 || Loss: 1080.2767 || 10iter: 9.9476 sec.
Iteration 2860 || Loss: 834.2479 || 10iter: 6.7234 sec.
Iteration 2870 || Loss: 879.7550 || 10iter: 7.0464 sec.
Iteration 2880 || Loss: 1362.1244 || 10iter: 7.0415 sec.
Iteration 2890 || Loss: 1272.7614 || 10iter: 6.6746 sec.
Iteration 2900 || Loss: 1005.1508 || 10iter: 6.5403 sec.
Iteration 2910 || Loss: 1226.9225 || 10iter: 6.1776 sec.
Iteration 2920 || Loss: 807.7581 || 10iter

That's all :)