In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

# set up person only VOC dataset

In [2]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath, cls="person")

# make Dataset
voc_classes = ['person']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 128  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

000009
000017
000021
000023
000030
000032
000035
000041
000048
000050
000051
000066
000073
000081
000083
000089
000101
000104
000110
000113
000125
000129
000131
000133
000138
000146
000150
000159
000162
000163
000164
000165
000169
000170
000171
000173
000174
000177
000190
000192
000193
000194
000200
000210
000218
000220
000222
000229
000232
000245
000251
000257
000259
000269
000275
000276
000278
000282
000285
000288
000298
000302
000305
000308
000320
000321
000322
000323
000328
000331
000337
000338
000352
000359
000367
000372
000374
000382
000394
000406
000407
000411
000419
000428
000433
000435
000438
000443
000446
000448
000463
000468
000470
000476
000477
000480
000482
000483
000498
000499
000500
000515
000516
000518
000520
000523
000524
000525
000526
000530
000531
000535
000541
000545
000554
000555
000579
000583
000589
000591
000597
000612
000613
000625
000626
000628
000633
000648
000654
000677
000684
000688
000690
000694
000695
000702
000709
000717
000726
000731
000733
000739
000742

In [3]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([32, 3, 128, 128])
32
torch.Size([2, 5])


In [4]:
targets[1]

tensor([[0.4868, 0.7560, 0.5120, 0.7756, 0.0000],
        [0.4195, 0.7667, 0.5457, 0.8825, 0.0000]])

# test with ssd model.

In [5]:
from utils.blazeface import SSD

BlazeFace(
  (features): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (act): ReLU(inplace)
    )
    (4): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, a

In [6]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 2,  # 背景クラスを含めた合計クラス数
    'input_size': 128,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [8, 16],  # DBOXの大きさを決める
    'min_sizes': [30, 60],  # DBOXの大きさを決める
    'max_sizes': [60, 128],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [7]:
print(net)

SSD(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
   

In [8]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=3, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [9]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):

        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface128_' +
                       str(epoch+1) + '.pth')

# start training here

In [None]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 5.5479 || 10iter: 5.3336 sec.
Iteration 20 || Loss: 5.7275 || 10iter: 2.7481 sec.
Iteration 30 || Loss: 6.0663 || 10iter: 2.5544 sec.
Iteration 40 || Loss: 5.1433 || 10iter: 2.4950 sec.
Iteration 50 || Loss: 6.2372 || 10iter: 2.5299 sec.
Iteration 60 || Loss: 5.6652 || 10iter: 1.5303 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:381.4997 ||Epoch_VAL_Loss:0.0000
timer:  18.1708 sec.
-------------
Epoch 2/200
-------------
train
Iteration 70 || Loss: 5.8343 || 10iter: 3.5246 sec.
Iteration 80 || Loss: 5.7348 || 10iter: 2.6142 sec.
Iteration 90 || Loss: 5.6922 || 10iter: 2.2431 sec.
Iteration 100 || Loss: 5.8039 || 10iter: 2.1730 sec.
Iteration 110 || Loss: 6.2829 || 10iter: 2.2542 sec.
Iteration 120 || Loss: 6.0590 || 10iter: 2.2100 sec.
Iteration 130 || Loss: 6.0433 || 10iter: 1.4679 sec.
-------------
epoch 2 || Epoch_TRAIN_Loss:370.6800 ||Epoch_VAL_Loss:0.0000
timer:  16.9388 sec.
-------------
Ep

Iteration 1110 || Loss: 5.4519 || 10iter: 2.0038 sec.
Iteration 1120 || Loss: 5.1497 || 10iter: 1.3904 sec.
-------------
epoch 17 || Epoch_TRAIN_Loss:335.8707 ||Epoch_VAL_Loss:0.0000
timer:  17.0043 sec.
-------------
Epoch 18/200
-------------
train
Iteration 1130 || Loss: 5.0034 || 10iter: 4.7680 sec.
Iteration 1140 || Loss: 5.0973 || 10iter: 2.4057 sec.
Iteration 1150 || Loss: 5.4013 || 10iter: 2.2585 sec.
Iteration 1160 || Loss: 4.9067 || 10iter: 1.9922 sec.
Iteration 1170 || Loss: 4.6039 || 10iter: 2.3056 sec.
Iteration 1180 || Loss: 4.3366 || 10iter: 1.6923 sec.
-------------
epoch 18 || Epoch_TRAIN_Loss:333.2262 ||Epoch_VAL_Loss:0.0000
timer:  16.6123 sec.
-------------
Epoch 19/200
-------------
train
Iteration 1190 || Loss: 5.7233 || 10iter: 2.9204 sec.
Iteration 1200 || Loss: 5.0196 || 10iter: 2.9912 sec.
Iteration 1210 || Loss: 5.1865 || 10iter: 2.1841 sec.
Iteration 1220 || Loss: 4.9481 || 10iter: 2.3066 sec.
Iteration 1230 || Loss: 5.6606 || 10iter: 2.5458 sec.
Iteration 

Iteration 2180 || Loss: 4.5463 || 10iter: 2.3014 sec.
Iteration 2190 || Loss: 4.9079 || 10iter: 3.4273 sec.
Iteration 2200 || Loss: 5.0587 || 10iter: 2.1637 sec.
Iteration 2210 || Loss: 5.0225 || 10iter: 2.0253 sec.
Iteration 2220 || Loss: 4.9863 || 10iter: 2.3422 sec.
Iteration 2230 || Loss: 4.7356 || 10iter: 2.1090 sec.
Iteration 2240 || Loss: 4.7322 || 10iter: 1.4050 sec.
-------------
epoch 34 || Epoch_TRAIN_Loss:324.4297 ||Epoch_VAL_Loss:0.0000
timer:  16.4637 sec.
-------------
Epoch 35/200
-------------
train
Iteration 2250 || Loss: 4.7422 || 10iter: 4.5168 sec.
Iteration 2260 || Loss: 5.2936 || 10iter: 2.3569 sec.
Iteration 2270 || Loss: 4.6589 || 10iter: 2.2384 sec.
Iteration 2280 || Loss: 4.4572 || 10iter: 2.0014 sec.
Iteration 2290 || Loss: 4.9295 || 10iter: 2.3099 sec.
Iteration 2300 || Loss: 4.6395 || 10iter: 1.9981 sec.
Iteration 2310 || Loss: 5.4465 || 10iter: 1.2673 sec.
-------------
val
-------------
epoch 35 || Epoch_TRAIN_Loss:320.5236 ||Epoch_VAL_Loss:161.0798
time

Iteration 3260 || Loss: 4.8205 || 10iter: 2.3328 sec.
Iteration 3270 || Loss: 4.7717 || 10iter: 2.2597 sec.
Iteration 3280 || Loss: 4.8389 || 10iter: 2.5852 sec.
Iteration 3290 || Loss: 4.4125 || 10iter: 1.7970 sec.
Iteration 3300 || Loss: 4.3829 || 10iter: 1.3396 sec.
-------------
val
-------------
epoch 50 || Epoch_TRAIN_Loss:316.2955 ||Epoch_VAL_Loss:160.5629
timer:  21.2334 sec.
-------------
Epoch 51/200
-------------
train
Iteration 3310 || Loss: 4.6881 || 10iter: 5.5153 sec.
Iteration 3320 || Loss: 4.8523 || 10iter: 2.2427 sec.
Iteration 3330 || Loss: 4.8341 || 10iter: 2.0781 sec.
Iteration 3340 || Loss: 5.0084 || 10iter: 2.3230 sec.
Iteration 3350 || Loss: 4.9174 || 10iter: 2.4714 sec.
Iteration 3360 || Loss: 4.9544 || 10iter: 1.5800 sec.
-------------
epoch 51 || Epoch_TRAIN_Loss:317.6861 ||Epoch_VAL_Loss:0.0000
timer:  17.1723 sec.
-------------
Epoch 52/200
-------------
train
Iteration 3370 || Loss: 5.0660 || 10iter: 3.7769 sec.
Iteration 3380 || Loss: 4.8326 || 10iter: 3.