In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

# set up person only VOC dataset

In [2]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath, cls="person")

# make Dataset
voc_classes = ['person']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 300  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

000009
000017
000021
000023
000030
000032
000035
000041
000048
000050
000051
000066
000073
000081
000083
000089
000101
000104
000110
000113
000125
000129
000131
000133
000138
000146
000150
000159
000162
000163
000164
000165
000169
000170
000171
000173
000174
000177
000190
000192
000193
000194
000200
000210
000218
000220
000222
000229
000232
000245
000251
000257
000259
000269
000275
000276
000278
000282
000285
000288
000298
000302
000305
000308
000320
000321
000322
000323
000328
000331
000337
000338
000352
000359
000367
000372
000374
000382
000394
000406
000407
000411
000419
000428
000433
000435
000438
000443
000446
000448
000463
000468
000470
000476
000477
000480
000482
000483
000498
000499
000500
000515
000516
000518
000520
000523
000524
000525
000526
000530
000531
000535
000541
000545
000554
000555
000579
000583
000589
000591
000597
000612
000613
000625
000626
000628
000633
000648
000654
000677
000684
000688
000690
000694
000695
000702
000709
000717
000726
000731
000733
000739
000742

In [3]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([32, 3, 300, 300])
32
torch.Size([1, 5])


In [4]:
targets[1]

tensor([[0.0000, 0.0000, 0.0365, 0.1676, 0.0000]])

# test with ssd model.

In [8]:
from utils.ssd import SSD

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [9]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 2,  # class including background
    'input_size': 300,  # input size
    'bbox_aspect_num': [4, 6, 6, 6, 4, 4],  # bbox aspects
    'feature_maps': [38, 19, 10, 5, 3, 1],  # feature map size of each stages
    'steps': [8, 16, 32, 64, 100, 300],  # DBOXの大きさを決める
    'min_sizes': [30, 60, 111, 162, 213, 264],  # DBOXの大きさを決める
    'max_sizes': [60, 111, 162, 213, 264, 315],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]], # anchor settings
}

net = SSD(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定
print("using vgg weights")
vgg_weights = torch.load("./weights/vgg16_reducedfc.pth")
net.vgg.load_state_dict(vgg_weights)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# 初期値を適応
net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using vgg weights
using: cuda:0
set weights!


In [10]:
print(net)

SSD(
  (vgg): ModuleList(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (17): Conv2d(256, 

In [11]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=3, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [12]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):

        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 10 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は10回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/ssd300_' +
                       str(epoch+1) + '.pth')

# start training here

In [None]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 8.9183 || 10iter: 49.2415 sec.
Iteration 20 || Loss: 8.2571 || 10iter: 19.4378 sec.
Iteration 30 || Loss: 18.7474 || 10iter: 19.4420 sec.
Iteration 40 || Loss: 10.8588 || 10iter: 19.4117 sec.
Iteration 50 || Loss: 9.7334 || 10iter: 19.3884 sec.
Iteration 60 || Loss: 7.4762 || 10iter: 19.3043 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:656.4293 ||Epoch_VAL_Loss:0.0000
timer:  181.9912 sec.
-------------
Epoch 2/200
-------------
train
Iteration 70 || Loss: 6.5129 || 10iter: 9.4671 sec.
Iteration 80 || Loss: 61.8643 || 10iter: 19.3187 sec.
Iteration 90 || Loss: 10.1675 || 10iter: 19.2539 sec.
Iteration 100 || Loss: 10.8150 || 10iter: 19.1732 sec.
Iteration 110 || Loss: 8.5794 || 10iter: 19.2347 sec.
Iteration 120 || Loss: 8.2563 || 10iter: 19.1311 sec.
Iteration 130 || Loss: 13.7946 || 10iter: 19.0443 sec.
-------------
epoch 2 || Epoch_TRAIN_Loss:763.3820 ||Epoch_VAL_Loss:0.0000
timer:  136.7749 s

Iteration 1090 || Loss: 6.9219 || 10iter: 18.7180 sec.
Iteration 1100 || Loss: 5.9252 || 10iter: 18.7662 sec.
Iteration 1110 || Loss: 5.2466 || 10iter: 18.6699 sec.
Iteration 1120 || Loss: 5.4438 || 10iter: 18.7254 sec.
-------------
epoch 17 || Epoch_TRAIN_Loss:410.7246 ||Epoch_VAL_Loss:0.0000
timer:  133.5529 sec.
-------------
Epoch 18/200
-------------
train
Iteration 1130 || Loss: 5.4337 || 10iter: 17.3313 sec.
Iteration 1140 || Loss: 5.2270 || 10iter: 18.7843 sec.
Iteration 1150 || Loss: 5.2085 || 10iter: 18.6881 sec.
Iteration 1160 || Loss: 7.1727 || 10iter: 18.7562 sec.
Iteration 1170 || Loss: 6.5629 || 10iter: 18.7732 sec.
Iteration 1180 || Loss: 5.2826 || 10iter: 18.6969 sec.
-------------
epoch 18 || Epoch_TRAIN_Loss:390.0972 ||Epoch_VAL_Loss:0.0000
timer:  133.4199 sec.
-------------
Epoch 19/200
-------------
train
Iteration 1190 || Loss: 5.2840 || 10iter: 5.0411 sec.
Iteration 1200 || Loss: 5.4446 || 10iter: 18.7377 sec.
Iteration 1210 || Loss: 5.6456 || 10iter: 18.5934 s

Iteration 2160 || Loss: 6.9366 || 10iter: 18.7339 sec.
Iteration 2170 || Loss: 7.1403 || 10iter: 18.6523 sec.
-------------
epoch 33 || Epoch_TRAIN_Loss:395.4583 ||Epoch_VAL_Loss:0.0000
timer:  133.1172 sec.
-------------
Epoch 34/200
-------------
train
Iteration 2180 || Loss: 6.1703 || 10iter: 5.6553 sec.
Iteration 2190 || Loss: 5.5801 || 10iter: 18.6631 sec.
Iteration 2200 || Loss: 6.1202 || 10iter: 18.5913 sec.
Iteration 2210 || Loss: 6.7145 || 10iter: 18.7261 sec.
Iteration 2220 || Loss: 5.8851 || 10iter: 18.7355 sec.
Iteration 2230 || Loss: 6.5635 || 10iter: 18.6572 sec.
Iteration 2240 || Loss: 5.7888 || 10iter: 18.7075 sec.
-------------
epoch 34 || Epoch_TRAIN_Loss:378.1537 ||Epoch_VAL_Loss:0.0000
timer:  133.4285 sec.
-------------
Epoch 35/200
-------------
train
Iteration 2250 || Loss: 5.6556 || 10iter: 13.0258 sec.
Iteration 2260 || Loss: 7.3144 || 10iter: 18.6900 sec.
Iteration 2270 || Loss: 5.0358 || 10iter: 18.6928 sec.
Iteration 2280 || Loss: 5.6616 || 10iter: 18.7279 s

Iteration 3230 || Loss: 5.8698 || 10iter: 18.6964 sec.
-------------
epoch 49 || Epoch_TRAIN_Loss:424.0163 ||Epoch_VAL_Loss:0.0000
timer:  132.9771 sec.
-------------
Epoch 50/200
-------------
train
Iteration 3240 || Loss: 9.9410 || 10iter: 13.1421 sec.
Iteration 3250 || Loss: 5.9626 || 10iter: 18.7161 sec.
Iteration 3260 || Loss: 6.1380 || 10iter: 18.5863 sec.
Iteration 3270 || Loss: 7.7567 || 10iter: 18.7522 sec.
Iteration 3280 || Loss: 5.8150 || 10iter: 18.7049 sec.
Iteration 3290 || Loss: 7.3401 || 10iter: 18.7806 sec.
Iteration 3300 || Loss: 5.1786 || 10iter: 18.3304 sec.
-------------
val
-------------
epoch 50 || Epoch_TRAIN_Loss:424.7989 ||Epoch_VAL_Loss:176.5584
timer:  158.2188 sec.
-------------
Epoch 51/200
-------------
train
Iteration 3310 || Loss: 6.6686 || 10iter: 21.0805 sec.
Iteration 3320 || Loss: 5.6837 || 10iter: 18.6132 sec.
Iteration 3330 || Loss: 7.7120 || 10iter: 18.7120 sec.
Iteration 3340 || Loss: 5.3274 || 10iter: 18.6588 sec.
Iteration 3350 || Loss: 5.6742

Iteration 4300 || Loss: 5.0241 || 10iter: 21.2768 sec.
Iteration 4310 || Loss: 5.1689 || 10iter: 18.5140 sec.
Iteration 4320 || Loss: 4.9361 || 10iter: 18.7382 sec.
Iteration 4330 || Loss: 5.2135 || 10iter: 18.6736 sec.
Iteration 4340 || Loss: 5.0941 || 10iter: 18.5796 sec.
Iteration 4350 || Loss: 5.1156 || 10iter: 18.6668 sec.
-------------
epoch 66 || Epoch_TRAIN_Loss:336.2827 ||Epoch_VAL_Loss:0.0000
timer:  132.7980 sec.
-------------
Epoch 67/200
-------------
train
Iteration 4360 || Loss: 5.1832 || 10iter: 8.9679 sec.
Iteration 4370 || Loss: 5.1663 || 10iter: 18.7033 sec.
Iteration 4380 || Loss: 5.1473 || 10iter: 18.6529 sec.
Iteration 4390 || Loss: 4.7913 || 10iter: 18.6021 sec.
Iteration 4400 || Loss: 5.0719 || 10iter: 18.6819 sec.
Iteration 4410 || Loss: 4.8873 || 10iter: 18.7559 sec.
Iteration 4420 || Loss: 5.4415 || 10iter: 18.6291 sec.
-------------
epoch 67 || Epoch_TRAIN_Loss:335.3632 ||Epoch_VAL_Loss:0.0000
timer:  132.5303 sec.
-------------
Epoch 68/200
-------------
tr