In [1]:
# import stuff
import os
import numpy as np
import torch
import torch.utils.data as data
from itertools import product as product
import time

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd import Function
import pandas as pd

# import dataset
from utils.dataset import VOCDataset, DatasetTransform, make_datapath_list, Anno_xml2list, od_collate_fn

# set up person only VOC dataset

In [2]:
# load files
vocpath = os.path.join("..", "VOCdevkit", "VOC2007")
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(vocpath)

# make Dataset
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']
color_mean = (104, 117, 123)  # (BGR)の色の平均値
input_size = 128  # 画像のinputサイズを300×300にする

## DatasetTransformを適応
transform = DatasetTransform(input_size, color_mean)
transform_anno = Anno_xml2list(voc_classes)

train_dataset = VOCDataset(train_img_list, train_anno_list, phase = "train", transform=transform, transform_anno = transform_anno)
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val", transform=DatasetTransform(
    input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn, num_workers=8)

val_dataloader = data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn, num_workers=8)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

000005
000007
000009
000012
000016
000017
000019
000020
000021
000023
000024
000026
000030
000032
000033
000034
000035
000036
000039
000041
000042
000044
000046
000047
000048
000050
000051
000052
000060
000061
000063
000064
000065
000066
000072
000073
000077
000078
000081
000083
000089
000091
000093
000095
000099
000101
000102
000104
000107
000109
000110
000112
000113
000117
000118
000120
000121
000122
000123
000125
000129
000130
000131
000132
000133
000134
000138
000140
000141
000142
000143
000146
000147
000150
000153
000154
000156
000158
000159
000161
000162
000163
000164
000165
000169
000170
000171
000173
000174
000177
000180
000184
000187
000189
000190
000192
000193
000194
000198
000200
000203
000207
000208
000209
000210
000211
000214
000215
000218
000219
000220
000221
000222
000224
000225
000228
000229
000232
000233
000235
000236
000241
000242
000244
000245
000246
000249
000250
000251
000256
000257
000259
000262
000263
000266
000268
000269
000270
000275
000276
000278
000282
000285

003937
003939
003941
003945
003946
003947
003948
003949
003953
003954
003956
003957
003960
003961
003963
003965
003966
003969
003970
003971
003973
003974
003979
003983
003984
003986
003987
003988
003990
003991
003992
003993
003994
003996
003997
003998
004003
004005
004008
004009
004010
004011
004012
004013
004014
004015
004016
004017
004019
004020
004023
004025
004028
004031
004033
004034
004035
004037
004039
004046
004047
004051
004052
004057
004058
004060
004066
004067
004069
004073
004075
004076
004077
004082
004085
004087
004089
004091
004092
004093
004095
004100
004102
004105
004106
004108
004110
004111
004113
004117
004120
004121
004122
004129
004131
004133
004135
004136
004137
004138
004140
004141
004142
004143
004145
004146
004148
004149
004150
004152
004158
004163
004164
004168
004169
004170
004171
004174
004178
004185
004186
004189
004190
004191
004192
004193
004194
004195
004196
004200
004201
004203
004204
004205
004209
004212
004215
004220
004221
004223
004224
004228
004229

007902
007905
007908
007909
007910
007911
007914
007915
007916
007919
007920
007921
007923
007924
007925
007926
007928
007931
007932
007933
007935
007939
007940
007943
007946
007947
007950
007953
007954
007956
007958
007959
007963
007964
007968
007970
007971
007974
007976
007979
007980
007984
007987
007991
007996
007997
007998
007999
008001
008002
008004
008005
008008
008009
008012
008017
008019
008023
008024
008026
008029
008031
008032
008033
008036
008037
008040
008042
008043
008044
008048
008049
008051
008053
008057
008060
008061
008062
008063
008064
008067
008068
008069
008072
008075
008076
008079
008082
008083
008084
008085
008086
008087
008091
008093
008095
008096
008098
008100
008101
008103
008105
008106
008107
008108
008112
008115
008116
008117
008121
008122
008125
008127
008130
008132
008137
008138
008139
008140
008141
008142
008144
008150
008151
008159
008160
008163
008164
008166
008168
008169
008171
008173
008174
008175
008177
008180
008186
008188
008189
008190
008191
008197

In [3]:
# check operation
batch_iterator = iter(dataloaders_dict["train"])  # iter
images, targets = next(batch_iterator)  # get first element
print(images.size())  # torch.Size([4, 3, 300, 300])
print(len(targets))
print(targets[1].shape)  # check targets

torch.Size([32, 3, 128, 128])
32
torch.Size([3, 5])


In [4]:
targets[1]

tensor([[0.0000e+00, 3.1195e-01, 4.8480e-01, 5.7484e-01, 6.0000e+00],
        [1.6869e-01, 2.0377e-01, 3.2067e-01, 4.0755e-01, 1.4000e+01],
        [4.5593e-03, 2.4654e-01, 1.4134e-01, 4.0252e-01, 1.4000e+01]])

# test with ssd model.

In [5]:
from utils.blazeface import SSD

BlazeFace(
  (features): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (act): ReLU(inplace)
    )
    (4): BlazeBlock(
      (conv1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
        (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, a

In [6]:
# SSD300の設定
ssd_cfg = {
    'num_classes': 21,  # 背景クラスを含めた合計クラス数
    'input_size': 128,  # 画像の入力サイズ
    'bbox_aspect_num': [4, 6],  # 出力するDBoxのアスペクト比の種類
    'feature_maps': [16, 8],  # 各sourceの画像サイズ
    'steps': [4, 8],  # DBOXの大きさを決める
    'min_sizes': [30, 60],  # DBOXの大きさを決める
    'max_sizes': [60, 128],  # DBOXの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase="train", cfg=ssd_cfg)

# SSDのweightsを設定

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# set inits for loc and conf
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPUが使えるか確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using:", device)

print("set weights!")

using: cuda:0
set weights!


In [7]:
print(net)

SSD(
  (blaze): BlazeFace(
    (features): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
          (3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (act): ReLU(inplace)
      )
      (4): BlazeBlock(
        (conv1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=24)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1))
   

)


In [8]:
from utils.ssd_model import MultiBoxLoss

# define loss
criterion = MultiBoxLoss(jaccard_thresh=0.5,neg_pos=2, device=device)

# optim
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [9]:
def get_current_lr(epoch):
    lr = 1e-3
    for i,lr_decay_epoch in enumerate([120,180]):
        if epoch >= lr_decay_epoch:
            lr *= 0.1
    return lr

def adjust_learning_rate(optimizer, epoch):
    lr = get_current_lr(epoch)
    print("lr is:", lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [10]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # GPUが使えるかを確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("used device：", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):
        
        adjust_learning_rate(optimizer, epoch)
        
        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('train')
            else:
                if((epoch+1) % 5 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('val')
                else:
                    # 検証は5回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                # 順伝搬（forward）計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬（forward）計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    # 訓練時はバックプロパゲーション
                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 10 == 0):  # 10iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Iteration {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    # 検証時
                    else:
                        epoch_val_loss += loss.item()

        # epochのphaseごとのlossと正解率
        t_epoch_finish = time.time()
        print('-------------')
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} ||Epoch_VAL_Loss:{:.4f}'.format(
            epoch+1, epoch_train_loss, epoch_val_loss))
        print('timer:  {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        t_epoch_start = time.time()

        # ログを保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0  # epochの損失和
        epoch_val_loss = 0.0  # epochの損失和

        # ネットワークを保存する
        if ((epoch+1) % 10 == 0):
            torch.save(net.state_dict(), 'weights/blazeface128VOC_' +
                       str(epoch+1) + '.pth')

# start training here

In [None]:
num_epochs = 200
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

used device： cuda:0
lr is: 0.001
-------------
Epoch 1/200
-------------
train
Iteration 10 || Loss: 30.9715 || 10iter: 5.4023 sec.
Iteration 20 || Loss: 29.4055 || 10iter: 2.2104 sec.
Iteration 30 || Loss: 25.0067 || 10iter: 2.3319 sec.
Iteration 40 || Loss: 16.1359 || 10iter: 2.2536 sec.
Iteration 50 || Loss: 20.5020 || 10iter: 2.2803 sec.
Iteration 60 || Loss: 17.9642 || 10iter: 2.2465 sec.
Iteration 70 || Loss: 14.6884 || 10iter: 2.8248 sec.
Iteration 80 || Loss: 15.0182 || 10iter: 2.4330 sec.
Iteration 90 || Loss: 12.8400 || 10iter: 2.3660 sec.
Iteration 100 || Loss: 13.8394 || 10iter: 2.4350 sec.
Iteration 110 || Loss: 13.6255 || 10iter: 2.5294 sec.
Iteration 120 || Loss: 13.3865 || 10iter: 2.3450 sec.
Iteration 130 || Loss: 14.9253 || 10iter: 2.2777 sec.
Iteration 140 || Loss: 15.4440 || 10iter: 2.3649 sec.
Iteration 150 || Loss: 11.0169 || 10iter: 1.8851 sec.
-------------
epoch 1 || Epoch_TRAIN_Loss:2838.4868 ||Epoch_VAL_Loss:0.0000
timer:  39.8199 sec.
lr is: 0.001
----------

Iteration 1290 || Loss: 8.1463 || 10iter: 2.2235 sec.
Iteration 1300 || Loss: 10.3016 || 10iter: 2.3598 sec.
Iteration 1310 || Loss: 8.3256 || 10iter: 2.3308 sec.
Iteration 1320 || Loss: 8.2557 || 10iter: 2.3944 sec.
Iteration 1330 || Loss: 8.1416 || 10iter: 2.4232 sec.
Iteration 1340 || Loss: 9.3502 || 10iter: 2.4017 sec.
Iteration 1350 || Loss: 8.4926 || 10iter: 2.4485 sec.
Iteration 1360 || Loss: 8.4950 || 10iter: 2.3198 sec.
Iteration 1370 || Loss: 8.2731 || 10iter: 2.3205 sec.
Iteration 1380 || Loss: 7.9619 || 10iter: 2.1059 sec.
Iteration 1390 || Loss: 8.4070 || 10iter: 2.5073 sec.
Iteration 1400 || Loss: 7.7586 || 10iter: 2.4166 sec.
Iteration 1410 || Loss: 7.1162 || 10iter: 1.5476 sec.
-------------
epoch 9 || Epoch_TRAIN_Loss:1336.0111 ||Epoch_VAL_Loss:0.0000
timer:  39.2829 sec.
lr is: 0.001
-------------
Epoch 10/200
-------------
train
Iteration 1420 || Loss: 8.8497 || 10iter: 4.3363 sec.
Iteration 1430 || Loss: 8.3226 || 10iter: 2.8321 sec.
Iteration 1440 || Loss: 9.0896 |