In [None]:
# Copyright (c) 2017 Max deGroot, Ellis Brown
# Released under the MIT license
# https://github.com/amdegroot/ssd.pytorch
# Updated by: Takuya Mouri

In [None]:
## バージョンを指定する場合、セルをコメントアウトしてください。
#!pip install torch==0.4.1
#!pip install torchvision==0.2.1
#!pip install numpy==1.14.6
#!pip install matplotlib==2.1.2
#!pip install Pillow==5.0.0

# Step.1) IMPORT PYTORCH

In [1]:
# version確認
import torch
print(torch.__version__)

0.4.1


In [2]:
# GPUの接続
import torch
torch.cuda.is_available()

True

## Skip:: the following is only for using Colab(TeslaK80) via cloud

# Google Driveへのマウント（Colab利用時のみ）

In [None]:
# Google Driveにマウント
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# フォルダ移動
cd /content/gdrive/My Drive/Colab Notebooks/ssd_pytorch

In [None]:
# ファイル名を表示
!ls

## Skip:: the following is only for VOC2007, VOC2019 data set

# ベースネットVGG-16のパラメータをダウンロード（フォルダweights未格納時）

In [None]:
# ベースネットワークのパラメータファイルをダウンロード
!wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth

In [None]:
import os
# フォルダweights未作成の場合に作成
if not os.path.exists('weights/'):
    os.mkdir('weights/')

In [None]:
# パラメータファイルをフォルダweightsに移動
!mv vgg16_reducedfc.pth weights

In [None]:
!wget https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth

In [None]:
!mv ssd300_mAP_77.43_v2.pth weights

# データセットVOC2007(0.9GB)のダウンロード（フォルダVOCdevkit未格納時）

In [None]:
# データセットVOC2007をダウンロード 
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!tar xf VOCtrainval_06-Nov-2007.tar

!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
!tar xf VOCtest_06-Nov-2007.tar

# データセットVOC2012(1.9GB)のダウンロード（フォルダVOCdevkit未格納時）

In [None]:
# データセットVOC2012をダウンロード 
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
!tar xf VOCtrainval_11-May-2012.tar

# Step.2) Setting up for Training SSD network

## SSDネットワークの学習

In [11]:
# ファイル名を表示
!ls

data		   README.md	   VOCtest_06-Nov-2007.tar
demo		   ssd.py	   VOCtrainval_06-Nov-2007.tar
layers		   training.ipynb  VOCtrainval_11-May-2012.tar
pascal-voc-writer  utils	   weights
__pycache__	   VOCdevkit


In [12]:
from data import *
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import os
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import numpy as np
import argparse

In [13]:
# 学習の再開時はargs['resume']にパラメータファイルを指定
args = {'dataset':'VOC',
        'basenet':'vgg16_reducedfc.pth',
        'batch_size':12,
        'resume':'',
        'start_iter':0,
        'num_workers':4,
        'cuda':True,
        'lr':5e-4,
        'momentum':0.9,
        'weight_decay':5e-4,
        'gamma':0.1,
        'save_folder':'weights/'
       }

In [14]:
# Tensor作成時のデフォルトにGPU Tensorを設定
if torch.cuda.is_available():
    if args['cuda']:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    if not args['cuda']:
        print("WARNING: It looks like you have a CUDA device, but aren't " +
              "using CUDA.\nRun with --cuda for optimal training speed.")
        torch.set_default_tensor_type('torch.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

In [15]:
# 訓練データの設定
cfg = voc
dataset = VOCDetection(root=VOC_ROOT,
                       transform=SSDAugmentation(cfg['min_dim'],
                                                 MEANS))

In [16]:
# ネットワークの定義
ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ネットワークをGPUに転送
net = ssd_net.to(device)

In [17]:
# 学習の再開時はargs['resume']のパラメータをロード
if args['resume']:
    print('Resuming training, loading {}...'.format(args['resume']))
    ssd_net.load_weights(args['save_folder'] + args['resume'])
# 学習の開始時はargs['basenet']のパラメータをロード    
else:
    vgg_weights = torch.load(args['save_folder'] + args['basenet'])
    print('Loading base network...')
    ssd_net.vgg.load_state_dict(vgg_weights)

Loading base network...


In [18]:
# ネットワークのモジュールリストを出力
print(net)

SSD(
  (vgg): ModuleList(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (17): Conv2d(256, 

In [19]:
if args['cuda']:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True

In [20]:
def adjust_learning_rate(optimizer, gamma, step):
    """Sets the learning rate to the initial LR decayed by 10 at every
        specified step
    # Adapted from PyTorch Imagenet example:
    # https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    lr = args['lr'] * (gamma ** (step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def xavier(param):
    init.xavier_uniform_(param)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()

In [21]:
# 新規モデル学習時は追加ネットワークの畳み込み、位置推定、クラス分類の畳み込みパラメータを初期化する
if not args['resume']:
    print('Initializing weights...')
    # initialize newly added layers' weights with xavier method
    ssd_net.extras.apply(weights_init)
    ssd_net.loc.apply(weights_init)
    ssd_net.conf.apply(weights_init)

Initializing weights...


In [22]:
# 損失関数の設定
criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                         False, args['cuda'])

In [23]:
# 最適化パラメータの設定
optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=args['momentum'],
                      weight_decay=args['weight_decay'])

In [24]:
# 訓練モード
net.train()
# loss counters
loc_loss = 0
conf_loss = 0
epoch = 0
print('Loading the dataset...')

epoch_size = len(dataset) // args['batch_size']
print('dataset_size', len(dataset))
print('epoch_size', epoch_size)
print('Training SSD on:', dataset.name)
print('Using the specified args:')
print(args)

step_index = 0

Loading the dataset...
dataset_size 1200
epoch_size 100
Training SSD on: VOC0712
Using the specified args:
{'momentum': 0.9, 'save_folder': 'weights/', 'resume': '', 'basenet': 'vgg16_reducedfc.pth', 'weight_decay': 0.0005, 'start_iter': 0, 'num_workers': 4, 'cuda': True, 'batch_size': 12, 'lr': 0.0005, 'gamma': 0.1, 'dataset': 'VOC'}


In [25]:
# 訓練データのデータローダへの設定
data_loader = data.DataLoader(dataset, args['batch_size'],
                              num_workers=args['num_workers'],
                              shuffle=True, collate_fn=detection_collate,
                              pin_memory=True)

# Step.3) Training SSD network

In [26]:
# 学習の開始
batch_iterator = None
# iterationでループして、cfg['max_iter']まで学習する
for iteration in range(args['start_iter'], cfg['max_iter']):
    # 学習開始時または1epoch終了後にdata_loaderから訓練データをロードする
    if (not batch_iterator) or (iteration % epoch_size ==0):
        batch_iterator = iter(data_loader)
        loc_loss = 0
        conf_loss = 0
        epoch += 1

    if iteration in cfg['lr_steps']:
        step_index += 1
        adjust_learning_rate(optimizer, args['gamma'], step_index)
        
    # load train data
    # バッチサイズ分の訓練データをload
    images, targets = next(batch_iterator)
    
    # 画像をGPUに転送
    images = images.to(device)
    # アノテーションをGPUに転送    
    targets = [ann.to(device) for ann in targets]

    # forward
    t0 = time.time()
    # 順伝播の計算
    out = net(images)
    # 勾配の初期化
    optimizer.zero_grad()
    # 損失関数の計算
    loss_l, loss_c = criterion(out, targets)
    loss = loss_l + loss_c
    # 勾配の計算
    loss.backward()
    # パラメータの更新
    optimizer.step()
    t1 = time.time()
    # 損失関数の更新
    loc_loss += loss_l.item()
    conf_loss += loss_c.item()
    
    #ログの出力
    if iteration % 10 == 0:
        print('timer: %.4f sec.' % (t1 - t0))
        print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

# 学習済みモデルの保存
torch.save(ssd_net.state_dict(),
           args['save_folder'] + '' + args['dataset'] + '.pth')



timer: 14.5236 sec.
iter 0 || Loss: 30.8505 || timer: 0.2115 sec.
iter 10 || Loss: 23.8911 || timer: 0.2099 sec.
iter 20 || Loss: 20.9576 || timer: 0.2075 sec.
iter 30 || Loss: 21.1735 || timer: 0.2059 sec.
iter 40 || Loss: 16.2083 || timer: 0.2159 sec.
iter 50 || Loss: 11.1766 || timer: 0.2161 sec.
iter 60 || Loss: 11.1382 || timer: 0.2103 sec.
iter 70 || Loss: 9.7479 || timer: 0.2102 sec.
iter 80 || Loss: 9.2593 || timer: 0.2106 sec.
iter 90 || Loss: 8.7111 || timer: 0.2317 sec.
iter 100 || Loss: 8.4233 || timer: 0.2141 sec.
iter 110 || Loss: 7.5710 || timer: 0.2107 sec.
iter 120 || Loss: 8.3536 || timer: 0.2091 sec.
iter 130 || Loss: 8.4300 || timer: 0.2114 sec.
iter 140 || Loss: 6.5450 || timer: 0.2096 sec.
iter 150 || Loss: 7.0498 || timer: 0.2099 sec.
iter 160 || Loss: 8.2317 || timer: 0.2132 sec.
iter 170 || Loss: 6.7472 || timer: 0.2095 sec.
iter 180 || Loss: 7.1225 || timer: 0.2123 sec.
iter 190 || Loss: 6.4174 || timer: 0.2343 sec.
iter 200 || Loss: 6.8062 || timer: 0.2117 se

iter 1720 || Loss: 1.8931 || timer: 0.2186 sec.
iter 1730 || Loss: 2.2812 || timer: 0.2180 sec.
iter 1740 || Loss: 1.7120 || timer: 0.2197 sec.
iter 1750 || Loss: 1.8816 || timer: 0.2240 sec.
iter 1760 || Loss: 2.0976 || timer: 0.2192 sec.
iter 1770 || Loss: 2.4248 || timer: 0.2257 sec.
iter 1780 || Loss: 1.8794 || timer: 0.2190 sec.
iter 1790 || Loss: 1.8750 || timer: 0.2320 sec.
iter 1800 || Loss: 1.9203 || timer: 0.2167 sec.
iter 1810 || Loss: 2.1165 || timer: 0.2192 sec.
iter 1820 || Loss: 1.8710 || timer: 0.2128 sec.
iter 1830 || Loss: 2.4870 || timer: 0.2200 sec.
iter 1840 || Loss: 1.6723 || timer: 0.2246 sec.
iter 1850 || Loss: 1.8347 || timer: 0.2224 sec.
iter 1860 || Loss: 1.3713 || timer: 0.2174 sec.
iter 1870 || Loss: 2.7861 || timer: 0.2172 sec.
iter 1880 || Loss: 1.3613 || timer: 0.2201 sec.
iter 1890 || Loss: 1.3416 || timer: 0.2386 sec.
iter 1900 || Loss: 1.3690 || timer: 0.2163 sec.
iter 1910 || Loss: 1.7841 || timer: 0.2164 sec.
iter 1920 || Loss: 1.5780 || timer: 0.22

iter 3430 || Loss: 1.9959 || timer: 0.2206 sec.
iter 3440 || Loss: 0.9720 || timer: 0.2169 sec.
iter 3450 || Loss: 0.9130 || timer: 0.2184 sec.
iter 3460 || Loss: 1.2463 || timer: 0.2241 sec.
iter 3470 || Loss: 2.2615 || timer: 0.2190 sec.
iter 3480 || Loss: 1.1039 || timer: 0.2165 sec.
iter 3490 || Loss: 1.9239 || timer: 0.2435 sec.
iter 3500 || Loss: 2.3603 || timer: 0.2201 sec.
iter 3510 || Loss: 1.0098 || timer: 0.2138 sec.
iter 3520 || Loss: 1.4841 || timer: 0.2229 sec.
iter 3530 || Loss: 1.1620 || timer: 0.2187 sec.
iter 3540 || Loss: 0.8959 || timer: 0.2190 sec.
iter 3550 || Loss: 1.2189 || timer: 0.2171 sec.
iter 3560 || Loss: 1.2441 || timer: 0.2214 sec.
iter 3570 || Loss: 1.3347 || timer: 0.2236 sec.
iter 3580 || Loss: 1.2714 || timer: 0.2221 sec.
iter 3590 || Loss: 1.7975 || timer: 0.2476 sec.
iter 3600 || Loss: 1.4521 || timer: 0.2188 sec.
iter 3610 || Loss: 2.3219 || timer: 0.2181 sec.
iter 3620 || Loss: 1.2217 || timer: 0.2214 sec.
iter 3630 || Loss: 2.0374 || timer: 0.21

iter 5140 || Loss: 1.0062 || timer: 0.2256 sec.
iter 5150 || Loss: 0.7419 || timer: 0.2179 sec.
iter 5160 || Loss: 0.8289 || timer: 0.2271 sec.
iter 5170 || Loss: 1.7674 || timer: 0.2192 sec.
iter 5180 || Loss: 1.1046 || timer: 0.2144 sec.
iter 5190 || Loss: 1.3364 || timer: 0.2476 sec.
iter 5200 || Loss: 1.1961 || timer: 0.2211 sec.
iter 5210 || Loss: 1.7633 || timer: 0.2207 sec.
iter 5220 || Loss: 0.8854 || timer: 0.2195 sec.
iter 5230 || Loss: 1.0917 || timer: 0.2209 sec.
iter 5240 || Loss: 0.8218 || timer: 0.2297 sec.
iter 5250 || Loss: 0.9888 || timer: 0.2229 sec.
iter 5260 || Loss: 1.0815 || timer: 0.2213 sec.
iter 5270 || Loss: 1.3890 || timer: 0.2173 sec.
iter 5280 || Loss: 0.9397 || timer: 0.2176 sec.
iter 5290 || Loss: 1.2360 || timer: 0.2313 sec.
iter 5300 || Loss: 1.4041 || timer: 0.2187 sec.
iter 5310 || Loss: 0.8383 || timer: 0.2276 sec.
iter 5320 || Loss: 0.9025 || timer: 0.2238 sec.
iter 5330 || Loss: 1.3399 || timer: 0.2159 sec.
iter 5340 || Loss: 0.9030 || timer: 0.22

iter 6850 || Loss: 0.5493 || timer: 0.2188 sec.
iter 6860 || Loss: 0.7212 || timer: 0.2151 sec.
iter 6870 || Loss: 1.3186 || timer: 0.2189 sec.
iter 6880 || Loss: 0.7129 || timer: 0.2209 sec.
iter 6890 || Loss: 0.9780 || timer: 0.2433 sec.
iter 6900 || Loss: 0.8840 || timer: 0.2264 sec.
iter 6910 || Loss: 1.8439 || timer: 0.2208 sec.
iter 6920 || Loss: 0.6435 || timer: 0.2196 sec.
iter 6930 || Loss: 0.9232 || timer: 0.2350 sec.
iter 6940 || Loss: 0.6579 || timer: 0.2282 sec.
iter 6950 || Loss: 0.6342 || timer: 0.2196 sec.
iter 6960 || Loss: 0.8466 || timer: 0.2160 sec.
iter 6970 || Loss: 1.5547 || timer: 0.2158 sec.
iter 6980 || Loss: 1.0877 || timer: 0.2252 sec.
iter 6990 || Loss: 1.1846 || timer: 0.2565 sec.
iter 7000 || Loss: 0.7902 || timer: 0.2158 sec.
iter 7010 || Loss: 0.8741 || timer: 0.2233 sec.
iter 7020 || Loss: 0.6780 || timer: 0.2284 sec.
iter 7030 || Loss: 1.1201 || timer: 0.2201 sec.
iter 7040 || Loss: 0.8507 || timer: 0.2248 sec.
iter 7050 || Loss: 0.7049 || timer: 0.22

iter 8560 || Loss: 0.5823 || timer: 0.2247 sec.
iter 8570 || Loss: 0.6975 || timer: 0.2192 sec.
iter 8580 || Loss: 0.5216 || timer: 0.2263 sec.
iter 8590 || Loss: 0.6357 || timer: 0.2393 sec.
iter 8600 || Loss: 0.5382 || timer: 0.2219 sec.
iter 8610 || Loss: 0.5484 || timer: 0.2227 sec.
iter 8620 || Loss: 0.5452 || timer: 0.2220 sec.
iter 8630 || Loss: 0.7806 || timer: 0.2180 sec.
iter 8640 || Loss: 0.7638 || timer: 0.2213 sec.
iter 8650 || Loss: 0.3698 || timer: 0.2227 sec.
iter 8660 || Loss: 0.9128 || timer: 0.2201 sec.
iter 8670 || Loss: 0.9961 || timer: 0.2143 sec.
iter 8680 || Loss: 0.5599 || timer: 0.2179 sec.
iter 8690 || Loss: 0.7496 || timer: 0.2388 sec.
iter 8700 || Loss: 1.0361 || timer: 0.2240 sec.
iter 8710 || Loss: 0.9564 || timer: 0.2202 sec.
iter 8720 || Loss: 0.5636 || timer: 0.2173 sec.
iter 8730 || Loss: 0.5446 || timer: 0.2244 sec.
iter 8740 || Loss: 0.7322 || timer: 0.2223 sec.
iter 8750 || Loss: 0.4108 || timer: 0.2218 sec.
iter 8760 || Loss: 0.4739 || timer: 0.21

iter 10260 || Loss: 0.4603 || timer: 0.2858 sec.
iter 10270 || Loss: 0.6302 || timer: 0.2245 sec.
iter 10280 || Loss: 0.4379 || timer: 0.2215 sec.
iter 10290 || Loss: 0.5228 || timer: 0.2461 sec.
iter 10300 || Loss: 0.4321 || timer: 0.2202 sec.
iter 10310 || Loss: 0.5395 || timer: 0.2238 sec.
iter 10320 || Loss: 0.4260 || timer: 0.2179 sec.
iter 10330 || Loss: 0.6666 || timer: 0.2133 sec.
iter 10340 || Loss: 0.7921 || timer: 0.2227 sec.
iter 10350 || Loss: 0.8075 || timer: 0.2205 sec.
iter 10360 || Loss: 1.0725 || timer: 0.2234 sec.
iter 10370 || Loss: 0.7863 || timer: 0.2220 sec.
iter 10380 || Loss: 0.7240 || timer: 0.2237 sec.
iter 10390 || Loss: 0.5632 || timer: 0.2649 sec.
iter 10400 || Loss: 0.5696 || timer: 0.2207 sec.
iter 10410 || Loss: 0.3391 || timer: 0.2213 sec.
iter 10420 || Loss: 0.4598 || timer: 0.2287 sec.
iter 10430 || Loss: 0.7980 || timer: 0.2200 sec.
iter 10440 || Loss: 0.6717 || timer: 0.2193 sec.
iter 10450 || Loss: 0.4878 || timer: 0.2186 sec.
iter 10460 || Loss: 

iter 11930 || Loss: 1.0190 || timer: 0.2227 sec.
iter 11940 || Loss: 1.2724 || timer: 0.2230 sec.
iter 11950 || Loss: 0.5154 || timer: 0.2241 sec.
iter 11960 || Loss: 0.5961 || timer: 0.2182 sec.
iter 11970 || Loss: 1.0677 || timer: 0.2300 sec.
iter 11980 || Loss: 0.4760 || timer: 0.2234 sec.
iter 11990 || Loss: 0.8222 || 