In [None]:
# Copyright (c) 2017 Max deGroot, Ellis Brown
# Released under the MIT license
# https://github.com/amdegroot/ssd.pytorch
# Updated by: Takuya Mouri

In [None]:
## バージョンを指定する場合、セルをコメントアウトしてください。
#!pip install torch==0.4.1
#!pip install torchvision==0.2.1
#!pip install numpy==1.14.6
#!pip install matplotlib==2.1.2
#!pip install Pillow==5.0.0

In [None]:
# version確認
import torch
print(torch.__version__)

In [None]:
# GPUの接続
import torch
torch.cuda.is_available()

# Google Driveへのマウント（Colab利用時のみ）

In [None]:
# Google Driveにマウント
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# フォルダ移動
cd /content/gdrive/My Drive/Colab Notebooks/ssd_pytorch

In [None]:
# ファイル名を表示
!ls

# ベースネットVGG-16のパラメータをダウンロード（フォルダweights未格納時）

In [None]:
# ベースネットワークのパラメータファイルをダウンロード
!wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth

In [None]:
import os
# フォルダweights未作成の場合に作成
if not os.path.exists('weights/'):
    os.mkdir('weights/')

In [None]:
# パラメータファイルをフォルダweightsに移動
!mv vgg16_reducedfc.pth weights

In [None]:
!wget https://s3.amazonaws.com/amdegroot-models/ssd300_mAP_77.43_v2.pth

In [None]:
!mv ssd300_mAP_77.43_v2.pth weights

# データセットVOC2007(0.9GB)のダウンロード（フォルダVOCdevkit未格納時）

In [None]:
# データセットVOC2007をダウンロード 
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
!tar xf VOCtrainval_06-Nov-2007.tar

!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
!tar xf VOCtest_06-Nov-2007.tar

# データセットVOC2012(1.9GB)のダウンロード（フォルダVOCdevkit未格納時）

In [None]:
# データセットVOC2012をダウンロード 
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
!tar xf VOCtrainval_11-May-2012.tar

# SSDネットワークの学習

In [1]:
# ファイル名を表示
!ls

data		   README.md	   VOCtest_06-Nov-2007.tar
demo		   ssd.py	   VOCtrainval_06-Nov-2007.tar
layers		   training.ipynb  VOCtrainval_11-May-2012.tar
pascal-voc-writer  utils	   weights
__pycache__	   VOCdevkit


In [2]:
from data import *
from utils.augmentations import SSDAugmentation
from layers.modules import MultiBoxLoss
from ssd import build_ssd
import os
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.utils.data as data
import numpy as np
import argparse

In [3]:
# 学習の再開時はargs['resume']にパラメータファイルを指定
args = {'dataset':'VOC',
        'basenet':'vgg16_reducedfc.pth',
        'batch_size':12,
        'resume':'',
        'start_iter':0,
        'num_workers':4,
        'cuda':True,
        'lr':5e-4,
        'momentum':0.9,
        'weight_decay':5e-4,
        'gamma':0.1,
        'save_folder':'weights/'
       }

In [4]:
# Tensor作成時のデフォルトにGPU Tensorを設定
if torch.cuda.is_available():
    if args['cuda']:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    if not args['cuda']:
        print("WARNING: It looks like you have a CUDA device, but aren't " +
              "using CUDA.\nRun with --cuda for optimal training speed.")
        torch.set_default_tensor_type('torch.FloatTensor')
else:
    torch.set_default_tensor_type('torch.FloatTensor')

In [5]:
# 訓練データの設定
cfg = voc
dataset = VOCDetection(root=VOC_ROOT,
                       transform=SSDAugmentation(cfg['min_dim'],
                                                 MEANS))

In [6]:
# ネットワークの定義
ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ネットワークをGPUに転送
net = ssd_net.to(device)

In [7]:
# 学習の再開時はargs['resume']のパラメータをロード
if args['resume']:
    print('Resuming training, loading {}...'.format(args['resume']))
    ssd_net.load_weights(args['save_folder'] + args['resume'])
# 学習の開始時はargs['basenet']のパラメータをロード    
else:
    vgg_weights = torch.load(args['save_folder'] + args['basenet'])
    print('Loading base network...')
    ssd_net.vgg.load_state_dict(vgg_weights)

Loading base network...


In [8]:
# ネットワークのモジュールリストを出力
print(net)

SSD(
  (vgg): ModuleList(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (17): Conv2d(256, 

In [9]:
if args['cuda']:
    net = torch.nn.DataParallel(ssd_net)
    cudnn.benchmark = True

In [10]:
def adjust_learning_rate(optimizer, gamma, step):
    """Sets the learning rate to the initial LR decayed by 10 at every
        specified step
    # Adapted from PyTorch Imagenet example:
    # https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    lr = args['lr'] * (gamma ** (step))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def xavier(param):
    init.xavier_uniform_(param)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        m.bias.data.zero_()

In [11]:
# 新規モデル学習時は追加ネットワークの畳み込み、位置推定、クラス分類の畳み込みパラメータを初期化する
if not args['resume']:
    print('Initializing weights...')
    # initialize newly added layers' weights with xavier method
    ssd_net.extras.apply(weights_init)
    ssd_net.loc.apply(weights_init)
    ssd_net.conf.apply(weights_init)

Initializing weights...


In [12]:
# 損失関数の設定
criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                         False, args['cuda'])

In [13]:
# 最適化パラメータの設定
optimizer = optim.SGD(net.parameters(), lr=args['lr'], momentum=args['momentum'],
                      weight_decay=args['weight_decay'])

In [14]:
# 訓練モード
net.train()
# loss counters
loc_loss = 0
conf_loss = 0
epoch = 0
print('Loading the dataset...')

epoch_size = len(dataset) // args['batch_size']
print('dataset_size', len(dataset))
print('epoch_size', epoch_size)
print('Training SSD on:', dataset.name)
print('Using the specified args:')
print(args)

step_index = 0

Loading the dataset...
dataset_size 1300
epoch_size 108
Training SSD on: VOC0712
Using the specified args:
{'batch_size': 12, 'cuda': True, 'save_folder': 'weights/', 'weight_decay': 0.0005, 'start_iter': 0, 'momentum': 0.9, 'gamma': 0.1, 'num_workers': 4, 'dataset': 'VOC', 'lr': 0.0005, 'resume': '', 'basenet': 'vgg16_reducedfc.pth'}


In [15]:
# 訓練データのデータローダへの設定
data_loader = data.DataLoader(dataset, args['batch_size'],
                              num_workers=args['num_workers'],
                              shuffle=True, collate_fn=detection_collate,
                              pin_memory=True)

In [16]:
# 学習の開始
batch_iterator = None
# iterationでループして、cfg['max_iter']まで学習する
for iteration in range(args['start_iter'], cfg['max_iter']):
    # 学習開始時または1epoch終了後にdata_loaderから訓練データをロードする
    if (not batch_iterator) or (iteration % epoch_size ==0):
        batch_iterator = iter(data_loader)
        loc_loss = 0
        conf_loss = 0
        epoch += 1

    if iteration in cfg['lr_steps']:
        step_index += 1
        adjust_learning_rate(optimizer, args['gamma'], step_index)
        
    # load train data
    # バッチサイズ分の訓練データをload
    images, targets = next(batch_iterator)
    
    # 画像をGPUに転送
    images = images.to(device)
    # アノテーションをGPUに転送    
    targets = [ann.to(device) for ann in targets]

    # forward
    t0 = time.time()
    # 順伝播の計算
    out = net(images)
    # 勾配の初期化
    optimizer.zero_grad()
    # 損失関数の計算
    loss_l, loss_c = criterion(out, targets)
    loss = loss_l + loss_c
    # 勾配の計算
    loss.backward()
    # パラメータの更新
    optimizer.step()
    t1 = time.time()
    # 損失関数の更新
    loc_loss += loss_l.item()
    conf_loss += loss_c.item()
    
    #ログの出力
    if iteration % 10 == 0:
        print('timer: %.4f sec.' % (t1 - t0))
        print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

# 学習済みモデルの保存
torch.save(ssd_net.state_dict(),
           args['save_folder'] + '' + args['dataset'] + '.pth')



timer: 11.5045 sec.
iter 0 || Loss: 18.5462 || timer: 0.1773 sec.
iter 10 || Loss: 7.7084 || timer: 0.1788 sec.
iter 20 || Loss: 6.6353 || timer: 0.1841 sec.
iter 30 || Loss: 6.5894 || timer: 0.1754 sec.
iter 40 || Loss: 6.2741 || timer: 0.1784 sec.
iter 50 || Loss: 3.7430 || timer: 0.1747 sec.
iter 60 || Loss: 4.1697 || timer: 0.1780 sec.
iter 70 || Loss: 5.2399 || timer: 0.1774 sec.
iter 80 || Loss: 3.1958 || timer: 0.1760 sec.
iter 90 || Loss: 3.1005 || timer: 0.1769 sec.
iter 100 || Loss: 3.4393 || timer: 0.1763 sec.
iter 110 || Loss: 2.4100 || timer: 0.1753 sec.
iter 120 || Loss: 3.4636 || timer: 0.1807 sec.
iter 130 || Loss: 3.0441 || timer: 0.1742 sec.
iter 140 || Loss: 2.8319 || timer: 0.1757 sec.
iter 150 || Loss: 3.3764 || timer: 0.1812 sec.
iter 160 || Loss: 3.0475 || timer: 0.1761 sec.
iter 170 || Loss: 2.1637 || timer: 0.1816 sec.
iter 180 || Loss: 2.6104 || timer: 0.1739 sec.
iter 190 || Loss: 2.9736 || timer: 0.1820 sec.
iter 200 || Loss: 2.7536 || timer: 0.1721 sec.
ite

iter 1720 || Loss: 1.1873 || timer: 0.1897 sec.
iter 1730 || Loss: 0.7565 || timer: 0.1861 sec.
iter 1740 || Loss: 1.0186 || timer: 0.1869 sec.
iter 1750 || Loss: 1.1847 || timer: 0.1843 sec.
iter 1760 || Loss: 1.1172 || timer: 0.1805 sec.
iter 1770 || Loss: 1.3373 || timer: 0.1848 sec.
iter 1780 || Loss: 1.2822 || timer: 0.1864 sec.
iter 1790 || Loss: 1.2552 || timer: 0.1807 sec.
iter 1800 || Loss: 0.7587 || timer: 0.1781 sec.
iter 1810 || Loss: 1.4144 || timer: 0.1814 sec.
iter 1820 || Loss: 1.2603 || timer: 0.1785 sec.
iter 1830 || Loss: 1.0662 || timer: 0.1914 sec.
iter 1840 || Loss: 1.3891 || timer: 0.1868 sec.
iter 1850 || Loss: 1.3316 || timer: 0.1882 sec.
iter 1860 || Loss: 0.8206 || timer: 0.1849 sec.
iter 1870 || Loss: 0.8104 || timer: 0.1792 sec.
iter 1880 || Loss: 0.7445 || timer: 0.1938 sec.
iter 1890 || Loss: 1.2103 || timer: 0.1824 sec.
iter 1900 || Loss: 1.0833 || timer: 0.1852 sec.
iter 1910 || Loss: 0.6412 || timer: 0.1800 sec.
iter 1920 || Loss: 1.1102 || timer: 0.17

iter 3430 || Loss: 0.7310 || timer: 0.1821 sec.
iter 3440 || Loss: 0.9783 || timer: 0.1800 sec.
iter 3450 || Loss: 1.1619 || timer: 0.1830 sec.
iter 3460 || Loss: 0.9048 || timer: 0.1872 sec.
iter 3470 || Loss: 0.7046 || timer: 0.1772 sec.
iter 3480 || Loss: 0.5216 || timer: 0.1908 sec.
iter 3490 || Loss: 0.8201 || timer: 0.1842 sec.
iter 3500 || Loss: 0.7619 || timer: 0.1844 sec.
iter 3510 || Loss: 0.5102 || timer: 0.1951 sec.
iter 3520 || Loss: 0.7640 || timer: 0.1868 sec.
iter 3530 || Loss: 0.6521 || timer: 0.1842 sec.
iter 3540 || Loss: 1.0240 || timer: 0.1800 sec.
iter 3550 || Loss: 0.8903 || timer: 0.1755 sec.
iter 3560 || Loss: 0.9966 || timer: 0.1907 sec.
iter 3570 || Loss: 0.8874 || timer: 0.1842 sec.
iter 3580 || Loss: 0.5428 || timer: 0.1845 sec.
iter 3590 || Loss: 0.8168 || timer: 0.1848 sec.
iter 3600 || Loss: 1.1941 || timer: 0.1784 sec.
iter 3610 || Loss: 0.6377 || timer: 0.1904 sec.
iter 3620 || Loss: 0.8582 || timer: 0.1859 sec.
iter 3630 || Loss: 1.1812 || timer: 0.18

iter 5140 || Loss: 1.2685 || timer: 0.1823 sec.
iter 5150 || Loss: 0.7180 || timer: 0.1881 sec.
iter 5160 || Loss: 0.5982 || timer: 0.1819 sec.
iter 5170 || Loss: 0.6436 || timer: 0.1814 sec.
iter 5180 || Loss: 1.0274 || timer: 0.1860 sec.
iter 5190 || Loss: 0.9467 || timer: 0.1823 sec.
iter 5200 || Loss: 0.7669 || timer: 0.1882 sec.
iter 5210 || Loss: 0.6104 || timer: 0.1831 sec.
iter 5220 || Loss: 1.0880 || timer: 0.1828 sec.
iter 5230 || Loss: 0.5962 || timer: 0.1924 sec.
iter 5240 || Loss: 0.7290 || timer: 0.1852 sec.
iter 5250 || Loss: 0.9597 || timer: 0.1793 sec.
iter 5260 || Loss: 1.0377 || timer: 0.1817 sec.
iter 5270 || Loss: 1.1905 || timer: 0.1835 sec.
iter 5280 || Loss: 0.5380 || timer: 0.1853 sec.
iter 5290 || Loss: 0.7772 || timer: 0.1804 sec.
iter 5300 || Loss: 1.0034 || timer: 0.1871 sec.
iter 5310 || Loss: 0.4864 || timer: 0.1906 sec.
iter 5320 || Loss: 0.5445 || timer: 0.1837 sec.
iter 5330 || Loss: 0.6758 || timer: 0.1862 sec.
iter 5340 || Loss: 0.6932 || timer: 0.18

iter 6850 || Loss: 0.8098 || timer: 0.1860 sec.
iter 6860 || Loss: 0.6048 || timer: 0.1828 sec.
iter 6870 || Loss: 0.9954 || timer: 0.1883 sec.
iter 6880 || Loss: 0.6075 || timer: 0.1812 sec.
iter 6890 || Loss: 0.5585 || timer: 0.1828 sec.
iter 6900 || Loss: 0.7404 || timer: 0.1756 sec.
iter 6910 || Loss: 0.6275 || timer: 0.1840 sec.
iter 6920 || Loss: 0.7469 || timer: 0.1857 sec.
iter 6930 || Loss: 0.5589 || timer: 0.1800 sec.
iter 6940 || Loss: 0.6145 || timer: 0.1848 sec.
iter 6950 || Loss: 0.6034 || timer: 0.1858 sec.
iter 6960 || Loss: 0.4565 || timer: 0.1830 sec.
iter 6970 || Loss: 0.5582 || timer: 0.1931 sec.
iter 6980 || Loss: 0.9148 || timer: 0.1853 sec.
iter 6990 || Loss: 0.8900 || timer: 0.1887 sec.
iter 7000 || Loss: 0.4261 || timer: 0.1890 sec.
iter 7010 || Loss: 0.6553 || timer: 0.2054 sec.
iter 7020 || Loss: 0.5178 || timer: 0.1870 sec.
iter 7030 || Loss: 0.7796 || timer: 0.1819 sec.
iter 7040 || Loss: 0.8716 || timer: 0.2044 sec.
iter 7050 || Loss: 0.7168 || timer: 0.18

iter 8560 || Loss: 0.6055 || timer: 0.1883 sec.
iter 8570 || Loss: 0.8891 || timer: 0.1818 sec.
iter 8580 || Loss: 0.5184 || timer: 0.1872 sec.
iter 8590 || Loss: 0.5848 || timer: 0.1871 sec.
iter 8600 || Loss: 0.6938 || timer: 0.1860 sec.
iter 8610 || Loss: 0.4368 || timer: 0.1864 sec.
iter 8620 || Loss: 0.4316 || timer: 0.1863 sec.
iter 8630 || Loss: 1.2223 || timer: 0.2026 sec.
iter 8640 || Loss: 0.7493 || timer: 0.1843 sec.
iter 8650 || Loss: 0.8086 || timer: 0.1893 sec.
iter 8660 || Loss: 0.6477 || timer: 0.1856 sec.
iter 8670 || Loss: 0.4558 || timer: 0.1799 sec.
iter 8680 || Loss: 1.4212 || timer: 0.1930 sec.
iter 8690 || Loss: 0.2676 || timer: 0.1907 sec.
iter 8700 || Loss: 0.9952 || timer: 0.1837 sec.
iter 8710 || Loss: 0.4431 || timer: 0.1859 sec.
iter 8720 || Loss: 0.6891 || timer: 0.1814 sec.
iter 8730 || Loss: 0.5587 || timer: 0.1798 sec.
iter 8740 || Loss: 1.0818 || timer: 0.1841 sec.
iter 8750 || Loss: 0.7036 || timer: 0.1845 sec.
iter 8760 || Loss: 0.7405 || timer: 0.19

iter 10260 || Loss: 0.7425 || timer: 0.1895 sec.
iter 10270 || Loss: 0.3719 || timer: 0.1866 sec.
iter 10280 || Loss: 0.5936 || timer: 0.1933 sec.
iter 10290 || Loss: 0.6415 || timer: 0.1812 sec.
iter 10300 || Loss: 0.7796 || timer: 0.1821 sec.
iter 10310 || Loss: 0.4121 || timer: 0.1904 sec.
iter 10320 || Loss: 0.5168 || timer: 0.1859 sec.
iter 10330 || Loss: 0.5948 || timer: 0.1829 sec.
iter 10340 || Loss: 0.5047 || timer: 0.1806 sec.
iter 10350 || Loss: 0.5475 || timer: 0.1825 sec.
iter 10360 || Loss: 1.3255 || timer: 0.1887 sec.
iter 10370 || Loss: 0.4927 || timer: 0.1913 sec.
iter 10380 || Loss: 1.1327 || timer: 0.1823 sec.
iter 10390 || Loss: 0.5385 || timer: 0.1884 sec.
iter 10400 || Loss: 0.3528 || timer: 0.1868 sec.
iter 10410 || Loss: 0.3301 || timer: 0.1898 sec.
iter 10420 || Loss: 0.7904 || timer: 0.1872 sec.
iter 10430 || Loss: 0.5837 || timer: 0.1876 sec.
iter 10440 || Loss: 0.4435 || timer: 0.1816 sec.
iter 10450 || Loss: 1.4205 || timer: 0.1919 sec.
iter 10460 || Loss: 

iter 11930 || Loss: 0.5694 || timer: 0.1850 sec.
iter 11940 || Loss: 0.4416 || timer: 0.1861 sec.
iter 11950 || Loss: 0.4316 || timer: 0.1857 sec.
iter 11960 || Loss: 0.7710 || timer: 0.1815 sec.
iter 11970 || Loss: 0.6969 || timer: 0.1851 sec.
iter 11980 || Loss: 0.6492 || timer: 0.1824 sec.
iter 11990 || Loss: 0.4075 || 