In [1]:
# 学習と検証の実装

In [31]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.nn.init as init
import torch.optim as optim

import random
import numpy as np
import time
import pandas as pd

In [4]:
seed = 1234
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [10]:
# DataLoaderの作成

from utils.ssd_model import make_datapath_list, VOCDataset, DataTransform, Anno_xml2list, od_collate_fn

rootpath = "./data/VOCdevkit/VOC2012/"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(rootpath)

voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

color_mean = (104, 117, 123)
input_size = 300

train_dataset = VOCDataset(train_img_list, train_anno_list, phase='train', 
                           transform=DataTransform(input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))
val_dataset = VOCDataset(val_img_list, val_anno_list, phase='val',
                        transform=DataTransform(input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn)

dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}



In [16]:
# ネットワークモデルの作成

from utils.ssd_model import SSD

ssd_cfg = {
    'num_classes': 21, # 背景クラスも含む
    'input_size': 300,
    'bbox_aspect_num': [4, 6, 6, 6, 4, 4], # DBoxのアスペクト比の種類
    'feature_maps': [38, 19, 10, 5, 3, 1], # 各sourceの画像サイズ
    'steps': [8, 16, 32, 64, 100, 300], # DBoxの大きさを決める
    'min_sizes': [30, 60, 111, 162, 213, 264], # DBoxの大きさを決める
    'max_sizes': [60, 111, 162, 213, 264, 315], # DBoxの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase='train', cfg=ssd_cfg)

# 初期値の重みの設定
vgg_weights = torch.load('./weights/vgg16_reducedfc.pth')
net.vgg.load_state_dict(vgg_weights)

# その他はHeの初期値に
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)
            
# Heの初期値を適用
net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

print(f"使用デバイス: {device}")

使用デバイス: cuda:0


In [19]:
# 損失関数と最適化手法の設定
from utils.ssd_model import MultiBoxLoss

criterion = MultiBoxLoss(jaccard_thresh=0.5, neg_pos=3, device=device)

optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [33]:
# 学習と検証の実施
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(f'使用デバイス: {device}')
    
    net.to(device)
    
    torch.backends.cudnn.benchmark = True
    
    save_epoch_cycle = 10
    iteration = 1
    epoch_train_loss = 0.0
    epoch_val_loss = 0.0
    logs = []
    
    for epoch in range(num_epochs + 1):
        t_epoch_start = time.time()
        t_iter_start = time.time()
        
        print('------------')
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('------------')
        
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
                print(' (train) ')
            else:
                if ((epoch+1) % save_epoch_cycle == 0):
                    net.eval()
                    print(' (val) ')
                else:
                    continue
                    
            for images, targets in dataloaders_dict[phase]:
                images = images.to(device)
                targets = [ann.to(device) for ann in targets] # リストの各要素のテンソルをGPUへ
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase=='train'):
                    outputs = net(images)
                    
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c
                    
                    if phase == 'train':
                        loss.backward()
                        nn.utils.clip_grad_value_(net.parameters(), clip_value=2.0)
                        optimizer.step()
                        
                        if (iteration % save_epoch_cycle == 0):
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print(f'iteration {iteration} || Loss: {loss.item():.4f} || 10iter: {duration:.4f} sec')
                            
                        epoch_train_loss += loss.item()
                        iteration += 1
                        
                    else:
                        epoch_val_loss += loss.item()
                        
        t_epoch_finish = time.time()
        print('------------')
        print(f'epoch {epoch+1} || Epoch_TRAIN_Loss: {epoch_train_loss:.4f} || Epoch_VAL_Loss: {epoch_val_loss:.4f}')
        print(f'timer: {t_epoch_finish - t_epoch_start:.4f}sec')
        t_epoch_start = time.time()

        log_epoch = {'epoch': epoch+1, 'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss = 0.0
        epoch_val_loss = 0.0

        if ((epoch+1) % save_epoch_cycle == 0):
            torch.save(net.state_dict(), 'weights/ssd300_' + str(epoch+1) + '.pth')

In [34]:
num_epochs = 50
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

使用デバイス: cuda:0
------------
Epoch 1/50
------------
 (train) 
iteration 10 || Loss: 4.7485 || 10iter: 8.3590 sec
iteration 20 || Loss: 4.6855 || 10iter: 17.8754 sec
iteration 30 || Loss: 5.1709 || 10iter: 26.4507 sec
iteration 40 || Loss: 4.5907 || 10iter: 35.1732 sec
iteration 50 || Loss: 4.7877 || 10iter: 44.3832 sec
iteration 60 || Loss: 4.0540 || 10iter: 53.6042 sec
iteration 70 || Loss: 4.6202 || 10iter: 63.3255 sec
iteration 80 || Loss: 4.5606 || 10iter: 72.2908 sec
iteration 90 || Loss: 4.3560 || 10iter: 81.3396 sec
iteration 100 || Loss: 4.4810 || 10iter: 90.3114 sec
iteration 110 || Loss: 5.1089 || 10iter: 100.3378 sec
iteration 120 || Loss: 4.8257 || 10iter: 109.4112 sec
iteration 130 || Loss: 5.1575 || 10iter: 118.7993 sec
iteration 140 || Loss: 4.1586 || 10iter: 127.7272 sec
iteration 150 || Loss: 4.6103 || 10iter: 136.7313 sec
iteration 160 || Loss: 3.9928 || 10iter: 146.9780 sec
iteration 170 || Loss: 4.9134 || 10iter: 156.5555 sec
------------
epoch 1 || Epoch_TRAIN_Loss