In [1]:
#  jojo_trainer.ipynb
#  3.モデルの学習を行う

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import numpy as np
import pandas as pd
import random
import sys
sys.path.append("/content/drive/MyDrive/jojo_poser/src")

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
!pip install torchinfo
from torchinfo import summary
from torchvision import models

from image_loader import *

torch.manual_seed(1)
#  np.random.seed(1)
#  random.seed(1)

Collecting torchinfo
  Downloading torchinfo-1.5.3-py3-none-any.whl (19 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.5.3


<torch._C.Generator at 0x7f6f292d7250>

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("使用されるデバイス: ",device)

使用されるデバイス:  cuda


In [5]:
LABELS = ["Buccellati", "Dio", "Giorno", "Highway-Star", "Jo-suke", "Jo-taro",
            "Kakyoin", "Kira", "Kishibe", "Polnareff", "Trish"]

In [6]:
#  画像の読み込み
im_rows = 300
im_cols = 300
color_mean = (132, 140, 144) #  BGR
#  color_mean = (0, 0, 0) #  平均を引く動作をなくす

#  Dataを取得
root_path = "/content/drive/MyDrive/jojo_poser"
train_img_path_list, train_labels = data_loder(root_path, "train")
valid_img_path_list, valid_labels = data_loder(root_path, "valid")

transform = DataTransform(im_rows, im_cols, color_mean)

#  Datasetを作成
tr_data = PreprocessJOJO(train_img_path_list, train_labels, "valid", im_rows, im_cols, transform=transform)
val_data = PreprocessJOJO(valid_img_path_list, valid_labels, "valid", im_rows, im_cols, transform=transform)
print('訓練データのサイズ: ', tr_data.__len__())
print('検証データのサイズ: ', val_data.__len__())

#  DataLorderを作成
batch_size = 16
tr_batch = data.DataLoader(
    tr_data,                #  訓練用data
    batch_size = batch_size,#  ミニバッチのサイズ
    shuffle = True,         #  シャッフルして抽出
    )
val_batch = data.DataLoader(
    val_data,               #  検証用data
    batch_size = batch_size,#  ミニバッチのサイズ
    shuffle = False,        #  シャッフルはせずに抽出
    )
print('訓練データのミニバッチの個数: ', tr_batch.__len__())
print('検証データのミニバッチの個数: ', val_batch.__len__())

#  DataLoaderをdictにまとめる
dataloaders_dict = {"train":tr_batch, "valid":val_batch}

訓練データのサイズ:  700
検証データのサイズ:  91
訓練データのミニバッチの個数:  44
検証データのミニバッチの個数:  6


In [None]:
#  訓練用のDataLorderをイテレーターに変換
batch_iterator = iter(dataloaders_dict["valid"])
#  最初のミニバッチを取り出す
images, labels = next(batch_iterator)
print('ミニバッチのイメージの形状: ',images.size())
print('ミニバッチのラベルの形状: ',len(labels))
print('labels[0]の形状: ',labels[0])

In [7]:
#   モデルのインスタンス作成
net = models.vgg16(pretrained=True)
net.classifier[6] = nn.Linear(in_features=4096, out_features=11)
print(net)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [8]:
summary(
    net,
    input_size = (batch_size, 3, im_rows, im_cols),
    col_names=["input_size","output_size","num_params"])

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
VGG                                      --                        --                        --
├─Sequential: 1-1                        [16, 3, 300, 300]         [16, 512, 9, 9]           --
│    └─Conv2d: 2-1                       [16, 3, 300, 300]         [16, 64, 300, 300]        1,792
│    └─ReLU: 2-2                         [16, 64, 300, 300]        [16, 64, 300, 300]        --
│    └─Conv2d: 2-3                       [16, 64, 300, 300]        [16, 64, 300, 300]        36,928
│    └─ReLU: 2-4                         [16, 64, 300, 300]        [16, 64, 300, 300]        --
│    └─MaxPool2d: 2-5                    [16, 64, 300, 300]        [16, 64, 150, 150]        --
│    └─Conv2d: 2-6                       [16, 64, 150, 150]        [16, 128, 150, 150]       73,856
│    └─ReLU: 2-7                         [16, 128, 150, 150]       [16, 128, 150, 150]       --
│    └─Conv2d: 2-8      

In [None]:
#  損失関数及びオプティマイザーの作成
criterion = nn.CrossEntropyLoss()

params_to_update_1 = []
params_to_update_2 = []
params_to_update_3 = []

update_param_names_1 = ["features"]
update_param_names_2 = ["classifier.0.weight", "classifier.0.bias", "classifier.3.weight", "classifier.3.bias"]
update_param_names_3 = ["classifier.6.weight", "classifier.6.bias"]

for name, param in net.named_parameters():
  if update_param_names_1[0] in name:
    param.requires_grad = True
    params_to_update_1.append(param)
    print("params_to_update_1に格納: ",name)
  elif name in update_param_names_2:
    param.requires_grad = True
    params_to_update_2.append(param)
    print("params_to_update_2に格納: ",name)
  elif name in update_param_names_3:
    param.requires_grad = True
    params_to_update_3.append(param)
    print("params_to_update_3に格納: ",name)
  else:
    param.requires_grad = False
    print("勾配の計算無し: ", name)

optimizer = optim.SGD([{'params':params_to_update_1, 'lr':1e-4},
                        {'params':params_to_update_2, 'lr':5e-4},
                        {'params':params_to_update_3, 'lr':1e-3}],
                      momentum = 0.9)

params_to_update_1に格納:  features.0.weight
params_to_update_1に格納:  features.0.bias
params_to_update_1に格納:  features.2.weight
params_to_update_1に格納:  features.2.bias
params_to_update_1に格納:  features.5.weight
params_to_update_1に格納:  features.5.bias
params_to_update_1に格納:  features.7.weight
params_to_update_1に格納:  features.7.bias
params_to_update_1に格納:  features.10.weight
params_to_update_1に格納:  features.10.bias
params_to_update_1に格納:  features.12.weight
params_to_update_1に格納:  features.12.bias
params_to_update_1に格納:  features.14.weight
params_to_update_1に格納:  features.14.bias
params_to_update_1に格納:  features.17.weight
params_to_update_1に格納:  features.17.bias
params_to_update_1に格納:  features.19.weight
params_to_update_1に格納:  features.19.bias
params_to_update_1に格納:  features.21.weight
params_to_update_1に格納:  features.21.bias
params_to_update_1に格納:  features.24.weight
params_to_update_1に格納:  features.24.bias
params_to_update_1に格納:  features.26.weight
params_to_update_1に格納:  features.26.bias


In [None]:

def train(net, dataloaders_dict, criterion, optimizer, num_epochs):
    '''
    Parameters:
        net(object): VGG+Dense モデル
        datalorders_dict(dict(object)): DataLorder
        criterion(object): 損失関数
        optimizer(object): オプティマイザー
        num_epochs(int): 学習回数
    '''
    
    net.to(device)
    torch.backends.cudnn.benchmark = True
    
    iteration = 1 #  イテレーション(ステップ)カウンター
    epoch_train_loss = 0.0 #  訓練1エポックごとの損失和
    epoch_val_loss = 0.0 #  検証1エポックごとの損失和
    logs = [] #  損失のログを記録するリスト
    
    for epoch in range(num_epochs):
        print('----------------------------------------------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('----------------------------------------------------')

        for phase in ["train","valid"]:

            if phase=="train":
                #  モデルを訓練モードに
                net.train()
            else:
                if((epoch+1)%10 == 0):
                    net.eval()#  モデルを検証モードに
                    print("----------------------------------------------------")
                    print("----- validation mode -----")
                else:
                    continue
                    
            #  1ステップにおけるミニバッチを使用した学習または検証
            #  データローダーをイテレートしてミニバッチを抽出
            for images, labels in dataloaders_dict[phase]:
                print("labels",labels)#  ================================== test!!!!
                #  画像データにデバイスを割り当てる
                images = images.to(device)
                #  教師データをデバイスを割り当てる
                labels = labels.to(device)

                #  optimizerが保持する勾配を0で初期化
                optimizer.zero_grad()

                #  順伝搬(forward)とバックプロパゲーション(訓練時)
                with torch.set_grad_enabled(phase == 'train'):
                    #  順伝播(forward)
                    outputs = net(images)
                    #  print(outputs.size())#  ================================== test!!!!
                    _, preds = torch.max(outputs, 1)
                    print("preds: ", preds)
                    #  print("outputs: ",outputs.data)#  ================================== test!!!!
                    #  labelの損失平均
                    loss = criterion(outputs, labels)
                    #  print("loss: ",loss.item())#  ================================== test!!!!
                    
                    #  訓練時はバックプロパゲーションによるパラメーター更新を行う
                    if phase == 'train':
                        loss.backward()  #  バックプロパゲーション

                        # 勾配降下法の更新式を適用してバイアス、重みを更新
                        optimizer.step()

                        # ミニバッチを10個処理(10ステップ)ごとに損失を出力
                        if (iteration % 10 == 0):
                            #  ステップ, 損失を出力
                            print('step( {} )  loss: {:.4f}'.format(iteration, loss.item()))

                        # エポックの損失をepoch_train_lossに加算する
                        epoch_train_loss += loss.item()
                        # ステップ数を1増やす
                        iteration += 1

                    # 検証モードでは順伝播後の損失の記録のみを行う
                    else:
                        epoch_val_loss += loss.item()
                        
        # epochのphaseごとのlossと正解率
        print('---------------------------------------')
        # 訓練データの損失と検証データの損失を出力
        print('train_loss: {:.4f} - val_loss(Every 10 epochs): {:.4f}'.format(epoch_train_loss, epoch_val_loss))

        # エポックごとに損失をdictオブジェクトに保存
        log_epoch = {'epoch': epoch+1,
                     'train_loss': epoch_train_loss,
                     'val_loss': epoch_val_loss}
        # ログのリストに追加
        logs.append(log_epoch)

        # 訓練時の損失和を0で初期化
        epoch_train_loss = 0.0
        # 検証時の損失和を0で初期化
        epoch_val_loss = 0.0
        
        # 5エポック終了ごとにモデルのパラメーター値を保存
        if ((epoch+1) % 5 == 0):
            torch.save(
                net.state_dict(),
                root_path + '/weights/detection_weights' + str(epoch+1) + '.pth')
            print('--saved weights--')
    # ログのリストをデータフレームに変換
    df = pd.DataFrame(logs)
    # ログファイルに保存
    df.to_csv(root_path + '/outputs/epoch_loss.csv')

In [None]:
#  学習
num_epochs = 15#  最終は50
train(net,
      dataloaders_dict,
      criterion,
      optimizer,
      num_epochs=num_epochs)

----------------------------------------------------
Epoch 1/15
----------------------------------------------------
labels tensor([ 3,  7,  1,  8,  2,  9,  3,  7,  7,  2,  4,  1,  2,  9,  4, 10])
preds:  tensor([2, 2, 7, 1, 1, 4, 8, 9, 2, 2, 1, 0, 2, 9, 1, 5], device='cuda:0')
labels tensor([ 2,  5,  8, 10, 10,  2,  5, 10,  6,  3,  7,  9,  8,  3,  0,  5])
preds:  tensor([3, 4, 4, 7, 3, 3, 3, 3, 4, 7, 4, 3, 4, 4, 7, 4], device='cuda:0')
labels tensor([10,  7,  5,  9,  1,  2,  5,  6,  3,  6,  8,  1, 10,  7, 10,  8])
preds:  tensor([10,  7, 10,  8, 10, 10, 10, 10, 10, 10, 10, 10, 10,  7, 10,  9],
       device='cuda:0')
labels tensor([ 5,  8, 10,  8,  9, 10,  5,  4,  9,  9,  0,  7,  4,  2,  9,  7])
preds:  tensor([8, 5, 8, 8, 8, 2, 5, 5, 8, 5, 8, 5, 2, 5, 9, 5], device='cuda:0')
labels tensor([ 1,  4,  4,  1,  8,  8, 10,  4,  3, 10,  0,  5,  5,  6,  1,  8])
preds:  tensor([2, 9, 6, 9, 5, 5, 7, 9, 9, 9, 2, 7, 8, 8, 7, 2], device='cuda:0')
labels tensor([ 7,  0, 10,  4,  8,  0,  4,  5, 10,

In [None]:
#  画像の読み込み
im_rows = 300
im_cols = 300
color_mean = (132, 140, 144) #  BGR

#  Dataを取得
root_path = "/content/drive/MyDrive/jojo_poser"
valid_img_path_list, valid_labels = data_loder(root_path, "valid")

transform = DataTransform(im_rows, im_cols, color_mean)

#  Datasetを作成
val_data = PreprocessJOJO(valid_img_path_list, valid_labels, "valid", im_rows, im_cols, transform=transform)
print('検証データのサイズ: ', val_data.__len__())

#  DataLorderを作成
batch_size = 1
val_batch = data.DataLoader(
    val_data,               #  検証用data
    batch_size = batch_size,#  ミニバッチのサイズ
    shuffle = False,        #  シャッフルはせずに抽出
    )
print('検証データのミニバッチの個数: ', val_batch.__len__())

検証データのサイズ:  91
検証データのミニバッチの個数:  91


In [None]:
#   モデルのインスタンス作成
net = models.vgg16(pretrained=True)
net.classifier[6] = nn.Linear(in_features=4096, out_features=11)
#  vggモデルの学習済みの重みを適用
net_weights = torch.load(root_path+'/weights/detection_weights10.pth')
net.load_state_dict(net_weights)
print("[model net] weights is applied.")

[model net] weights is applied.


In [None]:
def accuracy(net, val_batch):

  all_outputs = []
  all_labels = []
  for images, labels in val_batch:
    print(images.size())
    outputs = net(images)
    all_outputs.append(outputs.data.max(1)[1].item())
    all_labels.append(labels.item())
    print("answer: ",labels.item())
    print("predict: ", outputs.data.max(1)[1].item())
    del images, labels
  all_outputs = np.array(all_outputs)
  all_labels = np.array(all_labels)
  print("total accuracy score: ",sum(all_outputs==all_labels)/len(all_labels))

In [None]:
accuracy(net, val_batch)

torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([

In [None]:
accuracy(net, val_batch)

torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  0
predict:  0
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  1
predict:  1
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([1, 3, 300, 300])
answer:  2
predict:  2
torch.Size([