<a href="https://colab.research.google.com/github/komazawa-deep-learning/komazawa-deep-learning.github.io/blob/master/2021notebooks/2021_1028face_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding:utf-8 -*-
!pip install japanize_matplotlib

In [None]:
import sys
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import random
import numpy as np
import matplotlib.pyplot as plt
import japanize_matplotlib

# from tqdm import tqdm  #コマンドラインで実行するとき
from tqdm.notebook import tqdm  # jupyter で実行するとき

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

#device
# リソースの選択（CPU/GPU）
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 乱数シード固定（再現性の担保）
def fix_seed(seed):
    random.seed(seed) # for random
    np.random.seed(seed) # for numpy
    # for pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 2021
fix_seed(seed)

# データローダーのサブプロセスの乱数のseedが固定
def worker_init_fn(worker_id):
    np.random.seed(np.random.get_state()[1][0] + worker_id)

#print(worker_init_fn(1))

In [None]:
# データセットの作成
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

data = fetch_olivetti_faces()
X, y = data.data, data.target
split_ratio = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=split_ratio, 
                                                    stratify=y,
                                                    random_state=0)
print(f'X_train 訓練画像のサイズ: {X_train.shape}')
print(f'y_train 教師信号データのサイズ: {y_train.shape}')
X_ = torch.tensor(X_train).float()
X_ = torch.reshape(torch.tensor(X_train).float(), (-1,1,64,64))
y_ = torch.tensor(y_train).long()
Xtest_ = torch.tensor(X_test).float().reshape(-1,1,64,64)
ytest_ = torch.tensor(y_test).long()

class OlivettiFace_dataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        feature = self.X[index]
        label = self.y[index]
        return feature, label


train_dataset = OlivettiFace_dataset(X_, y_)
test_dataset = OlivettiFace_dataset(Xtest_, ytest_)

# データローダーの作成
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=128,  # バッチサイズ
                                           shuffle=True,  # データシャッフル
                                           num_workers=0,  # 高速化
                                           pin_memory=True,  # 高速化
                                           worker_init_fn=worker_init_fn,
                                           drop_last=False,
                                           )
test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=128,
                                          shuffle=False,
                                          num_workers=0,
                                          pin_memory=True,
                                          worker_init_fn=worker_init_fn,
                                          drop_last=False,
                                          )

In [None]:
#len(test_dataloader)

In [None]:
import torch.nn as nn
# ResNet50（以降のモデル）では， skip connection を行うために最後にチャネル数を調整しなくてはならない場合がある
# すなわち、ブロックの入力特徴数を、出力特徴数に合わせる必要がある）ので， これを shortcut として実装しています。
# 中身はシンプルで 1×1 の畳み込みを差し込むことによるチャネル数の調整しています。

class Block(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        features_divided4 = out_features // 4

        # 1x1 畳み込み
        self.conv1x1 = nn.Conv2d(in_features, features_divided4, kernel_size=(1, 1))
        self.bn1 = nn.BatchNorm2d(features_divided4)
        self.relu1 = nn.ReLU()

        # 3x3 畳み込み
        self.conv3x3_2 = nn.Conv2d(features_divided4, features_divided4, kernel_size=(3, 3), padding=1)
        self.bn2 = nn.BatchNorm2d(features_divided4)
        self.relu2 = nn.ReLU()

        # 1x1 の畳み込み
        self.conv1x1_3 = nn.Conv2d(features_divided4, out_features,
                               kernel_size=(1, 1),
                               padding=0)
        self.bn3 = nn.BatchNorm2d(out_features)

        # skip connection 特徴数調整        
        self.shortcut = self._shortcut(in_features, out_features)
        
        self.relu3 = nn.ReLU()

    def forward(self, x):
        h = self.conv1x1(x)
        h = self.bn1(h)
        h = self.relu1(h)
        h = self.conv3x3_2(h)
        h = self.bn2(h)
        h = self.relu2(h)
        h = self.conv1x1_3(h)
        h = self.bn3(h)
        shortcut = self.shortcut(x)
        y = self.relu3(h + shortcut)  # skip connection
        return y

    def _shortcut(self, in_features, out_features):
        if in_features != out_features:
            return self._projection(in_features, out_features)
        else:
            return lambda x: x

    def _projection(self, in_features, out_features):
        return nn.Conv2d(in_features, out_features, kernel_size=(1, 1), padding=0)

In [None]:
import torch
import torch.nn as nn

class ResNet(nn.Module):
    """
    上述の Block クラスを利用して ResNet クラスの実装。
    ResNet は基本ブロックが４つ直列しています。
    今回は時間節約のため，ブロックを 1 つにして，ブロック 2 から 4 をコメントアウトしています
    GlobalAvgPool2d は後述
    """
    def __init__(self, output_dim=10):
        super().__init__()

        self.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=1)

        # Block 1
        self.block0 = self._building_block(256, in_features=64) 
        self.block1 = nn.ModuleList([
            self._building_block(256) for _ in range(2)
        ])

        self.conv2 = nn.Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))

        # Block 2
        self.block2 = nn.ModuleList([
            self._building_block(512) for _ in range(4)
        ])

        # self.conv3 = nn.Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2))

        # # Block 3
        # self.block3 = nn.ModuleList([
        #     self._building_block(1024) for _ in range(6)
        # ])

        # self.conv4 = nn.Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2))

        # # Block 4
        # self.block4 = nn.ModuleList([
        #     self._building_block(2048) for _ in range(3)
        # ])

        self.avg_pool = GlobalAvgPool2d()
        #self.fc = nn.Linear(2048, 1000)
        self.fc = nn.Linear(512, 1000)
        self.out = nn.Linear(1000, output_dim)

    def forward(self, x):
        h = self.conv1(x)
        h = self.bn1(h)
        h = self.relu1(h)
        h = self.pool1(h)
        h = self.block0(h)
        for block in self.block1:
            h = block(h)
        h = self.conv2(h)
        # for block in self.block2:
        #     h = block(h)
        # h = self.conv3(h)
        # for block in self.block3:
        #     h = block(h)
        # h = self.conv4(h)
        # for block in self.block4:
        #     h = block(h)
        h = self.avg_pool(h)
        h = self.fc(h)
        h = torch.relu(h)
        h = self.out(h)
        y = torch.log_softmax(h, dim=-1)

        return y

    def _building_block(self, out_features, in_features=None):
        if in_features is None:
            in_features = out_features
        return Block(in_features, out_features)

import torch.nn.functional as F
class GlobalAvgPool2d(nn.Module):
    """ ResNet は 最後の全結合層に接続する際に global average pooling を行います。"""
    def __init__(self, device=device):
        super().__init__()

    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=x.size()[2:]).view(-1, x.size(1))

In [None]:
# モデル
model = ResNet(40).to(device)

# 学習・評価
def compute_loss(label, pred):
    return criterion(pred, label)

def train_step(x, y):
    model.train()
    preds = model(x)
    loss = compute_loss(y, preds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss, preds

def test_step(x, y):
    model.eval()
    preds = model(x)
    loss = compute_loss(y, preds)
    return loss, preds

criterion = nn.NLLLoss()
#optimizer = optim.Adam(model.parameters(), weight_decay=0.01)
optimizer = optim.SGD(model.parameters(), lr=0.05)

In [None]:
from sklearn.metrics import accuracy_score

epochs = 10
fix_seed(1234)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_losses, test_losses, train_acces, test_accs = [], [], [], []
for epoch in tqdm(range(epochs)):
    train_loss, test_loss, train_acc, test_acc = 0., 0., 0., 0.

    for (x, y) in train_dataloader:
        x, y = x.to(device), y.to(device)
        loss, _ = train_step(x, y)
        train_loss += loss.item()
        #train_acc += accuracy_score(y.tolist(), y.argmax(dim=-1).tolist())

    train_loss /= len(train_dataloader)
    train_losses.append(train_loss)
    #train_accs.append(train_acc)

    for (x, y) in test_dataloader:
        x, y = x.to(device), y.to(device)
        loss, preds = test_step(x, y)
        test_loss += loss.item()
        test_acc += accuracy_score(y.tolist(), y.argmax(dim=-1).tolist())

    test_loss /= len(test_dataloader)
    test_losses.append(test_loss)
    test_acc /= len(test_dataloader)
    test_accs.append(test_acc)
    print(f'エポック: {epoch + 1},',
          f'訓練損失: {train_loss:.3f},',
          f'テスト損失: {test_loss:.3f},',
          f'検証精度: {test_acc:.3f}')
   
# 学習進行状況の描画
plt.plot(train_losses, label='訓練損失')
plt.plot(test_losses, label='テスト損失')
plt.legend()
plt.show()

# plt.plot(train_accs, label='訓練精度')
# plt.plot(test_accs, label='テスト精度')
# plt.show()


In [None]:
len(test_dataloader)

In [None]:
from torchsummary import summary
summary(model, input_size=(1, 64, 64))