<a href="https://colab.research.google.com/github/komazawa-deep-learning/komazawa-deep-learning.github.io/blob/master/notebooks/2021_0528LeNet_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<center>
<img src="https://raw.githubusercontent.com/komazawa-deep-learning/komazawa-deep-learning.github.io/master/assets/1998LeCun_Fig2_CNN.svg"> <br/>
</center>

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

import numpy as np
import matplotlib.pyplot as plt
try:
    import japanize_matplotlib
except ImportError:
    !pip install japanize_matplotlib
    import japanize_matplotlib


In [None]:
import sys
import requests

mnist_urls = {
    #http://yann.lecun.com/exdb/mnist/
    'Xtrain':'https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz',
    'Ytrain':'https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz',
    'Xtest': 'https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz',
    'Ytest': 'https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz'
    # 'Xtrain': 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
    # 'Ytrain': 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
    # 'Xtest': 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
    # 'Ytest':'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
}

fashionmnist_urls = {
    #https://github.com/zalandoresearch/fashion-mnist
    'Xtest': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz',
    'Ytest': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz',
    'Xtrain': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',
    'Ytrain': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz'
}

kmnist_urls = {
    #http://codh.rois.ac.jp/kmnist/
    'Xtrain': 'http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz',
    'Ytrain': 'http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz',
    'Xtest': 'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz',
    'Ytest': 'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz'
}


def download_mnist(dataset):
    #上で定義したデータセットの情報を元にデータをダウンロードする
    for name, url in dataset.items():
        fname = url.split('/')[-1]
        print(url, fname)
        r = requests.get(url, timeout=35) #timeout=None はサーバからの応答が遅い場合永遠に待ち続ける
        with open(fname, 'wb') as f:
            f.write(r.content)


In [None]:
def load_mnist(path, kind='train'):
    """ダウンロードしたデータを読み込む関数"""
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [None]:
import os

# 存在しているファイルを一旦削除
fnames = ['train-images-idx3-ubyte.gz','train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']
for fname in fnames:
    if os.path.exists(fname):
        print(fname)
        os.remove(fname)

#データの表示
mnist_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
fashionmnist_labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat' ,
                       'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
kmnist_labels = ['お', 'き', 'す', 'つ', 'な', 'は', 'ま', 'や', 'れ', 'を']
# '0,U+304A,お', '1,U+304D,き', '2,U+3059,す', '3,U+3064,つ', '4,U+306A,な',
# '5,U+306F,は', '6,U+307E,ま', '7,U+3084,や', '8,U+308C,れ', '9,U+3092,を'

labels = mnist_labels
#labels = fashionmnist_labels
#labels = kmnist_labels

#以下の 3 つのデータセットのうち 1 つを選んで実習してみましょう
dataset = mnist_urls
#dataset = fashionmnist_urls
#dataset = kmnist_urls

download_mnist(dataset)

X_train, Y_train = load_mnist('.', kind='train')
X_test, Y_test = load_mnist('.', kind='t10k')

_Y = np.zeros((len(Y_train),10))
for i in range(len(_Y)):
    _Y[i,Y_train[i]] = 1
#Y_train = _Y

_Y = np.zeros((len(Y_test),10))
for i in range(len(_Y)):
    _Y[i,Y_test[i]] = 1
#Y_test = _Y

# 時間節約のためデータ数を制限
n_train = 60000             # 訓練データ数 今回は 60000 なので全データを使用します。
n_val = 1000                # 検証データ数
n_test = 10000              # テストデータ数
X_train = X_train[-n_train:]
Y_train = Y_train[-n_train:]
X_val = X_train[-n_val:]
Y_val = Y_train[-n_val:]
X_test = X_test[-n_test:]
Y_test = Y_test[-n_test:]

#次行の数字を変更して実施してください。ただし数字の範囲は 0 から 59999 までです
No = int(input('次行の数字を変更して実施してください。ただし数字の範囲は 0 から 59999 までです:'))
#No = 666
plt.figure(figsize=(1,1))    #表示する縦横の大きさ，単位はインチ
#plt.title('label:{}'.format(labels[np.argmax(Y_train[No])]))
plt.title('label:{}'.format(labels[Y_train[No]]))
plt.axis(False)
plt.imshow(X_train[No].reshape(28,28), cmap='gray')
plt.show()

In [None]:
# このセルはおまじない
X_train = np.copy(X_train)
X_train.setflags(write=1)
X_test = np.copy(X_test)
X_test.setflags(write=1)

In [None]:
#PyTorch の必要なライブラリを輸入
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

In [None]:
batch_size = 64           #バッチサイズ，処理速度と収束速度に影響します
test_batch_size = 1000
max_epochs = 14           #訓練する総エポック数
lr = 1.0                  #学習率
gamma = 0.7
#use_cuda = True           #GPU の使用を宣言
seed = 42
log_interval = 10
save_model = False

torch.manual_seed(seed)
#device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if device=="cuda" else {}

In [None]:
#駒沢大学用データセットの定義
class koma_mnist_dataset(torch.utils.data.Dataset):
    def __init__(self, X, y, transform=None):
        super(koma_mnist_dataset, self).__init__()
        self.transform = transform
        self.data = X.reshape(-1,28,28)
        self.label = y

    def __len__(self):
        return len(self.data)

    def __getitem__(self, ind):
        if self.transform:
            _X = self.transform(self.data[ind])
            _y = int(self.label[ind])
        else:
            _X = self.data[ind]
            _y = int(self.label[ind])
        return _X, _y

# _dataset = koma_mnist_dataset(X_train, Y_train, transform_)

koma_mnist_train_loader = torch.utils.data.DataLoader(
    koma_mnist_dataset(X_train, #.reshape(-1,28,28),
                       Y_train,
                       transform=transforms.Compose([transforms.ToTensor(),
                                                    transforms.Normalize((0.1307,), (0.3081,))]
                                                    )
                       ),
                       batch_size=batch_size, shuffle=True, **kwargs)

koma_mnist_test_loader = torch.utils.data.DataLoader(
    koma_mnist_dataset(X_test,
                       Y_test,
                       transform=transforms.Compose([transforms.ToTensor(),
                                                     transforms.Normalize((0.1307,), (0.3081,))]
                                                    )
                ),
                batch_size=test_batch_size, shuffle=False, **kwargs)


<center>
<img src="https://raw.githubusercontent.com/komazawa-deep-learning/komazawa-deep-learning.github.io/master/assets/1998LeCun_Fig2_CNN.svg"> <br/>
</center>

$$
\text{out}\left(N_i, C_{\text{out}_j}\right) =
\text{bias}\left(C_{\text{out}_j}\right) +
      \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}\left(C_{\text{out}_j}, k\right) \star \text{input}(N_i, k)
$$
ここで，

* $\star$: 2D 交差相関 (cross-correlation) 演算子
* $N$: バッチサイズ，
* $C$: チャンネル (特徴) 数
* $H$: 画像の縦，単位は画素数
* $W$: 画像の横幅,

<!-- ### Shape:

* Input: $(N, C_{in}, H_{in}, W_{in})$ or $(C_{in}, H_{in}, W_{in})$
* Output: $(N, C_{out}, H_{out}, W_{out})$ or  $(C_{out}, H_{out}, W_{out})$,
where
$$\begin{aligned}
H_{out} &= \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[0] - \text{dilation}[0]
\times (\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor\\
W_{out} &= \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[1] - \text{dilation}[1]
\times (\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
\end{aligned}$$
 -->
<!-- $$
H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[0] - \text{dilation}[0]
\times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
$$

$$
W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[1] - \text{dilation}[1]
\times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
$$


 -->

<!-- # #(12 ** 2) * 64
# #np.sqrt(9216 / 64)
# #np.sqrt(9216)
# #help(F.max_pool2d)
# #help(nn.Conv2d)
# #X_train.shape
# #28 * 28
# #26 * 26  / 4 * 64
# #13 * 13
# 24 * 24 / 4 * 64 -->

In [None]:
# LeNet の定義
class LeNet_orig(nn.Module):
    def __init__(self):
        super().__init__()

        # 最初の畳み込み層
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=6,
            kernel_size=3,
            padding='same',
            stride=1)

        # 2 番目の畳み込み層
        self.conv2 = nn.Conv2d(6, 16, 5, 1)
        self.dropout1 = nn.Dropout(p=0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(
            in_features=400,
            out_features=128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


class LeNet(nn.Module):
    def __init__(self):
        super().__init__()

        # 最初の畳み込み層
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=32,
            kernel_size=3,
            stride=1)

        # 2 番目の畳み込み層
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(p=0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(
            in_features=9216,
            out_features=128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


def train(model, device, train_loader, optimizer, epoch, log_interval=10):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print(f'訓練エポック: {epoch}',
                  f'[{batch_idx * len(data):>5d}/{len(train_loader.dataset)}',
                  f'({100. * batch_idx / len(train_loader):5.2f}%)]',
                  f' 損失値: {loss.item():.3f}')

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(f'---テストデータ: 平均損失: {test_loss:.4f}, 精度: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):5.2f}%)')


In [None]:
#訓練の実行
model = LeNet().to(device)
model = LeNet_orig().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=lr)

scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
for epoch in range(1, max_epochs + 1):
    test(model, device, koma_mnist_test_loader)
    train(model, device, koma_mnist_train_loader, optimizer, epoch, log_interval=10 ** 2)
