## データの読み込み

mnistと呼ばれる有名な手書き文字（0~9）のデータセットを使用する。

In [184]:
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image


def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()
    
    
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False, one_hot_label=True)
# 0~255 の値をスケーリングする
x_train, x_test = x_train / 255, x_test / 255

In [185]:
x_train.shape

(60000, 784)

In [187]:
img = x_train[0]
label = t_train[0]
print(label)

print(img.shape)
img = img.reshape(28, 28)
print(img.shape)

# img_show(img)

[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
(784,)
(28, 28)


## CNNとは

**CNN(Convolution Nueral Network)**とは畳み込み(Convolution)層とプーリング層を持つ
DNNです。

従来のDNNと異なり、上述の２層を持っておりデータの**位置情報**を扱えるようになっているのが特徴です。

この特徴からCNNは主に画像認識（クラス分類、オブジェクト検出、セグメンテーション）の分野でよく使われます。

畳み込み(Convolution)層とプーリング層についてそれぞれ簡単に説明します。

**畳み込み層**とはフィルタと呼ばれる小さな領域と画像の一部分との計算を行うことで、位置情報を含んだ情報として特徴量に変換します。

またこのフィルタ計算はスライドしてすべての領域と計算を行うため画像のズレにも対応できます。
（隅にある丸も真ん中にある丸も同じ丸だと認識できます。）

これによって画像からこのあたりは赤っぽいとか尖った線があるなどの特徴をとらえることができます。

**プーリング層**では情報の圧縮を行うような層で画像をデフォルメ化するような効果があります。

In [188]:
from util import im2col, col2im 

## 畳み込み層の実装

In [350]:
class Convolution:
    def __init__(self, W, b, output_c, filter_h, filter_w, stride = 1, pad = 0):
        """
        Wは( filter_h * filter_w * input_channel, out_channel)
        b は( 1, out_channel)
        を想定
        """
        # 重みとバイアスを設定
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        # その他変わらないパラメータを格納
        self.stride, self.pad, self.output_c, self.filter_h, self.filter_w = stride, pad, output_c, filter_h, filter_w
        
        # 中間データ（backward時に使用）
        self.x, self.col = None, None
        
    def forward(self, x):
        
        W, b = self.params
        stride, pad, output_c, filter_h, filter_w = self.stride, self.pad, self.output_c, self.filter_h, self.filter_w
        
        # なぜこの並び(N, C, H, W)なのか？  
        # kerasに合わせてるのか？
        if x.ndim == 4:
            N, input_c, input_h, input_w = x.shape
        elif x.ndim == 3:
            N, input_h, input_w = x.shape
            input_c = 1
        else:
            print("DAME!")
            print(x.shape)
        
        # 最終的な出力のサイズを算出
        out_h = 1 + int((input_h + 2*pad - filter_h) / stride)
        out_w = 1 + int((input_w + 2*pad - filter_w) / stride)
        
        # データを(N H W C)から(N*out_h*out_w, filter_h * filter_w * C)
        col = im2col(x, self.filter_h, self.filter_w, self.stride, self.pad)
        out = np.dot(col, W) + b
        
        # N, out_h, out_w, output_Cに整形
        out = out.reshape(N, output_c, out_h, out_w)
        
        self.x, self.col = x, col
        
        return out
    
    def backward(self, dout):
        
        W, b = self.params
        output_c, filter_h, filter_w = self.output_c, self.filter_h, self.filter_w
        
        dout = dout.transpose(0,2,3,1).reshape(-1, output_c)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        ## self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, W.T)
        dx = col2im(dcol, self.x.shape, filter_h, filter_w, self.stride, self.pad)

        return dx
    

In [351]:
#( filter_h * filter_w * input_channel, out_channel)
#        b は( 1, out_channel)

# 重みとバイアスの初期化
W = 0.01 * np.random.randn(3 * 3 , 4)  # 3 * 3 * 3でない？
b = np.zeros([1, 4])

conv = Convolution(W, b, 4, 3, 3)

In [352]:
 img[14, :]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.31764706, 0.94117647,
       0.99215686, 0.99215686, 0.46666667, 0.09803922, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        ])

In [353]:
np.random.randn(1)

array([1.85306113])

In [354]:
# img.reshape(1, 28, 28, 1)

im2col(img.reshape(1, 1, 28, 28), 5, 5, 1, 0).shape

(576, 25)

In [355]:
np.sum(out[0,0, :, :])

2.2296291892962214

In [356]:
conv.backward(out).shape

AttributeError: 'NoneType' object has no attribute 'T'

## プーリング層の実装

In [357]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        # 更新パラメータはなし
        self.params, self.grads = [], []
        # 学習中変わらないパラメータを詰め込む
        self.pool_h, self.pool_w, self.stride, self.pad = pool_h, pool_w, stride, pad
        
        self.x, self.arg_max = None, None

    def forward(self, x):
        pool_h, pool_w, stride, pad = self.pool_h, self.pool_w, self.stride, self.pad
        # なぜこの並び(N, C, H, W)なのか？  
        # kerasに合わせてるのか？
        if x.ndim == 4:
            N, input_c, input_h, input_w = x.shape
        elif x.ndim == 3:
            N, input_h, input_w = x.shape
            input_c = 1

        out_h = int(1 + (input_h - pool_h) / stride)
        out_w = int(1 + (input_w - pool_w) / stride)

        col = im2col(x, pool_h, pool_w, stride, pad)
        col = col.reshape(-1, pool_h * pool_w)

        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
        out = out.reshape(N, out_h, out_w, input_c).transpose(0, 3, 1, 2)

        self.x = x
        self.arg_max = arg_max

        return out

    
    def backward(self, dout):
        
        pool_h, pool_w, stride, pad = self.pool_h, self.pool_w, self.stride, self.pad
        
        x, arg_max = self.x, self.arg_max
        
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = pool_h * pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(arg_max.size), arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, x.shape, pool_h, pool_w, stride, pad)
        
        return dx

In [358]:
pool = Pooling(2, 2, 2)

pool.forward(img.reshape(1, 1, 28, 28)).shape


(1, 1, 14, 14)

In [359]:
pool.backward(pool.forward(img.reshape(1, 1, 28, 28))).shape

(1, 1, 28, 28)

In [360]:
img.reshape((1, 1, 28, 28))

array([[[[0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.        ,
          0.        , 0.        , 0.        , 0.        , 0.      

## Flatten層

(N , C, H, W) から　(N , C * H * W)に 変換するだけの層

In [361]:
class Flat:
    def __init__(self):
        self.params, self.grads = [], []
        self.input_shape = None
        
    def forward(self, x):
        N = x.shape[0]
        out = x.reshape(N, -1)
        
        self.input_shape = x.shape
        
        return out
    
    def backward(self, dout):
        dout = dout.reshape(self.input_shape)
        
        return dout

In [362]:
flat = Flat()
flat_data = flat.forward(img.reshape((1, 1, 28, 28)))
flat.backward(flat_data).shape

(1, 1, 28, 28)

## lenet構成のCNN

In [363]:
np.sum(np.random.randn(5 * 5 , 6))

13.138506455119824

In [413]:
import functions as F
import layers as L
from gradient import numerical_gradient

class LeNet:
    def __init__(self, input_size, data_num, output_size):
        
        
        # print(init_coef)
        # 重みとバイアスの初期化
        # 畳み込み層の重みパラメータの形状は
        # filter_height * filter_width * input_channel, output_channel
#         W1 = init_coef * np.random.randn(5 * 5 * 1, 6)
#         b1 = np.zeros(6)
#         W2 = init_coef * np.random.randn(5 * 5 * 6, 16)
#         b2 = np.zeros(16)
#         W3 = init_coef * np.random.randn(784, 120)
#         b3 = np.zeros(120)
#         W4 = init_coef * np.random.randn(120, 84)
#         b4 = np.zeros(84)
#         W5 = init_coef * np.random.randn(84, output_size)
#         b5 = np.zeros(output_size)
        limit = np.sqrt(1/data_num)
        W1 = np.random.uniform(-limit, limit, (5 * 5 * 1, 6))
        b1 = np.zeros(6)

        W2 = np.random.uniform(-limit, limit, (5 * 5 * 6, 16))
        b2 = np.zeros(16)

        W3 = np.random.uniform(-limit, limit, (784, 120))
        b3 = np.zeros(120)
        W4 = np.random.uniform(-limit, limit, (120, 84))
        b4 = np.zeros(84)
        W5 = np.random.uniform(-limit, limit, (84, output_size))
        b5 = np.zeros(output_size)
        
        # レイヤの生成
        self.layers = [
            Convolution(W1, b1, 6, 5, 5, 1, 2),
            L.Relu(),
            Pooling(2, 2, 2),
            Convolution(W2, b2, 16, 5, 5, 1, 2),
            L.Relu(),
            Pooling(2, 2, 2),
            Flat(),
            L.Affine(W3, b3),
            L.Relu(),
            L.Affine(W4, b4),
            L.Relu(),
            L.Affine(W5, b5),
        ]
        
        self.loss_layer = L.SoftmaxWithLoss()
        
        # すべての重みと勾配をリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
            
    def predict(self, x):
        for layer in self.layers:
#             print(layer)
#             print(x.shape)
            x = layer.forward(x)
        return x
        
    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss
        
    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
    
    ## 数値微分と誤差逆伝搬法による微分
    # x:入力データ, t:教師データ
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.forward(x, t)
        # loss_W = self.forward(x, t)
        
        grads = []
        for params in self.params:
            grads.append(numerical_gradient(loss_W, params))
        
        return grads
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        # 設定
        grads = []
        for layer in layers:
            dout = layer.backward(dout)
            grads.append(layer.grads)

        return  grads

## 学習

In [414]:
x_train[:100].reshape(-1, 1, 28, 28).shape

(100, 1, 28, 28)

In [415]:
import optimizer as opt

# ハイパーパラメータの設定
max_epoch = 20
batch_size = 128
learning_rate = 0.01

# データの読み込み、モデルとオプティマイザの生成
x, t = x_train[:512].reshape(-1, 1, 28, 28) , t_train[:512]

model = LeNet(input_size=784, data_num=x.shape[0], output_size=10)
optimizer = opt.SGD(lr=learning_rate)

# 学習で使用する変数
data_size = len(x)
max_iters = data_size // batch_size
total_loss = 0
loss_count = 0
loss_list = []

for epoch in range(max_epoch):
    # データのシャッフル
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]
    
    for iters in range(max_iters):
        batch_x = x[iters * batch_size : (iters + 1) * batch_size]
        batch_t = t[iters * batch_size : (iters + 1) * batch_size]
        
        # 勾配を求め、　パラメータを更新
        loss = model.forward(batch_x, batch_t)

        model.backward()

        optimizer.update(model.params, model.grads)
#         print("### START###")
#         for layer_params in model.params:
#             print(layer_params.__class__)
#             print(np.sum(layer_params))
#             print("### ")
        
        total_loss += loss
        loss_count += 1
        
    # 定期的に学習経過を出力
    # if (iters+1) % 10 == 0:
    avg_loss = total_loss / loss_count
    print('| epoch %d | train loss : %.4f  acc : %.4f | val_loss *** val_acc : %.4f' % (epoch + 1, avg_loss, accuracy_score(model, x, t), accuracy_score(model, x_test, t_test)))
    # model.forward(x_test, t_test),
    
    loss_list.append(avg_loss)
    total_loss, loss_count = 0, 0

| epoch 1 | train loss : 2.3026  acc : 0.1348 | val_loss *** val_acc : 0.1137
| epoch 2 | train loss : 2.3025  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 3 | train loss : 2.3024  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 4 | train loss : 2.3023  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 5 | train loss : 2.3022  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 6 | train loss : 2.3021  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 7 | train loss : 2.3020  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 8 | train loss : 2.3019  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 9 | train loss : 2.3018  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 10 | train loss : 2.3018  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 11 | train loss : 2.3017  acc : 0.1348 | val_loss *** val_acc : 0.1135
| epoch 12 | train loss : 2.3016  acc : 0.1348 | val_loss *** val_acc : 0.1135


KeyboardInterrupt: 

In [416]:
sum_count = 0
for param in model.params:
    sum_count += param.size
    print(param.size)
    
print("Total params")
print(sum_count)
    

150
6
2400
16
94080
120
10080
84
840
10
Total params
107786


In [380]:
for layer in model.layers:
    print(layer)

<__main__.Convolution object at 0x12f1c7da0>
<layers.Relu object at 0x12f1c7e10>
<__main__.Pooling object at 0x12ec57dd8>
<__main__.Convolution object at 0x12ec57ac8>
<layers.Relu object at 0x12ec57550>
<__main__.Pooling object at 0x12ec57320>
<__main__.Flat object at 0x12ec57278>
<layers.Affine object at 0x12ec571d0>
<layers.Relu object at 0x12ec57198>
<layers.Affine object at 0x12ec57160>
<layers.Relu object at 0x12ec57208>
<layers.Affine object at 0x12ec57128>


In [384]:
model.params[0].shape

(25, 6)

## 精度の計算

In [311]:
pred_label = np.argmax(model.predict(x_test.reshape(-1, 1, 28, 28)), axis=1)

KeyboardInterrupt: 

In [None]:
pred_label

In [288]:
true_label = np.argmax(t_test, axis=1)

In [289]:
true_label

array([7, 2, 1, ..., 4, 5, 6])

In [290]:
np.sum(pred_label == true_label) / pred_label.shape[0]

0.7422

In [296]:
def accuracy_score(model, x, y):
    pred_label = np.argmax(model.predict(x.reshape(-1, 1, 28, 28)), axis=1)
    true_label = np.argmax(y, axis=1)
    
    return np.sum(pred_label == true_label) / pred_label.shape[0]

In [301]:
accuracy_score(model, x_train[:500], t_train[:500])

1.0

In [299]:
accuracy_score(model, x_test, t_test)

0.7422

## 勾配確認

In [None]:
# データの読み込み、モデルとオプティマイザの生成
x, t = x_train[:1000].reshape(-1, 1, 28, 28), t_train[:1000]

network = LeNet(input_size=784, data_num=x.shape[0], output_size=10)

x_batch = x_train[:3].reshape(-1, 1, 28, 28)
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )

## Kerasのmnist

In [325]:
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

num_classes  = 10

model = Sequential()

input_shape = (28, 28, 1)
model.add(Conv2D(6, kernel_size=(5, 5),
                 strides=(1, 1),
                 padding='same',
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2),
                      strides=(2,2),
                      padding='valid'))
model.add(Conv2D(16, kernel_size=(5, 5),
                 strides=(1, 1),
                 padding='same',
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2),
                      strides=(2,2),
                      padding='valid'))

model.add(Flatten())
model.add(Dense(120, activation='relu'))
model.add(Dense(84, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# model.compile(loss=keras.losses.categorical_crossentropy,
#               optimizer=keras.optimizers.Adadelta(),
#               metrics=['accuracy'])

model.compile(loss=keras.losses.categorical_crossentropy,
               optimizer=keras.optimizers.SGD(lr=0.01, decay=0.0, momentum=1.0, nesterov=False),
              #optimizer=keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True),
               metrics=['accuracy'])

batch_size = 128
epochs = 20 ##学習の回数
# データの用意
x, t = x_train[:500].reshape(-1, 28, 28, 1) , t_train[:500]

history = model.fit(x, t,
                    batch_size=batch_size, epochs=epochs,
                    verbose=1, validation_data=(x_test.reshape(-1, 28, 28, 1), t_test))


Train on 500 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [326]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 28, 28, 6)         156       
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 14, 14, 6)         0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 14, 14, 16)        2416      
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 7, 7, 16)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 120)               94200     
_________________________________________________________________
dense_17 (Dense)             (None, 84)                10164     
__________

教師データ500枚に絞って2層のDNNでも**８０%弱**の精度になることが分かる。