# Chapter 8 ディープラーニング

1. ネットワークをより深く 
1. ディープラーニングの小歴史
1. ディープラーニングの高速化
1. ディープラーニングの実用例
1. ディープラーニングの未来
1. まとめ

## VGGを参考にしたネットワーク

In [2]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *


class DeepConvNet:
    """認識率99%以上の高精度なConvNet

    ネットワーク構成は下記の通り
        conv - relu - conv- relu - pool -
        conv - relu - conv- relu - pool -
        conv - relu - conv- relu - pool -
        affine - relu - dropout - affine - dropout - softmax
    """
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
                 conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 hidden_size=50, output_size=10):
        # 重みの初期化===========
        # 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか（TODO:自動で計算する）
        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
        weight_init_scales = np.sqrt(2.0 / pre_node_nums)  # ReLUを使う場合に推奨される初期値
        
        self.params = {}
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)

        # レイヤの生成===========
        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
                           conv_param_1['stride'], conv_param_1['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
                           conv_param_2['stride'], conv_param_2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W3'], self.params['b3'], 
                           conv_param_3['stride'], conv_param_3['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                           conv_param_4['stride'], conv_param_4['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                           conv_param_5['stride'], conv_param_5['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                           conv_param_6['stride'], conv_param_6['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['W7'], self.params['b7']))
        self.layers.append(Relu())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['W8'], self.params['b8']))
        self.layers.append(Dropout(0.5))
        
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            grads['W' + str(i+1)] = self.layers[layer_idx].dW
            grads['b' + str(i+1)] = self.layers[layer_idx].db

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]


In [3]:
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.trainer import Trainer

(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

# 処理に時間のかかる場合はデータを削減 
x_train, t_train = x_train[:5000], t_train[:5000]
x_test, t_test = x_test[:1000], t_test[:1000]

network = DeepConvNet()  
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=20, mini_batch_size=100,
                  optimizer='Adam', optimizer_param={'lr':0.001},
                  evaluate_sample_num_per_epoch=1000)
trainer.train()

# パラメータの保存
network.save_params("deep_convnet_params.pkl")
print("Saved Network Parameters!")


train loss:2.271367856874713
=== epoch:1, train acc:0.138, test acc:0.138 ===
train loss:2.3093065798117673
train loss:2.3338022173781
train loss:2.283430268481283
train loss:2.31293705894189
train loss:2.311641985053871
train loss:2.2953986692649213
train loss:2.2785976169684363
train loss:2.2558599920139466
train loss:2.26710458333315
train loss:2.253378483407437
train loss:2.194322934215733
train loss:2.2266169161647853
train loss:2.214622217369581
train loss:2.2094290233538816
train loss:2.222369595648069
train loss:2.2224286997763096
train loss:2.1968679455668205
train loss:2.1590577908898196
train loss:2.192396066151759
train loss:2.180343182685085
train loss:2.1583027157188424
train loss:2.054341488640579
train loss:2.1181417053290743
train loss:2.0068527888969037
train loss:2.055764672159217
train loss:2.0560819446841863
train loss:2.0114480413224816
train loss:1.9267946046068525
train loss:1.9823872059056442
train loss:1.8989418297020206
train loss:1.9917873412497105
train los

train loss:1.1962028111133203
train loss:1.2082887514434055
train loss:1.0231303282303439
train loss:1.1178255058407145
train loss:1.206204603757602
train loss:1.0105357368185022
train loss:1.111255297583602
train loss:1.086598196765533
train loss:1.2055959425852691
train loss:1.209925194272414
train loss:1.131127891907511
train loss:1.0146295439036808
train loss:1.0939846399077733
train loss:1.2046211777960125
train loss:1.0508594105433577
train loss:1.0906910878291534
train loss:1.1738396445309314
train loss:1.2797642221192866
train loss:1.1647326797236446
train loss:1.2139914157692426
train loss:1.2194700510879035
train loss:1.308043017743179
train loss:1.1855723868964423
train loss:1.2789818800456105
train loss:1.1825254677516903
train loss:0.960599713957729
train loss:1.17448623727598
train loss:1.0629601444406098
train loss:1.085253099057898
train loss:1.0532919460560253
train loss:1.3019654582814968
train loss:1.2437927787410943
train loss:1.1301199356656892
train loss:1.1461687

train loss:1.1774594904852886
train loss:1.267784361770809
train loss:1.086175967155092
train loss:0.9966782689783973
train loss:1.12542432405072
train loss:0.8830526307939535
train loss:0.9274860600542695
train loss:0.9880271675957694
train loss:1.2704651172944394
train loss:1.003196750367258
train loss:0.9249201140512766
train loss:1.1238040115537435
train loss:1.0322420077126455
train loss:1.1613003936088235
train loss:0.9889148642418784
train loss:1.031268917937514
train loss:0.8727126583490279
=== epoch:12, train acc:0.985, test acc:0.971 ===
train loss:1.1172287908066287
train loss:1.1069697369634617
train loss:1.0015198987380116
train loss:1.3401219680916807
train loss:1.0265282554454032
train loss:0.999645727557449
train loss:1.2436292651992427
train loss:1.0478684334257207
train loss:0.9235326839955889
train loss:0.9380471822284453
train loss:1.0673141148953167
train loss:0.9663730333647397
train loss:1.0660424181871928
train loss:1.188237597929562
train loss:1.078111382322742

=== epoch:17, train acc:0.99, test acc:0.978 ===
train loss:0.9097990047707062
train loss:1.0448983589258451
train loss:0.9347306218476358
train loss:0.8215188643895407
train loss:1.1391815349526289
train loss:0.9724090699252437
train loss:0.8848494154379739
train loss:1.0696060284579434
train loss:0.9395408564466607
train loss:0.9713355501798858
train loss:1.0555513177979083
train loss:1.013450681439749
train loss:1.2474330683449333
train loss:1.0025417382110442
train loss:0.8796178067978471
train loss:0.824889055400145
train loss:1.091780961626756
train loss:1.0747759737627371
train loss:0.9183426140762482
train loss:1.0575291227800725
train loss:1.0763155655019385
train loss:1.279987175837899
train loss:0.7877425836041816
train loss:0.9963999154305049
train loss:1.123044240986965
train loss:1.0617520129263576
train loss:0.8866266116189793
train loss:0.9963972776766324
train loss:0.8784845898176955
train loss:1.017961469718903
train loss:0.9756658205901438
train loss:0.99106289851893