In [None]:
import sys, os
sys.path.append("./deep-learning-from-scratch-master/")
import numpy as np
import pickle
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from datetime import datetime

class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)
    
    def sigmoid_function(self, x):
        return 1 / (1 + np.exp(-x)) # exp(x)はネイピア数（e）のx乗

    def softmax_function(self, a):
        c = np.max(a)
        e_a = np.exp(a - c) # オーバーフロー対策
        sum_e_a = np.sum(e_a)
        return e_a / sum_e_a

    def cross_entropy_error(self, y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)
        # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1) 
        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, W1) + b1
        z1 = self.sigmoid_function(a1)
        a2 = np.dot(z1, W2) + b2
        y = self.softmax_function(a2)
        return y

    def loss(self, x, t):
        y = self.predict(x)
        return self.cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)
        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            tmp_val = x[idx]
            x[idx] = float(tmp_val) + h # self.params["x"]の更新（numpyのため参照渡しになる）
            fxh1 = f(x) # f(x+h) -> xはダミーの引数。loss引数xには、gradient引数xが渡される
        
            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)
        
            x[idx] = tmp_val # 値を元に戻す
            it.iternext()   
        return grad

    def gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads["W1"] = self.numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = self.numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = self.numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = self.numerical_gradient(loss_W, self.params["b2"])
        return grads

# NN = TwoLayerNN(input_size=784, hidden_size=100, output_size=10)
# x = np.random.rand(100, 784)
# t = np.random.rand(100, 10)
# y = NN.predict(x)
# grads = NN.gradient(x, t)
# print(grads["W1"].shape)

# --- ミニバッチ学習 ---
print("--- mini batch learning ---")
print(" proc start : " + str(datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, one_hot_label=True)

train_loss_list = []
train_acc_list = []
test_acc_list = []

# ハイパーパラメータ
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

# 1エポックあたりの繰り返し数
iter_per_epoch = max(train_size / batch_size, 1)

NN = TwoLayerNN(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    # ミニバッチの取得
    batch_mask = np.random.choice(train_size, batch_size)
    # print("batch_mask : " + str(batch_mask))
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 勾配の計算
    grads = NN.gradient(x_batch, t_batch)

    # パラメータの更新
    for key in ("W1", "W2", "b1", "b2"):
        NN.params[key] = NN.params[key] - (learning_rate * grads[key])
    
    # 学習経過の記録
    loss = NN.loss(x_batch, t_batch)
    print("- learning... " + str(i) + "/" + str(iters_num) + " - loss:" + str(loss))
    train_loss_list.append(loss)

    # 1エポックごとに認識精度を計算
    if 1 % iter_per_epoch == 0:
        train_acc_list.append(NN.accuracy(x_train, t_train))
        test_acc_list.append(NN.accuracy(x_train, t_train))
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

print(" proc end : " + str(datetime.now().strftime("%Y/%m/%d %H:%M:%S")))

# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

--- mini batch learning ---
 proc start : 2018/02/06 09:50:18
- learning... 0/10000 - loss:6.89998873833
- learning... 1/10000 - loss:6.88605895993
- learning... 2/10000 - loss:6.90539565868
- learning... 3/10000 - loss:6.90693539536
- learning... 4/10000 - loss:6.90171537116
- learning... 5/10000 - loss:6.90119571476
- learning... 6/10000 - loss:6.88138613064
- learning... 7/10000 - loss:6.88357942645
- learning... 8/10000 - loss:6.89509459827
- learning... 9/10000 - loss:6.90521467932
- learning... 10/10000 - loss:6.91131374901
- learning... 11/10000 - loss:6.90350582557
- learning... 12/10000 - loss:6.90282196725
- learning... 13/10000 - loss:6.90009376044
- learning... 14/10000 - loss:6.89539086252
- learning... 15/10000 - loss:6.90491143942
- learning... 16/10000 - loss:6.89886380061
- learning... 17/10000 - loss:6.90713412861
- learning... 18/10000 - loss:6.90068627019
- learning... 19/10000 - loss:6.89021039232
- learning... 20/10000 - loss:6.90407090459
- learning... 21/10000 -

- learning... 184/10000 - loss:6.8091038726
- learning... 185/10000 - loss:6.79088818839
- learning... 186/10000 - loss:6.78510608215
- learning... 187/10000 - loss:6.76867188311
- learning... 188/10000 - loss:6.81526610213
- learning... 189/10000 - loss:6.80286394147
- learning... 190/10000 - loss:6.81193812098
- learning... 191/10000 - loss:6.81359013608
- learning... 192/10000 - loss:6.78690783845
- learning... 193/10000 - loss:6.78940897998
- learning... 194/10000 - loss:6.77820059667
- learning... 195/10000 - loss:6.76690895152
- learning... 196/10000 - loss:6.77184336708
- learning... 197/10000 - loss:6.77665548618
- learning... 198/10000 - loss:6.80922956159
- learning... 199/10000 - loss:6.792958883
- learning... 200/10000 - loss:6.77681538357
- learning... 201/10000 - loss:6.78566993251
- learning... 202/10000 - loss:6.75954952279
- learning... 203/10000 - loss:6.7819493141
- learning... 204/10000 - loss:6.77125075158
- learning... 205/10000 - loss:6.77778690219
- learning... 

- learning... 367/10000 - loss:5.9846042815
- learning... 368/10000 - loss:6.05279416148
- learning... 369/10000 - loss:6.08410253501
- learning... 370/10000 - loss:6.05685394931
- learning... 371/10000 - loss:6.17318202766
- learning... 372/10000 - loss:6.06188822145
- learning... 373/10000 - loss:5.99270512171
- learning... 374/10000 - loss:5.98235949296
- learning... 375/10000 - loss:5.99508426439
- learning... 376/10000 - loss:6.1338498802
- learning... 377/10000 - loss:5.90593697483
- learning... 378/10000 - loss:6.00529708501
- learning... 379/10000 - loss:5.99841485708
- learning... 380/10000 - loss:5.9887019663
- learning... 381/10000 - loss:6.01239328928
- learning... 382/10000 - loss:6.02129149881
- learning... 383/10000 - loss:5.91874498729
- learning... 384/10000 - loss:5.92631636667
- learning... 385/10000 - loss:5.97681360025
- learning... 386/10000 - loss:6.03735903747
- learning... 387/10000 - loss:6.03888175264
- learning... 388/10000 - loss:5.98755522266
- learning...