In [None]:
import sys, os
sys.path.append("./deep-learning-from-scratch-master/")
import numpy as np
import pickle
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist

class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)
    
    def sigmoid_function(self, x):
        return 1 / (1 + np.exp(-x)) # exp(x)はネイピア数（e）のx乗

    def softmax_function(self, a):
        c = np.max(a)
        e_a = np.exp(a - c) # オーバーフロー対策
        sum_e_a = np.sum(e_a)
        return e_a / sum_e_a

    def cross_entropy_error(self, y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)
        # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1) 
        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, W1) + b1
        z1 = self.sigmoid_function(a1)
        a2 = np.dot(z1, W2) + b2
        y = self.softmax_function(a2)
        return y

    def loss(self, x, t):
        y = self.predict(x)
        return self.cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)
        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            tmp_val = x[idx]
            x[idx] = float(tmp_val) + h # self.params["x"]の更新（numpyのため参照渡しになる）
            fxh1 = f(x) # f(x+h) -> xはダミーの引数。loss引数xには、gradient引数xが渡される
        
            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)
        
            x[idx] = tmp_val # 値を元に戻す
            it.iternext()   
        return grad

    def gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads["W1"] = self.numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = self.numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = self.numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = self.numerical_gradient(loss_W, self.params["b2"])
        return grads

# NN = TwoLayerNN(input_size=784, hidden_size=100, output_size=10)
# x = np.random.rand(100, 784)
# t = np.random.rand(100, 10)
# y = NN.predict(x)
# grads = NN.gradient(x, t)
# print(grads["W1"].shape)

# --- ミニバッチ学習 ---
print("--- mini batch learning ---")
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, one_hot_label=True)

train_loss_list = []
train_acc_list = []
test_acc_list = []

# ハイパーパラメータ
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

# 1エポックあたりの繰り返し数
iter_per_epoch = max(train_size / batch_size, 1)

NN = TwoLayerNN(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    # ミニバッチの取得
    batch_mask = np.random.choice(train_size, batch_size)
    # print("batch_mask : " + str(batch_mask))
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 勾配の計算
    grads = NN.gradient(x_batch, t_batch)

    # パラメータの更新
    for key in ("W1", "W2", "b1", "b2"):
        NN.params[key] = NN.params[key] - (learning_rate * grads[key])
    
    # 学習経過の記録
    loss = NN.loss(x_batch, t_batch)
    print("- learning... " + str(i) + "/" + str(iters_num) + " - loss:" + str(loss))
    train_loss_list.append(loss)

    # 1エポックごとに認識精度を計算
    if 1 % iter_per_epoch == 0:
        train_acc_list.append(NN.accuracy(x_train, t_train))
        test_acc_list.append(NN.accuracy(x_train, t_train))
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))


# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

--- mini batch learning ---
- learning... 0/10000 - loss:6.896762212875149
- learning... 1/10000 - loss:6.898006404399876
- learning... 2/10000 - loss:6.894620930755418
- learning... 3/10000 - loss:6.901588437853344
- learning... 4/10000 - loss:6.89541504056809
- learning... 5/10000 - loss:6.892621106055443
- learning... 6/10000 - loss:6.903155101180779
- learning... 7/10000 - loss:6.902643701905974
- learning... 8/10000 - loss:6.889317707226538
- learning... 9/10000 - loss:6.883500931752714
- learning... 10/10000 - loss:6.88435897837484
- learning... 11/10000 - loss:6.902383893462493
- learning... 12/10000 - loss:6.897598498087332
- learning... 13/10000 - loss:6.881720890875175
- learning... 14/10000 - loss:6.910850983697419
- learning... 15/10000 - loss:6.901288939374151
- learning... 16/10000 - loss:6.901853582080626
- learning... 17/10000 - loss:6.901134035340448
- learning... 18/10000 - loss:6.883268235944054
- learning... 19/10000 - loss:6.904590399869263
- learning... 20/10000 -

- learning... 169/10000 - loss:6.823468749668662
- learning... 170/10000 - loss:6.817013387501099
- learning... 171/10000 - loss:6.82776033258393
- learning... 172/10000 - loss:6.840308999097334
- learning... 173/10000 - loss:6.805097979379337
- learning... 174/10000 - loss:6.83363313174849
- learning... 175/10000 - loss:6.840761438711444
- learning... 176/10000 - loss:6.8185875782094225
- learning... 177/10000 - loss:6.832206965211265
- learning... 178/10000 - loss:6.801072776867934
- learning... 179/10000 - loss:6.828847550453953
- learning... 180/10000 - loss:6.817348691710041
- learning... 181/10000 - loss:6.774421315069125
- learning... 182/10000 - loss:6.806578392974392
- learning... 183/10000 - loss:6.799740759849719
- learning... 184/10000 - loss:6.820424224493957
- learning... 185/10000 - loss:6.8157973819414535
- learning... 186/10000 - loss:6.783481507522687
- learning... 187/10000 - loss:6.790345917532278
- learning... 188/10000 - loss:6.782087831786989
- learning... 189/10

- learning... 337/10000 - loss:6.1799514173942445
- learning... 338/10000 - loss:6.207241528214099
- learning... 339/10000 - loss:6.218217350625894
- learning... 340/10000 - loss:6.236697846705556
- learning... 341/10000 - loss:6.191155695375988
- learning... 342/10000 - loss:6.152675341630359
- learning... 343/10000 - loss:6.139389517721652
- learning... 344/10000 - loss:6.172620572405332
- learning... 345/10000 - loss:6.221334728800395
- learning... 346/10000 - loss:6.148980107215028
- learning... 347/10000 - loss:6.106844613446921
- learning... 348/10000 - loss:6.210610880116433
- learning... 349/10000 - loss:6.190269362641682
- learning... 350/10000 - loss:6.1878121078701
- learning... 351/10000 - loss:6.155146785521922
- learning... 352/10000 - loss:6.20205338421145
- learning... 353/10000 - loss:6.137153806464084
- learning... 354/10000 - loss:6.190159299941878
- learning... 355/10000 - loss:6.242178038827756
- learning... 356/10000 - loss:6.17114959836259
- learning... 357/10000