In [16]:
import numpy as np

class TwoLayerNet:

    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        #重みの初期化
        self.params = {}
        #784 * 50の重み行列
        self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size)
        #50 * 10の重み行列
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size)
        #バイアス，隠れ層の数だけ
        self.params['b1'] = np.zeros(hidden_size)
        #バイアス，出力層の数だけ
        self.params['b2'] = np.zeros(output_size)
    
    # 隠れ層のユニット 一つの値を[0,1]に落とし込む
    def sigmoid(self,x):
        return 1 / (1 + np.exp(-x))
    
    # 出力層のユニット 複数の値の組合せからそれぞれの値を合計1の形に落とし込む
    def softmax(self,a):
        c = np.max(a)
        exp_a = np.exp(a - c)#オーバーフロー対策
        sum_exp_a = np.sum(exp_a)
        y = exp_a / sum_exp_a
        return y
    
    # 勾配計算(1次元) 損失関数の勾配を取る 中心差分
    def _numerical_gradient_1d(self,f, x):
        h = 1e-4 # 0.0001
        grad = np.zeros_like(x)

        for idx in range(x.size):
            tmp_val = x[idx]
            x[idx] = float(tmp_val) + h
            fxh1 = f(x) # f(x+h)

            x[idx] = tmp_val - h 
            fxh2 = f(x) # f(x-h)
            grad[idx] = (fxh1 - fxh2) / (2*h)

            x[idx] = tmp_val # 値を元に戻す

        return grad

    # 勾配計算
    def numerical_gradient(self,f, X):
        if X.ndim == 1:
            return self._numerical_gradient_1d(f, X)
        else:
            # Xと同じ形の0行列を生成
            grad = np.zeros_like(X)
            
            for idx, x in enumerate(X):
                grad[idx] = self._numerical_gradient_1d(f, x)

            return grad
        
    # 損失関数 交差エントロピー誤差
    # 正解ラベルの場合，その確信度yが高いほど小さくなる 0で高い場合どうする?
    def cross_entropy_error(self,y,t):
        if y.ndim == 1:
            t = t.reshape(1,t.size)
            y = y.reshape(1,y.size)
        batch_size = y.shape[0]
        return -np.sum(t * np.log(y)) / batch_size
    
    def predict(self,x):
        W1,W2 = self.params['W1'],self.params['W2']
        b1,b2 = self.params['b1'],self.params['b2']

        a1 = np.dot(x,W1) + b1 #a1 = Wx1 + b1
        z1 = self.sigmoid(a1)  #x2 = h(a)
        a2 = np.dot(z1,W2) + b2#a2 = Wx2 + b2
        z2 = self.softmax(a2)  #各クラスの確率値に変換

        return z2

    def loss(self, x, t):
        y = self.predict(x)

        return self.cross_entropy_error(y,t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    # 勾配計算
    def gradient(self,x,t):
        loss_W = lambda W: self.loss(x,t)
        grads = {}
        grads['W1'] = self.numerical_gradient(loss_W,self.params['W1'])
        grads['W2'] = self.numerical_gradient(loss_W,self.params['W2'])
        grads['b1'] = self.numerical_gradient(loss_W,self.params['b1'])
        grads['b2'] = self.numerical_gradient(loss_W,self.params['b2'])

        return grads

In [None]:
#coding: utf-8
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm

# MNISTのダウンロード
mnist = fetch_mldata('MNIST original', data_home=".")

# 訓練データ
x_train = mnist['data'][:60000]
t_train = mnist['target'][:60000]

# trainと同じ形にする -1は自動的に調整
# 無駄な処理
t_train = t_train.reshape(1, -1).transpose()

# encode label
# labelごとのone-hot表現として取り扱う
encoder = OneHotEncoder(n_values=max(t_train)+1)
t_train = encoder.fit_transform(t_train).toarray()

# テストデータ
x_test = mnist['data'][60000:]
t_test = mnist['target'][60000:]

t_test = t_test.reshape(1, -1).transpose()

# encode label
# transformする必要
t_test = encoder.transform(t_test).toarray()

# 学習データがint型なのでfloatに変換，その後[0,1]に正規化する，max = 255
x_train  = x_train.astype(np.float64)
x_train /= x_train.max()
x_test   = x_test.astype(np.float64)
x_test  /= x_test.max()

train_loss_list = []
train_acc_list  = []
test_acc_list   = []

#hyper parameter
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

# 1エポックあたりの繰り返し数
#iter_per_epoch = max(train_size / batch_size, 1)
iter_per_epoch = 1

network = TwoLayerNet(input_size=784,hidden_size=50,output_size=10)

for i in range(iters_num):
    
    # train_sizeの中からbatch_size分ミニバッチのサンプルをとる
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 勾配を計算する
    grad = network.gradient(x_batch,t_batch)
    
    # ミニバッチによってparameterの更新 x - eta * grad
    for key in ('W1','W2','b1','b2'):
        network.params[key] -= learning_rate * grad[key]
    
    # 全体のlossを格納
    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    
    # 1エポックごとに認識精度を計算
    #if i % iter_per_epoch == 0:
    
    # acc
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    
    # accの格納
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))

train acc, test acc | 0.0983166666667, 0.0955
train acc, test acc | 0.0973666666667, 0.0982
train acc, test acc | 0.0973666666667, 0.0982
train acc, test acc | 0.0973666666667, 0.0982
train acc, test acc | 0.0986333333333, 0.0958
train acc, test acc | 0.0973666666667, 0.0981
train acc, test acc | 0.09915, 0.1009
train acc, test acc | 0.09915, 0.1009
train acc, test acc | 0.09915, 0.1009
train acc, test acc | 0.09915, 0.1009
train acc, test acc | 0.09915, 0.1009
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.102183333333, 0.101
train acc, test acc | 0.102183333333, 0.101
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1135
train acc, test acc | 0.112366666667, 0.1

train acc, test acc | 0.343466666667, 0.3436
train acc, test acc | 0.308716666667, 0.3083
train acc, test acc | 0.291283333333, 0.2907
train acc, test acc | 0.287266666667, 0.2883
train acc, test acc | 0.307633333333, 0.3085
train acc, test acc | 0.346383333333, 0.3478
train acc, test acc | 0.321366666667, 0.3187
train acc, test acc | 0.33235, 0.3317
train acc, test acc | 0.348633333333, 0.3488
train acc, test acc | 0.387616666667, 0.3913
train acc, test acc | 0.353216666667, 0.357
train acc, test acc | 0.342566666667, 0.3509
train acc, test acc | 0.3508, 0.3562
train acc, test acc | 0.390633333333, 0.3948
train acc, test acc | 0.328416666667, 0.3258
train acc, test acc | 0.339933333333, 0.3381
train acc, test acc | 0.34565, 0.349
train acc, test acc | 0.337233333333, 0.3421
train acc, test acc | 0.308333333333, 0.313
train acc, test acc | 0.302133333333, 0.3062
train acc, test acc | 0.28285, 0.2863
train acc, test acc | 0.359483333333, 0.3617
train acc, test acc | 0.304466666667, 0.30

train acc, test acc | 0.643766666667, 0.6544
train acc, test acc | 0.654516666667, 0.6624
train acc, test acc | 0.6462, 0.6537
train acc, test acc | 0.6517, 0.659
train acc, test acc | 0.669416666667, 0.6769
train acc, test acc | 0.6695, 0.6771
train acc, test acc | 0.668116666667, 0.6783
train acc, test acc | 0.674283333333, 0.6796
train acc, test acc | 0.6652, 0.6717
train acc, test acc | 0.689433333333, 0.696
train acc, test acc | 0.684033333333, 0.69
train acc, test acc | 0.6803, 0.6867
train acc, test acc | 0.69475, 0.7012
train acc, test acc | 0.6884, 0.6954
train acc, test acc | 0.7069, 0.7124
train acc, test acc | 0.714383333333, 0.7186
train acc, test acc | 0.707083333333, 0.7132
train acc, test acc | 0.71465, 0.7223
train acc, test acc | 0.702083333333, 0.7082
train acc, test acc | 0.706633333333, 0.7109
train acc, test acc | 0.7053, 0.712
train acc, test acc | 0.699616666667, 0.7071
train acc, test acc | 0.692183333333, 0.6976
train acc, test acc | 0.686633333333, 0.6902
tra

train acc, test acc | 0.7899, 0.794
train acc, test acc | 0.790833333333, 0.7953
train acc, test acc | 0.792, 0.7947
train acc, test acc | 0.797766666667, 0.7991
train acc, test acc | 0.797816666667, 0.8
train acc, test acc | 0.79315, 0.7956
train acc, test acc | 0.795833333333, 0.7968
train acc, test acc | 0.791733333333, 0.7951
train acc, test acc | 0.79345, 0.7967
train acc, test acc | 0.796133333333, 0.7954
train acc, test acc | 0.799716666667, 0.7999
train acc, test acc | 0.7972, 0.7971
train acc, test acc | 0.799016666667, 0.802
train acc, test acc | 0.800133333333, 0.8016
train acc, test acc | 0.803566666667, 0.8052
train acc, test acc | 0.804166666667, 0.8066
train acc, test acc | 0.796716666667, 0.8002
train acc, test acc | 0.803283333333, 0.806
train acc, test acc | 0.80565, 0.8096
train acc, test acc | 0.807416666667, 0.8137
train acc, test acc | 0.808866666667, 0.8126
train acc, test acc | 0.810466666667, 0.8144
train acc, test acc | 0.808216666667, 0.8103
train acc, test a

train acc, test acc | 0.83935, 0.8424
train acc, test acc | 0.840166666667, 0.8431
train acc, test acc | 0.840383333333, 0.844
train acc, test acc | 0.840033333333, 0.8433
train acc, test acc | 0.840783333333, 0.8413
train acc, test acc | 0.840383333333, 0.8433
train acc, test acc | 0.840133333333, 0.8448
train acc, test acc | 0.840183333333, 0.8444
train acc, test acc | 0.8353, 0.8397
train acc, test acc | 0.834966666667, 0.8392
train acc, test acc | 0.833166666667, 0.8376
train acc, test acc | 0.835133333333, 0.8393
train acc, test acc | 0.83405, 0.84
train acc, test acc | 0.837166666667, 0.8399
train acc, test acc | 0.837283333333, 0.842
train acc, test acc | 0.837816666667, 0.84
train acc, test acc | 0.83675, 0.8396
train acc, test acc | 0.831933333333, 0.8367
train acc, test acc | 0.828483333333, 0.8328
train acc, test acc | 0.82825, 0.8341
train acc, test acc | 0.834233333333, 0.8396
train acc, test acc | 0.834733333333, 0.8399
train acc, test acc | 0.836016666667, 0.841
train ac