In [1]:
import sys,os
sys.path.append('../common')
from layers import *
from functions import softmax,zscore,cross_entropy_error  #softmax(活性化関数),エントロピー二乗誤差(誤差関数)
from gradient import numerical_gradient #numerical_gradient(パラメータの更新、勾配)import pprint
from collections import OrderedDict
import numpy as np

In [2]:
#重みWはガウス分布で初期化,バイアスは0で初期化
class Twolayer:
    
    def __init__(self,input_size,hidden_size,output_size,weight_init_std = 0.01):
        
        #重みの初期化
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size) #学習率(0.01)*ランダムに生成された行列(input_size×hidden_size)
        self.params['b1'] = np.zeros(hidden_size) #(1×hidden_size)、要素が0の1次元配列
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size) #行列の形が違う。
        self.params['b2'] = np.zeros(output_size) #行列の形が違う。
        
        #レイヤの生成
        self.layers = OrderedDict() #辞書作成
        self.layers['Affine1'] = Affine(self.params['W1'],self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'],self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
    
    def convert_t(self,t_train):
   
        t = np.zeros((t_train.shape[0],10))
        for i in range(t_train.shape[0]):
            label = t_train[i] #5
            t[i][label-1] = 1
        
        return t
        
    
    def predict(self,x): #layersを使うことでAffine,Relu,Affineと順序的に計算してくれる
        
        for layer in self.layers.values(): #layers.value()はlayersのリストの値を与えている。
            #print(x.shape)
            x = layer.forward(x) #Affine1,Relu,Affine2の順
           
            
        return x
         
    
    def loss(self,x,t): #lossの算出 by cross_entropy_error
        y = self.predict(x)
        
        return self.lastLayer.forward(y,t) #Affine,Relu,Affineから得られた出力をsoftmax関数にかけて確率だしてそのエントロピー誤差を返す。
    
    def accuracy(self,x,t):
        
        y = self.predict(x)
        y = np.argmax(y,axis = 1)
        if t.ndim != 1 : t = np.argmax(t,axis = 1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self,x,t):
        loss_W = lambda W:self.loss(x,t) #入力と正解ラベルのlossを求める無名関数loss_Wの作成
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W,self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W,self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W,self.params['b2'])
        
        return grads
    
    def gradient(self,x,t): #入力、教師ラベル
        
        #forward:順伝搬では今まで通りlossを算出する。
        self.loss(x,t) #lossの中でpredictが呼ばれる。今、lossがわかっている。
        
        #backward:逆伝搬
        dout = 1
        dout  = self.lastLayer.backward(dout) #lastlayerはsmwlクラスのオブジェクト. smwlのbackwardの出力を返す。つまり、データ一個あたりの誤差。
        #print(dout)
        
        layers = list(self.layers.values()) #辞書layersの内容でリスト作成。[Affine1,Relu,Affine2]
        #print(layers)
        layers.reverse() #リストの要素を反転 [Affine2,Relu,Affine1]
        for layer in layers:
            dout = layer.backward(dout) #doutに各層(Affine2,Relu,Affine1)で微分した結果が入る。
            #print(dout)
        
        #print(dout.shape)
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads
        
        
        
        

In [3]:
import sys,os
sys.path.append('../')
from load_mnist import load_mnist
import numpy as np

In [6]:
'''(x_train,t_train), (x_test,t_test)= load_mnist(normalize=True,one_hot_label = True)'''

x_train,t_train = load_mnist('',kind = 'train')
x_test,t_test = load_mnist('',kind = 't10k')

x_train = zscore(x_train)
x_test = zscore(x_test)

network = Twolayer(input_size=784,hidden_size=50,output_size=10)

t_train = network.convert_t(t_train)
t_test = network.convert_t(t_test)

#print(x_train[0])

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size/batch_size,1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size,batch_size) #60000の中から100個
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch,t_batch) #誤差逆伝搬法で勾配求める
    #print(grad)
    
    for key in ('W1','b1','W2','b2'): #更新
        network.params[key] -= learning_rate * grad[key]
        #print(network.params['W2'])
    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train,t_train)
        test_acc = network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc,test_acc)




0.14005 0.1411
0.9394333333333333 0.9383
0.9576833333333333 0.9557
0.9683 0.9627
0.9738666666666667 0.9659
0.97795 0.9697
0.97915 0.9683
0.9823166666666666 0.9715
0.9843166666666666 0.973
0.9862833333333333 0.9731
0.9873166666666666 0.974
0.9871333333333333 0.9722
0.9894333333333334 0.9721
0.985 0.9702
0.9925333333333334 0.9741
0.9919833333333333 0.9744
0.99245 0.9743
