## 2層パーセプトロン 

In [8]:
import sys,os
sys.path.append('~/common')
from common.functions import softmax,cross_entropy_error  #softmax(活性化関数),エントロピー二乗誤差(誤差関数)
from common.gradient import numerical_gradient #numerical_gradient(パラメータの更新、勾配)
import pprint
import numpy as np

In [9]:
#重みWはガウス分布で初期化,バイアスは0で初期化
class Twolayer:
    
    def __init__(self,input_size,hidden_size,output_size,weight_init_std = 0.01):
        
        #重みの初期化
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size) #学習率(0.01)*ランダムに生成された行列(input_size×hidden_size)
        self.params['b1'] = np.zeros(hidden_size) #(1×hidden_size)、要素が0の1次元配列
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size) #行列の形が違う。
        self.params['b2'] = np.zeros(output_size) #行列の形が違う。
    
    def convert_t(self,t_train):
   
        t = np.zeros((t_train.shape[0],10))
        for i in range(t_train.shape[0]):
            label = t_train[i] #5
            t[i][label-1] = 1
        
        return t
        
    
    def predict(self,x): #出力
        W1,W2 = self.params['W1'],self.params['W2']
        b1,b2 = self.params['b1'],self.params['b2']
        
        a1 = np.dot(x,W1) + b1
        z1 = softmax(a1)
        a2 = np.dot(z1,W2) + b2
        y = softmax(a2)
        
        return y
         
    
    def loss(self,x,t): #lossの算出 by cross_entropy_error
        y = self.predict(x)
        
        return cross_entropy_error(y,t)
    
    def accuracy(self,x,t):
        
        y = self.predict(x)
        y = np.argmax(y,axis = 0)
        t = np.argmax(t,axis = 0)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def solve_gradient(self,x,t):
        loss_W = lambda W:self.loss(x,t) #入力と正解ラベルのlossを求める無名関数loss_Wの作成
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W,self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W,self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W,self.params['b2'])
        
        return grads
        
        

In [26]:
from load_mnist import *
x_train,t_train = load_mnist('',kind = 'train')

net = Twolayer(input_size = 784,hidden_size = 100,output_size = 10)

x = x_train[0]
#print(x.shape)

t = net.convert_t(t_train)
t = t[0]

print(net.params['W1'])
#net.accuracy(x,t)
grads = net.solve_gradient(x,t)




[[ 2.43431311e-03 -9.50479882e-03 -1.73524496e-02 ...  1.20079467e-02
  -1.40627534e-02  9.28828438e-04]
 [-1.51936752e-02 -2.38137038e-02 -7.50697564e-03 ...  1.29086827e-02
  -2.20105639e-02  4.17997485e-03]
 [-1.07545648e-02  8.66533582e-03  3.35433763e-03 ... -3.66920043e-03
   3.76652798e-03 -1.32429462e-02]
 ...
 [-2.75354024e-03 -1.99372833e-02 -9.92409642e-03 ... -1.63471860e-03
  -1.01797631e-02 -2.04050267e-02]
 [-9.14702428e-03  4.00698222e-04 -1.83537789e-02 ...  1.33834253e-02
   1.43903290e-02  4.75340859e-03]
 [ 7.45297221e-03 -2.27098257e-03  5.81777337e-04 ...  2.67391523e-04
   1.06502510e-05  3.35984345e-02]]


In [27]:
print(grads['W1'].shape)
np.dot(x,grads['W1'])

(784, 100)


array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  3.70854458e-04,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        7.20495130e-02,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  