In [5]:
import os, sys
sys.path.append(os.pardir)
import numpy as np
from common.functions import *
from dataset.mnist import load_mnist
from common.gradient import numerical_gradient
limit = sys.getrecursionlimit()
sys.setrecursionlimit(limit)

In [6]:
def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    return exp_a / sum_exp_a
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [9]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
    
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        return softmax(a2)
    
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        return np.sum(y == t) / float(x.shape[0])
    
    def  numerical_gradient(self, x, t):
        loss_w = lambda W: self.loss(x,t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_w, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_w, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_w, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_w, self.params['b2'])
    
        print(grads)
        return grads


In [None]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

train_loss_list = []

item_num = 10000
batch_size = 100
train_size = 10000
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

for i in range(item_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    grad = network.numerical_gradient(x_batch, t_batch)
    print(grad)
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

{'W1': array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), 'W2': array([[ 1.73250814e-02,  1.49408960e-02,  1.36414253e-03,
        -2.25547481e-02, -1.95860897e-02,  2.15450868e-02,
         1.75193134e-02, -2.67908927e-02, -5.65356169e-03,
         1.89151246e-03],
       [ 1.78458076e-02,  1.65661167e-02,  3.34606264e-03,
        -2.39012334e-02, -2.28890024e-02,  2.29551764e-02,
         1.88098562e-02, -2.74811400e-02, -5.50542941e-03,
         2.51203010e-04],
       [ 1.84423617e-02,  1.54702172e-02,  1.19105905e-03,
        -2.28882304e-02, -2.07184785e-02,  2.18142561e-02,
         1.86582300e-02, -2.85460246e-02, -5.01128976e-03,
         1.58813367e-03],
       [ 1.77533973e-02,  1.33439300e-02,  1.29735713e-03,
        -2.43722811e-02, -1.91629462e-02,  2.15644262e-02,
         1.9071155

In [None]:
train_loss_list