In [6]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from deeplearning_from_scratch.functions import *
from deeplearning_from_scratch.gradient import *

class SimpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3)

    def predict(self, x):
        return np.dot(x, self.W)

    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, z)

        return loss

class TwoLayerNet:

    def __init__(self, input_size, hidden_state_size, output_size, weight_init_std=0.01):
        self.params = {}
        
        self.params['W1'] = weight_init_std * \
                                np.random.randn(input_size, hidden_state_size)
        self.params['b1'] = np.zeros(hidden_state_size)
        self.params['W2'] = weight_init_std * \
                                np.random.randn(hidden_state_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        return y

    def loss(self, x, t):
        y = self.predict(x)

        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y==t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        grads = {}
        batch_num = x.shape[0]

        #forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        #backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)

        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads



In [7]:
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

In [8]:
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

net = TwoLayerNet(input_size=784, hidden_state_size=100, output_size=10)


In [9]:
for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = net.numerical_gradient(x_batch, t_batch)

    for key in ('W1', 'b1', 'W2', 'b2'):
        net.params[key] -= learning_rate * grad[key]

    loss = net.loss(x_batch, t_batch)
    print("iter[{}]'s loss: {}".format(i, loss))
    train_loss_list.append(loss)



iter[0]'s loss: 2.2790442584296637
iter[1]'s loss: 2.2850061294727566
iter[2]'s loss: 2.2670838055484186
iter[3]'s loss: 2.293915108778876
iter[4]'s loss: 2.257390212485696
iter[5]'s loss: 2.290659960874264
iter[6]'s loss: 2.285904651116665
iter[7]'s loss: 2.2557067047149273
iter[8]'s loss: 2.2875601870655204
iter[9]'s loss: 2.283910395396904
iter[10]'s loss: 2.287353310797123
iter[11]'s loss: 2.303930531581722
iter[12]'s loss: 2.2778551712756414
iter[13]'s loss: 2.2932250470979394
iter[14]'s loss: 2.2889821319939405
iter[15]'s loss: 2.26333062643548
iter[16]'s loss: 2.282785870921418
iter[17]'s loss: 2.292968990917143
iter[18]'s loss: 2.283794453062712
iter[19]'s loss: 2.2870545041129433
iter[20]'s loss: 2.2883088877391566
iter[21]'s loss: 2.2783847342286907
iter[22]'s loss: 2.289369904485266
iter[23]'s loss: 2.278101387737103
iter[24]'s loss: 2.2815109150554673
iter[25]'s loss: 2.2647942811173625
iter[26]'s loss: 2.2863157317527754
iter[27]'s loss: 2.2908397684063524
iter[28]'s loss:

KeyboardInterrupt: 