In [3]:
import sys,os
sys.path.append(os.curdir+"/deep-learning-from-scratch")
from dataset.mnist import load_mnist

import numpy as np
from PIL import Image
import matplotlib.pylab as plt

In [27]:
def debug(variable):
    print(variable, '=', repr(eval(variable)))

In [9]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        return out
    
    def backward(self, dout):
        # 積算の逆伝搬は 
        # f(x,y) = x*y
        # df/dx = 1 * y
        # df/dy = 1 * x
        dx = self.y * dout
        dy = self.x * dout
        return dx, dy

In [16]:
apple = 100
apple_num = 2
tax = 1.1

# layer
apple_layer = MulLayer()
tax_layer = MulLayer()

# forward
apple_price = apple_layer.forward(apple, apple_num)
sum = int(tax_layer.forward(apple_price, tax))
print(sum)

# backward
dprice = 1
dapple_price, dtax = tax_layer.backward(dprice)
print(dapple_price)
print(dtax)

dapple, dnum = apple_layer.backward(dapple_price)
print(dapple)
print(dnum)

220
1.1
200
2.2
110.00000000000001


In [29]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x + y
        return out
    
    def backward(self, dout):
        # 加算の逆伝搬は 
        # f(x) = x + y
        # df/dx = 1
        # df/dy = 1
        dx = 1 * dout
        dy = 1 * dout
        return dx, dy        

In [30]:
# 順伝搬
apple = 100
apple_num = 2
apple_layer = MulLayer()
apple_price = apple_layer.forward(apple, apple_num)

orange = 150
orange_num = 3
orange_layer = MulLayer()
orange_price = orange_layer.forward(orange, orange_num)

sum_layer = AddLayer()
sum_price = sum_layer.forward(apple_price, orange_price)

tax = 1.1
tax_layer = MulLayer()
tax_price = tax_layer.forward(sum_price, tax)
print(tax_price)

715.0000000000001


In [37]:
# 逆伝搬
dout = 1
dsum_price, dtax_price = tax_layer.backward(dout)
debug("dsum_price")
debug("dtax_price")

dapple_price, dorange_price = sum_layer.backward(dsum_price)
debug("dapple_price")
debug("dorange_price")

dapple, dapple_num = apple_layer.backward(dapple_price)
debug("dapple")
debug("dapple_num")

dorange, dorange_num = orange_layer.backward(dorange_price)
debug("dorange")
debug("dorange_num")

dsum_price = 1.1
dtax_price = 650
dapple_price = 1.1
dorange_price = 1.1
dapple = 2.2
dapple_num = 110.00000000000001
dorange = 3.3000000000000003
dorange_num = 165.0


In [43]:
class Relu:
    def __init__(self):
        self.mask = None
    
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx
    
x = np.array([[1.0, -0.5], [-2.0, 3.0]])
print(x)
mask = (x <= 0)
print(mask)
x[mask] = 0
debug("x")

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]
x = array([[1., 0.],
       [0., 3.]])


In [44]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        self.out = 1 / (1 + np.exp(x*-1))    
        return self.out

    def backward(self, dout):
        return dout * self.out * (1.0 - self.out)

In [6]:
class Affine:
    def __init__(self, W, b):
        self.x = None
        self.W = W
        self.b = b
        self.dW = None
        self.db = None
    
    def forward(self, x):
        self.x = x
        out = np.dot(self.W, x) + self.b
        return out

    def backward(self, dout):
        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.x.T, dout)

        dx = np.dot(dout, self.W.T)
        return dx

In [7]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c) # オーバーフロー対策
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

In [8]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y + 1e-7)) / batch_size

In [9]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        return dx

In [30]:
import sys
import os
import pprint
sys.path.append(os.curdir+"/deep-learning-from-scratch")
from dataset.mnist import load_mnist
from common.gradient import numerical_gradient
from collections import OrderedDict

In [36]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 重みの初期化
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(input_size,hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size,output_size)
        self.params["b2"] = np.zeros(output_size)
        
        # layerの生成
        self.layers = OrderedDict()
        self.layers["Affine1"] = Affine(self.params["W1"], self.params["b1"])
        self.layers["Relu1"] = Relu()
        self.layers["Affine2"] = Affine(self.params["W2"], self.params["b2"])
        self.lastLayer = SoftmaxWithLoss()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)      
        return x
    
    # x: 入力データ、 t: 教師データ
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        
        grads["W1"] = numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = numerical_gradient(loss_W, self.params["b2"])
        
        return grads
    
    def gradient(self, x, t):
        # forward
        self.loss(x, t)
        
        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        # setting
        grads = {}
        grads["W1"] = self.layers["Affine1"].dW
        grads["b1"] = self.layers["Affine1"].db
        grads["W2"] = self.layers["Affine2"].dW
        grads["b2"] = self.layers["Affine2"].db
        
        return grads        

In [37]:
# load MNIST data
(x_train, t_train), (x_text, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# list = [1,2,3,4]
# list[2:]  // -> [3,4]
x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W2:5.69091664085142e-09
b1:2.4926652621983998e-09
b2:1.3941851706028575e-07
W1:3.8526356030157895e-10


In [38]:
# 誤差逆伝播法を使った学習
# load MNIST data
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]

batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 誤差逆伝播によって勾配を求める
    grad = network.gradient(x_batch, t_batch)
    
    # 更新
    for key in ("W1", "b1", "W2", "b2"):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.09176666666666666 0.0905
0.9060833333333334 0.9076
0.92545 0.9269
0.93415 0.9342
0.9457666666666666 0.9434
0.95335 0.95
0.9573333333333334 0.9524
0.9617666666666667 0.9581
0.96485 0.9602
0.9680833333333333 0.9626
0.97 0.9652
0.9717 0.966
0.9733333333333334 0.9672
0.9755333333333334 0.9681
0.9758833333333333 0.9667
0.9784333333333334 0.9683
0.97795 0.9691
