In [1]:
# 乗算レイヤ
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        return out
    
    def backward(self, dout):
        dx = dout * self.y
        dy = dout * self.x
        return dx, dy

In [2]:
# figure 5-16
apple = 100
apple_num = 2
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print(price)

220.00000000000003


In [3]:
# backward
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


In [7]:
# 加算レイヤ
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        out = x + y
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [10]:
# figure 5-17
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_fruit_layer = AddLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
fruit_price = add_fruit_layer.forward(apple_price, orange_price)
# print(fruit_price, tax)
price = mul_tax_layer.forward(fruit_price, tax)
print("Output: {}".format(price))

# backward
d_price = 1
d_fruit_price, d_tax = mul_tax_layer.backward(d_price)
d_apple_price, d_orange_price = add_fruit_layer.backward(d_fruit_price)
d_apple, d_apple_num = mul_apple_layer.backward(d_apple_price)
d_orange, d_orange_num = mul_orange_layer.backward(d_orange_price)
print(d_apple, d_apple_num, d_orange, d_orange_num, d_tax)

Output: 715.0000000000001
2.2 110.00000000000001 3.3000000000000003 165.0 650


In [11]:
class Relu:
    def __init__(self):
        self.mask = None
    
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

In [12]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [13]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(b, axis=0)
        return dx

In [14]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        return dx

In [17]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size,
                 weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(input_size)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(input_size, hidden_size)
        self.params['b2'] = np.zeros(input_size)
        
        self.layers = OrderedDict()
        self.layers['Affine1'] = \
            Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = \
            Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads
    
    def gradient(self, x, t):
        self.loss(x, t)
        
        dout = 1
        dout = self.lastLayer.backward(dout)
        print(dout)
        
        layers = list(self.layers.values)
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads

In [18]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# 各重みの絶対誤差の平均
for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1:2.0320322819378514e-10
b1:9.190678347862323e-10
W2:7.210967020483684e-08
b2:1.4462285943539487e-07


In [22]:
# 誤差逆伝播法を使った学習
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet
from tqdm import tqdm

(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 100000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in tqdm(range(iters_num)):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 勾配算出
    grad = network.gradient(x_batch, t_batch)
    
    # 更新
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]  # 提供されているクラスはlayerメンバでなくparamsメンバでパラメータを管理
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)
    
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print(train_acc, test_acc)

  0%|          | 125/100000 [00:00<05:02, 330.34it/s]

0.09736666666666667 0.0982


  1%|          | 712/100000 [00:01<04:03, 406.92it/s]

0.7957833333333333 0.8013


  1%|▏         | 1302/100000 [00:02<03:57, 414.96it/s]

0.8771 0.8804


  2%|▏         | 1885/100000 [00:03<03:55, 416.56it/s]

0.8978833333333334 0.9004


  3%|▎         | 2526/100000 [00:05<03:56, 411.97it/s]

0.9077166666666666 0.9077


  3%|▎         | 3104/100000 [00:06<03:56, 409.27it/s]

0.9136166666666666 0.9142


  4%|▎         | 3685/100000 [00:07<03:54, 410.66it/s]

0.9198833333333334 0.9207


  4%|▍         | 4273/100000 [00:08<03:58, 401.52it/s]

0.9237166666666666 0.9241


  5%|▍         | 4862/100000 [00:09<03:50, 413.36it/s]

0.9277666666666666 0.9299


  6%|▌         | 5506/100000 [00:11<04:07, 382.39it/s]

0.9307666666666666 0.931


  6%|▌         | 6099/100000 [00:12<03:49, 409.44it/s]

0.9341833333333334 0.9344


  7%|▋         | 6693/100000 [00:13<03:44, 415.46it/s]

0.9365166666666667 0.9367


  7%|▋         | 7259/100000 [00:14<04:14, 363.72it/s]

0.9383833333333333 0.938


  8%|▊         | 7885/100000 [00:16<04:41, 327.29it/s]

0.9404166666666667 0.9399


  8%|▊         | 8460/100000 [00:17<04:36, 331.24it/s]

0.9425666666666667 0.9411


  9%|▉         | 9052/100000 [00:19<04:51, 312.19it/s]

0.9443333333333334 0.9414


 10%|▉         | 9675/100000 [00:20<03:45, 401.41it/s]

0.9465666666666667 0.9439


 10%|█         | 10274/100000 [00:21<03:46, 395.53it/s]

0.9477166666666667 0.9447


 11%|█         | 10908/100000 [00:22<03:37, 409.43it/s]

0.9491333333333334 0.9459


 12%|█▏        | 11500/100000 [00:24<03:39, 402.57it/s]

0.9507666666666666 0.9465


 12%|█▏        | 12079/100000 [00:25<03:42, 394.43it/s]

0.95215 0.9486


 13%|█▎        | 12673/100000 [00:26<03:48, 381.87it/s]

0.95315 0.9491


 13%|█▎        | 13281/100000 [00:27<03:42, 390.19it/s]

0.9546 0.9508


 14%|█▍        | 13882/100000 [00:29<03:28, 413.64it/s]

0.9554333333333334 0.951


 15%|█▍        | 14518/100000 [00:30<03:35, 396.38it/s]

0.9564833333333334 0.9524


 15%|█▌        | 15115/100000 [00:31<03:27, 409.43it/s]

0.95755 0.9539


 16%|█▌        | 15689/100000 [00:32<03:39, 384.67it/s]

0.9588833333333333 0.9536


 16%|█▋        | 16322/100000 [00:33<03:26, 404.83it/s]

0.95965 0.9548


 17%|█▋        | 16898/100000 [00:35<03:24, 405.70it/s]

0.9607333333333333 0.9562


 17%|█▋        | 17472/100000 [00:36<03:19, 412.85it/s]

0.9621 0.9569


 18%|█▊        | 18135/100000 [00:37<02:51, 476.54it/s]

0.9622833333333334 0.9579


 19%|█▊        | 18725/100000 [00:38<03:16, 413.22it/s]

0.9628333333333333 0.9576


 19%|█▉        | 19303/100000 [00:39<03:21, 400.36it/s]

0.96365 0.9587


 20%|█▉        | 19879/100000 [00:41<03:09, 423.35it/s]

0.9645833333333333 0.9588


 21%|██        | 20528/100000 [00:42<03:12, 413.11it/s]

0.9650666666666666 0.9597


 21%|██        | 21105/100000 [00:43<03:24, 386.04it/s]

0.9656833333333333 0.9598


 22%|██▏       | 21705/100000 [00:44<03:16, 398.56it/s]

0.9664666666666667 0.9604


 22%|██▏       | 22297/100000 [00:45<03:12, 402.68it/s]

0.9674166666666667 0.9599


 23%|██▎       | 22881/100000 [00:47<03:08, 409.31it/s]

0.9677 0.9606


 24%|██▎       | 23528/100000 [00:48<03:02, 419.00it/s]

0.9676833333333333 0.9599


 24%|██▍       | 24108/100000 [00:49<03:04, 412.19it/s]

0.9687666666666667 0.9605


 25%|██▍       | 24691/100000 [00:50<03:04, 409.28it/s]

0.9693166666666667 0.9617


 25%|██▌       | 25286/100000 [00:51<02:59, 417.33it/s]

0.9698333333333333 0.9618


 26%|██▌       | 25866/100000 [00:53<03:06, 397.69it/s]

0.9702833333333334 0.9623


 27%|██▋       | 26507/100000 [00:54<03:09, 387.64it/s]

0.9705666666666667 0.9629


 27%|██▋       | 27086/100000 [00:55<03:06, 391.74it/s]

0.9713166666666667 0.9629


 28%|██▊       | 27666/100000 [00:56<03:06, 387.23it/s]

0.97155 0.9624


 28%|██▊       | 28326/100000 [00:58<02:48, 426.24it/s]

0.9719333333333333 0.9637


 29%|██▉       | 28910/100000 [00:59<02:53, 410.49it/s]

0.9724333333333334 0.9636


 29%|██▉       | 29487/100000 [01:00<02:56, 398.95it/s]

0.9728833333333333 0.9645


 30%|███       | 30137/100000 [01:01<02:31, 461.58it/s]

0.9730833333333333 0.9641


 31%|███       | 30736/100000 [01:02<02:28, 465.93it/s]

0.9736166666666667 0.9652


 31%|███▏      | 31277/100000 [01:04<02:37, 435.01it/s]

0.9735333333333334 0.9654


 32%|███▏      | 31921/100000 [01:05<02:41, 421.82it/s]

0.9742833333333333 0.9654


 32%|███▏      | 32495/100000 [01:06<02:41, 419.18it/s]

0.9745 0.9655


 33%|███▎      | 33086/100000 [01:07<02:46, 402.85it/s]

0.9744833333333334 0.9651


 34%|███▎      | 33685/100000 [01:08<02:36, 422.40it/s]

0.9751333333333333 0.9654


 34%|███▍      | 34279/100000 [01:10<02:41, 407.80it/s]

0.9759166666666667 0.966


 35%|███▍      | 34871/100000 [01:11<02:35, 420.07it/s]

0.9759166666666667 0.9666


 36%|███▌      | 35508/100000 [01:12<02:39, 404.77it/s]

0.9759833333333333 0.9668


 36%|███▌      | 36099/100000 [01:13<02:33, 415.54it/s]

0.9763166666666667 0.9668


 37%|███▋      | 36728/100000 [01:14<02:20, 448.82it/s]

0.9768 0.967


 37%|███▋      | 37298/100000 [01:16<02:47, 374.69it/s]

0.9767833333333333 0.9671


 38%|███▊      | 37874/100000 [01:17<02:36, 395.74it/s]

0.9773 0.9668


 38%|███▊      | 38477/100000 [01:18<02:27, 417.39it/s]

0.9777666666666667 0.9669


 39%|███▉      | 39122/100000 [01:19<02:26, 415.19it/s]

0.9777333333333333 0.967


 40%|███▉      | 39707/100000 [01:20<02:27, 409.12it/s]

0.9781333333333333 0.9674


 40%|████      | 40290/100000 [01:22<02:23, 414.84it/s]

0.9785833333333334 0.9671


 41%|████      | 40858/100000 [01:23<02:30, 392.04it/s]

0.9785 0.9682


 42%|████▏     | 41500/100000 [01:24<02:24, 404.26it/s]

0.97915 0.9671


 42%|████▏     | 42083/100000 [01:25<02:26, 394.01it/s]

0.9789833333333333 0.9674


 43%|████▎     | 42705/100000 [01:27<02:26, 392.37it/s]

0.9793 0.9672


 43%|████▎     | 43278/100000 [01:28<02:19, 407.94it/s]

0.9798666666666667 0.9686


 44%|████▍     | 43927/100000 [01:29<02:15, 412.38it/s]

0.9799833333333333 0.9684


 44%|████▍     | 44500/100000 [01:30<02:19, 397.08it/s]

0.9800833333333333 0.9685


 45%|████▌     | 45080/100000 [01:31<02:16, 402.35it/s]

0.98075 0.9686


 46%|████▌     | 45726/100000 [01:33<02:13, 406.78it/s]

0.9805 0.9685


 46%|████▋     | 46305/100000 [01:34<02:09, 414.35it/s]

0.9807 0.9689


 47%|████▋     | 46882/100000 [01:35<02:16, 388.95it/s]

0.9815833333333334 0.9692


 48%|████▊     | 47529/100000 [01:36<02:09, 404.49it/s]

0.98175 0.9692


 48%|████▊     | 48110/100000 [01:37<02:06, 410.50it/s]

0.9817 0.9698


 49%|████▊     | 48682/100000 [01:39<02:09, 395.96it/s]

0.9817666666666667 0.9687


 49%|████▉     | 49269/100000 [01:40<02:05, 404.89it/s]

0.9815666666666667 0.9703


 50%|████▉     | 49914/100000 [01:41<02:03, 407.13it/s]

0.98265 0.9698


 50%|█████     | 50497/100000 [01:42<02:17, 358.83it/s]

0.9827833333333333 0.9698


 51%|█████     | 51095/100000 [01:44<02:10, 375.32it/s]

0.9825166666666667 0.97


 52%|█████▏    | 51705/100000 [01:45<02:02, 395.57it/s]

0.98285 0.9697


 52%|█████▏    | 52279/100000 [01:46<01:58, 402.33it/s]

0.9833 0.97


 53%|█████▎    | 52920/100000 [01:47<01:48, 433.36it/s]

0.9833166666666666 0.9699


 53%|█████▎    | 53485/100000 [01:49<01:57, 395.53it/s]

0.9832333333333333 0.9702


 54%|█████▍    | 54120/100000 [01:50<01:55, 396.95it/s]

0.9836833333333334 0.9694


 55%|█████▍    | 54693/100000 [01:51<01:56, 388.76it/s]

0.9839333333333333 0.9701


 55%|█████▌    | 55272/100000 [01:52<01:51, 401.22it/s]

0.9842 0.9701


 56%|█████▌    | 55931/100000 [01:54<01:45, 417.69it/s]

0.9841333333333333 0.9698


 57%|█████▋    | 56505/100000 [01:55<01:49, 397.14it/s]

0.9844333333333334 0.9698


 57%|█████▋    | 57067/100000 [01:56<01:57, 364.91it/s]

0.9846333333333334 0.9709


 58%|█████▊    | 57702/100000 [01:57<01:45, 399.31it/s]

0.9847833333333333 0.9707


 58%|█████▊    | 58281/100000 [01:58<01:46, 390.13it/s]

0.9845833333333334 0.9709


 59%|█████▉    | 58909/100000 [02:00<01:42, 399.68it/s]

0.9843666666666666 0.9707


 59%|█████▉    | 59497/100000 [02:01<01:48, 372.53it/s]

0.98505 0.9707


 60%|██████    | 60120/100000 [02:02<01:37, 408.99it/s]

0.9852166666666666 0.9705


 61%|██████    | 60687/100000 [02:03<01:45, 372.27it/s]

0.9854666666666667 0.9705


 61%|██████▏   | 61287/100000 [02:05<01:41, 382.55it/s]

0.9858 0.9702


 62%|██████▏   | 61915/100000 [02:06<01:36, 395.50it/s]

0.9856 0.9708


 62%|██████▏   | 62474/100000 [02:07<01:46, 353.82it/s]

0.9855333333333334 0.9708


 63%|██████▎   | 63089/100000 [02:09<01:47, 343.49it/s]

0.98595 0.9709


 64%|██████▎   | 63726/100000 [02:10<01:21, 443.68it/s]

0.986 0.9702


 64%|██████▍   | 64301/100000 [02:11<01:30, 393.48it/s]

0.9864 0.9709


 65%|██████▍   | 64862/100000 [02:12<01:30, 390.29it/s]

0.98635 0.971


 65%|██████▌   | 65495/100000 [02:14<01:28, 390.06it/s]

0.9864 0.9713


 66%|██████▌   | 66124/100000 [02:15<01:26, 390.79it/s]

0.98645 0.9709


 67%|██████▋   | 66698/100000 [02:16<01:24, 395.90it/s]

0.9871833333333333 0.971


 67%|██████▋   | 67323/100000 [02:17<01:23, 392.39it/s]

0.9869666666666667 0.9708


 68%|██████▊   | 67885/100000 [02:19<01:24, 378.90it/s]

0.9870833333333333 0.971


 68%|██████▊   | 68463/100000 [02:20<01:37, 323.65it/s]

0.98715 0.9716


 69%|██████▉   | 69093/100000 [02:21<01:18, 393.47it/s]

0.9873833333333333 0.9712


 70%|██████▉   | 69722/100000 [02:22<01:16, 395.33it/s]

0.9875 0.9717


 70%|███████   | 70296/100000 [02:24<01:17, 384.28it/s]

0.9872333333333333 0.9717


 71%|███████   | 70913/100000 [02:25<01:15, 385.11it/s]

0.9878166666666667 0.9711


 72%|███████▏  | 71526/100000 [02:26<01:07, 424.50it/s]

0.9875666666666667 0.9709


 72%|███████▏  | 72095/100000 [02:27<01:10, 393.52it/s]

0.98825 0.9718


 73%|███████▎  | 72667/100000 [02:29<01:08, 399.71it/s]

0.9883666666666666 0.9718


 73%|███████▎  | 73305/100000 [02:30<01:07, 397.44it/s]

0.9883833333333333 0.9716


 74%|███████▍  | 73878/100000 [02:31<01:06, 393.09it/s]

0.98795 0.9717


 75%|███████▍  | 74502/100000 [02:32<01:06, 385.12it/s]

0.9885833333333334 0.9713


 75%|███████▌  | 75125/100000 [02:34<01:03, 389.84it/s]

0.9883666666666666 0.9721


 76%|███████▌  | 75697/100000 [02:35<01:02, 386.10it/s]

0.9886 0.9716


 76%|███████▋  | 76332/100000 [02:36<00:53, 445.51it/s]

0.9888833333333333 0.972


 77%|███████▋  | 76906/100000 [02:37<00:59, 391.12it/s]

0.98925 0.9715


 77%|███████▋  | 77465/100000 [02:39<00:59, 377.72it/s]

0.9891666666666666 0.9726


 78%|███████▊  | 78101/100000 [02:40<00:56, 390.13it/s]

0.98925 0.9717


 79%|███████▊  | 78730/100000 [02:41<00:47, 446.51it/s]

0.98945 0.972


 79%|███████▉  | 79290/100000 [02:42<00:54, 377.73it/s]

0.9895333333333334 0.9721


 80%|███████▉  | 79909/100000 [02:44<00:54, 371.97it/s]

0.9893666666666666 0.9721


 80%|████████  | 80467/100000 [02:45<00:49, 391.19it/s]

0.9894333333333334 0.9715


 81%|████████  | 81083/100000 [02:46<00:49, 384.85it/s]

0.9897333333333334 0.9723


 82%|████████▏ | 81723/100000 [02:47<00:46, 394.66it/s]

0.9898166666666667 0.9719


 82%|████████▏ | 82283/100000 [02:49<00:45, 389.10it/s]

0.99015 0.9721


 83%|████████▎ | 82913/100000 [02:50<00:44, 381.71it/s]

0.9901666666666666 0.9719


 83%|████████▎ | 83485/100000 [02:51<00:41, 397.08it/s]

0.9902333333333333 0.9718


 84%|████████▍ | 84119/100000 [02:52<00:40, 388.02it/s]

0.9903833333333333 0.9719


 85%|████████▍ | 84690/100000 [02:54<00:39, 391.30it/s]

0.9904666666666667 0.972


 85%|████████▌ | 85274/100000 [02:55<00:37, 397.16it/s]

0.9906833333333334 0.9718


 86%|████████▌ | 85893/100000 [02:56<00:38, 369.30it/s]

0.9909333333333333 0.972


 86%|████████▋ | 86491/100000 [02:57<00:36, 368.03it/s]

0.9906833333333334 0.9724


 87%|████████▋ | 87104/100000 [02:59<00:33, 379.37it/s]

0.9909833333333333 0.9723


 88%|████████▊ | 87672/100000 [03:00<00:31, 386.31it/s]

0.9909333333333333 0.9721


 88%|████████▊ | 88297/100000 [03:01<00:30, 387.07it/s]

0.9909833333333333 0.9723


 89%|████████▉ | 88872/100000 [03:02<00:29, 382.73it/s]

0.9909666666666667 0.9718


 90%|████████▉ | 89509/100000 [03:04<00:26, 394.65it/s]

0.9911666666666666 0.9718


 90%|█████████ | 90122/100000 [03:05<00:25, 382.93it/s]

0.99115 0.9724


 91%|█████████ | 90669/100000 [03:06<00:25, 370.67it/s]

0.9915166666666667 0.9718


 91%|█████████▏| 91302/100000 [03:07<00:22, 388.72it/s]

0.9916333333333334 0.9721


 92%|█████████▏| 91918/100000 [03:09<00:21, 381.62it/s]

0.9915666666666667 0.9713


 92%|█████████▏| 92472/100000 [03:10<00:19, 378.61it/s]

0.9919666666666667 0.9715


 93%|█████████▎| 93117/100000 [03:11<00:17, 401.63it/s]

0.992 0.9717


 94%|█████████▎| 93702/100000 [03:12<00:15, 403.15it/s]

0.9921666666666666 0.972


 94%|█████████▍| 94279/100000 [03:14<00:15, 358.42it/s]

0.99215 0.9719


 95%|█████████▍| 94901/100000 [03:15<00:13, 390.68it/s]

0.9922833333333333 0.9724


 96%|█████████▌| 95527/100000 [03:16<00:11, 390.16it/s]

0.9924 0.9721


 96%|█████████▌| 96078/100000 [03:18<00:10, 373.94it/s]

0.9925166666666667 0.9723


 97%|█████████▋| 96707/100000 [03:19<00:08, 382.25it/s]

0.9923333333333333 0.9718


 97%|█████████▋| 97275/100000 [03:20<00:07, 387.78it/s]

0.9923333333333333 0.9727


 98%|█████████▊| 97891/100000 [03:21<00:05, 386.48it/s]

0.99275 0.9723


 99%|█████████▊| 98511/100000 [03:23<00:03, 385.30it/s]

0.9926166666666667 0.9721


 99%|█████████▉| 99069/100000 [03:24<00:02, 384.74it/s]

0.9928166666666667 0.9723


100%|█████████▉| 99699/100000 [03:25<00:00, 389.59it/s]

0.9926666666666667 0.9724


100%|██████████| 100000/100000 [03:26<00:00, 485.14it/s]


0.9928166666666667 0.9723


In [65]:
class Dropout:
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None
    
    def forward(self, x, train_flg=True):
        if train_flg:
            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
            return x * self.mask
        else:
            return x * (1.0 - self.dropout_ratio)
    
    def backward(self, dout):
        return dout * self.mask

In [66]:
x = np.array([1, 2, 100])

In [67]:
x

array([  1,   2, 100])

In [68]:
np.random.rand(*x.shape)

array([0.37932987, 0.58750367, 0.15248221])

In [69]:
x.shape

(3,)

In [70]:
# 関数に渡すときに配列から外す（アンパック）？
print(*x.shape)

3


In [71]:
# 6章のCNN
from collections import OrderedDict

class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        # print(input_size, input_dim)
        conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
        # print(conv_output_size)
        pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
        # print(pool_output_size)
        
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)
        
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        
        self.last_layer = SoftmaxWithLoss()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def gradient(self, x, t):
        self.loss(x, t)
        
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values)
        layers.revers()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine2'].db
        
        return grads

In [72]:
net = SimpleConvNet()

In [73]:
print(net.gradient)

<bound method SimpleConvNet.gradient of <__main__.SimpleConvNet object at 0x7fdcf86c45b0>>
