# Chapter 4: ニューラルネットワークの学習
## 2乗和誤差と交差エントロピー誤差

In [1]:
import numpy as np
from misc.functions import mean_squared_error, cross_entropy_error

t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
y1 = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])
y2 = np.array([0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0])

print('====== Case. 1 ======')
print('MSE: %.8f' % mean_squared_error(y1, t))
print('CEE: %.8f' % cross_entropy_error(y1, t))
print()
print('====== Case. 2 ======')
print('MSE: %.8f' % mean_squared_error(y2, t))
print('CEE: %.8f' % cross_entropy_error(y2, t))
print()

MSE: 0.09750000
CEE: 0.51082546

MSE: 0.59750000
CEE: 2.30258409



## 損失関数計算のバッチ処理

In [2]:
import numpy as np
from misc.functions import cross_entropy_error

t = np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
y = np.array([[0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0],
              [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]])
print('====== Batch Processing ======')
print('MSE: %.8f' % mean_squared_error(y, t))
print('CEE: %.8f' % cross_entropy_error(y, t))
print('')

MSE: 0.34750000
CEE: 1.40670478



## ニューラルネットワークと勾配

In [3]:
import numpy as np
from misc.neuralnet import SimpleNet

net = SimpleNet()
net.W = np.array([[0.47355232, 0.9977393, 0.84668094],
                 [0.85557411, 0.03563661, 0.69422093]])
x = np.array([0.6, 0.9])
t = np.array([0, 0, 1])

net.numerical_gradient(x, t)

array([[ 0.21924757,  0.14356243, -0.36281   ],
       [ 0.32887136,  0.21534364, -0.544215  ]])

## MNIST データセットの学習

In [4]:
import numpy as np
from misc.neuralnet import TwoLayerNet
from misc.mnist import load_train_data, load_test_data

n_iter = 1
batch_size = 10
alpha = 0.1

X_train, y_train = load_train_data(True)
X_test, y_test = load_test_data(True)

X_train = X_train.astype(np.float_) / 255
X_test = X_test.astype(np.float_) / 255

network = TwoLayerNet(784, 50, 10)
for k in range(n_iter):
    batch_mask = np.random.choice(X_train.shape[0], batch_size)
    X_batch = X_train[batch_mask]
    y_batch = y_train[batch_mask]
    
    grad = network.numerical_gradient(X_batch, y_batch)
    for key in grad.keys():
        network.params[key] -= alpha * grad[key]
        
network.score(X_test, y_test)

0.1028