In [47]:
import numpy as np
import pprint
import keras
from keras.api.datasets import mnist

In [48]:
# ニューラルネットワークの構造
input_size = 784    # 入力層のニューロン数
hidden_size_1 = 256   # 隠れ層のニューロン数
hidden_size_2 = 128   # 隠れ層のニューロン数
hidden_size_3 = 32   # 隠れ層のニューロン数
output_size = 10   # 出力層のニューロン数

# 学習率
learning_rate = 0.01

In [49]:
# 重みの初期化
np.random.seed(0) # 再現性のためのシード値設定
W1 = np.random.randn(input_size, hidden_size_1)  # 入力層から隠れ層1への重み
b1 = np.zeros((1, hidden_size_1))                # 隠れ層1のバイアス
W2 = np.random.randn(hidden_size_1, hidden_size_2) # 隠れ層1から隠れ層2への重み
b2 = np.zeros((1, hidden_size_2))                # 隠れ層2のバイアス
W3 = np.random.randn(hidden_size_2, hidden_size_3) # 隠れ層2から出力層への重み
b3 = np.zeros((1, hidden_size_3))                # 出力層のバイアス
W4 = np.random.randn(hidden_size_3, output_size) # 隠れ層2から出力層への重み
b4 = np.zeros((1, output_size))                # 出力層のバイアス

pprint.pprint(W1)
pprint.pprint(b1)
pprint.pprint(W2)
pprint.pprint(b2)
pprint.pprint(W3)
pprint.pprint(b3)
pprint.pprint(W4)
pprint.pprint(b4)

array([[ 1.76405235,  0.40015721,  0.97873798, ..., -0.31932842,
         0.69153875,  0.69474914],
       [-0.72559738, -1.38336396, -1.5829384 , ...,  1.30142807,
         0.89526027,  1.37496407],
       [-1.33221165, -1.96862469, -0.66005632, ..., -1.10290621,
        -0.10169727,  0.01927938],
       ...,
       [ 1.86140699, -0.78053759,  0.03168013, ..., -1.06306082,
         0.43608227, -1.17683911],
       [ 1.39020734, -0.51657398, -0.13652254, ...,  0.87964912,
         0.80342249, -0.29504386],
       [ 1.57119338, -0.15435302,  0.08733296, ..., -0.26611561,
         2.52287972,  0.73131543]])
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.,

In [50]:
# シグモイド関数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# シグモイド関数の導関数
def sigmoid_derivative(x):
    return x * (1 - x)

# relu関数
def relu(x):
    return x * (0 < x)

# relu関数の導関数
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [51]:
def forward_propagation(X):
    # 隠れ層1への入力
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)

    # 隠れ層2への入力
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    # 隠れ層3への入力
    z3 = np.dot(a2, W3) + b3
    a3 = sigmoid(z3)

    # 出力層への入力
    z4 = np.dot(a3, W4) + b4
    a4 = sigmoid(z4)
    
    return a1, a2, a3, a4

In [52]:
def backward_propagation(X, y, a1, a2, a3, a4):
    global W1, b1, W2, b2, W3, b3, W4, b4

    # 出力層の誤差
    output_error = y - a4
    output_delta = output_error * sigmoid_derivative(a4)

    # 隠れ層の誤差
    hidden_3_error = np.dot(output_delta, W4.T)
    hidden_3_delta = hidden_3_error * sigmoid_derivative(a3)

    # 隠れ層の誤差
    hidden_2_error = np.dot(hidden_3_delta, W3.T)
    hidden_2_delta = hidden_2_error * sigmoid_derivative(a2)

    # 隠れ層の誤差
    hidden_1_error = np.dot(hidden_2_delta, W2.T)
    hidden_1_delta = hidden_1_error * sigmoid_derivative(a1)

    # 重みとバイアスの更新
    W4 += learning_rate * np.dot(a3.T, output_delta)
    b4 += learning_rate * np.sum(output_delta, axis=0, keepdims=True)
    W3 += learning_rate * np.dot(a2.T, hidden_3_delta)
    b3 += learning_rate * np.sum(hidden_3_delta, axis=0, keepdims=True)
    W2 += learning_rate * np.dot(a1.T, hidden_2_delta)
    b2 += learning_rate * np.sum(hidden_2_delta, axis=0, keepdims=True)
    W1 += learning_rate * np.dot(X.T, hidden_1_delta)
    b1 += learning_rate * np.sum(hidden_1_delta, axis=0, keepdims=True)

In [53]:
def train(X, y, iterations):
    for i in range(iterations):
        # フォワードプロパゲーション
        a1, a2, a3, a4 = forward_propagation(X)
        
        # バックプロパゲーション
        backward_propagation(X, y, a1, a2, a3, a4)
        
        if iterations <= 300:
            loss = np.mean(np.square(y - a4))
            print(f'Iteration {i+1}, Loss: {loss}')
            continue
            
        if (i+1) % 1000 == 0:
            loss = np.mean(np.square(y - a4))
            print(f'Iteration {i+1}, Loss: {loss}')

In [54]:
# トレーニングデータの例
X = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

y = np.array([[0, 1],
              [1, 0],
              [1, 0],
              [0, 1]])

# データ抽出
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# one-hot-label に変換
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 学習を実行
train(x_train, y_train, 300)

60000 train samples
10000 test samples
Iteration 1, Loss: 0.5150257601150099
Iteration 2, Loss: 0.09977476952473072
Iteration 3, Loss: 0.17985125346467112
Iteration 4, Loss: 0.09999999948454616
Iteration 5, Loss: 0.0999999994845334
Iteration 6, Loss: 0.09999999948452072
Iteration 7, Loss: 0.09999999948450804
Iteration 8, Loss: 0.09999999948449534
Iteration 9, Loss: 0.09999999948448257
Iteration 10, Loss: 0.09999999948446991
Iteration 11, Loss: 0.09999999948445717
Iteration 12, Loss: 0.09999999948444445
Iteration 13, Loss: 0.0999999994844318
Iteration 14, Loss: 0.09999999948441912
Iteration 15, Loss: 0.09999999948440638
Iteration 16, Loss: 0.09999999948439363
Iteration 17, Loss: 0.09999999948438096
Iteration 18, Loss: 0.09999999948436825
Iteration 19, Loss: 0.09999999948435552
Iteration 20, Loss: 0.09999999948434285
Iteration 21, Loss: 0.09999999948433017
Iteration 22, Loss: 0.09999999948431741
Iteration 23, Loss: 0.0999999994843047
Iteration 24, Loss: 0.09999999948429202
Iteration 25, 

In [64]:
def predict(X):
    # 隠れ層1への入力
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)

    # 隠れ層2への入力
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    # 隠れ層3への入力
    z3 = np.dot(a2, W3) + b3
    a3 = sigmoid(z3)

    # 出力層への入力
    z4 = np.dot(a3, W4) + b4
    a4 = sigmoid(z4)
    
    return a4
    
    
y_pre = predict(x_train)

for i in range(5):
    print(np.argmax(y_pre[i]), np.argmax(y_train[i]))

0 0
