In [1]:
#http://tadaoyamaoka.hatenablog.com/entry/2016/04/10/120305
#Pythonで3層パーセプトロンの誤差逆伝播を実装してみる


In [18]:
import numpy as np

def sigmoid(u):
    return 1 / (1 + np.exp(-u))

def softmax(u):
    e = np.exp(u)
    return e / np.sum(e)

def forward(x):
    global W1
    global W2
    u1 = x.dot(W1)
    z1 = sigmoid(u1)
    u2 = z1.dot(W2)
    y = softmax(u2)
    return y, z1

In [19]:
x1 = np.array([4,3,3,4])
x2 = np.array([2,2,2,1])
x = np.array([[x1, x2]])


$$(x1, x2) \cdot \begin{pmatrix}
 w_{11}^{1}&w_{21}^{1} \\ 
 w_{12}^{1}&w_{22}^{1} 
\end{pmatrix} 
= 
\bigl(\begin{smallmatrix}
x1 w_{11}^{1}+x2 w_{12}^{1}  &  x1 w_{21}^{1}+x2 w_{22}^{1}
\end{smallmatrix}\bigr)
$$

In [20]:
W1 = np.array([[0.1, 0.3], [0.2, 0.4]])
W2 = np.array([[0.1, 0.3], [0.2, 0.4]])
learning_rate = 0.005

In [21]:

x = np.array([[1, 0.5]])
y, z1 = forward(x)

print("z1:", z1)
print("y:", y)

z1: [[0.549834   0.62245933]]
y: [[0.44165237 0.55834763]]


In [33]:
def back_propagation(x, z1, y, d):
    global W1
    global W2
    delta2 = y - d
    grad_W2 = z1.T.dot(delta2)

    sigmoid_dash = z1 * (1 - z1)
    delta1 = delta2.dot(W2.T) * sigmoid_dash
    grad_W1 = x.T.dot(delta1)

    W2 -= learning_rate * grad_W2
    W1 -= learning_rate * grad_W1

In [34]:
# 誤差逆伝播
d = np.array([[1, 0]]) # 教師データ
back_propagation(x, z1, y, d)
back_propagation(x, z1, y, d)
back_propagation(x, z1, y, d)


In [35]:
print("z1:", z1)
print("y:", y)
print("W1", W1)
print("W2", W2)

z1: [[0.549834   0.62245933]]
y: [[0.44165237 0.55834763]]
W1 [[0.09883257 0.29890116]
 [0.19941628 0.39945058]]
W2 [[0.11381493 0.28618507]
 [0.21563969 0.38436031]]


In [36]:
def cross_entropy_error(y, t, eps = 1e-15):
    y_clipped = np.clip(y, eps , 1 - eps)
    return -1 * (np.sum(t * np.log(y_clipped) + (1 - t) * np.log(1 - y_clipped)))

In [37]:
cross_entropy_error(y, d)

1.6344643811364101