# 活性化関数レイヤの実装

## ReLUレイヤの実装

In [1]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

In [27]:
x = np.array([[1.0,-0.5], [-2.0,3.0]])
print(x)
print(x.shape)

[[ 1.  -0.5]
 [-2.   3. ]]
(2, 2)


Reluはmask変数をもつ。０以上であればTrue、以下であればFalseになる。    
あとはxの値がそのまま。

In [28]:
mask = (x <= 0)

In [29]:
print(mask)

[[False  True]
 [ True False]]


In [30]:
relu = Relu()
print(relu.forward(x))

[[1. 0.]
 [0. 3.]]


backwardした場合には0のところにゼロを入れて実質変わってない。

In [32]:
print(relu.backward(x))
print(relu.backward(x))

[[1. 0.]
 [0. 3.]]
[[1. 0.]
 [0. 3.]]


入力を変えてみる

In [33]:
relu = Relu()
x = np.array(range(10))
print(x)
print(x.shape)

[0 1 2 3 4 5 6 7 8 9]
(10,)


In [36]:
relu = Relu()
print(relu.forward(x))

[0 1 2 3 4 5 6 7 8 9]


In [37]:
print(relu.backward(x))
print(relu.backward(x))

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]


## Sigmoidレイヤ

In [15]:
class Sigmoid:
    def __init__(self):
        self.out = None

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def forward(self, x):
        out = self.sigmoid(x)
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx


In [16]:
x = np.array([[1.0,-0.5], [-2.0,3.0]])
print(x)
print(x.shape)

[[ 1.  -0.5]
 [-2.   3. ]]
(2, 2)


In [17]:
sigmoid = Sigmoid()
print(sigmoid.forward(x))

[[0.73105858 0.37754067]
 [0.11920292 0.95257413]]


In [18]:
print(sigmoid.backward(x))
print(sigmoid.backward(x))

[[ 0.19661193 -0.11750186]
 [-0.20998717  0.13552998]]
[[ 0.19661193 -0.11750186]
 [-0.20998717  0.13552998]]


## Affineレイヤ

In [19]:
X = np.random.rand(2)
W = np.random.rand(2, 3)
B = np.random.rand(3)

In [20]:
print(X.shape)
print(W.shape)
print(B.shape)

(2,)
(2, 3)
(3,)


In [21]:
Y = np.dot(X, W) + B

In [22]:
Y

array([0.98464073, 1.3105723 , 0.3625971 ])

## バッチ版Affineレイヤ

In [23]:
X_dot_W = np.array([[0,0,0], [10,10,10]])

In [24]:
B = np.array([1,2,3])

In [25]:
X_dot_W

array([[ 0,  0,  0],
       [10, 10, 10]])

In [26]:
X_dot_W + B

array([[ 1,  2,  3],
       [11, 12, 13]])

In [28]:
dY = np.array([[1,2,3], [4,5,6]])

In [29]:
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [30]:
dB = np.sum(dY, axis=0)
dB

array([5, 7, 9])

In [64]:
class Affine:
    def __init__(self, W, b):
        self.W =W
        self.b = b
        
        self.x = None
        self.original_x_shape = None
        # 重み・バイアスパラメータの微分
        self.dW = None
        self.db = None

    def forward(self, x):
        # テンソル対応
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        dx = dx.reshape(*self.original_x_shape)  # 入力データの形状に戻す（テンソル対応）
        return dx


In [65]:
x = np.array([[1.0,-0.5], [-2.0,3.0]])
x.shape

(2, 2)

In [66]:
W = np.random.rand(3, 2)
B = np.random.rand(2)

In [67]:
affine = Affine(X, B)
print(affine.forward(x))

[ 1.39177161 -0.83866631]


In [68]:
W = np.random.rand(4, 2)
B = np.random.rand(2)

In [69]:
affine = Affine(X, B)
print(affine.forward(x))

[ 1.05722903 -0.79437147]


## Softmax-with-Lossレイヤ

In [71]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None # softmaxの出力
        self.t = None # 教師データ

    def softmax(self, x):
        if x.ndim == 2:
            x = x.T
            x = x - np.max(x, axis=0)
            y = np.exp(x) / np.sum(np.exp(x), axis=0)
            return y.T 

        x = x - np.max(x) # オーバーフロー対策
        return np.exp(x) / np.sum(np.exp(x))

    def cross_entropy_error(self, y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)

        # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1)

        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

    def forward(self, x, t):
        self.t = t
        self.y = self.softmax(x)
        self.loss = self.cross_entropy_error(self.y, self.t)
        
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size
        
        return dx

In [72]:
x = np.array([[1.0,-0.5], [-2.0,3.0]])
x.shape

(2, 2)

In [73]:
y = np.array([[0,1]])
y.shape

(1, 2)

In [74]:
softmax_with_loss = SoftmaxWithLoss()
print(softmax_with_loss.forward(x, y))

0.10406420174253615


In [76]:
print(softmax_with_loss.backward())
print(softmax_with_loss.backward())

[[-0.18242552 -0.81757448]
 [ 0.00669285  0.99330715]]
[[-0.18242552 -0.81757448]
 [ 0.00669285  0.99330715]]


# 誤差逆伝播の実装

ch04-3.ipynbに書いた実装を参照