In [1]:
import numpy as np

In [2]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

In [3]:
class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out

        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx

In [5]:
if __name__ == '__main__':
    x = np.array([[1.0, -0.5], [-2.0, 3.0]])
    print(x)

    mask = (x <= 0)
    print(mask)
    
    X = np.random.rand(2)     # 입력
    W = np.random.rand(2, 3)  # 가중치
    B = np.random.rand(3)     # 편향

    print(X.shape)  # (2,)
    print(W.shape)  # (2, 3)
    print(B.shape)  # (3,)

    Y = np.dot(X, W) + B

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]
(2,)
(2, 3)
(3,)


In [7]:
if __name__ == '__main__':
    X_dot_W = np.array([[0, 0, 0], [10, 10, 10]])
    B = np.array([1, 2, 3])
    print(X_dot_W)
    
    print(X_dot_W + B)
    
    dY = np.array([[1, 2, 3], [4, 5, 6]])
    print(dY)
    
    dB = np.sum(dY, axis=0)
    print(dB)

[[ 0  0  0]
 [10 10 10]]
[[ 1  2  3]
 [11 12 13]]
[[1 2 3]
 [4 5 6]]
[5 7 9]


In [8]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        return dx

In [9]:
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)  # 오버플로 대책
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

In [10]:
def cross_entropy_error(y, t):
    delta = 1e-7  # 0일때 -무한대가 되지 않기 위해 작은 값을 더함
    return -np.sum(t * np.log(y + delta))

In [11]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None  # 손실
        self.y = None     # softmax의 출력
        self.t = None     # 정답 레이블(원-핫 벡터)

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)  # 3.5.2, 4.2.2에서 구현
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = self.y - self.t / batch_size

        return dx

In [12]:
if __name__ == '__main__':
    swl = SoftmaxWithLoss()
    a = np.array([1, 8, 3])
    t = np.array([0, 1, 0])
    print(swl.forward(a, t)) 
    print(swl.backward())

    a = np.array([1, 3, 8]) 
    print(swl.forward(a, t))
    print(swl.backward())

0.007620616629495912
[0.00090496 0.65907491 0.00668679]
5.0076057626568575
[ 9.04959183e-04 -3.26646539e-01  9.92408247e-01]
