## 47단계: 소프트맥스 함수와 교차 엔트로피 오차

> 다중 클래스 분류 도전

### 47.1 슬라이스 조작 함수

get_item 함수 구현 (부록 B 참고)

<img src="images/그림 47-1.png" width=400/>

In [1]:
# dezero/functions.py
import numpy as np
from dezero import Function

class GetItem(Function):
    def __init__(self, slices):
        self.slices = slices

    def forward(self, x):
        y = x[self.slices]
        return y

    def backward(self, gy):
        x, = self.inputs
        f = GetItemGrad(self.slices, x.shape)
        return f(gy)


class GetItemGrad(Function):
    def __init__(self, slices, in_shape):
        self.slices = slices
        self.in_shape = in_shape

    def forward(self, gy):
        gx = np.zeros(self.in_shape, dtype=gy.dtype)
        np.add.at(gx, self.slices, gy)
        return gx

    def backward(self, ggx):
        return get_item(ggx, self.slices)


def get_item(x, slices):
    f = GetItem(slices)
    return f(x)

In [2]:
# Sample Usage

import numpy as np
from dezero import Variable
import dezero.functions as F

x = Variable(np.array([[1, 2, 3], [4, 5, 6]]))
y = F.get_item(x, 1)
print(y)

Variable([4 5 6])


In [3]:
y.backward()
print(x.grad)

Variable([[0 0 0]
          [1 1 1]])


In [4]:
x = Variable(np.array([[1, 2, 3], [4, 5, 6]]))
indices = np.array([0, 0, 1])
y = F.get_item(x, indices)
print(y)

Variable([[1 2 3]
          [1 2 3]
          [4 5 6]])


In [5]:
Variable.__getitem__ = F.get_item  # Variable의 메서드로 설정

y = x[1]
print(y)

y = x[:, 2]
print(y)

Variable([4 5 6])
Variable([3 6])


### 47.2 소프트맥스 함수

<img src="images/식 47.1.png" width=400/>

In [6]:
from dezero.models import MLP

model = MLP((10, 3))
x = np.array([[0.2, -0.4]])
y = model(x)
print(y)

Variable([[ 0.21256552  0.75854128 -1.03716231]])


In [7]:
from dezero import Variable, as_variable
import dezero.functions as F

def softmax1d(x):
    x = as_variable(x)
    y = F.exp(x)
    sum_y = F.sum(y)
    return y / sum_y

x = Variable(np.array([[0.2, -0.4]]))
y = model(x)
p = softmax1d(y)
print(y)
print(p)

Variable([[ 0.21256552  0.75854128 -1.03716231]])
Variable([[0.33190891 0.57297171 0.09511938]])


<img src="images/그림 47-2.png" width=500/>

In [8]:
# dezero/functions.py

def softmax_simple(x, axis=1):
    x = as_variable(x)
    y = F.exp(x)
    sum_y = F.sum(y, axis=axis, keepdims=True)
    return y / sum_y

### 47.3 교차 엔트로피 오차

다중 클래스에 적합한 손실 함수 구현

<img src="images/식 47.2.png" width=400/>

$\bold{t} = (0,0,1)$이고 $\bold{p} = (p_0, p_1, p_2)$인 경우를 대입하면 다음과 같다.

<img src="images/식 47.3.png" width=400/>


In [9]:
# dezero/functions.py

def softmax_cross_entropy_simple(x, t):
    x, t = as_variable(x), as_variable(t)
    N = x.shape[0]
    
    p = softmax(x)
    p = F.clip(p, 1e-15, 1.0)  # log(0)을 방지하기 위해
    log_p = F.log(p)
    tlog_p = log_p[np.arange(N), t.data]
    y = -1 * F.sum(tlog_p) / N
    return y

In [10]:
x = np.array([[0.2, -0.4], [0.3, 0.5], [1.3, -3.2], [2.1, 0.3]])
t = np.array([2, 0, 1, 0])
y = model(x)

loss = F.softmax_cross_entropy(y, t)
print(loss)

Variable(1.2768203800191946)
