In [1]:
import numpy as np

## 오차제곱합(Sum of Squared Error, SSE)

In [2]:
def sum_squre_error(y, t):
    return 0.5*np.sum((y-t)**2)

In [3]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]

sum_squre_error(np.array(y), np.array(t))

0.09750000000000003

## 교차 엔트로피 오차(Cross Entropy Error, CEE)

In [4]:
def cross_entropy_error(y, t):
    delta = 1e-7 # -inf 발생 방지
    return -np.sum(np.array(t)*np.log(np.array(y)+delta))

## 미니배치 학습

In [5]:
import os
import sys
sys.path.append(os.pardir)

import numpy as np

from dataset.mnist import load_mnist

(x_train, y_train), (x_test, y_test) = load_mnist(normalize=True, one_hot_label=True)

In [6]:
train_size = x_train.shape[0]

batch_size = 10
batch_mask = np.random.choice(train_size, batch_size) # 학습 데이터에서 batch_size개의 데이터 랜덤 추출

x_batch = x_train[batch_mask]
y_batch = y_train[batch_mask]

In [7]:
'''
배치 데이터용 CEE + y가 원핫인코딩 형태로 들어왔을 때
'''
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    
    return -np.sum(t*np.log(y+1e-7))/batch_size

In [8]:
'''
y가 정수 레이블 그대로 들어왔을 때
'''
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    
    print(y[np.arange(batch_size), t])
    
    return -np.sum(np.log(y[np.arange(batch_size), t]+1e-7))/batch_size

In [9]:
pred = [1, 3, 9, 5, 2]
real = [2, 1, 0, 3, 4]

cross_entropy_error(np.array(pred), np.array(real))

[[9 3 1 5 2]]


-5.598422173442812

## 기울기

In [10]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x) # x와 형상이 같으면서 모든 원소 값이 0인 배열 생성
    
    for idx in range(x.size):
        tmp_val = x[idx]
        
        # f(x+h) 계산
        x[idx] = tmp_val+h
        fxh1 = f(x)
        
        # f(x-h) 계산
        x[idx] = tmp_val-h
        fxh2 = f(x)
        
        grad[idx] = (fxh1-fxh2)/(2*h)
        
        x[idx] = tmp_val
        
    return grad

## 경사하강법

In [11]:
def gradient_descent(f, init_x, lr=0.005, step_num=100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr*grad
    
    return x

In [12]:
from common.functions import *
from common.gradient import numerical_gradient

class SimpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3)
        
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        
        loss = cross_entropy_error(y, t)
        
        return loss

In [13]:
net = SimpleNet()

x = np.array([0.6, 0.9])
t = np.array([0, 0, 1])

p = net.predict(x)

p

array([-1.17912328,  1.3821093 , -2.60556921])

In [14]:
def f(W):
    return net.loss(x, t)

In [16]:
dw = numerical_gradient(f, net.W)
dw

array([[ 0.04227729,  0.54756581, -0.5898431 ],
       [ 0.06341594,  0.82134871, -0.88476465]])