In [None]:
# 4.2.1 평균 제곱 오차
import numpy as np

def mean_squared_error(y, t):
  return 0.5 * np.sum((y - t) ** 2)

In [None]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print(mean_squared_error(np.array(y), np.array(t)))
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print(mean_squared_error(np.array(y), np.array(t)))

In [None]:
# 4.2.2 교차 엔트로피 오차
def cross_entropy_error(y, t):
  delta = 1e-7 # -INF 방지
  return -np.sum(t * np.log(y + delta))

In [None]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print(cross_entropy_error(np.array(y), np.array(t)))
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print(cross_entropy_error(np.array(y), np.array(t)))

In [None]:
# 4.2.3 미니배치 학습
import sys, os
import numpy as np
from dataset.mnist import load_mnist
sys.path.append(os.pardir)

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)

In [None]:
# 4.2.4 (배치용) 교차 엔트로피 오차 구현하기
# 정답 레이블이 원-핫 인코딩인 경우
def cross_entropy_error(y, t):
  if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)
  
  batch_size = y.shape[0]
  return -np.sum(t * np.log(y)) / batch_size

In [None]:
# 4.2.4 (배치용) 교차 엔트로피 오차 구현하기
# 정답 레이블이 숫자 레이블인 경우
def cross_entropy_error(y, t):
  if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)
  
  batch_size = y.shape[0]
  # 원-핫 인코딩 시 t가 0인 원소는 교차 엔트로피 오차도 0이기 때문에, 다음과 같이 구현
  # y[np.arange(batch_size), t] -> 각 데이터의 정답 레이블에 해당하는 신경망의 출력을 추출
  return -np.sum(y[np.arange(batch_size), t]) / batch_size

In [None]:
# 4.3.1 미분
# 나쁜 예시
'''
def numerical_diff(f, x):
  h = 10e-50 # round off error
  return (f(x + h) - f(x)) / h
'''

# 좋은 예시
def numerical_diff(f, x):
  h = 1e-4
  return (f(x + h) - f(x)) / h

In [None]:
# 4.3.2 수치 미분의 예
import numpy as np
import matplotlib.pylab as plt

def function_1(x):
  return 0.01 * (x ** 2) + 0.1 * x

x = np.arange(0.0, 20.0, 0.1)
y = function_1(x)
plt.xlabel("x")
plt.ylabel("y=f(x)")
plt.plot(x, y)
plt.show()

print(numerical_diff(function_1, 5))
print(numerical_diff(function_1, 10))

In [None]:
# 4.3.3 편미분
def function_tmp1(x0):
  return x0**2 + 4**2

def function_tmp2(x1):
  return 3**2 + x1**2

print(numerical_diff(function_tmp1, 3.0))
print(numerical_diff(function_tmp2, 4.0))

In [None]:
# 4.4 기울기
def numerical_gradient(f, x):
  h = 1e-4
  grad = np.zeros_like(x) # x와 형상이 같은 배열 생성

  for idx in range(x.size):
    # f(x+h) 계산
    tmp_val = x[idx]
    x[idx] = tmp_val + h
    fxh1 = f(x)

    # f(x-h) 계산
    x[idx] = tmp_val - h
    fxh2 = f(x)

    grad[idx] = (fxh1 - fxh2) / (2*h)
    x[idx] = tmp_val
  
  return grad

In [None]:
# 4.4.1 경사법
# lr: 학습률
def gradient_descent(f, init_x, lr=0.1, step_num=100):
  x = init_x
  for i in range(step_num):
    grad = numerical_gradient(f, x)
    x -= (lr * grad)
  return x

def function_2(x):
  return x[0]**2 + x[1]**2

init_x = np.array([-3.0, 4.0])
print(gradient_descent(function_2, init_x))

In [None]:
# 4.4.2 신경망에서의 기울기
import sys, os
import numpy as np
from common.functions import softmax, cross_entropy_error
from common.gradient import numerical_gradient

sys.path.append(os.pardir)

class simpleNet:
  def __init__(self):
      self.W = np.random.randn(2, 3)

  def predict(self, x):
    return np.dot(x, self.W)
  
  def loss(self, x, t):
    z = self.predict(x)
    y = softmax(z)
    loss = cross_entropy_error(y, t)

    return loss

net = simpleNet()

x = np.array([0.6, 0.9])
p = net.predict(x)
t = np.array([0, 0, 1])

print(net.W)
print(p)
print(net.loss(x, t))

def f(W):
  return net.loss(x, t)

dW = numerical_gradient(f, net.W)
print(dW)


In [None]:
# 4.5.1 2층 신경망 클래스 구현하기
import sys, os
from common.functions import *
from common.gradient import numerical_gradient

class TwoLayerNet:
  def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    # 가중치 초기화
    self.params = {}
    self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeros(output_size)

  def predict(self, x):
    W1, W2 = self.params['W1'], self.params['W2']
    b1, b2 = self.params['b1'], self.params['b2']

    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    y = softmax(a2)

    return y

  def loss(self, x, t):
    y = self.predict(x)
    return cross_entropy_error(y, t)

  def accuracy(self, x, t):
    y = self.predict(x)
    y = np.argmax(y, axis=1)
    t = np.argmax(t, axis=1)

    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy

  def numerical_gradient(self, x, t):
    loss_W = lambda W: self.loss(x, t)
    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

    return grads


In [None]:
# 4.5.2 미니배치 학습 구현하기
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

# hyper parameter
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
  # 미니배치 획득
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = x_train[batch_mask]
  t_batch = t_train[batch_mask]

  # 기울기 계산
  grad = network.numerical_gradient(x_batch, t_batch)

  # 매개변수 갱신
  for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

  # 학습 경과 기록
  loss = network.loss(x_batch, t_batch)
  train_loss_list.append(loss)

In [None]:
# 4.5.3 시험 데이터로 평가하기
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []
train_acc_list = []
test_acc_list = []

# hyper parameter
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

# 1 epoch당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
  # 미니배치 획득
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = x_train[batch_mask]
  t_batch = t_train[batch_mask]

  # 기울기 계산
  grad = network.numerical_gradient(x_batch, t_batch)

  # 매개변수 갱신
  for key in ('W1', 'b1', 'W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]

  # 학습 경과 기록
  loss = network.loss(x_batch, t_batch)
  train_loss_list.append(loss)

  if i % iter_per_epoch == 0:
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print(f"[{i} epoch] train acc: {train_acc}, test acc: {test_acc}")