<a href="https://colab.research.google.com/github/ownit4137/TIL/blob/main/DL%20from%20Scratch/1/SGD_2layer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 확률적 경사 하강법

## 개념

### 교차 엔트로피 오차, CEE

$E = - \sum_i {t}_i \log({y}_i) $

- 정답일 때의 출력이 클수록 오차가 작아짐, 작을수록 오차가 커짐

### 배치 학습

- 계산 라이브러리 대부분이 큰 배열을 효율적으로 처리할 수 있게 최적화됨
- I/O를 통해 데이터를 읽는 횟수를 줄여 순수 계산 수행 비율을 높임

### 미분

- 해석적 미분 : 수식을 전개해 미분하는 것
- 수치 미분 : 함수 f의 어떤 x를 중심으로 그 전후의 차분을 계산



In [None]:
from google.colab import drive 
drive.mount('/content/gdrive/')

%cd /content/gdrive/MyDrive/'Colab Notebooks'/

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
/content/gdrive/MyDrive/Colab Notebooks


In [None]:
from dataset.mnist import load_mnist    # 책 코드
from PIL import Image
import numpy as np

In [None]:
def cross_entropy_error(y, t):
  # 1차원 배열일 때의 처리, (1, n)꼴로 변환
  if y.ndim == 1:
    t = t.reshape(1, t.size)
    y = y.reshape(1, y.size)
      
  # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
  if t.size == y.size:
    t = t.argmax(axis=1)
            
  batch_size = y.shape[0]
  return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


def softmax(x):
  if x.ndim == 2:
    x = x.T
    x = x - np.max(x, axis=0)
    y = np.exp(x) / np.sum(np.exp(x), axis=0)
    return y.T 

  x = x - np.max(x) # 오버플로 대책
  return np.exp(x) / np.sum(np.exp(x))


def numerical_gradient(f, x):
  h = 1e-4 # 0.0001
  grad = np.zeros_like(x)
  
  it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
  while not it.finished:
    idx = it.multi_index
    tmp_val = x[idx]
    x[idx] = float(tmp_val) + h
    fxh1 = f(x) # f(x+h)
    
    x[idx] = tmp_val - h 
    fxh2 = f(x) # f(x-h)
    grad[idx] = (fxh1 - fxh2) / (2*h)
    
    x[idx] = tmp_val # 값 복원
    it.iternext()   
      
  return grad

def sigmoid(x):
  return 1 / (1 + np.exp(-x)) 

In [None]:
class SGD_2layer:
  def __init__(self, input_size, hidden_size, output_size):
    self.init_div = 0.01
    self.params = {}
    self.params['w1'] = self.init_div * np.random.randn(input_size, hidden_size)
    self.params['w2'] = self.init_div * np.random.randn(hidden_size, output_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['b2'] = np.zeros(output_size)

  def predict(self, x):
    w1, w2 = self.params['w1'], self.params['w2']
    b1, b2 = self.params['b1'], self.params['b2']

    a1 = np.dot(x, w1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, w2)
    y = softmax(a2)

    return y

  def loss(self, x, t):
    y = self.predict(x)
    return cross_entropy_error(y, t)

  def accuracy(self, x, t):
    y = self.predict(x)
    y = np.argmax(y, axis=1)  # 열 방향 최대
    t = np.argmax(t, axis=1)

    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy

  def getgrad(self, x, t):
    loss_w = lambda w : self.loss(x, t)

    grads = {}
    grads['w1'] = numerical_gradient(loss_w, self.params['w1'])
    grads['b1'] = numerical_gradient(loss_w, self.params['b1'])
    grads['w2'] = numerical_gradient(loss_w, self.params['w2'])
    grads['b2'] = numerical_gradient(loss_w, self.params['b2'])
    return grads
    

In [None]:
net = SGD_2layer(input_size=30, hidden_size=10, output_size=10)

x = np.random.rand(10, 30)
t = np.random.rand(10, 10)
y = net.predict(x)

grads = net.getgrad(x, t)
print(grads)

{'w1': array([[-2.57223094e-04,  3.83637921e-05, -7.07857697e-04,
         1.59621569e-04, -3.16458904e-04, -1.76416051e-04,
        -2.47931180e-04, -5.19376187e-05,  5.14959266e-04,
        -4.67216958e-04],
       [ 2.05686268e-04, -9.97132910e-05, -7.58018572e-04,
        -3.02355674e-04, -5.47517449e-04, -4.81973372e-05,
        -1.52020294e-04, -3.04305496e-04,  4.23343955e-04,
        -5.55582154e-04],
       [ 5.56775175e-04,  9.35930244e-05, -1.14542773e-03,
        -3.62398898e-04, -1.51325390e-04,  2.23575560e-04,
        -2.17637846e-04, -2.80742671e-04,  5.21856389e-04,
        -6.90759903e-04],
       [ 7.22439968e-04,  5.28042077e-05, -1.07811476e-03,
        -2.48601766e-04, -1.26333868e-04,  4.45368102e-04,
        -2.34406492e-04, -4.14342936e-04,  4.48814854e-04,
        -5.63624438e-04],
       [ 4.99484956e-04, -6.10783024e-05, -7.52476204e-04,
        -3.82191001e-04, -4.45353034e-04,  6.05056427e-05,
        -1.59703923e-04, -5.24428803e-04,  3.04706249e-04,
    