<a href="https://colab.research.google.com/github/tjdude/python/blob/main/Neural_Net_%26_Back_propagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Affine Layer

In [None]:
import numpy as np
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b

        self.x = None
        self.original_x_shape = None
        # 가중치와 편향 매개변수의 미분
        self.dW = None
        self.db = None

    def forward(self x):
        # for tensor
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        dx = dx.reshape(*self.original_x_shape) # 입력 데이터 모양 변경(for tensor)

        return dx

### Softmax function

In [None]:
import numpy as np
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np,sum(np.exp(x), axis=0)

        return y.T
    
    x = x - np.max(x) # 지수함수 특성상 값이 급격히 커지는 오버플로우 방지
    
    return np.exp(x) / np.sum(np.exp(x))


### relu function

In [None]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx
        

### cross-entropy & softmax with loss

In [None]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None # loss function
        self.y = None # softmax output
        self.t = None # label, one-hot encoding vector

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

    def backward(self, dout = 1):
        batch_size = self.t.shape[0]

        if self.t.size == self.y.size: # label이 one-hot encoding vector일때
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size

        return dx


### numerical gradient

In [None]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeors_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) # np.nditer : 편미분

    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)

        x[idx] = tmp_val - h
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)

        x[idx] = tmp_val # 값 복원
        it.iternext()

    return grad


In [None]:
from collections import OrderedDict class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 가중치 초기화         
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)         
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)          
        self.params['b2'] = np.zeros(output_size)

        # 계층 생성        
        self.layers = OrderedDict()         
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])          
        self.layers['Relu1'] = Relu()         
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()              

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)                  
        
        return x              # x : 입력 데이터, t : 정답 레이블     
        
    def loss(self, x, t):
        y = self.predict(x)         
        return self.lastLayer.forward(y, t)          
        
    def accuracy(self, x, t):
        y = self.predict(x)         
        y = np.argmax(y, axis=1)         
        
        if t.ndim != 1 : t = np.argmax(t, axis=1)                  
        
        accuracy = np.sum(y == t) / float(x.shape[0])         
        
        return accuracy              # x : 입력 데이터, t : 정답 레이블     
        
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)                  
        grads = {}         
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])         
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])         
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])         
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])                  
        
        return grads              
    def gradient(self, x, t):
        # forward         
        self.loss(x, t)

        # backward         
        dout = 1         
        dout = self.lastLayer.backward(dout)                  
        layers = list(self.layers.values())         
        layers.reverse()         

        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장         
        grads = {}         
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db         
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
        
        return grads

### MNIST Datasets

In [None]:
import tensorflow as tf
# 데이터 읽기 
(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data(path='minist.npz') 
print(x_train.shape, y_train.shape)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
x_batch = x_train[:10] y_batch = y_train[:10]
grad_numerical = network.numerical_gradient(x_batch, y_batch) grad_backprop = network.gradient(x_batch, y_batch)

# 각 가중치의 절대 오차의 평균을 구한다.
for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )     
    print(key + ":" + str(diff))

iters_num = 10000 # 학습 횟수 train_size = x_train.shape[0] batch_size = 100
learning_rate = 0.1
train_loss_list = [] 
train_acc_list = [] 
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size) # 60000개 데이터셋에서 무작위 인덱싱 획득    
    x_batch = x_train[batch_mask] # 데이터셋에서 무작위숫자로      
    t_batch = y_train[batch_mask]          
    
    # 기울기 계산    
     #grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분 방식     
    grad = network.gradient(x_batch, t_batch) # 오차역전파법 방식(훨씬 빠르다)          
    
    # 갱신     
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]          
        loss = network.loss(x_batch, t_batch)     
        train_loss_list.append(loss)          
        
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, y_train)         
            test_acc = network.accuracy(x_test, y_test)         
            train_acc_list.append(train_acc)         
            test_acc_list.append(test_acc)         
            print(train_acc, test_acc)

In [None]:
train_loss_list ,train_acc_list, test_acc_list 