# 신경망 학습

## 단순한 신경망 구현 : Logic Gate

### 필요한 모듈 import

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
plt.style.use("seaborn")

### 하이퍼 파라미터(Hyper Parameter)

In [None]:
# 하이퍼 파라미터는 사용자가 직접 통제하는 상수로, 학습 전 지정된다.
epochs = 1000 
lr = 0.1

### 유틸 함수들(Util Functions)

In [None]:
# 대표적인 활성화 함수 리스트

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def mean_squared_error(pred_y, true_y):
    return 0.5 * (np.sum((true_y - pred_y)**2))

def cross_entropy_error(pred_y, true_y):
    if true_y.ndim == 1:
        true_y = true_y.reshape(1, -1)
        pred_y = pred_y.reshape(1, -1)

    delta = 1e-7
    return -np.sum(true_y * np.log(pred_y + delta))

def cross_entropy_error_for_batch(pred_y, true_y):
    if true_y.ndim == 1:
        true_y = true_y.reshape(1, -1)
        pred_y = pred_y.reshape(1, -1)

    delta = 1e-7
    batch_size = pred_y.shape[0]

    return -np.sum(true_y * np.log(pred_y + delta)) / batch_size

def cross_entropy_for_bin(pred_y, true_y):
    return 0.5 * np.sum((-true_y * np.log(pred_y) - (1 - true_y) * np.log(1-pred_y)))

def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

def differential(f, x):
    eps = 1e-5
    diff_value = np.zeros_like(x)

    for i in range(x.shape[0]):
        temp_val = x[i]

        x[i] = temp_val + eps
        f_h1 = f(x)

        x[i] = temp_val - eps
        f_h2 = f(x)

        diff_value[i] = (f_h1 - f_h2) / (2*eps)
        x[i] = temp_val

    return diff_value

### 신경망

In [1]:
class LogicGateNet():

    def __init__(self):
        def weight_init():
            np.random.seed(1)
            weights = np.random.randn(2)
            bias = np.random.rand(1)

            return weights, bias
        
        self.weights, self.bias = weight_init()

    def predict(self, x):
        W = self.weights.reshape(-1, 1)
        b = self.bias

        pred_y = sigmoid(np.dot(x, W) + b)
        return pred_y

    def loss(self, x, true_y):
        pred_y = self.predict(x)
        return cross_entropy_for_bin(pred_y, true_y)

    def get_gradient(self, x, t):
        def loss_gradient(grad):
            return self.loss(x, t)
        
        grad_W = differential(loss_gradient, self.weights)
        grad_b = differential(loss_gradient, self.bias)

        return grad_W, grad_b


### AND Gate

#### 모델 생성 및 학습

In [None]:
AND = LogicGateNet()

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [0], [0], [1]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_B = AND.get_gradient(X, Y)

    AND.weights -= lr * grad_W
    AND.bias -= lr * grad_B

    loss = AND.loss(X, Y)

    train_loss_list.append(loss)

    if i%100 == 99:
        print("epoch : {}, cost : {}, weight : {}, bias : {}".format(i+1, loss, AND.weights, AND.bias))

#### 테스트

In [None]:
print(AND.predict(X))

### OR Gate

#### 모델 생성 및 학습

In [None]:
OR = LogicGateNet()
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_2 = np.array([[0], [0], [0], [1]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_B = OR.get_gradient(X, Y_2)

    OR.weights -= lr * grad_W
    OR.bias -= lr * grad_B

    loss = OR.loss(X, Y_2)
    train_loss_list.append(loss)

    if i%100 == 99:
        print("epoch : {}, cost : {}, weight : {}, bias : {}".format(i+1, loss, OR.weights, OR.bias))


#### 테스트

In [None]:
print(OR.predict(X))

### NAND Gate

#### 모델 생성 및 학습

In [None]:
NAND = LogicGateNet()
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_3 = np.array([[1], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_B = NAND.get_gradient(X, Y_3)

    NAND.weights -= lr * grad_W
    NAND.bias -= lr * grad_B

    loss = OR.loss(X, Y_3)
    train_loss_list.append(loss)
    
    if i%100 == 99:
        print("epoch : {}, cost : {}, weight : {}, bias : {}".format(i+1, loss, NAND.weights, NAND.bias))


#### 테스트

In [None]:
print(NAND.predict(X))

### XOR Gate

#### 모델 생성 및 학습

In [None]:
XOR = LogicGateNet()
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_4 = np.array([[0], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
    grad_W, grad_B = XOR.get_gradient(X, Y_4)

    XOR.weights -= lr * grad_W
    XOR.bias -= lr * grad_B

    loss = XOR.loss(X, Y_3)
    train_loss_list.append(loss)
    
    if i%100 == 99:
        print("epoch : {}, cost : {}, weight : {}, bias : {}".format(i+1, loss, XOR.weights, XOR.bias))

#### 테스트

In [None]:
print(XOR.predict(X))
# 전부 0.5단위로 학습이 잘 안된다.

#### 2층 신경망으로 XOR 게이트 구현(1)

- 얕은 신경망, Shallow Neural Network

- 두 논리게이트(NAND, OR)를 통과하고  
  AND 게이트로 합쳐서 구현

- 06 신경망 구조 참고

In [None]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_5 = np.array([[0], [1], [1], [0]])

s1 = NAND.predict(X)
s2 = OR.predict(X)

#### 테스트

In [None]:
X_2 = np.array([s1, s2]).T.reshape(-1, 2)
print(AND.predict(X_2))
# 테스트 다시 해보기, 0, 1, 1, 0으로 잘 나와야 한다.

#### 2층 신경망으로 XOR 게이트 구현(2)
- 클래스로 구현

In [None]:
class XORNet():
    def __init__(self):
        np.random.seed(1)

        def weight_init():
            params = {}
            params["w_1"] = np.random.randn(2)
            params["b_1"] = np.random.rand(2)
            params["w_2"] = np.random.randn(2)
            params["b_2"] = np.random.rand(1)
            return params

        self.params = weight_init()

    def predict(self, x):
        W_1, W_2 = self.params["w_1"].reshape(-1, 1), self.params["w_2"].reshape(-1, 1)
        B_1, B_2 = self.params["b_1"], self.params["b_2"]

        A1 = np.dot(x, W_1) + B_1 
        Z1 = sigmoid(A1)
        A2 = np.dot(Z1, W_2) + B_2
        pred_y = sigmoid(A2)

        return pred_y

    def loss(self, x, true_y):
        pred_y = self.predict(x)
        return cross_entropy_for_bin(pred_y, true_y)

    def get_gradient(self, x, t):
        def loss_grad(grad):
            return self.loss(x, t)
        grads = {}
        grads["w_1"] = differential(loss_grad, self.params["w_1"])
        grads["b_1"] = differential(loss_grad, self.params["b_1"])
        grads["w_2"] = differential(loss_grad, self.params["w_2"])
        grads["b_2"] = differential(loss_grad, self.params["b_2"])

        return grads

#### 하이퍼 파라미터(Hyper Parameter)
- 재조정

In [None]:
lr = 0.3

#### 모델 생성 및 학습

In [None]:
XOR = XORNet()
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_5 = np.array([[0], [1], [1], [0]])

train_loss_list = list()

for i in range(epochs):
    grads = XOR.get_gradient(X, Y_5)

    for key in ('w_1', "b_1", "w_2", "b_2"):
        XOR.params[key] -= lr * grads[key]

    loss = XOR.loss(X, Y_5)
    train_loss_list.append(loss)

    if i%100 == 99:
        print("epochs : {}, cost : {}".format(i+1, loss))

#### 테스트

In [None]:
print(XOR.predict(X))
#0, 1, 1, 0으로 잘 나온다.

## 다중 클래스 분류 : MNIST Dataset

### 배치 처리
- 학습 데이터 전체를 한번에 진행하지 않고  
  일부 데이터(샘플)을 확률적으로 구해서 조금씩 나누어 진행

- 확률적 경사 하강법(Stochastic Gradient Descent) 또는  
  미니 배치 학습법(mini-batch learning)이라고도 부름

#### 신경망 구현 : MNIST 

#### 필요한 모듈 임포트

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import tensorflow as tf 
import time 
from tqdm.notebook import tqdm

#### 데이터 로드

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

#### 데이터 확인

In [None]:
print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)

In [None]:
img = x_train[0]
print(img.shape)
plt.imshow(img, cmap='gray')
# 손글씨 5를 넣어주면 얘는 맞춰야 한다.

In [None]:
y_train[0]
# 얘는 정답이다.

#### 데이터 전처리 (Data Preprocessing)

In [None]:
def flatten_for_mnist(x):
    temp = np.zeros((x.shape[0], x[0].size))

    for idx, data in enumerate(x):
        temp[idx, :] = data.flatten()

    return temp

In [None]:
x_train, x_test = x_train / 255.0, x_test / 255.0
# 정규화 한다. 0과 1사이의 값으로 모두 나눠준다.
x_train = flatten_for_mnist(x_train)
x_test = flatten_for_mnist(x_test)

print(x_train.shape)
print(x_test.shape)

y_train_ohe = tf.one_hot(y_train, depth=10).numpy()
y_test_ohe = tf.one_hot(y_test, depth=10).numpy()

print(y_train_ohe.shape)
print(y_test_ohe.shape)

In [None]:
print(x_train[0].max(), x_test[0].min())
print(y_train_ohe[0])

#### 하이퍼 파라미터(Hyper Parameter)

In [None]:
epochs = 2
lr = 0.1
batch_size = 100
train_size = x_train.shape[0]

#### 사용되는 함수들(Util Functions)

In [None]:
# 대표적인 활성화 함수 리스트

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def mean_squared_error(pred_y, true_y):
    return 0.5 * (np.sum((true_y - pred_y)**2))

def cross_entropy_error(pred_y, true_y):
    if true_y.ndim == 1:
        true_y = true_y.reshape(1, -1)
        pred_y = pred_y.reshape(1, -1)

    delta = 1e-7
    return -np.sum(true_y * np.log(pred_y + delta))

def cross_entropy_error_for_batch(pred_y, true_y):
    if true_y.ndim == 1:
        true_y = true_y.reshape(1, -1)
        pred_y = pred_y.reshape(1, -1)

    delta = 1e-7
    batch_size = pred_y.shape[0]

    return -np.sum(true_y * np.log(pred_y + delta)) / batch_size

def cross_entropy_for_bin(pred_y, true_y):
    return 0.5 * np.sum((-true_y * np.log(pred_y) - (1 - true_y) * np.log(1-pred_y)))

def softmax(a):
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a

    return y

def differential_1d(f, x):
    eps = 1e-5
    diff_value = np.zeros_like(x)

    for i in range(x.shape[0]):
        temp_val = x[i]

        x[i] = temp_val + eps
        f_h1 = f(x)

        x[i] = temp_val - eps
        f_h2 = f(x)

        diff_value[i] = (f_h1 - f_h2) / (2*eps)
        x[i] = temp_val

    return diff_value

def differential_2d(f, X):
    if X.ndim == 1:
        return differential_1d(f, X)
    else:
        grad = np.zeros_like(X)

        for idx, x in enumerate(X):
            grad[idx] = differential_1d(f, x)
        
        return grad


#### 2층 신경망으로 구현

In [None]:
class MyModel():
    def __init__(self):

        def weight_init(input_nodes, hidden_nodes, output_units):
            np.random.seed(777)

            params = {}
            params["w_1"] = 0.01 * np.random.randn(input_nodes, hidden_nodes)
            params["b_1"] = np.zeros(hidden_nodes)
            params["w_2"] = 0.01 * np.random.randn(hidden_nodes, output_units)
            params["b_2"] = np.zeros(output_units)
            return params

        self.params = weight_init(784, 64, 10)

    def predict(self, x):
        W_1, W_2 = self.params["w_1"], self.params["w_2"]
        B_1, B_2 = self.params["b_1"], self.params["b_2"]

        A1 = np.dot(x, W_1) + B_1 
        Z1 = sigmoid(A1)

        A2 = np.dot(Z1, W_2) + B_2
        pred_y = softmax(A2)

        return pred_y
    
    def accuracy(self, x, true_y):
        pred_y = self.predict(x)
        y_argmax = np.argmax(pred_y, axis=1)
        t_argmax = np.argmax(true_y, axis=1)

        accuracy = np.sum(y_argmax == t_argmax) / float(x.shape[0])

        return accuracy

    def loss(self, x, true_y):
        pred_y = self.predict(x)
        return cross_entropy_for_bin(pred_y, true_y)

    def get_gradient(self, x, t):
        def loss_grad(grad):
            return self.loss(x, t)

        grads = {}
        grads["w_1"] = differential_2d(loss_grad, self.params["w_1"])
        grads["b_1"] = differential_2d(loss_grad, self.params["b_1"])
        grads["w_2"] = differential_2d(loss_grad, self.params["w_2"])
        grads["b_2"] = differential_2d(loss_grad, self.params["b_2"])

        return grads

#### 모델 생성 및 학습
- 시간 많이 소요

In [None]:
model = MyModel()

train_loss_list = list()
train_acc_list = list()

test_acc_list = list()
iter_per_epoch = max(train_size / batch_size, 1)

start_time = time.time()

for i in tqdm(range(epochs)):
    batch_idx = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_idx]
    y_batch = y_train_ohe[batch_idx]

    grads = model.get_gradient(x_batch, y_batch)

    for key in grads.keys():
        model.params[key] -= lr * grads[key]

    loss = model.loss(x_batch, y_batch)
    train_loss_list.append(loss)

    train_accuracy = model.accuracy(x_train, y_train_ohe)
    test_accuracy = model.accuracy(x_test, y_test_ohe)
    train_acc_list.append(train_accuracy)
    test_acc_list.append(test_accuracy)

    print("epochs : {}, cost : {}, train accuracy : {}, test accuracy : {}".format(i+1, loss, train_accuracy, test_accuracy))

end_time = time.time()

print("총 학습 소요시간 : {:.3f}s".format(end_time - start_time))

  0%|          | 0/2 [00:00<?, ?it/s]

epochs : 1, cost : 465.9060502356192, train accuracy : 0.10441666666666667, test accuracy : 0.1028
epochs : 2, cost : 361.8440950235913, train accuracy : 0.09751666666666667, test accuracy : 0.0974
총 학습 소요시간 : 190.846s


### 모델의 결과
- 모델은 학습이 잘 될 수도, 잘 안될 수도 있음

- 만약, 학습이 잘 되지 않았다면,  
  학습이 잘 되기 위해서 어떠한 조치를 취해야 하는가?
  - 다양한 학습관련 기술이 존재