# 딥러닝 2일차

## XOR 신경망 구현하기

In [1]:
import numpy as np
import random

In [2]:
random.seed(0)

### 환경 변수 지정

In [3]:
# x, y 값 입력
data =[[[0,0], [0]],
       [[0,1], [1]],
       [[1,0], [1]],
       [[1,1], [0]]]
data

[[[0, 0], [0]], [[0, 1], [1]], [[1, 0], [1]], [[1, 1], [0]]]

In [4]:
# sigmoid 함수
def sigmoid(x, derivative=False):
    if (derivative == True):
        return x * (1-x)
    return 1 / (1+np.exp(-x))    

In [10]:
# tanH 함수
def tanh(x, derivative=False):
    if (derivative == True):
        return 1-x**2
    return np.tanh(x)

## 가중치 함수

In [16]:
def makeMatrix(i, j, fill=0.0):
    mat = []
    for i in range(i):
        mat.append([fill] * j)
    return mat

## 신경망 구현

In [28]:
# 실행횟수(iteraions), 학습률(learning rate), 모멘텀(momenterm) 설정
iterations=5000
lr=0.1
mo=0.4

In [43]:
class NeuralNetwork:
    
    # 입력값(num_x), 은닉층초깃값(num_yh), 출력층초깃값(num_yo), 바이어스(bias)
    # 초깃값을 지정
    def __init__(self, num_x, num_yh, num_yo, bias=1):
        
        self.num_x = num_x + bias
        self.num_yh = num_yh
        self.num_yo = num_yo
        
        # 활성화 함수 초깃값
        self.activation_input = [1.0] * self.num_x
        self.activation_hidden = [1.0] * self.num_yh
        self.activation_out = [1.0] * self.num_yo
        
        # 가중치 입력 초깃값
        self.weight_in = makeMatrix(self.num_x, self.num_yh)
        for i in range(self.num_x):
            for j in range(self.num_yh):
                self.weight_in[i][j] = random.random()
                
        # 가중치 출력 초깃값
        self.weight_out = makeMatrix(self.num_yh, self.num_yo)
        for j in range(self.num_yh):
            for k in range(self.num_yo):
                self.weight_out[j][k] = random.random()
                
        # SGD를 위한 초깃값
        self.gradient_in = makeMatrix(self.num_x, self.num_yh)
        self.gradient_out = makeMatrix(self.num_yh, self.num_yo)
        
    def update(self, inputs):
        
        # 입력 레이어의 활성화 함수
        for i in range(self.num_x - 1):
            self.activation_input[i] = inputs[i]
            
        # 은닉층의 활성화 함수
        for j in range(self.num_yh):
            sum=0.0
            for i in range(self.num_x):
                sum = sum + self.activation_input[i] * self.weight_in[i][j]
                self.activation_hidden[j] = tanh(sum, False)
    
        # 출력층의 활성화 함수
        for k in range(self.num_yo):
            sum=0.0
            for j in range(self.num_yh):
                sum = sum + self.activation_hidden[j] * self.weight_out[j][k]
                self.activation_out[k] = tanh(sum, False)
                
        return self.activation_out[:]
                
                
    # Back-propagation 실행
    def backPropagate(self, targets):
        
        # 델타 출력 계산
        output_deltas = [0.0] * self.num_yo
        for k in range(self.num_yo):
            error = targets[k] - self.activation_out[k]
            # 활성화 함수 미분 적용
            output_deltas[k] = tanh(self.activation_out[k], True) * error
        
        # 은닉 노드의 오차함수(Loss Function)
        hidden_deltas = [0.0] * self.num_yh
        for j in range(self.num_yh):
            error = 0.0
            for k in range(self.num_yo):
                error = error + output_deltas[k] * self.weight_out[j][k]
            hidden_deltas[j] = tanh(self.activation_hidden[j], True) * error
        
        # 출력 가중치 업데이트
        for j in range(self.num_yh):
            for k in range(self.num_yo):
                gradient = output_deltas[k] * self.activation_hidden[j]
                v = mo * self.gradient_out[j][k] - lr * gradient
                self.weight_out[j][k] += v
                self.gradient_out[j][k] = gradient
                
        # 입력 가중치 업데이트
        for i in range(self.num_x):
            for j in range(self.num_yh):
                gradient = hidden_deltas[j] * self.activation_input[i]
                v = mo * self.gradient_in[i][j] - lr * gradient
                self.weight_in[i][j] += v
                self.gradient_in[i][j] = gradient
                
        # 최소 제곱법으로 오차 계산
        error = 0.0
        for k in range(len(targets)):
            error = error + 0.5 * (targets[k] - self.activation_out[k])**2
        return error
    
    
    # 학습 실행
    def train(self, patterns):
        for i in range(iterations):
            error = 0.0
            for p in patterns:
                inputs = p[0]
                targets = p[1]
                self.update(inputs)
                error = error + self.backPropagate(targets)
            if i % 100 ==0:
                print('error: %-.5f' % error)
                
    # 결괏값
    def result(self, patterns):
        for p in patterns:
            print('Input: %s, Predict: %s' % (p[0], self.update(p[0])))            

if __name__ == '__main__':  
    
    n = NeuralNetwork(2, 2, 1)
    n.train(data)
    n.result(data)

error: 0.72263
error: 0.26551
error: 0.02119
error: 0.00710
error: 0.00381
error: 0.00251
error: 0.00184
error: 0.00145
error: 0.00118
error: 0.00100
error: 0.00086
error: 0.00076
error: 0.00067
error: 0.00061
error: 0.00055
error: 0.00051
error: 0.00047
error: 0.00043
error: 0.00040
error: 0.00038
error: 0.00035
error: 0.00033
error: 0.00032
error: 0.00030
error: 0.00029
error: 0.00027
error: 0.00026
error: 0.00025
error: 0.00024
error: 0.00023
error: 0.00022
error: 0.00021
error: 0.00020
error: 0.00020
error: 0.00019
error: 0.00018
error: 0.00018
error: 0.00017
error: 0.00017
error: 0.00016
error: 0.00016
error: 0.00015
error: 0.00015
error: 0.00015
error: 0.00014
error: 0.00014
error: 0.00014
error: 0.00013
error: 0.00013
error: 0.00013
Input: [0, 0], Predict: [0.0005940778458137488]
Input: [0, 1], Predict: [0.9889714567853399]
Input: [1, 0], Predict: [0.9890004851192029]
Input: [1, 1], Predict: [0.0021678199033020704]


In [39]:
n = NeuralNetwork(2, 2, 1)

In [40]:
n.train(data)

error: 0.50934
error: 0.50749
error: 0.50306
error: 0.43495
error: 0.23661
error: 0.10160
error: 0.05938
error: 0.04035
error: 0.03003
error: 0.02369
