In [1]:
import numpy as np
from datetime import datetime

# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [2]:
# feed forward
def feed_forward(xdata, tdata):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(xdata, W2) + b2  # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    # cross-entropy 
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    



# loss val
def loss_val(xdata, tdata):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
    delta = 1e-7    # log 무한대 발산 방지
    
    z2 = np.dot(xdata, W2) + b2  # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    # cross-entropy 
    return  -np.sum( tdata*np.log(y + delta) + (1-tdata)*np.log((1 - y)+delta ) )    

In [3]:
# query, 즉 미래 값 예측 함수
def predict(xdata):
        
    z2 = np.dot(xdata, W2) + b2         # 은닉층의 선형회귀 값
    a2 = sigmoid(z2)                                  # 은닉층의 출력
        
    z3 = np.dot(a2, W3) + b3            # 출력층의 선형회귀 값
    y = a3 = sigmoid(z3)                              # 출력층의 출력
    
    if y >= 0.5:
        result = 1  # True
    else:
        result = 0  # False
    
    return y, result

In [4]:
# and, or, nand, xor data
xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])   

and_tdata = np.array([0, 0, 0, 1]).reshape(4,1)
or_tdata = np.array([0, 1, 1, 1]).reshape(4,1)
nand_tdata = np.array([1, 1, 1, 0]).reshape(4,1)
xor_tdata = np.array([0, 1, 1, 0]).reshape(4,1)

# test data
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

In [5]:
# define neural network architecture
input_nodes = 2   # 입력노드 2개
hidden_nodes = 10  # 은닉노드 10개
output_nodes= 1   # 출력노드 1개

W2 = np.random.rand(input_nodes,hidden_nodes)  # 입력층-은닉층 가중치
b2 = np.random.rand(hidden_nodes)    # 은닉층 바이어스

W3 = np.random.rand(hidden_nodes,output_nodes)  # 은닉층-출력층 가중치
b3 = np.random.rand(output_nodes)    # 출력층 바이어스

learning_rate = 1e-1

# xor verification
f = lambda x : feed_forward(xdata, xor_tdata)
        
print("Initial loss value = ", loss_val(xdata, xor_tdata))
     
start_time = datetime.now()

for step in  range(10001):
            
    W2 -= learning_rate * numerical_derivative(f, W2)
    
    b2 -= learning_rate * numerical_derivative(f, b2)
        
    W3 -= learning_rate * numerical_derivative(f, W3)
    
    b3 -= learning_rate * numerical_derivative(f, b3)
    
    if (step % 400 == 0):
        print("step = ", step, "  , loss value = ", loss_val(xdata, xor_tdata))
        
end_time = datetime.now()

print("")
print("Elapsed Time => ", end_time - start_time)

Initial loss value =  9.289299555055772
step =  0   , loss value =  6.953077032317731
step =  400   , loss value =  2.438666817866771
step =  800   , loss value =  0.9713423758356354
step =  1200   , loss value =  0.21316559864141044
step =  1600   , loss value =  0.09147321679536785
step =  2000   , loss value =  0.054297371146152254
step =  2400   , loss value =  0.037523891989765586
step =  2800   , loss value =  0.02824857802626775
step =  3200   , loss value =  0.022451958707714242
step =  3600   , loss value =  0.01852280274111505
step =  4000   , loss value =  0.01570129789143936
step =  4400   , loss value =  0.013586117951152533
step =  4800   , loss value =  0.011946784755192548
step =  5200   , loss value =  0.010642143465222687
step =  5600   , loss value =  0.00958122204886118
step =  6000   , loss value =  0.008702910802747044
step =  6400   , loss value =  0.007964726591470368
step =  6800   , loss value =  0.007336269508931419
step =  7200   , loss value =  0.0067952429

In [6]:
# xor prediction

for data in test_data:
    (real_val, logical_val) = predict(data)
    print("real_val", real_val, ", logical_val = ", logical_val)

real_val [0.00096639] , logical_val =  0
real_val [0.99889288] , logical_val =  1
real_val [0.99887524] , logical_val =  1
real_val [0.0012376] , logical_val =  0
