# XOR Deep Learning Architecture
- 딥러닝에서는1개 이상의 은닉층(hidden layer)를 만들수 있음.
- 각 은닉층은 임의 개수의 노드(node)로 구성 됨.
- 은닉층과 노드 개수가 많아질 수록 학습 속도는 느려지므로 적적한 개수 결정.
  
<img src = "http://cmseng.skku.edu/CMSLecture/ML/img/12-1.png" style="max-width: 80%; height: auto;">

<img src = "http://cmseng.skku.edu/CMSLecture/ML/img/12-2.png" style="max-width: 80%; height: auto;">

In [1]:
import numpy as np

# 수치미분 함수

def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    
    while not it.finished:
        idx = it.multi_index        
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x 
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val 
        it.iternext()   
        
    return grad

# sigmoid 함수

def sigmoid(x):
    return 1 / (1+np.exp(-x))

In [16]:
class LogicGate:
        
    def __init__(self, gate_name, xdata, tdata):
        
        self.name = gate_name
        
        # 입력 데이터, 정답 데이터 초기화
        self.__xdata = xdata.reshape(4,2)  # 4개의 입력데이터 x1, x2 에 대하여 batch 처리 행렬
        self.__tdata = tdata.reshape(4,1)  # 4개의 입력데이터 x1, x2 에 대한 각각의 계산 값 행렬
        
        # 2층 hidden layer unit : 6개 가정,  가중치 W2, 바이어스 b2 초기화
        self.__W2 = np.random.rand(2,6)  # weight, 2 X 6 matrix
        self.__b2 = np.random.rand(6)
        
        # 3층 output layer unit : 1 개 , 가중치 W3, 바이어스 b3 초기화
        self.__W3 = np.random.rand(6,1)
        self.__b3 = np.random.rand(1)
                        
        # 학습률 learning rate 초기화
        self.__learning_rate = 1e-2
    
        print(self.name + " object is created")
            
    def feed_forward(self):        # feed forward 를 통하여 손실함수(cross-entropy) 값 계산
        
        delta = 1e-7    # log 무한대 발산 방지
    
        z2 = np.dot(self.__xdata, self.__W2) + self.__b2  # 은닉층의 선형회귀 값
        a2 = sigmoid(z2)                                  # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3            # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3)                              # 출력층의 출력
    
        # cross-entropy 
        return  -np.sum( self.__tdata*np.log(y + delta) + (1-self.__tdata)*np.log((1 - y)+delta ) )    
    
    def loss_val(self):          # 외부 출력을 위한 손실함수(cross-entropy) 값 계산 
        
        delta = 1e-7    # log 무한대 발산 방지
    
        z2 = np.dot(self.__xdata, self.__W2) + self.__b2  # 은닉층의 선형회귀 값
        a2 = sigmoid(z2)                                  # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3            # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3)                              # 출력층의 출력
    
        # cross-entropy 
        return  -np.sum( self.__tdata*np.log(y + delta) + (1-self.__tdata)*np.log((1 - y)+delta ) )
    
    
    # 수치미분을 이용하여 손실함수가 최소가 될때 까지 학습하는 함수
    def train(self):
        
        f = lambda x : self.feed_forward()
        
        print("Initial loss value = ", self.loss_val())
        
        for step in  range(20001):
            
            self.__W2 -= self.__learning_rate * numerical_derivative(f, self.__W2)
    
            self.__b2 -= self.__learning_rate * numerical_derivative(f, self.__b2)
        
            self.__W3 -= self.__learning_rate * numerical_derivative(f, self.__W3)
    
            self.__b3 -= self.__learning_rate * numerical_derivative(f, self.__b3)
    
            if (step % 400 == 0):
                print("step = ", step, "  , loss value = ", self.loss_val())
                
    
    # query, 즉 미래 값 예측 함수
    def predict(self, xdata):
        
        z2 = np.dot(xdata, self.__W2) + self.__b2         # 은닉층의 선형회귀 값
        a2 = sigmoid(z2)                                  # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3            # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3)                              # 출력층의 출력
    
        if y > 0.5:
            result = 1  # True
        else:
            result = 0  # False
    
        return y, result

In [23]:
# AND Gate 객체 생성 및 training

xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 0, 0, 1])

and_obj = LogicGate("AND", xdata, tdata)

and_obj.train()

AND object is created
Initial loss value =  10.258336800584477
step =  0   , loss value =  9.923878092077246
step =  400   , loss value =  2.257250218409506
step =  800   , loss value =  2.1337927888786097
step =  1200   , loss value =  1.9566527423987157
step =  1600   , loss value =  1.7009839122402464
step =  2000   , loss value =  1.377771439646323
step =  2400   , loss value =  1.0462641248463829
step =  2800   , loss value =  0.774294988209459
step =  3200   , loss value =  0.5764658836039058
step =  3600   , loss value =  0.4378571940818907
step =  4000   , loss value =  0.34109531087643025
step =  4400   , loss value =  0.27264838671894104
step =  4800   , loss value =  0.22317794660308693
step =  5200   , loss value =  0.1865444382971421
step =  5600   , loss value =  0.15875854232673117
step =  6000   , loss value =  0.13720831772675102
step =  6400   , loss value =  0.1201545022091695
step =  6800   , loss value =  0.10641474596637843
step =  7200   , loss value =  0.0951677

In [24]:
# AND Gate prediction

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for data in test_data:
    print(and_obj.predict(data))

(array([9.26200639e-05]), 0)
(array([0.00478046]), 0)
(array([0.00477648]), 0)
(array([0.99164333]), 1)


In [21]:
# OR Gate 객체 생성 및 training

xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 1, 1, 1])

or_obj = LogicGate("OR", xdata, tdata)

or_obj.train()

OR object is created
Initial loss value =  2.304122541200034
step =  0   , loss value =  2.2943325332477102
step =  400   , loss value =  1.927260470667521
step =  800   , loss value =  1.698011249815765
step =  1200   , loss value =  1.3746417586682826
step =  1600   , loss value =  1.0194128037972796
step =  2000   , loss value =  0.7222983075890542
step =  2400   , loss value =  0.513358478227762
step =  2800   , loss value =  0.37581939148087573
step =  3200   , loss value =  0.285427179900028
step =  3600   , loss value =  0.22442497076711265
step =  4000   , loss value =  0.18179989698651453
step =  4400   , loss value =  0.1509701755954736
step =  4800   , loss value =  0.127964231935384
step =  5200   , loss value =  0.11032130585866651
step =  5600   , loss value =  0.09646832033091562
step =  6000   , loss value =  0.08536782904235315
step =  6400   , loss value =  0.07631537232395062
step =  6800   , loss value =  0.0688197030337826
step =  7200   , loss value =  0.062529946

In [22]:
# OR Gate prediction

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for data in test_data:
    print(or_obj.predict(data))

(array([0.00819173]), 0)
(array([0.99736488]), 1)
(array([0.99718786]), 1)
(array([0.99991242]), 1)


In [19]:
# NAND Gate 객체 생성 및 training

xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([1, 1, 1, 0])

nand_obj = LogicGate("NAND", xdata, tdata)

nand_obj.train()

NAND object is created
Initial loss value =  3.094325917146323
step =  0   , loss value =  3.0643750666653577
step =  400   , loss value =  2.246483150950328
step =  800   , loss value =  2.146319105209918
step =  1200   , loss value =  1.9912507731842344
step =  1600   , loss value =  1.7156755299912771
step =  2000   , loss value =  1.3572148251680713
step =  2400   , loss value =  1.0336821869153145
step =  2800   , loss value =  0.7700607120130298
step =  3200   , loss value =  0.5723252085314177
step =  3600   , loss value =  0.43221809122067767
step =  4000   , loss value =  0.3346964067607979
step =  4400   , loss value =  0.2662316851458586
step =  4800   , loss value =  0.2171360684196023
step =  5200   , loss value =  0.18102642950102796
step =  5600   , loss value =  0.1537881516604565
step =  6000   , loss value =  0.13275410979803193
step =  6400   , loss value =  0.11616506441216695
step =  6800   , loss value =  0.10283493222245454
step =  7200   , loss value =  0.091945

In [20]:
# NAND Gate prediction

test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for data in test_data:
    print(nand_obj.predict(data))

(array([0.9999235]), 1)
(array([0.99550804]), 1)
(array([0.99531982]), 1)
(array([0.00831601]), 0)


In [17]:
# XOR Gate 객체 생성

xdata = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])
tdata = np.array([0, 1, 1, 0])


xor_obj = LogicGate("XOR", xdata, tdata)

xor_obj.train()

XOR object is created
Initial loss value =  6.4991371632638115
step =  0   , loss value =  6.34191789054433
step =  400   , loss value =  2.7729140058982003
step =  800   , loss value =  2.767421034645885
step =  1200   , loss value =  2.7617679884132307
step =  1600   , loss value =  2.7554652813699625
step =  2000   , loss value =  2.747965675978508
step =  2400   , loss value =  2.7386047402867226
step =  2800   , loss value =  2.726544097578281
step =  3200   , loss value =  2.7107212861088557
step =  3600   , loss value =  2.689814677389422
step =  4000   , loss value =  2.662235845868076
step =  4400   , loss value =  2.626176201916909
step =  4800   , loss value =  2.5797741050925613
step =  5200   , loss value =  2.521491852715468
step =  5600   , loss value =  2.4506404377611437
step =  6000   , loss value =  2.3675982934637982
step =  6400   , loss value =  2.273136356391934
step =  6800   , loss value =  2.1669602577338694
step =  7200   , loss value =  2.0465008458027025
st

In [18]:
test_data = np.array([ [0, 0], [0, 1], [1, 0], [1, 1] ])

for data in test_data:
    print(xor_obj.predict(data))

(array([0.00595721]), 0)
(array([0.97426299]), 1)
(array([0.97055108]), 1)
(array([0.04304948]), 0)
