### XOR 문제 - 딥러닝 아키텍처
- 딥러닝에서는, 1개 이상의 은닉층(hidden layer)을 만들 수 있고, 각 은닉층(hidden layer)에 존재하는 노드(node) 개수 또한 임의의 개수를 만들 수 있음. 그러나 은닉층과 노드 수가 많아지면 학습 속도가 느려지므로 적절한 개수의 은닉층과 노드수를 고려하여 구현하는 것이 필요함

1) input

2) feed forward

3) 정답 t와 출력된 y 값의 차이를 바탕으로 손실(loss) 값 계산

4) update W(2), b(2), W(3), b(3) => repeat 1)

In [5]:
import numpy as np

# sigmoid 함수
def sigmoid(x):
    return 1 / (1+np.exp(-x))

# 수치미분 함수
def numerical_derivative(f, x):
    delta_x = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x+delta_x)
        
        x[idx] = tmp_val - delta_x
        fx2 = f(x) # f(x-delta_x)
        grad[idx] = (fx1 - fx2) / (2*delta_x)
        
        x[idx] = tmp_val
        it.iternext()
        
    return grad
        
class  LogicGate:
    def __init__(self, gate_name, xdata, tdata):
        self.name = gate_name
        
        # 입력 데이터, 정답 데이터 초기화     # batch 처리: 입력데이터와 정답데이터의 모든 경우의 수를 한 번에 처리하는 것
        self.__xdata = xdata.reshape(4,2) # 4개의 입력데이터 x1, x2에 대하여 batch 처리 행렬
        self.__tdata = tdata.reshape(4,1) # 4개의 입력데이터 x1, x2에 대한 각각의 계산값 행령
        
        # 2층 hidden layer unit : 6개 가정, 가중치 W2, 바이어스 b2 초기화
        self.__W2 = np.random.rand(2,6) # weight, 2X6 matrix
        self.__b2 = np.random.rand(6)
        
        # 3층 output layer unit : 1개, 가중치 W3, 바이어스 b3 초기화
        self.__W3 = np.random.rand(6,1)
        self.__b3 = np.random.rand(1)
        
        # 학습률 learning rate 초기화
        self.__learning_rate = 1e-2
        
        print(self.name + " object is created")
        
    def feed_forward(self): # feed forward를 통하여 손실함수(cross-entropy) 값 계산
        
        delta = 1e-7 # log 무한대 발산 방지
        
        z2 = np.dot(self.__xdata, self.__W2) + self.__b2 # 은닉층의 선형회귀 값
        a2 = sigmoid(z2) # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3 # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3) # 출력층의 출력
        
        # cross-entropy
        return -np.sum( self.__tdata * np.log(y + delta) + (1-self.__tdata) * np.log((1-y) + delta ))
    
    def loss_val(self): # 외부 출력을 윟나 손실함수(cross-entropy) 값 계산
        
        delta = 1e-7 # log 무한대 발산 방지
        
        z2 = np.dot(self.__xdata, self.__W2) + self.__b2 # 은닉층의 선형회귀 값
        a2 = sigmoid(z2) # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3 # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3) # 출력층의 출력
        
        # cross-entropy
        return -np.sum( self.__tdata * np.log(y + delta) + (1-self.__tdata) * np.log((1-y) + delta ))
    
    # 수치미분을 이용하여 손실함수가 최소가 될 때까지 학습하는 함수
    def train(self):
        f = lambda x: self.feed_forward()
        print("Initial loss value = ", self.loss_val())
        
        for step in range(10001):
            self.__W2 -= self.__learning_rate + numerical_derivative(f, self.__W2)
            self.__b2 -= self.__learning_rate + numerical_derivative(f, self.__b2)
            self.__W3 -= self.__learning_rate + numerical_derivative(f, self.__W3)
            self.__b3 -= self.__learning_rate + numerical_derivative(f, self.__b3)
            
            if(step % 400 == 0):
                print("step = ", step, " , loss value = ", self.loss_val())
                
    # query, 즉 미래 값 예측 함수
    def predict(self, xdata):
        z2 = np.dot(xdata, self.__W2) + self.__b2 # 은닉층의 선형회귀 값
        a2 = sigmoid(z2) # 은닉층의 출력
        
        z3 = np.dot(a2, self.__W3) + self.__b3 # 출력층의 선형회귀 값
        y = a3 = sigmoid(z3) # 출력층의 출력
        
        if y > 0.5:
            result = 1 # True
        else: 
            result = 0 # False
            
        return y, result


AND object is created
Initial loss value =  8.921548369928487
step =  0  , loss value =  3.1289477543902056
step =  400  , loss value =  0.025480318493430833
step =  800  , loss value =  0.016779688015569616
step =  1200  , loss value =  0.014842850775500026
step =  1600  , loss value =  0.01409844762239433
step =  2000  , loss value =  0.013689463445428046
step =  2400  , loss value =  0.013428888478155895
step =  2800  , loss value =  0.013247907019988075
step =  3200  , loss value =  0.013114727311191648
step =  3600  , loss value =  0.01301254351387229
step =  4000  , loss value =  0.012931618237166605
step =  4400  , loss value =  0.012865914854010672
step =  4800  , loss value =  0.012811489797600854
step =  5200  , loss value =  0.012765656613187987
step =  5600  , loss value =  0.012726521320140216
step =  6000  , loss value =  0.012692709840699033
step =  6400  , loss value =  0.012663200792994203
step =  6800  , loss value =  0.012637218979921254
step =  7200  , loss value = 

In [6]:
# AND Gate 객체 생성 및 training

xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])
tdata = np.array([0, 0, 0, 1])

and_obj = LogicGate("AND", xdata ,tdata)

and_obj.train()


AND object is created
Initial loss value =  5.851566940227959
step =  0  , loss value =  4.334256527358926
step =  400  , loss value =  0.021197952870179282
step =  800  , loss value =  0.01823005759233466
step =  1200  , loss value =  0.01514204883968952
step =  1600  , loss value =  0.014236555901946796
step =  2000  , loss value =  0.01377134388795708
step =  2400  , loss value =  0.013483275371649797
step =  2800  , loss value =  0.01328670473970821
step =  3200  , loss value =  0.01314381818798344
step =  3600  , loss value =  0.013035176809163744
step =  4000  , loss value =  0.012949736621819061
step =  4400  , loss value =  0.012880751582136217
step =  4800  , loss value =  0.012823865664195765
step =  5200  , loss value =  0.012776139229567798
step =  5600  , loss value =  0.012735515847097914
step =  6000  , loss value =  0.012700513291322237
step =  6400  , loss value =  0.012670035903823073
step =  6800  , loss value =  0.012643256093075095
step =  7200  , loss value =  0.0

In [10]:
# AND Gate prediction
test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

for data in test_data:
    print(and_obj.predict(data))

(array([3.51750385e-47]), 0)
(array([0.00061074]), 0)
(array([0.00061074]), 0)
(array([0.98877725]), 1)


In [11]:
# OR Gate 객체 생성 및 training

xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])
tdata = np.array([0, 1, 1, 1])

or_obj = LogicGate("OR", xdata, tdata)

or_obj.train()

OR object is created
Initial loss value =  3.099011147747847
step =  0  , loss value =  2.1712068680847523
step =  400  , loss value =  0.02814315587656683
step =  800  , loss value =  0.02247423290414957
step =  1200  , loss value =  0.017345432847787792
step =  1600  , loss value =  0.01630138873272673
step =  2000  , loss value =  0.016871855564251633
step =  2400  , loss value =  0.013866989631921945
step =  2800  , loss value =  0.013387064627791498
step =  3200  , loss value =  0.013286514812806524
step =  3600  , loss value =  0.013275351952801358
step =  4000  , loss value =  0.013328161560955495
step =  4400  , loss value =  0.013463535447321491
step =  4800  , loss value =  0.013756510394555178
step =  5200  , loss value =  0.014561588703862222
step =  5600  , loss value =  0.017581363362344185
step =  6000  , loss value =  0.012189425918918917
step =  6400  , loss value =  0.011971374979555898
step =  6800  , loss value =  0.01191134190814984
step =  7200  , loss value =  0.

In [12]:
# OR Gate prediction

test_data = np.array([ [0,0], [0,1], [1,0], [1,1] ])

for data in test_data:
    print(or_obj.predict(data))

(array([0.00088865]), 0)
(array([0.9963701]), 1)
(array([0.9963701]), 1)
(array([0.9963701]), 1)


In [14]:
# XOR Gate 객체 생성 및 training
# XOR: 두 입력값이 같지 않고, 1을 포함하면 1을 출력
xdata = np.array([ [0,0], [0,1], [1,0], [1,1] ])
tdata = np.array([0, 1, 1, 0])

xor_obj = LogicGate("XOR", xdata, tdata)

xor_obj.train()

OR object is created
Initial loss value =  4.5535456495675
step =  0  , loss value =  3.565762273608605
step =  400  , loss value =  0.060651234983733565
step =  800  , loss value =  0.03077584562125033
step =  1200  , loss value =  0.025462767071104772
step =  1600  , loss value =  0.022527070424841908
step =  2000  , loss value =  0.020855793452105257
step =  2400  , loss value =  0.019760425898191276
step =  2800  , loss value =  0.018976027899830737
step =  3200  , loss value =  0.018380944802749394
step =  3600  , loss value =  0.01791099223668436
step =  4000  , loss value =  0.017528756942783132
step =  4400  , loss value =  0.017210762723561642
step =  4800  , loss value =  0.016941430232705196
step =  5200  , loss value =  0.016709959262147654
step =  5600  , loss value =  0.016508596963620604
step =  6000  , loss value =  0.01633161723055499
step =  6400  , loss value =  0.016174689359819194
step =  6800  , loss value =  0.016034471527484273
step =  7200  , loss value =  0.01