### XOR Perceptron

## XOR in Single Layer Perceptron

In [3]:
import torch

In [12]:
# XOR data 선언
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

In [13]:
# nn layers
linear = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

In [14]:
# model ( nn.Sequential은 코드에 적힌 순서대로 값을 전달해 처리한다. linear -> sigmoid )
model = torch.nn.Sequential(linear, sigmoid)

In [15]:
# define cost/loss & optimizer / BInary Cross Entropy 사용(0, 1 이므로)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [17]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    # cost/loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.6975341439247131
100 0.6931471824645996
200 0.6931471824645996
300 0.6931471824645996
400 0.6931471824645996
500 0.6931471824645996
600 0.6931471824645996
700 0.6931471824645996
800 0.6931471824645996
900 0.6931471824645996
1000 0.6931471824645996
1100 0.6931471824645996
1200 0.6931471824645996
1300 0.6931471824645996
1400 0.6931471824645996
1500 0.6931471824645996
1600 0.6931471824645996
1700 0.6931471824645996
1800 0.6931471824645996
1900 0.6931471824645996
2000 0.6931471824645996
2100 0.6931471824645996
2200 0.6931471824645996
2300 0.6931471824645996
2400 0.6931471824645996
2500 0.6931471824645996
2600 0.6931471824645996
2700 0.6931471824645996
2800 0.6931471824645996
2900 0.6931471824645996
3000 0.6931471824645996
3100 0.6931471824645996
3200 0.6931471824645996
3300 0.6931471824645996
3400 0.6931471824645996
3500 0.6931471824645996
3600 0.6931471824645996
3700 0.6931471824645996
3800 0.6931471824645996
3900 0.6931471824645996
4000 0.6931471824645996
4100 0.6931471824645996
4200

#### 200번 이후로 loss가 일정하며 학습이 제대로 되지 않을음 확인할 수 있다

In [19]:
# Accuracy computation
# True if hypothesis>0.5 else False
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\n Hypothesis: ', hypothesis.detach().cpu().numpy(), '\n Correct: ', predicted.detach().cpu().numpy(), 
          '\n Accuracy: ', accuracy.item())


 Hypothesis:  [[0.5]
 [0.5]
 [0.5]
 [0.5]] 
 Correct:  [[0.]
 [0.]
 [0.]
 [0.]] 
 Accuracy:  0.5


#### 단층 Perceptron으로는 제대로 구현이 불가능함을 알 수 있다.

## XOR In Multilayer Perceptron

In [44]:
# XOR data 선언
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

In [45]:
# nn Layer
w1 = torch.Tensor(2, 2)
b1 = torch.Tensor(2)
w2 = torch.Tensor(2, 1)
b2 = torch.Tensor(1)

learning_rate = 1
# sigmoid function
def sigmoid(x):
    return 1.0 / (1.0 + torch.exp(-x))

#  derivative of the sigmoid function
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [46]:
for step in range(10001):
    # Propergation -> Backpropergarion을 반복하며 진행한다.
    # forward
    l1 = torch.add(torch.matmul(X, w1), b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1, w2), b2)
    Y_pred = sigmoid(l2)
    
    # Binary Cross Entropy Loss
    cost = -torch.mean(Y * torch.log(Y_pred) + (1 - Y) * torch.log(1 - Y_pred))
    
    # Back prob
    
    # Loss derivative(BCE 미분식)
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
    
    # Layer 2
    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)
    
    # Layer 1
    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
    
    # Weight update
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    
    if step % 100 == 0:
        print(step, cost.item())

0 nan
100 nan
200 nan
300 nan
400 nan
500 nan
600 nan
700 nan
800 nan
900 nan
1000 nan
1100 nan
1200 nan
1300 nan
1400 nan
1500 nan
1600 nan
1700 nan
1800 nan
1900 nan
2000 nan
2100 nan
2200 nan
2300 nan
2400 nan
2500 nan
2600 nan
2700 nan
2800 nan
2900 nan
3000 nan
3100 nan
3200 nan
3300 nan
3400 nan
3500 nan
3600 nan
3700 nan
3800 nan
3900 nan
4000 nan
4100 nan
4200 nan
4300 nan
4400 nan
4500 nan
4600 nan
4700 nan
4800 nan
4900 nan
5000 nan
5100 nan
5200 nan
5300 nan
5400 nan
5500 nan
5600 nan
5700 nan
5800 nan
5900 nan
6000 nan
6100 nan
6200 nan
6300 nan
6400 nan
6500 nan
6600 nan
6700 nan
6800 nan
6900 nan
7000 nan
7100 nan
7200 nan
7300 nan
7400 nan
7500 nan
7600 nan
7700 nan
7800 nan
7900 nan
8000 nan
8100 nan
8200 nan
8300 nan
8400 nan
8500 nan
8600 nan
8700 nan
8800 nan
8900 nan
9000 nan
9100 nan
9200 nan
9300 nan
9400 nan
9500 nan
9600 nan
9700 nan
9800 nan
9900 nan
10000 nan


In [47]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\n Hypothesis: ', hypothesis.detach().cpu().numpy(), '\n Correct: ', predicted.detach().cpu().numpy(), 
          '\n Accuracy: ', accuracy.item())


 Hypothesis:  [[0.00171033]
 [0.9977189 ]
 [0.9977095 ]
 [0.00153983]] 
 Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
 Accuracy:  1.0


## Code:xor-nn

In [48]:
# XOR data 선언
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

In [49]:
# 각 Layer(2개)와 sigmoid 함수 선언
linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

In [50]:
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid)
# define cost/Loss & optimozer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/Loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.7035230398178101
100 0.6934325098991394
200 0.6932744979858398
300 0.6932134628295898
400 0.6931830644607544
500 0.6931648254394531
600 0.693152129650116
700 0.6931419372558594
800 0.6931324601173401
900 0.693122386932373
1000 0.6931099891662598
1100 0.6930927634239197
1200 0.6930667161941528
1300 0.6930224299430847
1400 0.6929371356964111
1500 0.692742109298706
1600 0.6921645998954773
1700 0.6895726323127747
1800 0.6702017188072205
1900 0.5715908408164978
2000 0.23000909388065338
2100 0.0819261372089386
2200 0.04671409726142883
2300 0.032176896929740906
2400 0.024387534707784653
2500 0.019571945071220398
2600 0.01631409488618374
2700 0.013969474472105503
2800 0.012204252183437347
2900 0.010828915983438492
3000 0.009728006087243557
3100 0.008827459067106247
3200 0.008077488280832767
3300 0.007443435490131378
3400 0.0069005852565169334
3500 0.006430664099752903
3600 0.006019984371960163
3700 0.0056580533273518085
3800 0.0053367409855127335
3900 0.005049640312790871
4000 0.0047915270

In [52]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\n Hypothesis: ', hypothesis.detach().cpu().numpy(), '\n Correct: ', predicted.detach().cpu().numpy(), 
          '\n Accuracy: ', accuracy.item())


 Hypothesis:  [[1.1144429e-03]
 [9.9846280e-01]
 [9.9896157e-01]
 [9.6936122e-04]] 
 Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
 Accuracy:  1.0


## Code: xor-nn-wide-deep(4층)

In [54]:
# XOR data 선언
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.FloatTensor([[0], [1], [1], [0]])

In [55]:
# 각 Layer(4개)와 sigmoid 함수 선언
linear1 = torch.nn.Linear(2, 10, bias=True)
linear2 = torch.nn.Linear(10, 10, bias=True)
linear3 = torch.nn.Linear(10, 10, bias=True)
linear4 = torch.nn.Linear(10, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

In [56]:
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid)
# define cost/Loss & optimozer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    # cost/Loss function
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    if step % 100 == 0:
        print(step, cost.item())

0 0.7161834239959717
100 0.6931208968162537
200 0.6931166052818298
300 0.6931120157241821
400 0.693107008934021
500 0.6931014657020569
600 0.693095326423645
700 0.6930884122848511
800 0.6930806040763855
900 0.693071722984314
1000 0.6930615305900574
1100 0.6930496692657471
1200 0.6930357217788696
1300 0.6930188536643982
1400 0.6929985284805298
1500 0.6929736137390137
1600 0.6929424405097961
1700 0.6929025053977966
1800 0.6928502321243286
1900 0.6927796602249146
2000 0.6926810145378113
2100 0.6925365328788757
2200 0.6923128366470337
2300 0.6919382810592651
2400 0.6912395358085632
2500 0.6897046566009521
2600 0.6852677464485168
2700 0.6636131405830383
2800 0.4901348352432251
2900 0.05120769143104553
3000 0.011127297766506672
3100 0.00567008089274168
3200 0.0036980737932026386
3300 0.002708381274715066
3400 0.00212107808329165
3500 0.0017350923735648394
3600 0.0014633642276749015
3700 0.0012623455841094255
3800 0.0011079877149313688
3900 0.0009859782876446843
4000 0.0008872214239090681
410

In [58]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\n Hypothesis: ', hypothesis.detach().cpu().numpy(), '\n Correct: ', predicted.detach().cpu().numpy(), 
          '\n Accuracy: ', accuracy.item())


 Hypothesis:  [[9.1147675e-05]
 [9.9988830e-01]
 [9.9989271e-01]
 [1.4922999e-04]] 
 Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
 Accuracy:  1.0
