https://github.com/deeplearningzerotoall/PyTorch/blob/master/lab-08_2_xor_nn.ipynb

# Naive Coding

In [1]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [2]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

w1 = torch.Tensor(2,2).to(device)
b1 = torch.Tensor(2).to(device)
w2 = torch.Tensor(2, 1).to(device)
b2 = torch.Tensor(1).to(device)

def sigmoid(x):
    return 1./(1. + torch.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1- sigmoid(x))

In [3]:
learning_rate = 0.1

for step in range(10001):
    l1 = torch.add(torch.matmul(X,w1),b1)
    a1 = sigmoid(l1)
    l2 = torch.add(torch.matmul(a1,w2),b2)
    Y_pred = sigmoid(l2)
    
    cost = -torch.mean(Y*torch.log(Y_pred) + (1-Y)*torch.log(1-Y_pred))
    
    d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
    
    d_l2 = d_Y_pred * sigmoid_prime(l2)
    d_b2 = d_l2
    d_w2 = torch.matmul(torch.transpose(a1, 0, 1), d_b2)

    d_a1 = torch.matmul(d_b2, torch.transpose(w2, 0, 1))
    d_l1 = d_a1 * sigmoid_prime(l1)
    d_b1 = d_l1
    d_w1 = torch.matmul(torch.transpose(X, 0, 1), d_b1)
    
    w1 = w1 - learning_rate * d_w1
    b1 = b1 - learning_rate * torch.mean(d_b1, 0)
    w2 = w2 - learning_rate * d_w2
    b2 = b2 - learning_rate * torch.mean(d_b2, 0)
    
    if step % 1000 == 0:
        print(step, cost.item())

0 0.6931471824645996
1000 0.6931471824645996
2000 0.6931471824645996
3000 0.6931471824645996
4000 0.6931471824645996
5000 0.6931471824645996
6000 0.6931471824645996
7000 0.6931471824645996
8000 0.6931471824645996
9000 0.6931471824645996
10000 0.6931471824645996


# Using torch Package

In [4]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

linear1 = torch.nn.Linear(2, 2, bias=True)
linear2 = torch.nn.Linear(2, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [5]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 1000 == 0:
        print(step, cost.item())

0 0.7434073090553284
1000 0.6930999755859375
2000 0.6838316917419434
3000 0.013983823359012604
4000 0.005768374539911747
5000 0.003600734518840909
6000 0.0026096487417817116
7000 0.0020436146296560764
8000 0.001678097527474165
9000 0.0014228166546672583
10000 0.0012345188297331333


In [6]:
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('Hypothesis: ', hypothesis.detach().cpu().numpy())
    print('Correct: ', predicted.detach().cpu().numpy())
    print('Accuracy: ', accuracy.item())

Hypothesis:  [[0.00106364]
 [0.99889404]
 [0.99889404]
 [0.00165861]]
Correct:  [[0.]
 [1.]
 [1.]
 [0.]]
Accuracy:  1.0


In [7]:
hypothesis

tensor([[0.0011],
        [0.9989],
        [0.9989],
        [0.0017]], device='cuda:0')

In [8]:
predicted

tensor([[0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')

In [9]:
Y

tensor([[0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')

In [10]:
(predicted == Y).float().mean()

tensor(1., device='cuda:0')

In [11]:
print((torch.FloatTensor([3.5]) > 0.2))
print((torch.FloatTensor([3.5]) > 0.2).float())

tensor([True])
tensor([1.])


In [12]:
print(predicted.detach())
print(predicted.detach().cpu())
print(predicted.detach().cpu().numpy())

tensor([[0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.],
        [1.],
        [1.],
        [0.]])
[[0.]
 [1.]
 [1.]
 [0.]]


# layer를 늘려보기

In [13]:
linear1 = torch.nn.Linear(2, 12, bias=True)
linear2 = torch.nn.Linear(12, 12, bias=True)
linear3 = torch.nn.Linear(12, 12, bias=True)
linear4 = torch.nn.Linear(12, 1, bias=True)
sigmoid = torch.nn.Sigmoid()

model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid).to(device)

criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 1000 == 0:
        print(step, cost.item())

0 0.7815723419189453
1000 0.6929947137832642
2000 0.6913048028945923
3000 0.0025057331658899784
4000 0.0006415569805540144
5000 0.0003520289610605687
6000 0.0002388799621257931
7000 0.00017942648264579475
8000 0.00014300202019512653
9000 0.00011851613817270845
10000 0.00010094569006469101
