In [1]:
import torch
from collections import OrderedDict

# check device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# set seed for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

torch.__version__

'1.7.0+cu101'

In [2]:
def train(model, optimizer, criterion, x_train, y_train):
  n_epochs = 10000
  for epoch in range(n_epochs+1):
    hypothesis = model(x_train)
    cost = criterion(hypothesis, y_train)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch%1000 == 0:
      print(f'epoch : {epoch:6}  |  cost : {cost:10.6f}')

## Multi Layer Perceptron and XOR problem
- Perceptron 모델로 풀 수 없는 XOR 문제를 풀기 위해, Hidden Layer를 추가해 여러 층으로 쌓아 올린 MLP 구현

In [3]:
# prepare data for xor problem
x_train = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
y_train = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

print(x_train)
print(y_train)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
tensor([[0.],
        [1.],
        [1.],
        [0.]])


In [4]:
# train model
model = torch.nn.Sequential(OrderedDict([
    ('linear1', torch.nn.Linear(2, 2, bias=True)),
    ('softmax1', torch.nn.Sigmoid()),
    ('linear2', torch.nn.Linear(2, 1, bias=True)),
    ('softmax2', torch.nn.Sigmoid()),
]))

optimizer = torch.optim.SGD(model.parameters(), lr=1)  # modified learning rate from 0.1 to 1 to speed up learning
criterion = torch.nn.BCELoss().to(device)
train(model, optimizer, criterion, x_train, y_train)

epoch :      0  |  cost :   0.743407
epoch :   1000  |  cost :   0.693100
epoch :   2000  |  cost :   0.683832
epoch :   3000  |  cost :   0.013984
epoch :   4000  |  cost :   0.005768
epoch :   5000  |  cost :   0.003601
epoch :   6000  |  cost :   0.002610
epoch :   7000  |  cost :   0.002044
epoch :   8000  |  cost :   0.001678
epoch :   9000  |  cost :   0.001423
epoch :  10000  |  cost :   0.001235


In [5]:
# compute accuracy
with torch.no_grad():
  hypothesis = model(x_train)
  predicted = (hypothesis > 0.5).float()
  accuracy = (predicted == y_train).float().mean()
  print(f'>>> hypothesis :\n {hypothesis} \n\n>>> label :\n {y_train} \n\n>>> accuracy : {accuracy}')

>>> hypothesis :
 tensor([[0.0011],
        [0.9989],
        [0.9989],
        [0.0017]]) 

>>> label :
 tensor([[0.],
        [1.],
        [1.],
        [0.]]) 

>>> accuracy : 1.0


## Multi Layer Perceptron (Deeper)
- 더 많은 층을 쌓아서 loss 최소화

In [6]:
# train model
model2 = torch.nn.Sequential(OrderedDict([
    ('linear1', torch.nn.Linear(2, 10, bias=True)),
    ('softmax1', torch.nn.Sigmoid()),
    ('linear2', torch.nn.Linear(10, 10, bias=True)),
    ('softmax2', torch.nn.Sigmoid()),
    ('linear3', torch.nn.Linear(10, 10, bias=True)),
    ('softmax3', torch.nn.Sigmoid()),
    ('linear4', torch.nn.Linear(10, 1, bias=True)),
    ('softmax4', torch.nn.Sigmoid()),
]))

optimizer2 = torch.optim.SGD(model2.parameters(), lr=1)  # modified learning rate from 0.1 to 1 to speed up learning
criterion2 = torch.nn.BCELoss().to(device)
train(model2, optimizer2, criterion2, x_train, y_train)

epoch :      0  |  cost :   0.697808
epoch :   1000  |  cost :   0.692944
epoch :   2000  |  cost :   0.679710
epoch :   3000  |  cost :   0.002338
epoch :   4000  |  cost :   0.000608
epoch :   5000  |  cost :   0.000333
epoch :   6000  |  cost :   0.000225
epoch :   7000  |  cost :   0.000169
epoch :   8000  |  cost :   0.000134
epoch :   9000  |  cost :   0.000111
epoch :  10000  |  cost :   0.000095


In [7]:
# compute accuracy
with torch.no_grad():
  hypothesis = model2(x_train)
  predicted = (hypothesis > 0.5).float()
  accuracy = (predicted == y_train).float().mean()
  print(f'>>> hypothesis :\n {hypothesis} \n\n>>> label :\n {y_train} \n\n>>> accuracy : {accuracy}')

>>> hypothesis :
 tensor([[7.4389e-05],
        [9.9988e-01],
        [9.9990e-01],
        [8.8289e-05]]) 

>>> label :
 tensor([[0.],
        [1.],
        [1.],
        [0.]]) 

>>> accuracy : 1.0
