### import

In [82]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [83]:
# 같은 결과를 내기 위해 
torch.manual_seed(1)

<torch._C.Generator at 0x12d27b230>

### Cross-entropy Loss with torch.nn.functional

In [84]:
z = torch.rand(3, 5, requires_grad = True)
hypothesis = F.softmax(z, dim=1)
y = torch.randint(5, (3,)).long() 
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.]])

In [85]:
# Low level
torch.log(F.softmax(z, dim=1))

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward>)

In [86]:
# High level
F.log_softmax(z, dim=1)

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward>)

In [87]:
# Low level
(y_one_hot * -torch.log(F.softmax(z,dim=1))).sum(dim=1).mean()

tensor(1.4689, grad_fn=<MeanBackward0>)

In [88]:
# High level
# F.nll_loss : negative loss likelihood
# nll_loss에는 long type을 인풋으로 받음 

F.nll_loss(F.log_softmax(z, dim=1), y)      

tensor(1.4689, grad_fn=<NllLossBackward>)

In [89]:
F.cross_entropy(z, y)

tensor(1.4689, grad_fn=<NllLossBackward>)

### Data

In [113]:
xy = np.loadtxt('data-04-zoo.csv', delimiter = ',', dtype = np.float32)

In [114]:
xy

array([[1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       [0., 0., 1., ..., 0., 0., 3.],
       ...,
       [1., 0., 0., ..., 0., 1., 0.],
       [0., 0., 1., ..., 0., 0., 6.],
       [0., 1., 1., ..., 0., 0., 1.]], dtype=float32)

In [115]:
x_train =  torch.FloatTensor(xy[:, 0:-1])
x_train

tensor([[1., 0., 0.,  ..., 0., 0., 1.],
        [1., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 1.,  ..., 1., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 1., 0., 1.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 1., 0., 0.]])

In [116]:
y_train = torch.LongTensor(xy[:, [-1]]).squeeze() # [-1] : 벡터로 하기위해 (그냥 -1은 안됨), 마지막 컬럼만 추출 

In [117]:
y_train

tensor([0, 0, 3, 0, 0, 0, 0, 3, 3, 0, 0, 1, 3, 6, 6, 6, 1, 0, 3, 0, 1, 1, 0, 1,
        5, 4, 4, 0, 0, 0, 5, 0, 0, 1, 3, 0, 0, 1, 3, 5, 5, 1, 5, 1, 0, 0, 6, 0,
        0, 0, 0, 5, 4, 6, 0, 0, 1, 1, 1, 1, 3, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        6, 3, 0, 0, 2, 6, 1, 1, 2, 6, 3, 1, 0, 6, 3, 1, 5, 4, 2, 2, 3, 0, 0, 1,
        0, 5, 0, 6, 1])

In [118]:
print(xy.shape)
print(x_train.shape)
print(y_train.shape)

(101, 17)
torch.Size([101, 16])
torch.Size([101])


In [119]:
nb_classes = 7
y_one_hot = torch.zeros((len(y_train), nb_classes))
y_one_hot = y_one_hot.scatter(1, y_train.unsqueeze(1), 1)

In [120]:
y_one_hot

tensor([[1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1., 0., 0.],
        [0.,

In [122]:
z = F.softmax(x_train.matmul(W) + b, dim = 1) # or .mm or @

In [123]:
cost = (y_one_hot * -torch.log(z)).sum(dim=1).mean()

### Training with F.cross_entropy

In [124]:
# 모델 초기화
W = torch.zeros((16, 7), requires_grad = True)
b = torch.zeros(1, requires_grad= True)

# optimizer 설정
optimizer = optim.SGD([W,b], lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1) :
    
    # Cost 계산 (2)
    # z = x_train.matmul(W) + b # or .mm or @
    # cost = F.cross_entropy(z.squeeze(), y_one_hot.squeeze()) # 실제 코드로 하니 안돌아감 계속 
    
    z = F.softmax(x_train.matmul(W) + b, dim = 1) # or .mm or @
    cost = (y_one_hot * -torch.log(z)).sum(dim=1).mean()  # cross entropy low level
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번 마다 로그 출력
    if epoch % 100 == 0 :
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()))

Epoch    0/1000 Cost: 1.945910
Epoch  100/1000 Cost: 0.471836
Epoch  200/1000 Cost: 0.326327
Epoch  300/1000 Cost: 0.257839
Epoch  400/1000 Cost: 0.215762
Epoch  500/1000 Cost: 0.186603
Epoch  600/1000 Cost: 0.164898
Epoch  700/1000 Cost: 0.147955
Epoch  800/1000 Cost: 0.134279
Epoch  900/1000 Cost: 0.122962
Epoch 1000/1000 Cost: 0.113422


### High-level Implementation with `nn.Module`

In [129]:
x_train.shape

torch.Size([101, 16])

In [137]:
y_train.shape

torch.Size([101])

In [133]:
class SoftmaxClassifierModel(nn.Module) :
    def __init__(self) :
        super().__init__()
        self.linear = nn.Linear(16, 7)
    
    def forward(self, x) :     #forward spell 틀리지 말기;; 틀리면 안돌아감  
        return self.linear(x)

In [134]:
model = SoftmaxClassifierModel()

In [135]:
model(x_train)

tensor([[ 4.5460e-01, -8.3686e-02, -6.0767e-01, -5.7760e-01,  6.3988e-01,
          1.8320e-01,  1.1782e-01],
        [ 6.1679e-01, -1.9700e-01, -6.4860e-01, -4.3452e-01,  7.6939e-01,
          3.9976e-01,  2.4992e-01],
        [ 3.6386e-01,  5.6284e-01, -1.5396e-01,  3.0666e-01,  6.3417e-01,
         -6.0098e-02,  2.5796e-03],
        [ 4.5460e-01, -8.3686e-02, -6.0767e-01, -5.7760e-01,  6.3988e-01,
          1.8320e-01,  1.1782e-01],
        [ 4.0503e-01, -1.3607e-01, -8.1359e-01, -4.5682e-01,  8.8770e-01,
          1.8565e-01,  3.3355e-01],
        [ 6.1679e-01, -1.9700e-01, -6.4860e-01, -4.3452e-01,  7.6939e-01,
          3.9976e-01,  2.4992e-01],
        [ 5.3877e-01, -4.2266e-01, -4.2010e-01, -4.0881e-01,  7.4327e-01,
          2.5530e-01,  1.1877e-01],
        [ 4.9760e-01,  2.7626e-01,  2.3952e-01,  3.5466e-01,  4.8974e-01,
          9.5542e-03, -2.1220e-01],
        [ 3.6386e-01,  5.6284e-01, -1.5396e-01,  3.0666e-01,  6.3417e-01,
         -6.0098e-02,  2.5796e-03],
        [ 

In [136]:
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    prediction = model(x_train)

    # cost 계산
    cost = F.cross_entropy(prediction, y_train)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 20번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.982726
Epoch  100/1000 Cost: 0.489954
Epoch  200/1000 Cost: 0.331328
Epoch  300/1000 Cost: 0.255378
Epoch  400/1000 Cost: 0.208865
Epoch  500/1000 Cost: 0.177086
Epoch  600/1000 Cost: 0.153876
Epoch  700/1000 Cost: 0.136125
Epoch  800/1000 Cost: 0.122085
Epoch  900/1000 Cost: 0.110689
Epoch 1000/1000 Cost: 0.101251
