In [None]:
'''
    Softmax Classification by PyTorch
'''

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

z = torch.FloatTensor([[1, 2, 3]])
z

tensor([[1., 2., 3.]])

In [9]:
hypothesis = torch.softmax(z, dim=1)
hypothesis

tensor([[0.0900, 0.2447, 0.6652]])

In [11]:
# softmax sum => always 1
print(hypothesis.sum())

tensor(1.)


In [42]:
# create tensors randomly => 3 means row / 5 means col
z = torch.rand(3, 5, requires_grad=True)
hypothesis = F.softmax(z, dim=1) # dim=1 => 2D array
hypothesis.sum(dim=1)

tensor([1.0000, 1.0000, 1.0000], grad_fn=<SumBackward2>)

In [31]:
# randint => 5 means 0 ~ 4 / 3 means 1D size 3 array
y = torch.randint(5, (3,)).long()
y

tensor([4, 3, 3])

In [35]:
# y => one hot encoded y => number of col = number of classes
y_one_hot = torch.zeros_like(hypothesis)

# 1 => dim / unsqueeze => 3 to (3, 1) / 1 => one hot on proper index
# inplace
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 1., 0.]])

In [45]:
# low level
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
cost

tensor(1.5921, grad_fn=<MeanBackward1>)

In [41]:
# high level
F.log_softmax(z, dim=1)

tensor([[-1.7115, -1.2753, -1.7496, -1.4889, -1.9617],
        [-1.7165, -1.8392, -1.2480, -1.8234, -1.5474],
        [-1.9166, -1.4405, -1.8537, -1.5893, -1.3651]],
       grad_fn=<LogSoftmaxBackward>)

In [53]:
# version - 2
cost = (y_one_hot * -F.log_softmax(z, dim=1)).sum(dim=1).mean()
cost

tensor(1.5921, grad_fn=<MeanBackward1>)

In [50]:
# version - 3 => nll = negative log likelihood
cost = F.nll_loss(F.log_softmax(z, dim=1), y)
cost

tensor(1.5921, grad_fn=<NllLossBackward>)

In [52]:
# version - 4
cost = F.cross_entropy(z, y)
cost

tensor(1.5921, grad_fn=<NllLossBackward>)

In [54]:
# whole procedure
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [57]:
# set weights and bias
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

In [65]:
optimizer = optim.SGD([W, b], lr=0.1)

In [66]:
nb_epochs = 1000

for epoch in range(1 + nb_epochs):
    z = x_train.matmul(W) + b
    cost = F.cross_entropy(z, y_train)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print('Epoch: {:4d}/{}, Cost:{:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch:    0/1000, Cost:0.277016
Epoch:  100/1000, Cost:0.268716
Epoch:  200/1000, Cost:0.260893
Epoch:  300/1000, Cost:0.253502
Epoch:  400/1000, Cost:0.246508
Epoch:  500/1000, Cost:0.239877
Epoch:  600/1000, Cost:0.233581
Epoch:  700/1000, Cost:0.227593
Epoch:  800/1000, Cost:0.221892
Epoch:  900/1000, Cost:0.216455
Epoch: 1000/1000, Cost:0.211266
