In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

torch.__version__

'1.7.0+cu101'

In [2]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7fa3c3fd7b58>

In [3]:
# Data
x_train = torch.FloatTensor(
    [[1, 2, 1, 1],
     [2, 1, 3, 2],
     [3, 1, 3, 4],
     [4, 1, 5, 5],
     [1, 7, 5, 5],
     [1, 2, 5, 6],
     [1, 6, 6, 6],
     [1, 7, 7, 7]]
) 
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

print(x_train.shape)
print(x_train)

torch.Size([8, 4])
tensor([[1., 2., 1., 1.],
        [2., 1., 3., 2.],
        [3., 1., 3., 4.],
        [4., 1., 5., 5.],
        [1., 7., 5., 5.],
        [1., 2., 5., 6.],
        [1., 6., 6., 6.],
        [1., 7., 7., 7.]])


## Train model with Low-level Cross Entropy Loss

In [4]:
W = torch.zeros([4, 3], requires_grad=True)
b = torch.zeros(1, requires_grad=True)
optimizer = optim.SGD([W, b], lr=0.1)

n_epochs = 1000
for epoch in range(n_epochs + 1):
  hypothesis = x_train.matmul(W) + b

  y_one_hot = torch.zeros_like(hypothesis)
  y_one_hot = y_one_hot.scatter(1, y_train.unsqueeze(dim=1), 1)
  cost = ( y_one_hot * - torch.log(F.softmax(hypothesis, dim=1)) ).sum(dim=1).mean()
  
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()
  
  if epoch % 100 == 0:
    print(f'epoch : {epoch:5}  |  cost {cost:10.6f}')

epoch :     0  |  cost   1.098612
epoch :   100  |  cost   0.761050
epoch :   200  |  cost   0.689991
epoch :   300  |  cost   0.643229
epoch :   400  |  cost   0.604117
epoch :   500  |  cost   0.568255
epoch :   600  |  cost   0.533922
epoch :   700  |  cost   0.500291
epoch :   800  |  cost   0.466908
epoch :   900  |  cost   0.433507
epoch :  1000  |  cost   0.399962


## Train model with `F.cross_entropy`

In [5]:
W = torch.zeros([4, 3], requires_grad=True)
b = torch.zeros(1, requires_grad=True)
optimizer = optim.SGD([W, b], lr=0.1)

n_epochs = 1000
for epoch in range(n_epochs + 1):
  hypothesis = x_train.matmul(W) + b

  cost = F.cross_entropy(hypothesis, y_train)
  
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()
  
  if epoch % 100 == 0:
    print(f'epoch : {epoch:5}  |  cost {cost:10.6f}')

epoch :     0  |  cost   1.098612
epoch :   100  |  cost   0.761050
epoch :   200  |  cost   0.689991
epoch :   300  |  cost   0.643229
epoch :   400  |  cost   0.604117
epoch :   500  |  cost   0.568256
epoch :   600  |  cost   0.533922
epoch :   700  |  cost   0.500291
epoch :   800  |  cost   0.466908
epoch :   900  |  cost   0.433507
epoch :  1000  |  cost   0.399962


## High-level Implementation with `nn.Module`

In [6]:
class SoftmaxClassifierModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(4, 3)

  def forward(self, x):
    return self.linear(x)

In [7]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr=0.1)

n_epochs = 1000
for epoch in range(n_epochs + 1):
  hypothesis = model(x_train)
  cost = F.cross_entropy(hypothesis, y_train)

  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  if epoch % 100 == 0:
    print(f'epoch : {epoch:5}  |  cost : {cost.item():10.6f}')

epoch :     0  |  cost :   1.616785
epoch :   100  |  cost :   0.658891
epoch :   200  |  cost :   0.573443
epoch :   300  |  cost :   0.518151
epoch :   400  |  cost :   0.473265
epoch :   500  |  cost :   0.433516
epoch :   600  |  cost :   0.396563
epoch :   700  |  cost :   0.360914
epoch :   800  |  cost :   0.325392
epoch :   900  |  cost :   0.289178
epoch :  1000  |  cost :   0.254148
