<a href="https://colab.research.google.com/github/thai94/d2l/blob/main/3.linear-neural-networks/3_6_implementation_of_softmax_regression_from_%1Dscratch_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [155]:
import torch
from IPython import display
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

In [156]:
def get_dataloader_workers():
    """Use 4 processes to read the data."""
    return 8

In [157]:
def load_data_fashion_mnist(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory."""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))

In [158]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

  cpuset_checked))


In [159]:
num_inputs = 784
num_outputs = 10

W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)

In [160]:
def softmax(X):
  X_exp = torch.exp(X)
  partition = X_exp.sum(1, keepdim=True)
  return X_exp/partition

In [161]:
X = torch.normal(0, 1, (2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(1)

(tensor([[0.3293, 0.0453, 0.4958, 0.1138, 0.0157],
         [0.0143, 0.8970, 0.0544, 0.0175, 0.0169]]), tensor([1., 1.]))

In [162]:
X

tensor([[ 0.8983, -1.0850,  1.3075, -0.1643, -2.1444],
        [-1.0595,  3.0798,  0.2766, -0.8588, -0.8942]])

In [163]:
def net(X):
  return softmax(torch.matmul(X.reshape((-1, num_inputs)), W) + b)

In [164]:
y = torch.tensor([0, 2])
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y_hat[[0, 1], y]

tensor([0.1000, 0.5000])

In [165]:
def cross_entropy(y_hat, y):
  return - torch.log(y_hat[range(len(y_hat)), y])

In [166]:
def accuracy(y_hat, y):
  if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
    y_hat = y_hat.argmax(axis=1)
  cmp = y_hat.type(y.dtype) == y
  return float(cmp.type(y.dtype).sum())

In [167]:
accuracy(y_hat, y) / len(y)

0.5

In [168]:
class Accumulator:
  def __init__(self, n):
    self.data = [0.0] * n
  
  def add(self, *args):
    self.data = [a + float(b) for a,b in zip(self.data, args)]

  def reset(self):
    self.data = [0.0] * len(self.data)
    
  def __getitem__(self, idx):
    return self.data[idx]

In [169]:
def evaluate_accuracy(net, data_iter):
  metric = Accumulator(2)
  with torch.no_grad():
    for X, y in data_iter:
      metric.add(accuracy(net(X), y), y.numel())
  return metric[0] / metric[1]

In [170]:
evaluate_accuracy(net, test_iter)

  cpuset_checked))


0.1086

In [171]:
def train_epoch_ch3(net, train_iter, loss, updater):
  metric = Accumulator(3)
  for X, y in train_iter:
    y_hat = net(X)
    l = loss(y_hat, y)
    l.sum().backward()
    updater(X.shape[0])
  metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
  return metric[0] / metric[2], metric[1] / metric[2]

In [172]:
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):

  for epoch in range(num_epochs):
    train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
    test_acc = evaluate_accuracy(net, test_iter)
    print('epoch: %s' % epoch)
    print(train_metrics)
    print(test_acc)
  
  train_loss, train_acc = train_metrics
  assert train_loss < 0.5, train_loss
  assert train_acc <= 1 and train_acc > 0.7, train_acc
  assert test_acc <= 1 and test_acc > 0.7, test_acc

In [173]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

In [174]:
lr = 0.1
def updater(batch_size):
    return sgd([W, b], lr, batch_size)

In [175]:
num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)

  cpuset_checked))


epoch: 0
(0.6225115060806274, 0.7916666666666666)
0.7724
epoch: 1
(0.4663444360097249, 0.8541666666666666)
0.7941
epoch: 2
(0.45891443888346356, 0.84375)
0.8196
epoch: 3
(0.6520735025405884, 0.7604166666666666)
0.8212
epoch: 4
(0.4318745930989583, 0.8333333333333334)
0.827
epoch: 5
(0.465431014696757, 0.84375)
0.8132
epoch: 6
(0.3612331549326579, 0.90625)
0.8273
epoch: 7
(0.32048139969507855, 0.8854166666666666)
0.8272
epoch: 8
(0.3889477650324504, 0.84375)
0.815
epoch: 9
(0.4193052848180135, 0.84375)
0.8312


In [178]:
for X, y in test_iter:
  break

  cpuset_checked))


In [188]:
net(X).argmax(axis=1) == y

tensor([ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True, False,  True,  True,
        False, False,  True, False,  True, False,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
        False,  True, False,  True,  True,  True, False,  True,  True, False,
        False, False,  True, False,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True, False, False, False,  True,
         True,  True, False,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True, False,  True,
         True,  True,  True, False,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True, 