## Setup

In [23]:
!pip install d2l==0.17.5



In [24]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision.models as models
from torchvision import datasets, transforms as T
import torch.nn.functional as F
from d2l import torch as d2l

In [25]:
if torch.cuda.is_available():
  torch.cuda.empty_cache()
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

class SoftmaxRegression(nn.Module):
  def __init__(self, input_dim, output_dim, *args, **kwargs):
    super(SoftmaxRegression, self).__init__()
    self.layer = nn.Linear(input_dim, output_dim)
    nn.init.normal_(self.layer.weight, std=0.01)

  def forward(self, X):
    X = nn.Flatten().forward(X)
    return F.softmax(self.layer(X), dim=-1)

## Reading the Dataset


In [26]:
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transforms = T.Compose([T.Resize((224, 224)), T.ToTensor(), normalize])

train_data = datasets.CIFAR10('train_data', download=True, transform=transforms)
test_data = datasets.CIFAR10('test_data', download=True, train=False, transform=transforms)

batch_size = 256

train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


## Configuring Output Dimensions

In [27]:
softmax = SoftmaxRegression(150528, 10)

## Training

In [28]:
def train(net, train_iter, test_iter, num_epochs, lr, device):
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(num_epochs):
        # Sum of training loss, sum of training accuracy, no. of examples
        metric = d2l.Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        test_acc = evaluate(net, test_iter)
        print(f'epoch {epoch + 1}, loss {train_l:.3f}, train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')

In [29]:
def evaluate(net, data_iter, device=None):
    if isinstance(net, nn.Module):
        net.eval()  # Set the model to evaluation mode
        if not device:
            device = next(iter(net.parameters())).device
    # No. of correct predictions, no. of predictions
    metric = d2l.Accumulator(2)

    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # Required for BERT Fine-tuning
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(d2l.accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

In [31]:
learning_rate, num_epochs = 0.1, 10
value = train(softmax, train_data_loader, test_data_loader, num_epochs, learning_rate, d2l.try_gpu())

training on cuda:0
epoch 1, loss 2.119, train acc 0.340, test acc 0.347
epoch 2, loss 2.108, train acc 0.351, test acc 0.353
epoch 3, loss 2.100, train acc 0.359, test acc 0.341
epoch 4, loss 2.098, train acc 0.362, test acc 0.360
epoch 5, loss 2.092, train acc 0.368, test acc 0.365
epoch 6, loss 2.086, train acc 0.374, test acc 0.366
epoch 7, loss 2.084, train acc 0.376, test acc 0.372
epoch 8, loss 2.083, train acc 0.377, test acc 0.364
epoch 9, loss 2.076, train acc 0.384, test acc 0.335
epoch 10, loss 2.084, train acc 0.376, test acc 0.364
9790.2 examples/sec on cuda:0
