## Setup

In [9]:
!pip install d2l==0.17.5



In [10]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision.models as models
from torchvision import datasets, transforms as T
from d2l import torch as d2l

In [11]:
if torch.cuda.is_available():
  torch.cuda.empty_cache()
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

resnet18 = models.resnet18(pretrained=True).to(device)

## Reading the Dataset


In [12]:
normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transforms = T.Compose([T.Resize((224, 224)), T.ToTensor(), normalize])

train_data = datasets.CIFAR10('train_data', download=True, transform=transforms)
test_data = datasets.CIFAR10('test_data', download=True, train=False, transform=transforms)

batch_size = 256

train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


## Configuring Output Dimensions

In [13]:
def init_weights(model):
  nn.init.normal_(model.weight, std=0.01)

out_layer = nn.Linear(512, 10)
out_layer.apply(init_weights)

Linear(in_features=512, out_features=10, bias=True)

In [14]:
resnet18.fc = out_layer

## Training

In [15]:
def train(net, train_iter, num_epochs, lr, device):
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(num_epochs):
        # Sum of training loss, sum of training accuracy, no. of examples
        metric = d2l.Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        print(f'epoch {epoch + 1}, loss {train_l:.3f}, train acc {train_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec '
          f'on {str(device)}')

In [16]:
learning_rate, num_epochs = 0.05, 10
train(resnet18, train_data_loader, num_epochs, learning_rate, d2l.try_gpu())

training on cuda:0
epoch 1, loss 0.421, train acc 0.865
epoch 2, loss 0.139, train acc 0.955
epoch 3, loss 0.070, train acc 0.981
epoch 4, loss 0.034, train acc 0.993
epoch 5, loss 0.014, train acc 0.999
epoch 6, loss 0.007, train acc 1.000
epoch 7, loss 0.005, train acc 1.000
epoch 8, loss 0.003, train acc 1.000
epoch 9, loss 0.003, train acc 1.000
epoch 10, loss 0.002, train acc 1.000
669.2 examples/sec on cuda:0


## Evaluating

In [17]:
def evaluate(net, data_iter, device=None):
    if isinstance(net, nn.Module):
        net.eval()  # Set the model to evaluation mode
        if not device:
            device = next(iter(net.parameters())).device
    # No. of correct predictions, no. of predictions
    metric = d2l.Accumulator(2)

    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # Required for BERT Fine-tuning
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(d2l.accuracy(net(X), y), y.numel())
    test_acc = metric[0] / metric[1]
    print(f'test acc {test_acc:.3f}')

In [19]:
evaluate(resnet18, test_data_loader)

test acc 0.948
