# Object Classification with CNNs

Object classification in images with convolutional neural networks (CNNs) is arguably what ignited the field of deep learning (or re-ignited the field of neural networks). Even now, deep learning shines in supervised learning settings when there is plenty of data, and so we'll look at a fairly standard pipeline based on object classification.

## Data

MNIST
CIFAR?

In [21]:
import os
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [74]:
batch_size = 64

data_path = os.path.join(os.path.expanduser('~'), '.torch', 'datasets', 'mnist')
train_data = datasets.MNIST(data_path, train=True, download=True,
                            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))
test_data = datasets.MNIST(data_path, train=False,
                           transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]))

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=batch_size, num_workers=4)

## Model

In [89]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, 5, padding=2, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.prelu1 = nn.PReLU(16)
        self.conv2 = nn.Conv2d(16, 32, 3, stride=2, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(32)
        self.prelu2 = nn.PReLU(32)
        self.conv3 = nn.Conv2d(32, 32, 3, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(32)
        self.prelu3 = nn.PReLU(32)
        self.conv4 = nn.Conv2d(32, 64, 3, stride=2, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(64)
        self.prelu4 = nn.PReLU(64)
        self.fc1 = nn.Linear(64 * 7 * 7, 512, bias=False)
        self.dp1 = nn.Dropout()
        self.bn5 = nn.BatchNorm1d(512)
        self.prelu5 = nn.PReLU(512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.prelu1(self.bn1(self.conv1(x)))
        x = self.prelu2(self.bn2(self.conv2(x)))
        x = self.prelu3(self.bn3(self.conv3(x)))
        x = self.prelu4(self.bn4(self.conv4(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.prelu5(self.bn5(self.dp1(self.fc1(x))))
        return F.log_softmax(self.fc2(x), dim=1)

model = Net()

## Training

Training loop

In [87]:
optimiser = optim.Adam(model.parameters(), lr=5e-4)
model.train()
train_losses = []

for i, (x, y) in enumerate(train_loader):
  optimiser.zero_grad()
  y_hat = model(x)
  loss = F.nll_loss(y_hat, y)
  loss.backward()
  train_losses.append(loss.item())
  optimiser.step()

  if i % 10 == 0:
    print(i, loss.item())

0 2.290889263153076
10 1.3428609371185303
20 0.9157310128211975
30 0.6610764265060425
40 0.6482018232345581
50 0.44249194860458374
60 0.534844696521759
70 0.4320223927497864
80 0.23216548562049866
90 0.29437491297721863
100 0.3193305730819702
110 0.29913055896759033
120 0.20930927991867065
130 0.19264532625675201
140 0.21241307258605957
150 0.3589246869087219
160 0.19412192702293396
170 0.22808325290679932
180 0.22743552923202515
190 0.11307939887046814
200 0.125473290681839
210 0.1475408375263214
220 0.1659116894006729
230 0.1664218306541443
240 0.08588605374097824
250 0.07279990613460541
260 0.10554340481758118
270 0.07441550493240356
280 0.1585436463356018
290 0.17889198660850525
300 0.14697301387786865
310 0.09552984684705734
320 0.10998660326004028
330 0.12195517867803574
340 0.06838690489530563
350 0.06499971449375153
360 0.09424880146980286
370 0.08306574076414108
380 0.041483305394649506
390 0.10213358700275421
400 0.09162051975727081
410 0.23035699129104614
420 0.0752951279282

## Testing

In [88]:
model.eval()
test_loss, correct = 0, 0

with torch.no_grad():
  for x, y in test_loader:
    y_hat = model(x)
    test_loss += F.nll_loss(y_hat, y, reduction='sum').item()
    pred = y_hat.argmax(1, keepdim=True)
    correct += pred.eq(y.view_as(pred)).sum().item()

test_loss /= len(test_data)
acc = correct / len(test_data)
print(acc, test_loss)

0.9882 0.0378669185757637
