In [1]:
import torch
import torch.nn as nn

Stride: This is the number of pixels that we shift both horizontally and
vertically before applying convolution networks using a filter on the next patch
of the image.

Padding: This is the strategy that we apply to the edges of an image while we
convolve, depending on whether we want to keep the dimensions of the tensors
the same after convolution or only apply convolution where the filter fits
properly with the input image. If we want to keep the dimensions the same, then
we need to zero pad the edge so that the original dimensions match with the
output after convolution. This is called same padding. But if we don't want to
preserve the original dimensions, then the places where the filter doesn't fit
completely are truncated, which is called valid padding.

In [2]:
nn.Conv2d(3, 16, 3)

Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))

adding padding

In [3]:
nn.Conv2d(3, 16, 3, padding=1)

Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

non square kernel

In [4]:
nn.Conv2d(3, 16, (3,4), padding=1)

Conv2d(3, 16, kernel_size=(3, 4), stride=(1, 1), padding=(1, 1))

In [5]:
max_pool = nn.MaxPool2d(3, stride=1)

In [6]:
a = torch.FloatTensor(3,5,5).random_(0, 10)

In [7]:
a

tensor([[[4., 6., 2., 7., 2.],
         [9., 4., 1., 3., 1.],
         [4., 1., 4., 2., 5.],
         [4., 7., 8., 2., 5.],
         [3., 2., 2., 4., 1.]],

        [[5., 8., 6., 8., 4.],
         [5., 7., 6., 2., 6.],
         [7., 6., 5., 3., 4.],
         [1., 2., 6., 6., 8.],
         [7., 3., 5., 4., 5.]],

        [[9., 4., 2., 6., 2.],
         [5., 5., 3., 9., 3.],
         [6., 3., 5., 5., 3.],
         [5., 4., 2., 6., 8.],
         [9., 9., 8., 7., 4.]]])

In [8]:
max_pool(a)

tensor([[[9., 7., 7.],
         [9., 8., 8.],
         [8., 8., 8.]],

        [[8., 8., 8.],
         [7., 7., 8.],
         [7., 6., 8.]],

        [[9., 9., 9.],
         [6., 9., 9.],
         [9., 9., 8.]]])

In [9]:
avg_pool = nn.AvgPool2d(3, stride=1)

In [10]:
avg_pool(a)

tensor([[[3.8889, 3.3333, 3.0000],
         [4.6667, 3.5556, 3.4444],
         [3.8889, 3.5556, 3.6667]],

        [[6.1111, 5.6667, 4.8889],
         [5.0000, 4.7778, 5.1111],
         [4.6667, 4.4444, 5.1111]],

        [[4.6667, 4.6667, 4.2222],
         [4.2222, 4.6667, 4.8889],
         [5.6667, 5.4444, 5.3333]]])

In [11]:
from torchvision import transforms

In [12]:
toTensor = transforms.ToTensor()

In [13]:
normTrans = transforms.Normalize((0.5,),(0.5,))

In [14]:
from torchvision import datasets

In [15]:
from torchvision import transforms

In [16]:
transformations = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
])

In [17]:
train_data = datasets.CIFAR10('~/.pytorch/CIFAR10', train=True, download=True, transform=transformations)

Files already downloaded and verified


In [18]:
test_data = datasets.CIFAR10('~/.pytorch/CIFAR10', train=False, download=True, transform=transformations)

Files already downloaded and verified


In [19]:
len(train_data), len(test_data)

(50000, 10000)

In [20]:
from torch.utils.data.sampler import SubsetRandomSampler

In [21]:
validation_size = 0.2

In [22]:
import numpy as np

In [23]:
training_size = len(train_data)

In [24]:
indices = list(range(training_size))

In [25]:
np.random.shuffle(indices)

In [26]:
index_split = int(np.floor(training_size * validation_size))

In [27]:
validation_indices, training_indices = indices[:index_split], indices[index_split:]

In [28]:
training_sample = SubsetRandomSampler(training_indices)
validation_sample = SubsetRandomSampler(validation_indices)

In [29]:
batch_size = 64

In [30]:
from torch.utils.data.dataloader import DataLoader

In [31]:
train_loader = DataLoader(train_data, batch_size=batch_size, sampler=training_sample)

In [32]:
valid_loader = DataLoader(train_data, batch_size=batch_size, sampler=validation_sample)

In [33]:
test_loader = DataLoader(test_data, batch_size=batch_size)

defining the CNN architecture

In [34]:
import torch.nn.functional as F

In [35]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.linear1 = nn.Linear(64 * 4 * 4, 512)
        self.linear2 = nn.Linear(512, 10)
        self.dropout = nn.Dropout(p=0.3)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = self.dropout(x)
        x = F.relu(self.linear1(x))
        x = self.dropout(x)
        x = self.linear2(x)
        return x

In [36]:
model = CNN()
model

CNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=1024, out_features=512, bias=True)
  (linear2): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [38]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device.type

'cuda'

In [37]:
model = model.to(device)

NameError: name 'device' is not defined

In [40]:
criterion = nn.CrossEntropyLoss()

In [41]:
import torch.optim as optim

In [42]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [43]:
import tqdm

In [44]:
n_epoches = 30
for epoch in tqdm.notebook.tqdm(range(1, n_epoches+1)):
    train_loss = 0.0
    valid_loss = 0.0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)
    model.eval()
    for batch_idx, (data, target) in enumerate(valid_loader):
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = criterion(output, target)
        valid_loss += loss.item()*data.size(0)
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    print(f'| Epoch: {epoch:02} | Train Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=30.0), HTML(value='')))

| Epoch: 01 | Train Loss: 2.297 | Val. Loss: 2.281
| Epoch: 02 | Train Loss: 2.180 | Val. Loss: 2.030
| Epoch: 03 | Train Loss: 1.963 | Val. Loss: 1.845
| Epoch: 04 | Train Loss: 1.822 | Val. Loss: 1.717
| Epoch: 05 | Train Loss: 1.714 | Val. Loss: 1.616
| Epoch: 06 | Train Loss: 1.633 | Val. Loss: 1.552
| Epoch: 07 | Train Loss: 1.580 | Val. Loss: 1.509
| Epoch: 08 | Train Loss: 1.544 | Val. Loss: 1.473
| Epoch: 09 | Train Loss: 1.504 | Val. Loss: 1.445
| Epoch: 10 | Train Loss: 1.468 | Val. Loss: 1.421
| Epoch: 11 | Train Loss: 1.441 | Val. Loss: 1.393
| Epoch: 12 | Train Loss: 1.415 | Val. Loss: 1.351
| Epoch: 13 | Train Loss: 1.389 | Val. Loss: 1.352
| Epoch: 14 | Train Loss: 1.369 | Val. Loss: 1.297
| Epoch: 15 | Train Loss: 1.344 | Val. Loss: 1.294
| Epoch: 16 | Train Loss: 1.325 | Val. Loss: 1.254
| Epoch: 17 | Train Loss: 1.300 | Val. Loss: 1.231
| Epoch: 18 | Train Loss: 1.281 | Val. Loss: 1.265
| Epoch: 19 | Train Loss: 1.263 | Val. Loss: 1.197
| Epoch: 20 | Train Loss: 1.246