---
layout: default
categories: CNN
title: "CNN - Implementation"
permalink: /ML34.5/
order: 34.5
comments: true
---

In [1]:
%reset -f
%pylab --no-import-all inline
import matplotlib_inline.backend_inline

matplotlib_inline.backend_inline.set_matplotlib_formats("svg")
plt.rcParams["mathtext.fontset"] = "cm"

Populating the interactive namespace from numpy and matplotlib


In [2]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

Convolutional neural networks (CNN) are usually used fot computer vision tasks and current implementations of famous architectures are much more complex than what we can easily write in few lines of code as up until now.

Luckily a simlpe CNN can be implemented by just adding a couple of layers to the architecture of an FNN  that we built in <a href="page:ML17.5">ML-17.5</a>

First we are going to use the same MNIST dataset as for the other examples

In [3]:
train_dataset = dsets.MNIST(
    root="./data", train=True, transform=transforms.ToTensor(), download=True
)

test_dataset = dsets.MNIST(root="./data", train=False, transform=transforms.ToTensor())

We have a training set of 60000 vectors $x^{(i)} \in \mathbb{R}^{784}$

In [18]:
train_dataset.data.size()

torch.Size([60000, 28, 28])

And 60000 traning labels

In [19]:
train_dataset.targets.size()

torch.Size([60000])

And a test set of 10000 images and labels

In [20]:
test_dataset.targets.size()

torch.Size([10000])

With a mini-batch size of 100 images and a total number of 3000 iterations, we will go through the whole dataset 5 times (5 epochs)

In [21]:
batch_size = 100
n_iters = 3000
num_epochs = int(n_iters / (len(train_dataset) / batch_size))

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=batch_size, shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False
)

The model architecture will have:

1. A Convolutional layer with *Same Padding*
2. A *Max Pooling* layer
3. A Convolutional layer with *Same Padding*
4. A *Max Pooling* layer
5. A fully connected layer

In [22]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # Convolution 1
        self.cnn1 = nn.Conv2d(
            in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2
        )
        self.relu1 = nn.ReLU()

        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)

        # Convolution 2
        self.cnn2 = nn.Conv2d(
            in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2
        )
        self.relu2 = nn.ReLU()

        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)

        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)

        # Max pool 1
        out = self.maxpool1(out)

        # Convolution 2
        out = self.cnn2(out)
        out = self.relu2(out)

        # Max pool 2
        out = self.maxpool2(out)

        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)

        return out

In [23]:
model = CNNModel()

In [24]:
criterion = nn.CrossEntropyLoss()

In [25]:
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [27]:
print(len(list(model.parameters())))

# Convolution 1: 16 Kernels
print(list(model.parameters())[0].size())

# Convolution 1 Bias: 16 Kernels
print(list(model.parameters())[1].size())

# Convolution 2: 32 Kernels with depth = 16
print(list(model.parameters())[2].size())

# Convolution 2 Bias: 32 Kernels with depth = 16
print(list(model.parameters())[3].size())

# Fully Connected Layer 1
print(list(model.parameters())[4].size())

# Fully Connected Layer Bias
print(list(model.parameters())[5].size())

6
torch.Size([16, 1, 5, 5])
torch.Size([16])
torch.Size([32, 16, 5, 5])
torch.Size([32])
torch.Size([10, 1568])
torch.Size([10])


In [28]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Load images
        images = images.requires_grad_()

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Load images
                images = images.requires_grad_()

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print(
                "Iteration: {}. Loss: {}. Accuracy: {}".format(
                    iter, loss.item(), accuracy
                )
            )

Iteration: 500. Loss: 0.5105285048484802. Accuracy: 87.93000030517578
Iteration: 1000. Loss: 0.2870190441608429. Accuracy: 93.08999633789062
Iteration: 1500. Loss: 0.13547983765602112. Accuracy: 94.5199966430664
Iteration: 2000. Loss: 0.08501622080802917. Accuracy: 95.7300033569336
Iteration: 2500. Loss: 0.15075716376304626. Accuracy: 96.43000030517578
Iteration: 3000. Loss: 0.11706560850143433. Accuracy: 96.93000030517578


In [29]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

"""
STEP 1: LOADING DATASET
"""

train_dataset = dsets.MNIST(
    root="./data", train=True, transform=transforms.ToTensor(), download=True
)

test_dataset = dsets.MNIST(root="./data", train=False, transform=transforms.ToTensor())

"""
STEP 2: MAKING DATASET ITERABLE
"""

batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=batch_size, shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False
)

"""
STEP 3: CREATE MODEL CLASS
"""


class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # Convolution 1
        self.cnn1 = nn.Conv2d(
            in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2
        )
        self.relu1 = nn.ReLU()

        # Average pool 1
        self.avgpool1 = nn.AvgPool2d(kernel_size=2)

        # Convolution 2
        self.cnn2 = nn.Conv2d(
            in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2
        )
        self.relu2 = nn.ReLU()

        # Average pool 2
        self.avgpool2 = nn.AvgPool2d(kernel_size=2)

        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 7 * 7, 10)

    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)

        # Average pool 1
        out = self.avgpool1(out)

        # Convolution 2
        out = self.cnn2(out)
        out = self.relu2(out)

        # Max pool 2
        out = self.avgpool2(out)

        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)

        return out


"""
STEP 4: INSTANTIATE MODEL CLASS
"""

model = CNNModel()

"""
STEP 5: INSTANTIATE LOSS CLASS
"""
criterion = nn.CrossEntropyLoss()


"""
STEP 6: INSTANTIATE OPTIMIZER CLASS
"""
learning_rate = 0.01

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

"""
STEP 7: TRAIN THE MODEL
"""
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Load images as tensors with gradient accumulation abilities
        images = images.requires_grad_()

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 500 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                # Load images to tensors with gradient accumulation abilities
                images = images.requires_grad_()

                # Forward pass only to get logits/output
                outputs = model(images)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print(
                "Iteration: {}. Loss: {}. Accuracy: {}".format(
                    iter, loss.item(), accuracy
                )
            )

Iteration: 500. Loss: 0.33858054876327515. Accuracy: 86.51000213623047
Iteration: 1000. Loss: 0.31930989027023315. Accuracy: 89.54000091552734
Iteration: 1500. Loss: 0.37224602699279785. Accuracy: 90.75
Iteration: 2000. Loss: 0.14266057312488556. Accuracy: 91.61000061035156
Iteration: 2500. Loss: 0.1566978245973587. Accuracy: 92.36000061035156
Iteration: 3000. Loss: 0.32563263177871704. Accuracy: 93.25
