# Lab 4: Introduction to Convolutional Layers

The goal of this lab is to implement convolutional layers in pytorch and illustrate how to use GPU for training CNN

In [8]:
# import common dependencies
import torch
from torch import nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time

In [9]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cuda


## 2D Convolutions

In [10]:
# question: what would be the desired input shape for the 2D Convolution layer?
desired_shape = (3,15,15) # how does this relate to the input channels?

# generate a random tensor with the desired shape
x_2d = torch.rand(desired_shape)

### Knowing the module

In [11]:
?nn.Conv2d

[0;31mInit signature:[0m
[0mnn[0m[0;34m.[0m[0mConv2d[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0min_channels[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout_channels[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkernel_size[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mint[0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstride[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mint[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpadding[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mint[0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mint[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdilation[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m

Before we create a Convolution layer and see how that works, let's understand what parameters does our 2D convolution layer need and what would be the expected output?

### Questions
- Input channels?
    - Number of slices of 2d matrices
- Output channels?
    - Number of filters. Each filter will result in a slice of matrix by being applied to the input. One filter will be operating on all channels of the input, so the filter shape will be (in_channels, kernel_size, kernel_size).


https://towardsdatascience.com/intuitively-understanding-convolutions-for-deep-learning-1f6f42faee1

### Instantiate a 2D Convolution Layer

In [12]:
# TODO: how many in channels do we have? (check our sample tensor)
conv2d_layer = nn.Conv2d(in_channels=3,  # input channels
                         out_channels=5, # output channels
                         kernel_size=3, # kernel size
                         stride=1, # stride
                         padding=1, # padding
                         bias=True
                        ) 

### Number of Parameters in the Convolution Layer

In [13]:
print(f'Number of parameters in our conv2d_layer are {sum(p.numel() for p in conv2d_layer.parameters())}')
# (filter size + 1 for bias) x output channel

Number of parameters in our conv2d_layer are 140


### Output

In [14]:
x_2d.shape

torch.Size([3, 15, 15])

In [15]:
x_2d.unsqueeze(0).shape

torch.Size([1, 3, 15, 15])

In [16]:
out = conv2d_layer(x_2d.unsqueeze(0))

### Shape of the output??

In [17]:
out.shape

torch.Size([1, 5, 15, 15])

### Output shape of torch.Size([1, 8, 5, 5])

In [18]:
# TODO: create a 2D convolution layer that has an output shape of torch.Size([1, 8, 5, 5]) with the same input tensor
conv2d_layer_2 = nn.Conv2d(in_channels=3,  # input channels
                         out_channels=8, # output channels
                         kernel_size=5, # kernel size
                         stride=3, # stride
                         padding=1, # padding
                         bias=True
                        ) 

out = conv2d_layer_2(x_2d.unsqueeze(0))
out.shape
# NOTE: ther are multiple ways to do this so there is not one correct answer

torch.Size([1, 8, 5, 5])

## Take a look of 1d convolution

In [19]:
# question: What would be the desired input shape for the 1D Convolution layer?
desired_shape = (3,10)

# generate a random tensor of the desired shape
x_1d = torch.rand(desired_shape)

### Instantiate a 1D Convolution Layer

In [20]:
conv1d_layer = nn.Conv1d(in_channels=3,  # input channels
                         out_channels=5, # output channels
                         kernel_size=3, # kernel size
                         stride=1, # stride
                         padding=1, # padding
                         bias=True
                        ) 

### Number of Parameters in the Convolution Layer

In [21]:
print(f'Number of parameters in our conv1d_layer are {sum(p.numel() for p in conv1d_layer.parameters())}')

Number of parameters in our conv1d_layer are 50


In [22]:
conv1d_res = conv1d_layer(x_1d.unsqueeze(0))

In [23]:
conv1d_res.shape

torch.Size([1, 5, 10])

## Implementing LeNet

In [24]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # input channels, output channels, kernel size
        self.pool = nn.MaxPool2d(2, 2) # kernel size, stride
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120) # 16*4*4 comes from the dimensionality of the output tensor before the fully connected layers
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10) # 10 output classes for MNIST digits

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4) # flatten the tensor for the fully connected layer
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [25]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform = transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


In [26]:

print('Using', device)
model = LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print('Training Epoch: {} - Time: {:.2f} seconds'.format(epoch, time.time() - start_time))

for epoch in range(1, 11): # Train for 10 epochs for demonstration
    train(model, device, train_loader, optimizer, epoch)


Using cuda
Training Epoch: 1 - Time: 8.15 seconds
Training Epoch: 2 - Time: 5.90 seconds
Training Epoch: 3 - Time: 5.94 seconds
Training Epoch: 4 - Time: 5.91 seconds
Training Epoch: 5 - Time: 5.88 seconds
Training Epoch: 6 - Time: 5.91 seconds
Training Epoch: 7 - Time: 5.90 seconds
Training Epoch: 8 - Time: 5.89 seconds
Training Epoch: 9 - Time: 5.91 seconds
Training Epoch: 10 - Time: 5.90 seconds


In [27]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

test(model, device, test_loader)

Test set: Average loss: 0.0001, Accuracy: 9781/10000 (98%)


## Create Your Own CNN

In [59]:
# TODO: see if you can achieve a similar accuracy to LeNet with your own CNN
# make sure to have a different number of convolutional layers in your appraoch

class CassieNet(nn.Module):
    def __init__(self):
        super(CassieNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16, 10, 2)
        self.fc1 = nn.Linear(10, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 10)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [60]:

print('Using', device)
model = CassieNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print('Training Epoch: {} - Time: {:.2f} seconds'.format(epoch, time.time() - start_time))

for epoch in range(1, 11): # Train for 10 epochs for demonstration
    train(model, device, train_loader, optimizer, epoch)


Using cuda
Training Epoch: 1 - Time: 6.15 seconds
Training Epoch: 2 - Time: 6.17 seconds
Training Epoch: 3 - Time: 6.14 seconds
Training Epoch: 4 - Time: 6.15 seconds
Training Epoch: 5 - Time: 6.14 seconds
Training Epoch: 6 - Time: 6.16 seconds
Training Epoch: 7 - Time: 6.14 seconds
Training Epoch: 8 - Time: 6.14 seconds
Training Epoch: 9 - Time: 6.14 seconds
Training Epoch: 10 - Time: 6.14 seconds


In [61]:
test(model, device, test_loader)

Test set: Average loss: 0.0002, Accuracy: 9526/10000 (95%)


In [None]:
# NOTE: there are multiple ways to approach this, and there is no one correct solution