# Image Classification with Convolution Neural Network (CNN)

In [None]:
# get a dataset using pytorch
import torch
import torchvision
from torchvision import transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testLoader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

In [None]:
# Prepare the CNN to run on the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print out the device
print(device)

cuda:0


In [None]:
# implement a CNN
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # first convolution layer, 1 input (image), 32 kernels, 3x3
        self.conv1 = nn.Conv2d(1, 32, 3)
        # second convolution layer, 32 inputs, 64 kernels, 3x3
        self.conv2 = nn.Conv2d(32, 64, 3)
        # third convolution layer, 64 inputs, 64 kernels, 3x3
        self.conv3 = nn.Conv2d(64, 64, 3)
        # fourth convolution layer, 64 inputs, 64 kernels, 3x3
        self.conv4 = nn.Conv2d(64, 64, 3)
        # first linear layer with output size of 10
        self.fc1 = nn.Linear(4096, 10)
    
    def forward(self, x):
        # first relu activation after first convolution
        x = F.relu(self.conv1(x))
        # second relu activation after second convolution
        x = F.relu(self.conv2(x))
        # maxpool layer with kernels 2x2
        x = F.max_pool2d(x, (2, 2))
        # third relu activation after third convolution
        x = F.relu(self.conv3(x))
        # fourth relu activation after fourth convolution
        x = F.relu(self.conv4(x))
        # flattening layer
        x = torch.flatten(x, 1, 3)
        # linear layer with output size 10
        x = self.fc1(x)
        return x

# initialize the CNN
net = Net()
# convert the methods to CUDA tensors
net.to(device)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=4096, out_features=10, bias=True)
)

In [None]:
import torch.optim as optim

# Create SGD optimizer with learning rate 0.001
optimizer = optim.SGD(net.parameters(), lr=0.001)
# Create categorical cross entropy criterion
criterion = nn.CrossEntropyLoss()

In [None]:
# train the CNN for 10 epochs
for epoch in range(10):
    for i, data in enumerate(trainloader, 0):
        # get the inputs, data is a list of [inputs, labels]
        inputs, labels = data
        # send data to the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Finished Epoch {epoch+1}')

print('Finished Training!')

Finished Epoch 1
Finished Epoch 2
Finished Epoch 3
Finished Epoch 4
Finished Epoch 5
Finished Epoch 6
Finished Epoch 7
Finished Epoch 8
Finished Epoch 9
Finished Epoch 10
Finished Training!


In [None]:
# save the trained model
torch.save(net.state_dict(), 'mnist_net.pth')

In [None]:
# predict the labels of the test images and measure the accuracy
correct = 0
total = 0
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {correct / total * 100}%')

Accuracy of the network on the test images: 96.85000000000001%
