In [None]:
# Copyright 2023 The ML Notebooks Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Convolutional Neural Network (CNN)


Convolutional Neural Networks (CNNs) are a type of neural network commonly used for image classification and computer vision tasks. They use convolutional layers to automatically learn spatial hierarchies of features from images, allowing them to identify patterns and objects.

In this notebook, we will build a simple CNN using PyTorch to classify images from the CIFAR-10 dataset, which consists of 60,000 32x32 color images in 10 classes. We will also cover data preprocessing, training, evaluation, and data visualization.


### Setup


In [None]:
import copy
import time

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

print("Finished importing...")

### Data Preprocessing


In [None]:
# Define the transformations to apply to the data

transform_train = transforms.Compose(
    [
        transforms.RandomResizedCrop(32),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

transform_test = transforms.Compose(
    [
        transforms.Resize(32),
        transforms.CenterCrop(32),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ]
)

In [None]:
# Load the training and test sets

batch_size = 8
num_workers = 2

trainset = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_train
)
trainloader = DataLoader(
    trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers
)

testset = datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_test
)
testloader = DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=num_workers
)
# Define the classes of the CIFAR-10 dataset
classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

### Data Visualization

Let's visualize some examples from the CIFAR-10 dataset to get a better sense of what the data looks like.


In [None]:
def imshow(img, title=None):
    img = img.numpy().transpose((1, 2, 0))
    mean = np.array((0.485, 0.456, 0.406))
    std = np.array((0.229, 0.224, 0.225))
    img = std * img + mean
    img = np.clip(img, 0, 1)
    plt.imshow(img)
    if title is not None:
        plt.title(title)
    plt.axis("off")

In [None]:
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(trainset), size=(1,)).item()
    img, label = trainset[sample_idx]
    figure.add_subplot(rows, cols, i)
    imshow(img, classes[label])
plt.show()

In [None]:
# Get a batch of training images
dataiter = iter(trainloader)
images, labels = next(dataiter)
print(images.shape)
print(labels)
# Make a grid from batch
out = torchvision.utils.make_grid(images)

imshow(out)
print(" ".join(f"{classes[labels[i]]:5s}" for i in range(batch_size)))

### Define the CNN architecture


The `Net` class defines the architecture of our network. It has two convolutional layers with 5x5 kernels and max pooling layers with 2x2 kernels. The convolutional layers have 6 and 16 output channels, respectively. The fully connected layers have 120, 84, and 10 output neurons, respectively.

The `forward` method defines how the data flows through the network. The input data x is passed through the convolutional and pooling layers, then flattened and passed through the fully connected layers.


In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

### Training and Evaluation


In [None]:
def test_accuracy(model, device, test_loader):
    was_training = model.training
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, total=len(test_loader)):
            inputs, labels = inputs.to(device), labels.to(device)
            # run the model on the test set to predict labels
            outputs = model(inputs)
            # the label with the highest energy will be our prediction
            _, preds = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

        model.train(mode=was_training)

    accuracy = 100 * correct / total
    return accuracy

In [None]:
def test_accuracy_per_class(model, device, test_loader, classes):
    was_training = model.training
    model.eval()

    correct_pred = {classname: 0 for classname in classes}
    total_pred = {classname: 0 for classname in classes}

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, total=len(test_loader)):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs.data, 1)
            # collect the correct predictions for each class
            for label, pred in zip(labels, preds):
                if label == pred:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1

        model.train(mode=was_training)

    accuracy_per_class = {classname: 0 for classname in classes}
    for classname, correct_count in correct_pred.items():
        accuracy = 100 * float(correct_count) / total_pred[classname]
        accuracy_per_class[classname] = accuracy

    return accuracy_per_class

In [None]:
def visualize(model, device, data_loader, classes, num_images=4):
    was_training = model.training
    model.eval()

    images_so_far = 0
    fig = plt.figure(figsize=(8, 8))
    fig.subplots_adjust(wspace=0.4)

    with torch.no_grad():
        for _, (inputs, labels) in enumerate(data_loader, 0):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images // 2, 4, images_so_far)
                title = f"GT: {classes[labels.cpu().data[j]]} \nP: {classes[preds[j]]}"
                imshow(inputs.cpu().data[j], title)

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return

        model.train(mode=was_training)

In [None]:
def train(
    model,
    device,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    scheduler,
    num_epochs,
):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        print(f"Epoch {epoch + 1}/{num_epochs}")
        print("-" * 10)

        model.train()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in tqdm(train_loader, total=len(train_loader)):
            inputs, labels = inputs.to(device), labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            # compute the loss based on model output and real labels
            loss = criterion(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()
            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        scheduler.step()
        # Compute and print the average accuracy for this epoch
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = float(running_corrects) / len(train_loader.dataset)
        val_acc = test_accuracy(model, device, val_loader)
        print(
            f"loss: {epoch_loss:.4f}  train acc: {epoch_acc:.4f}  val acc: {val_acc:.4f}"
        )
        print()

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print(
        f"Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s"
    )
    print(f"Best Acc: {best_acc:4f}")
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.backends.mps.is_available():
    device = torch.device("mps")

print(device)

In [None]:
# Instantiate a neural network model
net = Net()

if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs")
    net = nn.DataParallel(net)

net = net.to(device)

In [None]:
# Define a Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
# Decay LR by a factor of 0.1 every 7 epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
# Train the model
num_epochs = 10

net = train(
    net,
    device,
    trainloader,
    testloader,
    criterion,
    optimizer,
    scheduler,
    num_epochs,
)

In [None]:
# Display predictions for a few images
visualize(net, device, testloader, classes, 8)

In [None]:
# Evaluate the model on testset
acc = test_accuracy(net, device, testloader)
print("Test accuracy:", acc)

In [None]:
# Classes that performed well, and the classes that did not perform well
acc_per_class = test_accuracy_per_class(net, device, testloader, classes)
for classname, accuracy in sorted(
    acc_per_class.items(), key=lambda x: x[1], reverse=True
):
    print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %")

### Conclusion

You have trained a simple CNN model for image classification using pytorch on the CIFAR-10 dataset.
