# Setup

## Download & Extract the dataset

In [1]:
# Dowload the dataset
from torchvision.datasets.utils import download_url

dataset_url = "https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(dataset_url, '.')

Using downloaded and verified file: ./cifar10.tgz


In [2]:
# Extract from archive
import tarfile

data_dir = './data'
with tarfile.open('./cifar10.tgz', 'r:gz') as tar:
    tar.extractall(path=data_dir)

## Explore the dataset

In [3]:
import os
data_dir = data_dir + "/cifar10"

print(os.listdir(data_dir))
classes = os.listdir(data_dir + "/train")
print(classes)

['test', 'train']
['automobile', 'dog', 'horse', 'ship', 'airplane', 'cat', 'frog', 'bird', 'deer', 'truck']


In [4]:
for c in classes:
    print(f"{c:10s} (train): {len(os.listdir(data_dir + '/train/' + c))}")
    print(f"{c:10s} ( test): {len(os.listdir(data_dir + '/test/' + c))}")

automobile (train): 5000
automobile ( test): 1000
dog        (train): 5000
dog        ( test): 1000
horse      (train): 5000
horse      ( test): 1000
ship       (train): 5000
ship       ( test): 1000
airplane   (train): 5000
airplane   ( test): 1000
cat        (train): 5000
cat        ( test): 1000
frog       (train): 5000
frog       ( test): 1000
bird       (train): 5000
bird       ( test): 1000
deer       (train): 5000
deer       ( test): 1000
truck      (train): 5000
truck      ( test): 1000


## Transfer the images to Tensors

In [5]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
dataset_train = ImageFolder(data_dir+'/train', transform=ToTensor())

print(f"Total size of the dataset: {len(dataset_train)}")

Total size of the dataset: 50000


In [6]:
# RGB 32x32 pixels per image; 3 (channel) * 32 (width) * 32 (height)
img, label = dataset_train[0]
print(img.shape, label)
img

torch.Size([3, 32, 32]) 0


tensor([[[0.7922, 0.7922, 0.8000,  ..., 0.8118, 0.8039, 0.7961],
         [0.8078, 0.8078, 0.8118,  ..., 0.8235, 0.8157, 0.8078],
         [0.8235, 0.8275, 0.8314,  ..., 0.8392, 0.8314, 0.8235],
         ...,
         [0.8549, 0.8235, 0.7608,  ..., 0.9529, 0.9569, 0.9529],
         [0.8588, 0.8510, 0.8471,  ..., 0.9451, 0.9451, 0.9451],
         [0.8510, 0.8471, 0.8510,  ..., 0.9373, 0.9373, 0.9412]],

        [[0.8000, 0.8000, 0.8078,  ..., 0.8157, 0.8078, 0.8000],
         [0.8157, 0.8157, 0.8196,  ..., 0.8275, 0.8196, 0.8118],
         [0.8314, 0.8353, 0.8392,  ..., 0.8392, 0.8353, 0.8275],
         ...,
         [0.8510, 0.8196, 0.7608,  ..., 0.9490, 0.9490, 0.9529],
         [0.8549, 0.8471, 0.8471,  ..., 0.9412, 0.9412, 0.9412],
         [0.8471, 0.8431, 0.8471,  ..., 0.9333, 0.9333, 0.9333]],

        [[0.7804, 0.7804, 0.7882,  ..., 0.7843, 0.7804, 0.7765],
         [0.7961, 0.7961, 0.8000,  ..., 0.8039, 0.7961, 0.7882],
         [0.8118, 0.8157, 0.8235,  ..., 0.8235, 0.8157, 0.

In [7]:
# List of the classes
print(dataset_train.class_to_idx)

{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}


In [8]:
from torch.utils.data import random_split

val_size = int(len(dataset_train)*.1)
train_size = len(dataset_train) - val_size

train_ds, val_ds = random_split(dataset_train, [train_size, val_size])
len(train_ds), len(val_ds)

(45000, 5000)

In [9]:
from torch.utils.data.dataloader import DataLoader

batch_size=128
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size*2)

In [10]:
# Train data
for images, labels in train_loader:
    print('images.shape:', images.shape)
    break

# Validation data
for images, labels in val_loader:
    print('images.shape:', images.shape)
    break

images.shape: torch.Size([128, 3, 32, 32])
images.shape: torch.Size([256, 3, 32, 32])


# Train the model

## Define the network

In [11]:
from torch import nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)

        self.batchn1 = nn.BatchNorm2d(32)
        self.batchn2 = nn.BatchNorm2d(64)
        self.batchn3 = nn.BatchNorm2d(128)
        self.batchn4 = nn.BatchNorm2d(128)
        self.batchn5 = nn.BatchNorm2d(256)
        self.batchn6 = nn.BatchNorm2d(256)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256*4*4, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):                           # Output shape
        x = self.relu(self.batchn1(self.conv1(x)))  #  32 * 32 * 32
        x = self.relu(self.batchn2(self.conv2(x)))  #  64 * 32 * 32
        x = self.maxpool(x)                         #  64 * 16 * 16

        x = self.relu(self.batchn3(self.conv3(x)))  # 128 * 16 * 16
        x = self.relu(self.batchn4(self.conv4(x)))  # 128 * 16 * 16
        x = self.maxpool(x)                         # 128 *  8 *  8

        x = self.relu(self.batchn5(self.conv5(x)))  # 256 *  8 *  8
        x = self.relu(self.batchn6(self.conv6(x)))  # 256 *  8 *  8
        x = self.maxpool(x)                         # 256 *  4 *  4

        x = self.flatten(x)                         # 4096 = 256*4*4
        x = self.relu(self.fc1(x))                  # 1024
        x = self.relu(self.fc2(x))                  # 512
        x = self.fc3(x)                             # 10
        return x

In [12]:
import torch
from torchsummary import summary

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SimpleCNN().to(device)
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
       BatchNorm2d-2           [-1, 32, 32, 32]              64
              ReLU-3           [-1, 32, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          18,496
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
         MaxPool2d-7           [-1, 64, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]          73,856
       BatchNorm2d-9          [-1, 128, 16, 16]             256
             ReLU-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 128, 16, 16]         147,584
      BatchNorm2d-12          [-1, 128, 16, 16]             256
             ReLU-13          [-1, 128, 16, 16]               0
        MaxPool2d-14            [-1, 12

In [13]:
# Loss function & Optimizer
from torch import optim

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## Run the training & validation

In [14]:
import torch.nn.functional as F

epochs = 5
for epoch in range(epochs):
    # Training phase
    running_loss_train = 0.0
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss_train += loss.item()

    # Validation phase
    running_loss_val = 0.0
    running_acc_val = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss_val += loss.item()
            probabilities = F.softmax(outputs, dim=1)
            preds = torch.argmax(outputs, 1)
            running_acc_val += torch.sum(preds == labels).item()

    print(f'Epoch [{epoch+1:2d}/{epochs:2d}], TRN Loss: {running_loss_train / len(train_loader):.4f}, VLD Loss: {running_loss_val / len(val_loader):.4f}, VLD Acc: {running_acc_val / len(val_ds):.4f}')

Epoch [ 1/ 5], TRN Loss: 1.3744, VLD Loss: 1.0734, VLD Acc: 0.6132
Epoch [ 2/ 5], TRN Loss: 0.8694, VLD Loss: 0.7707, VLD Acc: 0.7308
Epoch [ 3/ 5], TRN Loss: 0.6686, VLD Loss: 0.7834, VLD Acc: 0.7292
Epoch [ 4/ 5], TRN Loss: 0.5413, VLD Loss: 0.5879, VLD Acc: 0.7982
Epoch [ 5/ 5], TRN Loss: 0.4394, VLD Loss: 0.6344, VLD Acc: 0.7930


# Test the model

In [15]:
test_loader = DataLoader(ImageFolder(data_dir+'/test', transform=ToTensor()), batch_size*2)

In [16]:
acc_test = 0
model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        probabilities = F.softmax(outputs, dim=1)
        preds = torch.argmax(outputs, 1)
        acc_test += torch.sum(preds == labels).item()
print(f'Test Acc: {acc_test / len(test_loader.dataset)*100:.2f}%')

Test Acc: 78.02%


# Save or Load the model parameters (optional)

In [17]:
torch.save(model.state_dict(), 'simple_cnn.pth')

In [18]:
model = SimpleCNN().to(device)
model.load_state_dict(torch.load('simple_cnn.pth'))

  model.load_state_dict(torch.load('simple_cnn.pth'))


<All keys matched successfully>

Reference:
- https://www.kaggle.com/code/shadabhussain/cifar-10-cnn-using-pytorch
- https://medium.com/@sergioalves94/deep-learning-in-pytorch-with-cifar-10-dataset-858b504a6b54