In [1]:
import torch

from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

import time

## Dataset and DataLoader

In [2]:
# download the dataset FashionMNIST

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)  

In [3]:
# prepare DataLoader

train_dataloader = DataLoader(training_data, 
                              batch_size=100, 
                              shuffle=True, 
                              num_workers=2)
test_dataloader = DataLoader(test_data, 
                             batch_size=100, 
                             shuffle=False, 
                             num_workers=2)

## Convolutional Neural Network

In [11]:
# define the model
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        self.fc1 = nn.Linear(in_features=64 * 6 * 6, out_features=600)
        self.drop = nn.Dropout2d(0.25)
        self.fc2 = nn.Linear(in_features=600, out_features=120)
        self.fc3 = nn.Linear(in_features=120, out_features=10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = self.drop(out)
        out = F.relu(self.fc2(out))
        out = F.relu(self.fc3(out))

        return out


# instantiate the model
model = FashionCNN()
print(model)


FashionCNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2304, out_features=600, bias=True)
  (drop): Dropout2d(p=0.25, inplace=False)
  (fc2): Linear(in_features=600, out_features=120, bias=True)
  (fc3): Linear(in_features=120, out_features=10, bias=True)
)


## Loss function and optimizer

In [12]:
# define loss
criterion = nn.CrossEntropyLoss()

learning_rate = 0.001

# define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## Training loop

In [13]:
# training loop
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:  # print every 100 mini-batches
            print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print("Finished Training")


[1,   100] loss: 0.911
[1,   200] loss: 0.653
[1,   300] loss: 0.595
[1,   400] loss: 0.569
[1,   500] loss: 0.559
[1,   600] loss: 0.527
[2,   100] loss: 0.497
[2,   200] loss: 0.517
[2,   300] loss: 0.505
[2,   400] loss: 0.475
[2,   500] loss: 0.279
[2,   600] loss: 0.269
[3,   100] loss: 0.250
[3,   200] loss: 0.240
[3,   300] loss: 0.233
[3,   400] loss: 0.240
[3,   500] loss: 0.236
[3,   600] loss: 0.239
[4,   100] loss: 0.205
[4,   200] loss: 0.212
[4,   300] loss: 0.218
[4,   400] loss: 0.207
[4,   500] loss: 0.220
[4,   600] loss: 0.203
[5,   100] loss: 0.180
[5,   200] loss: 0.189
[5,   300] loss: 0.183
[5,   400] loss: 0.190
[5,   500] loss: 0.186
[5,   600] loss: 0.182
Finished Training


Q1: Discussion on batch-size.
*   What is batch-size in Neural Network. Where is it defined in the code?
*   Try to vary the batch-size in the code. What do you notice?



## Train on GPU

The following commands show how to transfer tensors and model to GPU.

Place them in the correct position of the code above and monitor the changes.

You need to change runtime to GPU:
*   Go to Runtime -> Change runtime type -> Hardware accelerator: GPU.



In [14]:
# define our device as GPU if available otherwise CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [15]:
# move the model to GPU
model.to(device)

FashionCNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2304, out_features=600, bias=True)
  (drop): Dropout2d(p=0.25, inplace=False)
  (fc2): Linear(in_features=600, out_features=120, bias=True)
  (fc3): Linear(in_features=120, out_features=10, bias=True)
)

In [None]:
# move the input and label tensors to GPU
inputs, labels = data[0].to(device), data[1].to(device)

Q2: Discussion on GPU.
*   Use the magic command `%%time` to measure the training time.
*   Compare the training time when using CPU and GPU.
*   Measure the speed-up with GPU. What is the speed-up factor?


