# Helper code from Training session for FCN

In [1]:
# 1. Imports
import torch
import math
import torch.nn as nn # all neural network modules, nn.Linear, nn.Conv2d, BatchNorm, loss functions
import torch.optim as optim # all optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F # all functions that dont have any parameters eg: activations like relu
from torch.utils.data import DataLoader # gives easier dataset management and creates mini batches
import torchvision.datasets as datasets # standard public datasets 
import torchvision.transforms as transforms # transforms on dataset

In [6]:
# 2. Create Convolutional Neural Network
class CNN(nn.Module):
    def __init__(self, inchannels, num_classes):
        super(CNN, self).__init__()
        # formula for conv output
            #  out_x = floor((in_x + 2*p_x - K_x)/s_x) + 1
                # where in_x=input_features in x dim
                # p_x=padding x dim, K_x=kernel size x dim, s_x=stride x dim

        self.conv1 = nn.Conv2d(in_channels=inchannels, out_channels=8,
                               kernel_size=(3,3), stride=(1,1), padding=(1,1)) # out=8x28x28
        # formula for pool output
        # n_pool_x = floor( (in_x - k_x)/s_x) + 1 )
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2)) # will half the conv layer size # out=8x14x14
        
        self.conv2 = nn.Conv2d(in_channels=self.conv1.out_channels, out_channels=16,
                               kernel_size=(3,3), stride=(1,1), padding=(1,1)) # out=16x14x14
        # need to consider inchannels from previous conv and pooling operation
        # since pooling op divided features by half bcoz of kernel=(2,2) and stride=(2,2)
        # multiply infeatures of linear layer with out_channels or previous layer and pool channels = 14/2=7
        
        self.fc1 = nn.Linear(in_features=self.conv2.out_channels * 7 * 7, out_features=7 * 7)
        self.fc2 = nn.Linear(in_features=self.fc1.out_features, out_features=num_classes)
        return
    
    def forward(self, x):
        x = F.relu(self.conv1(x)) # torch.Size([64, 8, 28, 28])
        x = self.pool(x) # torch.Size([64, 8, 14, 14])
        x = F.relu(self.conv2(x)) # torch.Size([64, 16, 14, 14])
        x = self.pool(x) # torch.Size([64, 16, 7, 7])

        # need to flatten conv features from NxCxHxW to Nxfeatures before sending to fc block
        x = x.view(x.shape[0],-1)  # torch.Size([64, 784])

        x = F.relu(self.fc1(x)) # torch.Size([64, 49])
        x = self.fc2(x) # returns logits = (batch_size, num_classes) torch.Size([64, 10])
        return x


In [7]:
# Testing the CNN architecture on dummy data
model = CNN(inchannels=1, num_classes=10)
x = torch.rand(64, 1, 28, 28)
model(x).shape #torch.Size([64, 10])

torch.Size([64, 10])

In [8]:
# 3. Set Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
# 4. Hyperparameters
inchannels = 1
num_classes = 10
lr = 0.001
batch_size=64
num_epochs=10


In [10]:
# 5. Load Data (Simple MNIST)
train_dataset=datasets.MNIST(root='dataset/', train=True, 
                             transform=transforms.ToTensor(),
                             download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True)
test_dataset=datasets.MNIST(root='dataset/', train=False, 
                             transform=transforms.ToTensor(),
                             download=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size,
                          shuffle=False)


n_iterations = math.ceil(len(train_dataset)/batch_size)

In [14]:
# 6. Initialize network
model = CNN(inchannels==inchannels, num_classes=num_classes).to(device)

In [15]:
# 7. Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=lr)


In [16]:
# 8. Train Network
model.train()
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data, targets = data.to(device), targets.to(device) # torch.Size([64, 1, 28, 28]), torch.Size([64])

        # clean past gradients collected during backprop
        optimizer.zero_grad()

        # Forward Pass - compute predictions
        logits = model(data)
        train_loss = criterion(logits, targets)
        
        if epoch % 2 == 0 and batch_idx % 300 == 0:
            print(f"Epoch : {epoch} Batch_size/Iterations: {batch_idx}/{n_iterations}, Train Loss: {train_loss:.4f}")

        # Backward Pass - get the gradients
        train_loss.backward()

        # Update our weights
        optimizer.step()


Epoch : 0 Batch_size/Iterations: 0/938, Train Loss: 2.3005
Epoch : 0 Batch_size/Iterations: 300/938, Train Loss: 0.3333
Epoch : 0 Batch_size/Iterations: 600/938, Train Loss: 0.1458
Epoch : 0 Batch_size/Iterations: 900/938, Train Loss: 0.1433
Epoch : 2 Batch_size/Iterations: 0/938, Train Loss: 0.0855
Epoch : 2 Batch_size/Iterations: 300/938, Train Loss: 0.0371
Epoch : 2 Batch_size/Iterations: 600/938, Train Loss: 0.0639
Epoch : 2 Batch_size/Iterations: 900/938, Train Loss: 0.0131
Epoch : 4 Batch_size/Iterations: 0/938, Train Loss: 0.0104
Epoch : 4 Batch_size/Iterations: 300/938, Train Loss: 0.1327
Epoch : 4 Batch_size/Iterations: 600/938, Train Loss: 0.0632
Epoch : 4 Batch_size/Iterations: 900/938, Train Loss: 0.0720
Epoch : 6 Batch_size/Iterations: 0/938, Train Loss: 0.0729
Epoch : 6 Batch_size/Iterations: 300/938, Train Loss: 0.0606
Epoch : 6 Batch_size/Iterations: 600/938, Train Loss: 0.1181
Epoch : 6 Batch_size/Iterations: 900/938, Train Loss: 0.0759
Epoch : 8 Batch_size/Iterations:

In [17]:
# 9. Check accuracy on training and test to see how good is our model (Eval)
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on train data")
    else:
        print("Checking accuracy on test data")
    num_correct=num_samples=0
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, targets) in enumerate(loader):
            data, targets = data.to(device), targets.to(device)

            logits=model(data) # (batch_size, num_classes)
            
            # we need max index in dim=1 which holds num_classes values
            prediction_index = torch.argmax(torch.softmax(logits, dim=1), dim=1) # torch.size([64])
            num_correct += (prediction_index == targets).sum()
            num_samples += prediction_index.shape[0]

            val_loss += criterion(logits, targets).item() * data.shape[0] # add avg losses for each batch multiplied by batch size
    
    acc = (num_correct / num_samples) * 100
    print(f"Accuracy achieved : {acc:.2f} on dataset: {len(loader)*batch_size} and mean loss : {val_loss/len(loader.dataset):.3f}")

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on train data
Accuracy achieved : 99.42 on dataset: 60032 and mean loss : 0.019
Checking accuracy on test data
Accuracy achieved : 98.93 on dataset: 10048 and mean loss : 0.035
