# Imports and setting up libraries

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# importing necessary libraries
import os
from tqdm.auto import tqdm
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
from torchvision import transforms as T

In [None]:
!pip install torchinfo
from torchinfo import summary

In [None]:
os.chdir('/content/gdrive/MyDrive/Deep Learning/cifar10-experiments/')

In [None]:
# using GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
# ensuring reproducibility of code
random_seed = 42
torch.manual_seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Loading data

In [None]:
# defining transforms and data augmentation
train_transform = T.Compose(
    [T.RandomHorizontalFlip(),
     T.RandomCrop(32, padding = 4),
     T.ToTensor(),
     T.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))])

valid_transform = T.Compose(
    [T.ToTensor(),
     T.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))])

test_transform = T.Compose(
    [T.ToTensor(),
     T.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))])

In [None]:
# loading data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=train_transform)

validset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=valid_transform)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=test_transform)

In [None]:
# some parameters for the data
batch_size = 128
val_size = 0.10

In [None]:
# validation split
num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(val_size * num_train))
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [None]:
# defining dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size,
                                          sampler = train_sampler, num_workers = 2)
validloader = torch.utils.data.DataLoader(validset, batch_size = batch_size,
                                          sampler = valid_sampler, num_workers = 2)
testloader = torch.utils.data.DataLoader(testset, batch_size = batch_size,
                                         shuffle = False, num_workers = 2)

In [None]:
# class names in CIFAR-10
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images[:8]))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(8)))

# Network definitions

In [None]:
arch = "densenetlike"

In [None]:
# importing model to be used
model_file = __import__(f'models.{arch}', fromlist=[arch])
base_arch = getattr(model_file, arch)

In [None]:
# moving net to GPU and summary of model architecture
net = base_arch()
net.to(device)
summary(net, input_size = (batch_size, 3, 32, 32))

In [None]:
# defining loss, optimizer and lr scheduler
learning_rate = 3e-3
decay = 0.10
opt_milestones = [50, 75]
gamma = 0.10

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(net.parameters(), lr = learning_rate, weight_decay = decay)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, opt_milestones, gamma = gamma)

# Training

In [None]:
keys = ['epochs', 'loss', 'acc', 'val_loss', 'val_acc']
history = {key: [] for key in keys}

In [None]:
def train(epoch):
    """training loop for one epoch"""
    net.train()

    train_loss = 0.0
    train_total = 0
    train_correct = 0

    for data in tqdm(trainloader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # keep track of loss and accuracy
        train_loss += loss.item()
        predicted = torch.argmax(outputs.data, dim = 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_accuracy = 100 * (train_correct / train_total)
    history['epochs'].append(epoch + 1)
    history['loss'].append(train_loss)
    history['acc'].append(train_accuracy)

    # print statistics
    print(f'    Loss of the network on the {train_total} training images: {train_loss}')
    print(f'    Accuracy of the network on the {train_total} training images: {train_accuracy}%')

In [None]:
def validation():
    """validation set evaluation for one epoch"""
    valid_correct = 0
    valid_total = 0
    valid_loss = 0

    # since we're not training, we don't need to calculate the gradients
    with torch.no_grad():
        net.eval()
        for data in validloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            # calculate outputs by running images through the network
            outputs = net(images)
            valid_loss += criterion(outputs, labels)
            # the class with the highest energy is what we choose as prediction
            predicted = torch.argmax(outputs.data, dim = 1)
            valid_total += labels.size(0)
            valid_correct += (predicted == labels).sum().item()
    
    valid_accuracy = 100 * (valid_correct / valid_total)
    history['val_loss'].append(valid_loss)
    history['val_acc'].append(valid_accuracy)
    
    print(f'    Accuracy of the network on the {valid_total} validation images: {valid_accuracy}%')

In [None]:
# training
epochs = 100

for epoch in range(epochs):  # loop over the dataset multiple time
    print("Epoch", epoch + 1)
    train(epoch)
    validation()
    print()
    scheduler.step()

print('Finished Training')

In [None]:
plt.figure(figsize= (12,4))
plt.subplot(1,2,1)
plt.plot(history['epochs'], history['loss'], label='loss') 
plt.plot(history['epochs'], history['val_loss'], label='Val loss')
plt.xlabel('Epochs')
plt.title("Loss Visualisation")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history['epochs'], history['acc'], label='accuracy')  
plt.plot(history['epochs'], history['val_acc'], label='Val accuracy')
plt.xlabel('Epochs')
plt.title("Accuracy Visualisation")
plt.legend()

In [None]:
# saving model
model_save_name = f'cifar10{arch}.pt'
path = f"/content/gdrive/My Drive/Deep Learning/cifar10-experiments/trained models/{model_save_name}" 
torch.save(net, path)

# Testing

In [None]:
# loading model
model_save_name = f'cifar10{arch}.pt'
PATH = f"/content/gdrive/My Drive/Deep Learning/cifar10-experiments/trained models/{model_save_name}"
model = torch.load(PATH, map_location=torch.device(device))

In [None]:
# moving model to GPU
model.to(device);

In [None]:
# evaluating on test set
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    model.eval()
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} test images: {100 * correct / total}')

In [None]:
# counting predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    model.eval()
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1

# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f"Accuracy for class {classname} is: {accuracy}")