## Data Preparation

In [None]:
# Importing the libraries
import os
import torch
import torchvision
from torchvision.utils import make_grid

import numpy as np

In [None]:
from torchvision.datasets import FashionMNIST
from torchvision.transforms import Compose, ToTensor, Normalize, GaussianBlur, RandomPerspective, RandomRotation

In [None]:
# Transform to normalize the data and convert to a tensor
transform = Compose([ToTensor(),
    GaussianBlur(kernel_size=(5,9), sigma=(0.01,0.1)),
    RandomPerspective(distortion_scale=0.1,p=0.1), # p is probability of being transformed, and scale is degree of distortion
    RandomRotation(degrees=10),
    Normalize((0.5,), (0.5,))
    ])

# Download the data
dataset = FashionMNIST('MNIST_data/', download = True, train = True, transform = transform)

In [None]:
print(dataset.classes)

In [None]:
import matplotlib.pyplot as plt
import random
def show_example(img, label):
    print('Label: {} ({})'.format(dataset.classes[label], label))
    plt.imshow(img.squeeze(), cmap='Greys_r')
    plt.axis(False)

In [None]:
show_example(*dataset[random.randint(0,len(dataset))])

In [None]:
show_example(*dataset[20000])

## Creating Training and Validation Datasets

In [None]:
def split_indices(n, val_frac, seed):
    # Determine the size of the validation set
    n_val = int(val_frac * n)
    np.random.seed(seed)
    # Create random permutation between 0 to n-1
    idxs = np.random.permutation(n)
    # Pick first n_val indices for validation set
    return idxs[n_val:], idxs[:n_val]

In [None]:
val_frac = 1/10
rand_seed =  314159

train_indices, val_indices = split_indices(len(dataset), val_frac, rand_seed)
print("#samples in training set: {}".format(len(train_indices)))
print("#samples in validation set: {}".format(len(val_indices)))

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

In [None]:
batch_size = 32

In [None]:
# Training sampler and data loader
train_sampler = SubsetRandomSampler(train_indices)
train_dl = DataLoader(dataset,
                     batch_size,
                     sampler=train_sampler)

# Validation sampler and data loader
val_sampler = SubsetRandomSampler(val_indices)
val_dl = DataLoader(dataset,
                   batch_size,
                   sampler=val_sampler)

In [None]:
def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(10,10))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, 8).permute(1, 2, 0), cmap='Greys_r')
        break

In [None]:
show_batch(train_dl)

## Building the Model

In [None]:
# Import the libraries
import torch.nn as nn
import torch.nn.functional as F

from torchinfo import summary

In [None]:
class ImageClassifierNet(nn.Module):
    def __init__(self, n_channels=3):
        super(ImageClassifierNet, self).__init__()
        ######################
        #   YOUR CODE HERE   #
        ######################
        self.layer1 = nn.Sequential(
        nn.Conv2d(1,32,kernel_size=5,stride=1,padding=2),
        nn.Sigmoid(),
        nn.MaxPool2d(kernel_size=2,stride=2))

        self.layer2 = nn.Sequential(
        nn.Conv2d(32,64,kernel_size=5,stride=1,padding=2),
        nn.Sigmoid(),
        nn.MaxPool2d(kernel_size=2,stride=2))

        self.drop_out = nn.Dropout(0.5)

        self.fc1 = nn.Linear(7*7*64, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        return out

In [None]:
model = ImageClassifierNet()

**NOTE: The total number of parameters should be <= 100,000 for the contest.**

In [None]:
summary(model, input_size=(batch_size, 1, 28, 28))

## Enable training on a GPU

In [None]:
def get_default_device():
    """Use GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)
    
    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [None]:
device = get_default_device()

train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)

to_device(model, device)

## Train the model

In [None]:
def train_model(n_epochs, model, train_dl, val_dl, loss_fn, opt_fn, lr):
    """
    Trains the model on a dataset.
    
    Args:
        n_epochs: number of epochs
        model: ImageClassifierNet object
        train_dl: training dataloader
        val_dl: validation dataloader
        loss_fn: the loss function
        opt_fn: the optimizer
        lr: learning rate
    
    Returns:
        The trained model. 
        A tuple of (model, train_losses, val_losses, train_accuracies, val_accuracies)
    """
    # Record these values the end of each epoch
    train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []
    
    for epoch in range(n_epochs):
        sum_loss = 0.0
        val_l = 0.0
        train_ac = 0
        val_ac = 0.0
        total = 0
        vtotal = 0

        if len(val_dl) != 0:

            for data,vdata in zip(train_dl,val_dl):
                model.train()
                inputs, labels = data 
                opt_fn.zero_grad()
                outputs = model(inputs)             # output of model
                loss = loss_fn(outputs, labels)     # loss of output
                loss.backward()                     # learn
                opt_fn.step()                       # step in learning direction

                sum_loss += loss.item()

                _,predicted = torch.max(outputs.data, 1)
                train_ac += (predicted == labels).sum().item()
                total += labels.size(0)

                
                with torch.no_grad():
                    model.eval()
                    val_inputs, val_labels = vdata
                    val_output= model(val_inputs)
                    val_loss = loss_fn(val_output, val_labels)

                    val_l += val_loss.item()

                    _, predicted = torch.max(val_output.data, 1)
                    val_ac += (predicted == val_labels).sum().item()
                    vtotal += val_labels.size(0)


                    
            train_losses.append(sum_loss / total)
            train_accuracies.append(train_ac / total)
            val_losses.append(val_l / vtotal)
            val_accuracies.append(val_ac / vtotal)
            
            print('[epoch: %d] (train loss: %.3f train accuracy: %.3f) (val loss: %.3f val accuracy: %.3f)'% (epoch + 1, sum_loss /total, train_ac/total,val_l/vtotal,val_ac/vtotal))
            sum_loss = 0.0
            val_l = 0.0
            train_ac = 0.0
            val_ac = 0.0
            total = 0
            vtotal = 0

        else:
            for data in train_dl:
                model.train()
                inputs, labels = data 
                opt_fn.zero_grad()
                outputs = model(inputs)             # output of model
                loss = loss_fn(outputs, labels)     # loss of output
                loss.backward()                     # learn
                opt_fn.step()                       # step in learning direction

                sum_loss += loss.item()

                _,predicted = torch.max(outputs.data, 1)
                train_ac += (predicted == labels).sum().item()
                total += labels.size(0)
                    
                
            train_losses.append(sum_loss / total)
            train_accuracies.append(train_ac / total)
            
            print('[epoch: %d] (train loss: %.3f train accuracy: %.3f)'% (epoch + 1, sum_loss / total, train_ac/total))
            sum_loss = 0.0
            train_ac = 0.0
            total = 0


    return model, train_losses, val_losses, train_accuracies, val_accuracies

In [None]:
import torch.optim as optim

num_epochs = 10
loss_fn =  nn.CrossEntropyLoss()
lr =  0.01
opt_fn =  optim.Adam(model.parameters(),lr=lr)


In [None]:
history = train_model(num_epochs, model, train_dl, val_dl, loss_fn, opt_fn, lr)
model, train_losses, val_losses, train_accuracies, val_accuracies = history

## Plot loss and accuracy

In [None]:
def plot_accuracy(train_accuracies, val_accuracies):
    """Plot accuracies"""
    plt.plot(train_accuracies, "-x")
    plt.plot(val_accuracies, "-o")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend(["Training", "Validation"])
    plt.title("Accuracy vs. No. of epochs")

In [None]:
plot_accuracy(train_accuracies, val_accuracies)

In [None]:
def plot_losses(train_losses, val_losses):
    """Plot losses"""
    plt.plot(train_losses, "-x")
    plt.plot(val_losses, "-o")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend(["Training", "Validation"])
    plt.title("Loss vs. No. of Epochs")

In [None]:
plot_losses(train_losses, val_losses)

## Train a model on the entire dataset

In [None]:
indices, _ = split_indices(len(dataset), 0, rand_seed)

sampler = SubsetRandomSampler(indices)
dl = DataLoader(dataset, batch_size, sampler=sampler)
dl = DeviceDataLoader(dl, device)

**Set the maximum number of training epochs and the learning rate for finetuning your model.**

In [None]:

num_epochs = 30
lr = 0.01

In [None]:
history = train_model(num_epochs, model, dl, [], loss_fn, opt_fn, lr)
model = history[0]

In [None]:
plot_accuracy(history[1],history[3])

## Check Predictions

In [None]:
def view_prediction(img, label, probs, classes):
    """
    Visualize predictions.
    """
    probs = probs.cpu().numpy().squeeze()

    fig, (ax1, ax2) = plt.subplots(figsize=(8,15), ncols=2)
    ax1.imshow(img.resize_(1, 28, 28).cpu().numpy().squeeze(), cmap='Greys_r')
    ax1.axis('off')
    ax1.set_title('Actual: {}'.format(classes[label]))
    ax2.barh(np.arange(10), probs)
    ax2.set_aspect(0.1)
    ax2.set_yticks(np.arange(10))
    ax2.set_yticklabels(classes, size='small');
    ax2.set_title('Predicted: probabilities')
    ax2.set_xlim(0, 1.1)

    plt.tight_layout()

In [None]:
# Calculate the class probabilites (log softmax) for img
images = iter(dl)
for imgs, labels in images:
    with torch.no_grad():
        model.eval()
        # Calculate the class probabilites (log softmax) for img
        probs = torch.nn.functional.softmax(model(imgs[0].unsqueeze(0)), dim=1)
        # Plot the image and probabilites
        view_prediction(imgs[0], labels[0], probs, dataset.classes)
    break

## Save the model

In [None]:
# Very important
torch.save(model, 'model')

## Compute accuracy on the test set

In [None]:
test_dataset = FashionMNIST('MNIST_data/', download = True, train = False, transform = transform)

In [None]:
test_dl = DataLoader(test_dataset, batch_size)
test_dl = DeviceDataLoader(test_dl, device)

In [None]:
def evaluate(model, test_dl):
    """
    Evaluates your model on the test data.
    
    Args:
        model: ImageClassifierNet object
        test_dl: test dataloader
    
    Returns: 
        Test accuracy.
    """
   
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_dl:
            model.eval()
            input, labels = data
            output = model(input)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
    return correct / total

In [None]:
print("Test Accuracy = {:.4f}".format(evaluate(model, test_dl)))