## Import dependencies

In [3]:
from __future__ import print_function
import numpy as np
import torch
import torchvision.datasets as datasets
import torchvision
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.patheffects as path_effects
import argparse
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.utils.data.sampler import SubsetRandomSampler
import os


## Download MNIST

In [None]:
mnist_dataset = datasets.MNIST(root='./data', train=True, 
                               transform=None, target_transform=None, download=True)


## Load in MNIST

In [None]:
train_dataset = datasets.MNIST(root='./data', train=True, 
                               transform=None, target_transform=None, download=False)
test_dataset = datasets.MNIST(root='./data', train=False, 
                               transform=None, target_transform=None, download=False)


In [None]:
train_all_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size
)

test_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size
)

## Partition MNIST

In [None]:
class_counts = {}
for i in range(10):
    class_counts[i] = []

for batch_idx, (data, target) in enumerate(train_dataset):
    class_counts[int(target)].append(batch_idx)
    
subset_indices_train = np.array([], dtype='int8')
subset_indices_valid = np.array([], dtype='int8')

np.random.seed(0)
for c in class_counts:
    t_size = int(len(class_counts[c])*0.85)
    t = np.random.choice(class_counts[c], size=t_size)
    v = []
    for i in range(len(class_counts[c])):
        if i not in t:
            v.append(int(i))
    
    subset_indices_train = np.concatenate((subset_indices_train, t), axis=None)
    subset_indices_valid = np.concatenate((subset_indices_valid, v), axis=None)

In [None]:
train_dataset = datasets.MNIST('./data', train=True, download=False,
            transform=transforms.Compose([       # Data preprocessing
                transforms.ToTensor(),           # Add data augmentation here
                transforms.Normalize((0.1307,), (0.3081,))
            ]))

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size,
    sampler=SubsetRandomSampler(subset_indices_train)
)
val_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size,
    sampler=SubsetRandomSampler(subset_indices_valid)
)


## Set up ConvNet

In [None]:


class ConvNet(nn.Module):
    '''
    Design your model with convolutional layers.
    '''
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3,3), stride=1)
        self.conv2 = nn.Conv2d(8, 8, 3, 1)
        self.dropout1 = nn.Dropout2d(0.5)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(200, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout2(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)

        output = F.log_softmax(x, dim=1)
        return output

In [None]:

def train(model,train_loader, optimizer, epoch, log_interval):
    '''
    This is your training function. When you call this function, the model is
    trained for 1 epoch.
    '''
    model.train()   # Set the model to training mode
    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data, target
        optimizer.zero_grad()               # Clear the gradient
        output = model(data)                # Make predictions
        loss = F.nll_loss(output, target)   # Compute loss
        loss.backward()                     # Gradient computation
        optimizer.step()                    # Perform a single optimization step
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
        total_loss += loss.item()
    return total_loss


def test(model, test_loader, subset_indices_valid):
    model.eval()    # Set the model to inference mode
    test_loss = 0
    correct = 0
    with torch.no_grad():   # For the inference step, gradient is not computed
        for data, target in test_loader:
            data, target = data, target
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()


    test_loss /= len(test_loader.dataset)
    
    total = len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, total,
        100. * correct / total))
    
    return test_loss, correct, total



## Set training parameters

In [None]:
batch_size = 32

epochs = 20
step = 1

test_batch_size = 1000
lr = 1.0
gamma=0.7
no_cuda = False
seed = 1
log_interval = 10
evaluate = False
save_model = True


In [None]:
# Load your model [fcNet, ConvNet, Net]
basic_model = ConvNet()

# Try different optimzers here [Adam, SGD, RMSprop]
optimizer = optim.Adadelta(basic_model.parameters(), lr=lr)

# Set your learning rate scheduler
scheduler = StepLR(optimizer, step_size=step, gamma=gamma)

# Training loop
basic_training_losses_over_epochs = []
basic_test_losses_over_epochs = []
for epoch in range(1, epochs + 1):
    loss_ep = train(basic_model, train_loader, optimizer, epoch, log_interval)
    
    test_loss, correct_test_count, total_test_count = test(basic_model, val_loader, subset_indices_valid) 
    
    basic_training_losses_over_epochs.append(loss_ep)
    basic_test_losses_over_epochs.append(test_loss)
    
    scheduler.step()    # learning rate scheduler

    # You may optionally save your model at each epoch here

if save_model:
    torch.save(basic_model.state_dict(), "mnist_model2.pt")

## Test and visualize results

In [None]:
test(basic_model, train_loader, subset_indices_valid) 
test(basic_model, test_loader, subset_indices_valid) 

In [None]:
x = list(range(1, epochs + 1))
y = basic_training_losses_over_epochs
plt.plot(x, y)
plt.scatter(x, y)
plt.title("ConvNet: Training Loss over Epoch")
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.show()

x = list(range(1, epochs + 1))
y = basic_test_losses_over_epochs
plt.plot(x, y)
plt.scatter(x, y)
plt.title("ConvNet: Test Loss over Epoch")
plt.xlabel("Epoch")
plt.ylabel("Test Loss")
plt.show()

# Add Data Augmentation Scheme

## Augment data

In [None]:
gaussian_kernel = np.load('gaussian-kernel.npy')

In [None]:
def pad_with(vector, pad_width, iaxis, kwargs):
    pad_value = kwargs.get('padder', 0)
    vector[:pad_width[0]] = pad_value
    vector[-pad_width[1]:] = pad_value

In [None]:
def convolution(img, kernel):
    MAX_ROWS = img.shape[0]
    MAX_COLS = img.shape[1]
    kernel_size = kernel.shape[0]
    pad_amount = int(kernel_size/2)
    gaussian_convolved_img = np.zeros(img.shape)
    zero_padded = np.pad(img[0, :,:], pad_amount, pad_with, padder=0)
    for r in range(pad_amount, MAX_ROWS+pad_amount):
        for c in range(pad_amount, MAX_COLS+pad_amount):
#             print("r-pad_amount", r-pad_amount)
#             print("r-pad_amount+kernel_size", r-pad_amount+kernel_size)
            conv = np.multiply(zero_padded[r-pad_amount:r-pad_amount+kernel_size, c-pad_amount:c-pad_amount+kernel_size],kernel)
            conv = np.sum(conv)
            gaussian_convolved_img[0, r-pad_amount, c-pad_amount] = (conv)
    return gaussian_convolved_img

In [None]:
augmented_train_dataset = datasets.MNIST('./data', train=True, download=False,
            transform=transforms.Compose([       # Data preprocessing
                transforms.ToTensor(),           # Add data augmentation here
                transforms.Normalize((0.1307,), (0.3081,))
            ]))

In [None]:
for batch_idx, (data, target) in enumerate(train_loader):
    new_data = []
    new_target = []
    data, target = data, target
    for i in range(len(data)):
        input_x = data[i]
        gaussian_input = convolution(input_x, gaussian_kernel)
        gaussian_input = torch.FloatTensor(gaussian_input)
        new_data.append(input_x)
        new_data.append(gaussian_input)
        new_target.append(target[i])
        new_target.append(target[i])
    
    data = torch.stack(new_data, 0) 
    target = torch.tensor(new_target)

## Train ConvNet on Augmented Data

In [None]:
def augmented_train(model,train_loader, optimizer, epoch, log_interval):
    '''
    This is your training function. When you call this function, the model is
    trained for 1 epoch.
    '''
    total_loss = 0
    model.train()   # Set the model to training mode
    for batch_idx, (data, target) in enumerate(train_loader):
        new_data = []
        new_target = []
        data, target = data, target
        for i in range(len(data)):
            input_x = data[i]
            gaussian_input = convolution(input_x, box_kernel)
            gaussian_input = torch.FloatTensor(gaussian_input)
            new_data.append(input_x)
            new_data.append(gaussian_input)
            new_target.append(target[i])
            new_target.append(target[i])

        data = torch.stack(new_data, 0) 
        target = torch.tensor(new_target)
    
        optimizer.zero_grad()               # Clear the gradient
        output = model(data)                # Make predictions
        loss = F.nll_loss(output, target)   # Compute loss
        loss.backward()                     # Gradient computation
        optimizer.step()                    # Perform a single optimization step
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
        total_loss += loss.item()
    return total_loss

In [None]:
# Load your model [fcNet, ConvNet, Net]
aug_basic_model = ConvNet()

# Try different optimzers here [Adam, SGD, RMSprop]
optimizer = optim.Adadelta(aug_basic_model.parameters(), lr=lr)

# Set your learning rate scheduler
scheduler = StepLR(optimizer, step_size=step, gamma=gamma)
augmented_basic_training_losses_over_epochs = []
augmented_basic_test_losses_over_epochs = []
# Training loop
for epoch in range(1, epochs + 1):
    loss_ep = train(aug_basic_model, train_loader, optimizer, epoch, log_interval)
    test_loss, _, _ = test(aug_basic_model, val_loader, subset_indices_valid) 
    
    augmented_basic_training_losses_over_epochs.append(loss_ep)
    augmented_basic_test_losses_over_epochs.append(test_loss)
    
    
    scheduler.step()    # learning rate scheduler

    # You may optionally save your model at each epoch here

if save_model:
    torch.save(aug_basic_model.state_dict(), "mnist_model2_aug2.pt")

## Visualize and test results

In [None]:
test(aug_basic_model, train_loader, subset_indices_valid) 
test(aug_basic_model, test_loader, subset_indices_valid) 

x = list(range(1, epochs + 1))
y = augmented_basic_training_losses_over_epochs
plt.plot(x, y)
plt.scatter(x, y)
plt.title("Augmented ConvNet: Training Loss over Epoch")
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.show()

x = list(range(1, epochs + 1))
y = augmented_basic_test_losses_over_epochs
plt.plot(x, y)
plt.scatter(x, y)
plt.title("Augmented ConvNet: Test Loss over Epoch")
plt.xlabel("Epoch")
plt.ylabel("Test Loss")
plt.show()