In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torchvision import transforms, datasets

In [None]:
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
data_path = './MNIST'

data_test   = datasets.MNIST(root = data_path, train= True, download=True, transform= transform)
data_train  = datasets.MNIST(root = data_path, train= False, download=True, transform= transform)

In [None]:
print("the number of your training data (must be 10,000) = ", data_train.__len__())
print("hte number of your testing data (must be 60,000) = ", data_test.__len__())

In [None]:
train_loader = torch.utils.data.DataLoader(data_train, batch_size = 64, num_workers=0)
test_loader = torch.utils.data.DataLoader(data_test, batch_size=64, num_workers=0)

In [None]:
import torch.nn as nn

def calc_out(in_layers, stride, padding, kernel_size, pool_stride):
    """
    Helper function for computing the number of outputs from a
    conv layer
    """
    return int((1+(in_layers - kernel_size + (2*padding))/stride)/pool_stride)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # Some helpful values
        inputs      = [1,32,64,64] # MNIST data shape
        kernel_size = [5,5,3]
        stride      = [1,1,1]
        pool_stride = [2,2,2]

        # Layer lists
        layers = []

        self.out   = 28
        self.depth = inputs[-1]
        for i in range(len(kernel_size)):
            # Get some variables
            padding = int(kernel_size[i]/2)

            # Define the output from this layer
            self.out = calc_out(self.out, stride[i], padding,
                                kernel_size[i], pool_stride[i])

            # convolutional layer 1
            layers.append(nn.Conv2d(inputs[i], inputs[i+1], kernel_size[i], 
                                       stride=stride[i], padding=padding))
            layers.append(nn.BatchNorm2d(inputs[i+1]))
            layers.append(nn.ReLU())
            
            # convolutional layer 2
            layers.append(nn.Conv2d(inputs[i+1], inputs[i+1], kernel_size[i], 
                                       stride=stride[i], padding=padding))
            layers.append(nn.BatchNorm2d(inputs[i+1]))
            layers.append(nn.ReLU())
            # maxpool layer
            layers.append(nn.MaxPool2d(pool_stride[i],pool_stride[i]))
            layers.append(nn.Dropout(p=0.2))

        self.cnn_layers = nn.Sequential(*layers)
        
        print(self.depth*self.out*self.out)
        
        # Now for our fully connected layers
        layers2 = []
        layers2.append(nn.Dropout(p=0.2))
        layers2.append(nn.Linear(self.depth*self.out*self.out, 512))
        layers2.append(nn.Dropout(p=0.2))
        layers2.append(nn.Linear(512, 256))
        layers2.append(nn.Dropout(p=0.2))
        layers2.append(nn.Linear(256, 256))
        layers2.append(nn.Dropout(p=0.2))
        layers2.append(nn.Linear(256, 10))

        self.fc_layers = nn.Sequential(*layers2)

    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(-1, self.depth*self.out*self.out)
        x = self.fc_layers(x)
        return x
    
# create a complete CNN
model = Net()
model

In [None]:
tLoss, vLoss = [], []
tacc, vacc = [], []

In [None]:
import torch.optim as optim

# specify loss function
criterion = nn.CrossEntropyLoss()

# specify optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0005)

In [None]:
# number of epochs to train the model
n_epochs = 30

# Get the device
model.to(device)

for epoch in range(n_epochs):
    # keep track of training and validation loss
    train_loss = 0.0
    test_loss = 0.0
    train_correct = 0
    test_correct = 0

    # train #
    model.train()
    for data, target in train_loader:
        # move tensors to GPU if CUDA is available
        data   = data.to(device)
        target = target.to(device)
        
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        output = model(data)

        # calculate the batch loss
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)

        prediction = output.data.max(1)[1]   # first column has actual prob.
        correct = prediction.eq(target.data).sum()
        train_correct += correct
    
 
    # test #
    model.eval()
    for data, target in test_loader:
        # move tensors to GPU if CUDA is available
        data   = data.to(device)
        target = target.to(device)

        output = model(data)

        # calculate the batch loss
        loss = criterion(output, target)
        test_loss += loss.item()*data.size(0)

        prediction = output.data.max(1)[1]   # first column has actual prob.
        correct = prediction.eq(target.data).sum()
        test_correct += correct
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.dataset)
    test_loss = test_loss/len(test_loader.dataset)
    tLoss.append(train_loss)
    vLoss.append(test_loss)

    # calculate average accuracy
    train_accuracy = train_correct / len(train_loader.dataset)
    test_accuracy = test_correct / len(test_loader.dataset)
    tacc.append(train_accuracy)
    vacc.append(test_accuracy)

    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.5f} \tTesting Loss: {:.5f}'.format(
        epoch, train_loss, test_loss))
    print('Epoch: {} \tTraining acc: {:.5f} \tTesting acc: {:.5f}'.format(
        epoch, train_accuracy, test_accuracy))

## Output

### 1. Plot the training and testing losses over epochs [2pt]

In [None]:
plt.plot([i for i in range(len(tLoss))], tLoss, label = 'training loss' , c = 'blue')
plt.plot([i for i in range(len(vLoss))], vLoss, label = 'testing loss' , c = 'red')

plt.legend()
plt.xlabel("epochs")
plt.ylabel("loss")
plt.title("training and testing losses over epochs")
plt.show()

### 2. Plot the training and testing accuracies over epochs [2pt]

In [None]:
plt.plot([i for i in range(len(tacc))], tacc, label = 'training acc' , c = 'blue')
plt.plot([i for i in range(len(vacc))], vacc, label = 'testing acc' , c = 'red')

plt.legend()
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.title("training and testing accuracies over epochs")
plt.show()

### 3. Print the final training and testing losses at convergence [2pt]

In [None]:
print('Training loss: {:.5f} \nTesting loss: {:.5f}'.format(tLoss[-1], vLoss[-1]))

### 4. Print the final training and testing accuracies at convergence [20pt]

In [None]:
print('Training acc: {:.5f} \nTesting acc: {:.5f}'.format(tacc[-1], vacc[-1]))

### 5. Print the testing accuracies within the last 10 epochs [5pt]

In [None]:
vacc_last_10_epochs = vacc[n_epochs - 10:]

for i in range(len(vacc_last_10_epochs)):
    print("[epoch = {}] \tTesting acc : {:.5f}".format(n_epochs - 10 + i, vacc_last_10_epochs[i]))