In [137]:
import torch
import torchvision
import torchvision.datasets as dsets
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import KFold
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
%matplotlib inline

## Functions to run KFold CV on CNN

In [138]:

def reset_weights(m):
  '''
    Try resetting model weights to avoid
    weight leakage.
  '''
  for layer in m.children():
    if hasattr(layer, 'reset_parameters'):
        print(f'Reset trainable parameters of layer = {layer}')
        layer.reset_parameters()
    
def torchKFold(net, dataset, name):    
    # Configuration options
    k_folds = 5
    num_epochs = 1
    loss_function = nn.CrossEntropyLoss()

    # For fold results
    results = {}

    # Set fixed random number seed
    torch.manual_seed(42)

    # Define the K-fold Cross Validator
    kfold = KFold(n_splits=k_folds, shuffle=True)

    # Start print
    print('--------------------------------')

    # K-fold Cross Validation model evaluation
    for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):

        # Print
        print(f'FOLD {fold}')
        print('--------------------------------')

        # Sample elements randomly from a given list of ids, no replacement.
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)

        # Define data loaders for training and testing data in this fold
        trainloader = torch.utils.data.DataLoader(
                          dataset, 
                          batch_size=10, sampler=train_subsampler)
        testloader = torch.utils.data.DataLoader(
                          dataset,
                          batch_size=10, sampler=test_subsampler)

        # Init the neural network
        network = net()
        network.apply(reset_weights)

        # Initialize optimizer
        optimizer = torch.optim.Adam(network.parameters(), lr=1e-4)

        # Run the training loop for defined number of epochs
        for epoch in range(0, num_epochs):

          # Print epoch
          print(f'Starting epoch {epoch+1}')

          # Set current loss value
          current_loss = 0.0

          # Iterate over the DataLoader for training data
          for i, data in enumerate(trainloader, 0):

            # Get inputs
            inputs, targets = data

            # Zero the gradients
            optimizer.zero_grad()

            # Perform forward pass
            outputs = network(inputs)

            # Compute loss
            loss = loss_function(outputs, targets)

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Print statistics
            current_loss += loss.item()
            if i % 500 == 499:
                print('Loss after mini-batch %5d: %.3f' %
                      (i + 1, current_loss / 500))
                current_loss = 0.0

        # Process is complete.
        print('Training process has finished. Saving trained model.')

        # Print about testing
        print('Starting testing')

        # Saving the model
        save_path = f'./model-{name}-fold-{fold}.pth'
        torch.save(network.state_dict(), save_path)

        # Evaluationfor this fold
        correct, total = 0, 0
        with torch.no_grad():

          # Iterate over the test data and generate predictions
          for i, data in enumerate(testloader, 0):

            # Get inputs
            inputs, targets = data

            # Generate outputs
            outputs = network(inputs)

            # Set total and correct
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

        # Print accuracy
        print('Accuracy for fold %d: %d %%' % (fold, 100.0 * correct / total))
        print('--------------------------------')
        results[fold] = 100.0 * (correct / total)

        # Print fold results
        print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
        print('--------------------------------')
        sum = 0.0
        for key, value in results.items():
            print(f'Fold {key}: {value} %')
            sum += value
            print(f'Average: {sum/len(results.items())} %')

In [139]:
print(torch.cuda.is_available())

True


In [140]:
train_dataset = dsets.MNIST(root = '/home/mtweed/Documents/New_College/_Misl/data/',
                            train = True,
                            download = False,
                            transform = transforms.ToTensor())

In [141]:
test_dataset = dsets.MNIST(root = '/home/mtweed/Documents/New_College/_Misl/data/',
                            train = False,
                            download = False,
                            transform = transforms.ToTensor())

## CNN One
**Build The Neural Network Class**

This first Convolutional Neural Netowork has two convolutional layers with kernal_size=3, stride=1, and padding=0 which are the default values.

Each convolutional layer is pooled using a max pool algorithm with kernel_size=2 and a stride equal to the length of the kernel (2).

Finally, it has one fully connected layer.

In [142]:
class NeuralNet1(nn.Module):
    def __init__(self):
        super(NeuralNet1, self).__init__()
        
        # Convolutional Layer 1
        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=16,
                               kernel_size=3,
                               stride=1,
                               padding=0)
        self.relu1 = nn.ReLU()
        
        # Max Pool of Layer 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        # Convolutional Layer 2
        self.conv2 = nn.Conv2d(in_channels=16,
                               out_channels=32,
                               kernel_size=3,
                               stride=1,
                               padding=0)
        self.relu2 = nn.ReLU()
        
        # Max Pool of Layer 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected layer
        self.fc = nn.Linear(in_features=32*5*5,
                            out_features=10)
        
    def forward(self,x):
        
        # Conv 1
        out = self.conv1(x)
        out = self.relu1(out)
        
        # Pool layer 1
        out = self.maxpool1(out)
        
        # Conv 2
        out = self.conv2(out)
        out = self.relu2(out)
        
        # Pool Layer 2
        out = self.maxpool2(out)
        
        # Flatten out
        out = out.view(-1, 32*5*5)
        
        # Fully Connected Layer
        out = self.fc(out)
        
        return out

## KFold CV on CNN One

In [144]:
torchKFold(NeuralNet1, train_dataset, "one")

--------------------------------
FOLD 0
--------------------------------
Reset trainable parameters of layer = Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
Reset trainable parameters of layer = Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
Reset trainable parameters of layer = Linear(in_features=800, out_features=10, bias=True)
Starting epoch 1
Loss after mini-batch   500: 1.784
Loss after mini-batch  1000: 0.622
Loss after mini-batch  1500: 0.414
Loss after mini-batch  2000: 0.315
Loss after mini-batch  2500: 0.280
Loss after mini-batch  3000: 0.260
Loss after mini-batch  3500: 0.239
Loss after mini-batch  4000: 0.211
Loss after mini-batch  4500: 0.198
Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 0: 94 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 94.61666666666667 %
Average: 94.61666666666667 %
FOLD 1
--------------------------------
Reset trainable parameters 

## CNN Two
**Build The Neural Network Class**

For this model we reduced the number of channels out to 10 on the first layer (down from 16) and 20 on the second layer (down from 32) and we changes the kernel size to 5.

We also added another fully connected layer and two dropout points (one for convolution layer 2 and one for the first fully connected layer.

In [145]:
class NeuralNet2(nn.Module):
    def __init__(self):
        super(NeuralNet2, self).__init__()
        
        # Convolutional Layer 1
        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=10,
                               kernel_size=5,
                               stride=1,
                               padding=0)

        # Convolutional Layer 2
        self.conv2 = nn.Conv2d(in_channels=10,
                               out_channels=20,
                               kernel_size=5,
                               stride=1,
                               padding=0)
        
        # Drop out for Conv2
        self.dropC2 = nn.Dropout2d()
        
        # Fully connected layer 1
        self.fc1 = nn.Linear(in_features=20*4*4,
                            out_features=50)
        
        # Fully connected layer 2
        self.fc2 = nn.Linear(in_features=50,
                             out_features=10)
        
    def forward(self,x):
        
        # Conv 1
        out = self.conv1(x)
        out = F.max_pool2d(out, 2)
        out = F.relu(out)
        
        #print('C1 ', out.shape)
        
        # Conv 2
        out = self.conv2(out)
        out = self.dropC2(out)
        out = F.max_pool2d(out, 2)
        out = F.relu(out)
        
        #print('C2 ', out.shape)
        
        # Flatten out
        out = out.view(-1, 20*4*4)
        
        #print('flat ', out.shape)
        
        # Fully Connected Layer 1
        out = self.fc1(out)
        out = F.relu(out)
        
        # Drop out FC
        out = F.dropout(out, training=self.training)
        
        # Fully Connected Layer 2
        out = self.fc2(out)
        out = F.log_softmax(out, dim=1)
        
        return out

## KFold CV on CNN Two

In [147]:
torchKFold(NeuralNet2, train_dataset,'two')

--------------------------------
FOLD 0
--------------------------------
Reset trainable parameters of layer = Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
Reset trainable parameters of layer = Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
Reset trainable parameters of layer = Linear(in_features=320, out_features=50, bias=True)
Reset trainable parameters of layer = Linear(in_features=50, out_features=10, bias=True)
Starting epoch 1
Loss after mini-batch   500: 2.219
Loss after mini-batch  1000: 1.586
Loss after mini-batch  1500: 1.113
Loss after mini-batch  2000: 0.921
Loss after mini-batch  2500: 0.813
Loss after mini-batch  3000: 0.719
Loss after mini-batch  3500: 0.689
Loss after mini-batch  4000: 0.618
Loss after mini-batch  4500: 0.591
Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 0: 82 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 82.075 %
Average: 82.075 %


## CNN Three
**Build The Neural Network Class**

Finally, we kept the structure of the first NN and doubled the output channels for both of the convolutional layer.  We also increased the kernel size to 6 for the first convolutional layer.

In [135]:
class NeuralNet3(nn.Module):
    def __init__(self):
        super(NeuralNet3, self).__init__()
        
        # Convolutional Layer 1
        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=32,
                               kernel_size=6,
                               stride=1,
                               padding=0)
        self.relu1 = nn.ReLU()
        
        # Max Pool of Layer 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        # Convolutional Layer 2
        self.conv2 = nn.Conv2d(in_channels=32,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=0)
        self.relu2 = nn.ReLU()
        
        # Max Pool of Layer 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected layer
        self.fc = nn.Linear(in_features=64*4*4,
                            out_features=10)
        
    def forward(self,x):
        
        # Conv 1
        out = self.conv1(x)
        out = self.relu1(out)
        
        #print('C1 ', out.shape)
        
        # Pool layer 1
        out = self.maxpool1(out)
        
        #print('P1 ', out.shape)
        
        # Conv 2
        out = self.conv2(out)
        out = self.relu2(out)
        
        #print('C2 ', out.shape)
        
        # Pool Layer 2
        out = self.maxpool2(out)
        
        #print('P2 ', out.shape)
        
        # Flatten out
        out = out.view(-1, 64*4*4)
        
        #print('flat ', out.shape)
        
        # Fully Connected Layer
        out = F.relu(self.fc(out))
        
        return out

## KFold CV on CNN Three

In [136]:
torchKFold(NeuralNet3, train_dataset, 'Three')

--------------------------------
FOLD 0
--------------------------------
Reset trainable parameters of layer = Conv2d(1, 32, kernel_size=(6, 6), stride=(1, 1))
Reset trainable parameters of layer = Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
Reset trainable parameters of layer = Linear(in_features=1024, out_features=10, bias=True)
Starting epoch 1
Loss after mini-batch   500: 1.831
Loss after mini-batch  1000: 1.236
Loss after mini-batch  1500: 1.007
Loss after mini-batch  2000: 0.932
Loss after mini-batch  2500: 0.923
Loss after mini-batch  3000: 0.848
Loss after mini-batch  3500: 0.849
Loss after mini-batch  4000: 0.863
Loss after mini-batch  4500: 0.870
Training process has finished. Saving trained model.
Starting testing
Accuracy for fold 0: 66 %
--------------------------------
K-FOLD CROSS VALIDATION RESULTS FOR 5 FOLDS
--------------------------------
Fold 0: 66.24166666666666 %
Average: 66.24166666666666 %
FOLD 1
--------------------------------
Reset trainable parameters

In [152]:
model = NeuralNet1()

In [155]:
state_dict = torch.load('model-one-fold-4.pth')

In [156]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [157]:
# Calculate Accuracy         
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:

    test = Variable(images.view(100,1,28,28))
    # Forward propagation
    outputs = model(test)
    # Get predictions from the maximum value
    predicted = torch.max(outputs.data, 1)[1]

    # Total number of labels
    total += len(labels)
    correct += (predicted == labels).sum()

accuracy = 100 * correct / float(total)

print("The accuracy of the best CNN on the test data is:", accuracy)

The accuracy of the best CNN on the test data is: tensor(94.8200)
