# Load all imports

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import helper
import net

# Globals, CNNs, and Device

In [2]:
oriPATH = 'C:/Users/Ryan/Desktop/machine-learning/part2/cnn'
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
learningRates = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]
numEpochs = 50

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

regNet = net.ReluNet()
earlyNet = net.ReluNet()
regNet = regNet.to(device)
earlyNet = earlyNet.to(device)

cuda:0


# MNIST training and validation set augmentation

In [3]:
valid_ratio = 0.3

transform = transforms.Compose([
    # define your data augmentation here!
    # transforms.RandomRotation(degrees=30),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_valid_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
nb_train = int((1.0 - valid_ratio) * len(train_valid_dataset))
nb_valid =  int(valid_ratio * len(train_valid_dataset))
train_dataset, valid_dataset = torch.utils.data.dataset.random_split(train_valid_dataset, [nb_train, nb_valid])
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=1000, shuffle=True, num_workers=2, pin_memory=True)
validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=1000, shuffle=True, num_workers=2, pin_memory=True)

# Define the loss function and the optimizer.

In [4]:
import torch.nn as nn
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

regOpt = optim.Adam(regNet.parameters(), lr=0.001)
earlyOpt = optim.Adam(earlyNet.parameters(), lr=0.001)

# Train the CNN and store the best model based on the validation loss.

In [5]:
import time
import os as OO
OO.mkdir(oriPATH + '/task3')
PATH = oriPATH + '/task3'

regTrainingLoss = []
regValidationLoss = []
earlyTrainingLoss = []
earlyValidationLoss = []

train, val = helper.runCNN_noStop(trainloader, device, regOpt, regNet, criterion, validloader, PATH, 'regNet', numEpochs)
regTrainingLoss.append(train)
regValidationLoss.append(val)
train, val = helper.runCNN_earlyStop(trainloader, device, earlyOpt, earlyNet, criterion, validloader, PATH, 'earlyNet', numEpochs)
earlyTrainingLoss.append(train)
earlyValidationLoss.append(val)

np.save(OO.path.join(PATH, 'regTrainingLoss.npy'), regTrainingLoss)
np.save(OO.path.join(PATH, 'regValidationLoss.npy'), regValidationLoss)
np.save(OO.path.join(PATH, 'earlyTrainingLoss.npy'), earlyTrainingLoss)
np.save(OO.path.join(PATH, 'earlyValidationLoss.npy'), earlyValidationLoss)

32768it [00:01, 24254.09it/s]
1654784it [00:01, 1394055.38it/s]                             
8192it [00:00, 25681.71it/s]            
9920512it [00:20, 1982750.62it/s]

# Define the test dataset.

In [6]:
transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))
])
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Infer on the whole test dataset.

In [7]:
testloader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False, num_workers=2, pin_memory=True)

import os as OO
PATH = oriPATH + '/task3'

accuracy = []

testNet = net.ReluNet()
testNet = testNet.to(device)
testNet.load_state_dict(torch.load(OO.path.join(PATH, 'regNet.pth')))
correct, total = helper.testCNN(testloader, testNet, device)
accuracy.append(100 * correct / total)
testNet2 = net.ReluNet()
testNet2 = testNet2.to(device)
testNet2.load_state_dict(torch.load(OO.path.join(PATH, 'earlyNet.pth')))
correct, total = helper.testCNN(testloader, testNet, device)
accuracy.append(100 * correct / total)

sTrain = np.load(OO.path.join(PATH, 'regTrainingLoss.npy'))
rTrain = np.load(OO.path.join(PATH, 'earlyTrainingLoss.npy'))

regNumEpochs = len(sTrain[0])
earlyNumEpochs = len(rTrain[0])

print("regular network: {} epochs %.3F %% accuracy".format(regNumEpochs) % accuracy[0])
print("early stop network: {} epochs %.3F %% accuracy".format(earlyNumEpochs) % accuracy[1])


regular network: 50 epochs 99.190 % accuracy
early stop network: 20 epochs 99.190 % accuracy


The models both achieved the same accuracy, but the early stop condition I implemented reduced the training time by over 50%.  The stop condition I used is to finalize the model upon the 5th epoch that did not result in a lower loss.  These results might indicate that the gradient simply flattens to its lowest value, or the global maxima is very far from my stop condition even.  Regardless, I think that 99.19% test accuracy is awesome!