# Load all imports

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import helper
import net

# Globals, CNNs, and Device

In [2]:
oriPATH = 'C:/Users/Ryan/Desktop/machine-learning/part2/cnn'
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
learningRates = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]
numEpochs = 50

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

wideNet = net.ReluNetWide()
deepNet = net.ReluNetDeep()
print(sum([p.numel() for p in wideNet.parameters()]))
print(sum([p.numel() for p in deepNet.parameters()]))
wideNet = wideNet.to(device)
deepNet = deepNet.to(device)

cuda:0
951274
983978


The wide and deep nets both have similar numbers of values and are approximately the same size as the original network.

# MNIST training and validation set with augmentation

In [3]:
valid_ratio = 0.3

transform = transforms.Compose([
    # define your data deepmentation here!
    transforms.RandomRotation(degrees=60),
    transforms.RandomRotation(degrees=300),
    transforms.RandomRotation(degrees=30),
    transforms.RandomRotation(degrees=330),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

transformed = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

nb_train = int((1.0 - valid_ratio) * len(transformed))
nb_valid =  int(valid_ratio * len(transformed))
t_train_dataset, t_valid_dataset = torch.utils.data.dataset.random_split(transformed, [nb_train, nb_valid])
t_trainloader = torch.utils.data.DataLoader(t_train_dataset, batch_size=1000, shuffle=True, num_workers=2, pin_memory=True)
t_validloader = torch.utils.data.DataLoader(t_valid_dataset, batch_size=1000, shuffle=True, num_workers=2, pin_memory=True)

# Define the loss function and the optimizer.

In [4]:
import torch.nn as nn
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

wideOpt = optim.Adam(wideNet.parameters(), lr=0.001)
deepOpt = optim.Adam(deepNet.parameters(), lr=0.001)

# Train the CNN and store the best model based on the validation loss.

In [5]:
import time
import os as OO
OO.mkdir(oriPATH + '/task5')
PATH = oriPATH + '/task5'

wideTrainingLoss = []
wideValidationLoss = []
deepTrainingLoss = []
deepValidationLoss = []


train, val = helper.runCNN_earlyStop(t_trainloader, device, deepOpt, deepNet, criterion, t_validloader, PATH, 'deepNet', numEpochs)
deepTrainingLoss.append(train)
deepValidationLoss.append(val)
train, val = helper.runCNN_earlyStop(t_trainloader, device, wideOpt, wideNet, criterion, t_validloader, PATH, 'wideNet', numEpochs)
wideTrainingLoss.append(train)
wideValidationLoss.append(val)

np.save(OO.path.join(PATH, 'wideTrainingLoss.npy'), wideTrainingLoss)
np.save(OO.path.join(PATH, 'wideValidationLoss.npy'), wideValidationLoss)
np.save(OO.path.join(PATH, 'deepTrainingLoss.npy'), deepTrainingLoss)
np.save(OO.path.join(PATH, 'deepValidationLoss.npy'), deepValidationLoss)

# Define the test dataset.

In [6]:
transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))
])
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Infer on the whole test dataset.

In [8]:
testloader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False, num_workers=2, pin_memory=True)

import os as OO
PATH = oriPATH + '/task5'

accuracy = []

testNet = net.ReluNetWide()
testNet = testNet.to(device)
testNet.load_state_dict(torch.load(OO.path.join(PATH, 'wideNet.pth')))
correct, total = helper.testCNN(testloader, testNet, device)
accuracy.append(100 * correct / total)
testNet2 = net.ReluNetDeep()
testNet2 = testNet2.to(device)
testNet2.load_state_dict(torch.load(OO.path.join(PATH, 'deepNet.pth')))
correct, total = helper.testCNN(testloader, testNet, device)
accuracy.append(100 * correct / total)

sTrain = np.load(OO.path.join(PATH, 'wideTrainingLoss.npy'))
rTrain = np.load(OO.path.join(PATH, 'deepTrainingLoss.npy'))

wideNumEpochs = len(sTrain[0])
deepNumEpochs = len(rTrain[0])

print("wide network: {} epochs %.3F %% accuracy".format(wideNumEpochs) % accuracy[0])
print("deep stop network: {} epochs %.3F %% accuracy".format(deepNumEpochs) % accuracy[1])


wideular network: 48 epochs 97.500 % accuracy
deep stop network: 49 epochs 97.500 % accuracy
