In [1]:
# Imports
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F


from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Subset, TensorDataset

from torchsummary import summary
import torchvision as tv
import torchvision.transforms as T
import copy

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline

backend_inline.set_matplotlib_formats("svg")


#### Pytorch device specific configuration ###

# Pytorch Gpu Configuration for Cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Pytorch Gpu Configuration for directml(AMD GPU)
# import torch_directml

# device = torch_directml.device()

# Set default device
torch.set_default_device(device)

In [2]:
### MNIST data

# import dataset
data = np.loadtxt(open("../Datasets/mnist_train_small.csv", "rb"), delimiter=",")

# extract labels, normalize, reshape
labelsT = torch.tensor(data[:, 0]).long()
data = data[:, 1:]
dataNorm = data / np.max(data)
dataNormT = torch.tensor(dataNorm.reshape(dataNorm.shape[0], 1, 28, 28)).float()

# split the data
train_data, test_data, train_labels, test_labels = train_test_split(
    dataNormT, labelsT, test_size=0.1
)

# convert into PyTorch Datasets
train_data = TensorDataset(train_data, train_labels)
test_data = TensorDataset(test_data, test_labels)

# translate into dataloader objects
batchsize = 32
numbers_train_loader = DataLoader(
    train_data, batch_size=batchsize, shuffle=True, drop_last=True, generator=torch.Generator(device=device)
)
numbers_test_loader = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0], generator=torch.Generator(device=device))

In [5]:
### FMNIST data

# transformations
transform = T.Compose([T.ToTensor(), T.Normalize(0.5, 0.5)])

# import the data and simultaneously apply the transform
trainset = tv.datasets.FashionMNIST(
    root="../Datasets/", train=True, download=True, transform=transform
)
testset = tv.datasets.FashionMNIST(
    root="../Datasets/", train=False, download=True, transform=transform
)

# transform to dataloaders
batchsize = 32
fashion_train_loader = DataLoader(
    trainset, batch_size=batchsize, shuffle=True, drop_last=True, generator=torch.Generator(device=device)
)
fashion_test_loader = DataLoader(testset, batch_size=len(testset), generator=torch.Generator(device=device))  ### FMNIST data

# transformations
transform = T.Compose([T.ToTensor(), T.Normalize(0.5, 0.5)])

# import the data and simultaneously apply the transform
trainset = tv.datasets.FashionMNIST(
    root="../Datasets/", train=True, download=True, transform=transform
)
testset = tv.datasets.FashionMNIST(
    root="../Datasets/", train=False, download=True, transform=transform
)

# transform to dataloaders
batchsize = 32
fashion_train_loader = DataLoader(
    trainset, batch_size=batchsize, shuffle=True, drop_last=True, generator=torch.Generator(device=device)
)
fashion_test_loader = DataLoader(testset, batch_size=len(testset), generator=torch.Generator(device=device))

In [6]:
# create a class for the model
def createTheMNISTNet(printtoggle=False):

    class mnistNet(nn.Module):
        def __init__(self, printtoggle):
            super().__init__()

            ### convolution layers
            self.conv1 = nn.Conv2d(1, 10, kernel_size=5, stride=1, padding=1)
            # size: np.floor( (28+2*1-5)/1 )+1 = 26/2 = 13 (/2 b/c maxpool)

            self.conv2 = nn.Conv2d(10, 20, kernel_size=5, stride=1, padding=1)
            # size: np.floor( (13+2*1-5)/1 )+1 = 11/2 = 5 (/2 b/c maxpool)

            # compute the number of units in FClayer (number of outputs of conv2)
            expectSize = (
                np.floor((5 + 2 * 0 - 1) / 1) + 1
            )  # fc1 layer has no padding or kernel, so set to 0/1
            expectSize = 20 * int(expectSize**2)

            ### fully-connected layer
            self.fc1 = nn.Linear(expectSize, 50)

            ### output layer
            self.out = nn.Linear(50, 10)

            # toggle for printing out tensor sizes during forward prop
            self.print = printtoggle

        # forward pass
        def forward(self, x):

            print(f"Input: {x.shape}") if self.print else None

            # convolution -> maxpool -> relu
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            print(f"Layer conv1: {x.shape}") if self.print else None

            # and again: convolution -> maxpool -> relu
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
            print(f"Layer conv2: {x.shape}") if self.print else None

            # reshape for linear layer
            nUnits = x.shape.numel() / x.shape[0]
            x = x.view(-1, int(nUnits))
            if self.print:
                print(f"Vectorize: {x.shape}")

            # linear layers
            x = F.relu(self.fc1(x))
            if self.print:
                print(f"Layer fc1: {x.shape}")
            x = self.out(x)
            if self.print:
                print(f"Layer out: {x.shape}")

            return x

    # create the model instance
    net = mnistNet(printtoggle)

    # loss function
    lossfun = nn.CrossEntropyLoss()

    # optimizer (NOTE: Using SGD here to slow down learning!)
    optimizer = torch.optim.SGD(net.parameters(), lr=0.005)

    return net, lossfun, optimizer

In [8]:
# a function that trains the model


# input the network and the number of epochs to train
def function2trainTheModel(net, train_loader, test_loader, numepochs=10):

    # initialize losses
    losses = torch.zeros(numepochs)
    trainAcc = []
    testAcc = []

    # loop over epochs
    for epochi in range(numepochs):

        # loop over training data batches
        net.train()
        batchAcc = []
        batchLoss = []
        for X, y in train_loader:

            # forward pass and loss
            yHat = net(X)
            loss = lossfun(yHat, y)

            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # loss from this batch
            batchLoss.append(loss.item())

            # compute accuracy
            matches = torch.argmax(yHat, axis=1) == y  # booleans (false/true)
            matchesNumeric = matches.float()  # convert to numbers (0/1)
            accuracyPct = 100 * torch.mean(matchesNumeric)  # average and x100
            batchAcc.append(accuracyPct)  # add to list of accuracies
        # end of batch loop...

        # now that we've trained through the batches, get their average training accuracy
        trainAcc.append(np.mean(batchAcc))

        # and get average losses across the batches
        losses[epochi] = np.mean(batchLoss)

        # test accuracy
        net.eval()
        X, y = next(iter(test_loader))  # extract X,y from test dataloader
        with torch.no_grad():  # deactivates autograd
            yHat = net(X)

        # compute test accuracy
        testAcc.append(100 * torch.mean((torch.argmax(yHat, axis=1) == y).float()))

    # end epochs

    # function output
    return trainAcc, testAcc, losses, net