Pytorch Training for EMNIST Data

# Import Libraries

In [31]:
import matplotlib.pyplot as plt
# import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, random_split, DataLoader   
# See https://clay-atlas.com/us/blog/2021/08/25/pytorch-en-random-split-data-set/
from torchvision import datasets       # Gets standard data sets from internet
from torchvision.transforms import ToTensor, Lambda 

# ToTensor() converts numpy array to a tensor

# Adjustable Variables

In [32]:
# Variables
batch_size=40 # Minibatch Size
learning_rate = 0.00005 # Learning Rate
epochs = 60 # Epochs


# Do you want to run this on all available gpu's? True for yes, False for no
gpu_run = True
# Run on multiple GPU's
parallelize = False

# Which network do you want to use? 'stable' or 'experimental'
which__net = 'experimental'

# Import Training and Testing Data

In [33]:
# Import the EMNIST Data as the training data
training_data = datasets.EMNIST(root = "./data", # Where are we putting it? 
                                                split = 'balanced' , # decide on the dataset split you want 
                                                train = True, # is this the training data?
                                                download = True, # Do you want to download the data to the local machine
                                                transform = ToTensor(), # Transform the outcoming data
                                                )

# Length of testing and validation data splits 
vdat = int(len(training_data)/6) # make 1/6th of the testing data validation
tdat = int(len(training_data) - vdat) # make the rest testing

# Split the training and validation data using random_split
training_data, validation_data = random_split(training_data,[tdat, vdat],
                                              generator=torch.Generator().manual_seed(42))

# Import Testing Data
test_data = datasets.EMNIST(root = "./data", # Where are we putting it? 
                                                split = 'balanced' , # decide on the dataset split you want 
                                                train = False, # is this the training data?
                                                download = True, # Do you want to download the data to the local machine
                                                transform = ToTensor(), # Transform the outcoming data
                                                )

# Shape and Length of the datasets

## Training data

In [34]:
print('type(datasets) =', type(datasets))
print('type(training_data) =', type(training_data))
print('len(training_data) =', len(training_data))
print('type(training_data[0]) =', type(training_data[0]))
print('type(training_data[1]) =', type(training_data[1]))
print('len(training_data[0]) =', len(training_data[0]))
print('training_data[0][0].shape = ', training_data[0][0].shape)
print('training_data[0][1] =', training_data[0][1])
img,label = training_data[0]
print('img.shape =', img.shape)
print('label =',label)

type(datasets) = <class 'module'>
type(training_data) = <class 'torch.utils.data.dataset.Subset'>
len(training_data) = 94000
type(training_data[0]) = <class 'tuple'>
type(training_data[1]) = <class 'tuple'>
len(training_data[0]) = 2
training_data[0][0].shape =  torch.Size([1, 28, 28])
training_data[0][1] = 40
img.shape = torch.Size([1, 28, 28])
label = 40


## Validation Data

In [35]:
print('type(datasets) =', type(datasets))
print('type(validation_data) =', type(validation_data))
print('len(validation_data) =', len(validation_data))
print('type(validation_data[0]) =', type(validation_data[0]))
print('type(validation_data[1]) =', type(validation_data[1]))
print('len(validation_data[0]) =', len(validation_data[0]))
print('validation_data[0][0].shape = ', validation_data[0][0].shape)
print('validation_data[0][1] =', validation_data[0][1])
img,label = validation_data[0]
print('img.shape =', img.shape)
print('label =',label)

type(datasets) = <class 'module'>
type(validation_data) = <class 'torch.utils.data.dataset.Subset'>
len(validation_data) = 18800
type(validation_data[0]) = <class 'tuple'>
type(validation_data[1]) = <class 'tuple'>
len(validation_data[0]) = 2
validation_data[0][0].shape =  torch.Size([1, 28, 28])
validation_data[0][1] = 26
img.shape = torch.Size([1, 28, 28])
label = 26


# Create Minibatches for training

In [36]:
# Training data minibatches
train_dataloader = DataLoader(training_data, # what data are we loading?
                                                batch_size=batch_size,  # what is the minibatch size? 
                                                shuffle=True) # shuffle to randomize

# Testing data minibatches
test_dataloader = DataLoader(test_data, # what data are we loading?
                                                batch_size=batch_size, # what is the minibatch size going to be
                                                shuffle=True) # shuffle to randomize the order

# Validation dataset Loader                                                
validation_dataloader = DataLoader(validation_data,  # what dataset are we loading? 
                                                        batch_size=batch_size, # what is the batch size going to be?
                                                        shuffle=True) # Shuffle to randomize the order

## Testing Data Info

In [37]:
print(type(test_dataloader))
print('len(test_dataloader) =',len(test_dataloader))   # number of batches
print('len(test_dataloader.dataset) =', len(test_dataloader.dataset))
print('len(test_dataloader)*batch_size =',len(test_dataloader)*batch_size)
print()
print(type(train_dataloader))
print('len(train_dataloader) =',len(train_dataloader))   # number of batches
print('len(train_dataloader.dataset) =', len(train_dataloader.dataset))
print('len(train_data_loader)*batch_size =',len(train_dataloader)*batch_size) 
print()
print(type(validation_dataloader))
print('len(validation_dataloader) =',len(validation_dataloader))   # number of batches
print('len(validation_dataloader.dataset) =', len(validation_dataloader.dataset)) 
print('len(validation_dataloader)*batch_size =',len(validation_dataloader)*batch_size) 

<class 'torch.utils.data.dataloader.DataLoader'>
len(test_dataloader) = 470
len(test_dataloader.dataset) = 18800
len(test_dataloader)*batch_size = 18800

<class 'torch.utils.data.dataloader.DataLoader'>
len(train_dataloader) = 2350
len(train_dataloader.dataset) = 94000
len(train_data_loader)*batch_size = 94000

<class 'torch.utils.data.dataloader.DataLoader'>
len(validation_dataloader) = 470
len(validation_dataloader.dataset) = 18800
len(validation_dataloader)*batch_size = 18800


## Example Batch

In [38]:
# # train_dataloader is not subscriptable so train_dataloader[0] gives an error

# batch_example = iter(train_dataloader).next()
# print('len(batch_example) =',len(batch_example))
# print('type(batch_example) =',type(batch_example))
# print()

# batch_images = batch_example[0]
# print('len(batch_images) =',len(batch_images))
# print('type(batch_images) =', type(batch_images))

# batch_labels = batch_example[1]
# print()
# print('batch_labels =', batch_labels)
# print('type(batch_labels) =', type(batch_labels))

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device '.format(device))

Using cpu device 


# Design the Network

In [39]:
# from tkinter.ttk import _Padding
import torch.nn.functional as F
if which__net == 'stable':

    class NeuralNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(1, 20, stride = 1, kernel_size = 5)  # makes 20 maps of 24x24
            self.pool = nn.MaxPool2d(2, 2)                              # 20 maps of 12x12
            self.conv2 = nn.Conv2d(20, 40, stride = 1, kernel_size = 5) # 40 maps of 8x8
            self.pool = nn.MaxPool2d(2, 2)                              # 40 maps of 4x4
            self.fc1 = nn.Linear(40 * 4 * 4, 100)                       # flatten to 40*4*4 neurons                   
                    
        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = torch.flatten(x, 1) # flatten all dimensions except batch
            x = F.relu(self.fc1(x))
            return x
#############################################################################

if which__net == 'experimental':
    # ############# New net##################
     # ############# New net##################
    print('Using experimental network')

    class NeuralNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            # makes 20 maps of 28x28
            self.conv1 = nn.Conv2d(1, 20, # Channels in/Out 
                                                    stride = 1, # Stride
                                                    kernel_size = 3,
                                                    padding = 1)  # Kernel

            # Keep the size, add 10 more layers
            self.conv2 = nn.Conv2d(20, 30, # Channels in/Out 
                                                    stride = 1, # Stride
                                                    kernel_size = 3,
                                                    padding = 1)  # Kernel

            # Downsampled and thus altering parameters
            self.conv3 = nn.Conv2d(30  , 30 ,
                                                     stride = 1,
                                                     kernel_size = 3,
                                                     padding = 1) 

            # Keeping size and reducing layers
            self.conv4 = nn.Conv2d(30  , 10 ,
                                                     stride = 1,
                                                     kernel_size = 3,
                                                     padding = 1) 
            # Pooling function to divide size in hafl
            self.pool = nn.MaxPool2d(2, 2)   # 40 maps of 4x4
            
            # fully connected layer structure
            self.fc1 = nn.Linear(10 * 7 * 7, 300)                          
            self.fc2 = nn.Linear(300 , 160)                     # Added
            self.fc3 = nn.Linear(160, 120)                       # Added    

        def forward(self, x):
            # Convolutions Period
            # First Round
            x = F.rrelu(self.conv1(x))
            x = F.rrelu(self.conv2(x))
            x = self.pool(x)

            # Second Round
            x = F.rrelu(self.conv3(x))
            x = F.rrelu(self.conv4(x))
            x = self.pool(x)
            # x = self.pool(F.rrelu(self.conv3(x))) # Added

            # Flatten everything in order to pass through FC Layers
            x = torch.flatten(x, 1) # flatten all dimensions except batch

            # Linear Layers
            x = F.rrelu(self.fc1(x))
            x = F.rrelu(self.fc2(x)) # Added 
            x = F.rrelu(self.fc3(x)) # Added
            # x = F.softmax(self.fc4(x), dim =1) # Added
                # Decided against softmax based on Ian/Goodfellow chapter 6 https://stats.stackexchange.com/questions/218752/relu-vs-sigmoid-vs-softmax-as-hidden-layer-neurons
            # x = self.dropout(x) # Added 
                ## Removed because it is just elminating the random variables
            return x
    ##########################################

    model = NeuralNetwork().to(device)




############# Mods
# strides set from 1 to 2  :/
# https://paperswithcode.com/method/u-net

# some modifications pulled from 
# https://github.com/austin-hill/EMNIST-CNN/blob/master/torch_cnn.py 
# Added Dropout


# Switched 


Using experimental network


# Run on GPU And parallelize

In [40]:
# if gpu_run == True:
#     device = 'cuda' if torch.cuda.is_available() else 'cpu'
#     print('Using {} device '.format(device))
#     model =  NeuralNetwork().to(device)

#     #########################PARALLEL GPU'S#########################################
#     # This was pulled from the parallelization tutorial in pytorch
#     # https://pytorch.org/tutorials/beginner/blitz/data_parallel_tutorial.html
#     if parallelize == True:
#         if torch.cuda.device_count() > 1:
#             print("Using", torch.cuda.device_count(), "GPU's")
#             model = nn.DataParallel(model)  

#         # Transfer the model to the chosen platform 
#         model.to(device)
#         # model =  NeuralNetwork().to(device)
#         print(model)

# else:
#     model =  Net().to(device)
#     print(model)
#     print('Running on CPU')


model =  NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (conv1): Conv2d(1, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(20, 30, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(30, 30, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(30, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=490, out_features=300, bias=True)
  (fc2): Linear(in_features=300, out_features=160, bias=True)
  (fc3): Linear(in_features=160, out_features=120, bias=True)
)


# Define the training loop process

This takes the data loader, model, loss function, and optimization function as input and, for the length of the dataset, pushes the input and output to whatever device is being used to then predict the model for X and look at the loss function for the predicted vs. y. 

This information is then used for backwards propogation

In [41]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X,y) in enumerate(dataloader):
        X = X.to(device) 
        y = y.to(device)
        pred = model(X) # Run X  through the model
        loss = loss_fn(pred,y) # Compare the output to y through the loss function
    
        optimizer.zero_grad() # Zero the gradient
        loss.backward() # Backwards propogation
        optimizer.step() # Re-evaluate the model and return the loss


# Define the testing loop process

In [42]:
validation_correct = []
def test_loop(dataloader, model, loss_fn, valid_or_test_flag = "valid"):
    size = len(dataloader.dataset)
    test_loss, correct = 0,0
    with torch.no_grad():
        for X,y in dataloader:
            X = X.to(device)
            y = y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred,y).item() # apply loss function
            correct += (pred.argmax(1)==y).type(torch.float).sum().item()
    test_loss /= size
    correct = int(correct)
    accuracy = correct/size
    if valid_or_test_flag == "test":
        print(f"Test Error: \n Accuracy {(100*correct):>0.1f}%, Avg. loss: {test_loss:>8f} \n")
        # print(f"Accuracy on Test Data: \n {correct}/{size} or {(100*accuracy):>0.1f} percent% \n")
        # print(f"Average Test Loss: \n {test_loss:>8f} \n")
    else: 
        print(f"Accuracy on Validation Data: \n {correct}/{size} or {(100*accuracy):>0.1f} percent% \n")
        print(f"Average Validation Loss: \n {test_loss:>8f} \n")
        validation_correct.append(correct)

# Define loss functino, optimization, and run through each epoch

In [43]:

# Loss function
loss_fn = nn.CrossEntropyLoss()

# # Optimizaiton function
# optimizer = torch.optim.SGD(model.parameters(),
#                                                     lr = learning_rate)
                                                    
# Optimizaiton function
optimizer = torch.optim.Adam(model.parameters(),
                                                    lr = learning_rate)
# for each epoch
for t in range(epochs):
    # Print what epoch we are on
    print(f"Epoch {t+1})\n --------------------------")
    # Run the training loop with:
    train_loop(train_dataloader,  # data
                        model, # model we are using
                        loss_fn, # loss function
                        optimizer) # appropriate optimizer

    # Run the testing loop with:
    test_loop(validation_dataloader, # The validation dataset 
                        model, # the model specified
                        loss_fn, # The loss function
                        valid_or_test_flag = "valid") # Print the validation rate

# After we hav run through all the epochs
print("Done Training")
print()
print("Begin Test")

# Run the model on the testing data
test_loop(test_dataloader,  # input the testing data
                    model,  # Using the trained model
                    loss_fn, # Using the specified loss function
                    valid_or_test_flag = "test") # Look at the testing rate

Epoch 1)
 --------------------------
Accuracy on Validation Data: 
 12111/18800 or 64.4 percent% 

Average Validation Loss: 
 0.030799 

Epoch 2)
 --------------------------
Accuracy on Validation Data: 
 13370/18800 or 71.1 percent% 

Average Validation Loss: 
 0.024586 

Epoch 3)
 --------------------------
Accuracy on Validation Data: 
 13773/18800 or 73.3 percent% 

Average Validation Loss: 
 0.022098 

Epoch 4)
 --------------------------
Accuracy on Validation Data: 
 14212/18800 or 75.6 percent% 

Average Validation Loss: 
 0.020014 

Epoch 5)
 --------------------------
Accuracy on Validation Data: 
 14473/18800 or 77.0 percent% 

Average Validation Loss: 
 0.018373 

Epoch 6)
 --------------------------
Accuracy on Validation Data: 
 14813/18800 or 78.8 percent% 

Average Validation Loss: 
 0.016977 

Epoch 7)
 --------------------------
Accuracy on Validation Data: 
 15002/18800 or 79.8 percent% 

Average Validation Loss: 
 0.016086 

Epoch 8)
 --------------------------
Accu

In [44]:
target_transform = Lambda(lambda y: torch.zeros(
    10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))

# Save the network

In [None]:
PATH = './EMNIST_net.pth' # Path that we are going to save to
torch.save(model.state_dict(), PATH) # Save the network to this given path

### Save the plot parameters

In [None]:
import json
filename = 'EMNIST_plot_data.json'
f = open(filename, "w")
json.dump(validation_correct, f)
f.close() 

validation_correct_xmin = 0
validation_cost_xmin = 0
validation_set_size = len(validation_data)

filename = 'EMNIST_plot_parameters.json'
f = open(filename, "w")
json.dump([epochs, validation_correct_xmin, validation_set_size], f)
f.close()



## Assess the overall accuracy of the model

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 82 %
