In [189]:
# Q1:
import torchvision
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import SubsetRandomSampler, RandomSampler, random_split

mnist_train=torchvision.datasets.MNIST('mnist',
                train=True,
                download=True,
                transform=torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize((0.1307,), (0.3081,))
                ]))

# Get validation split here (5000 random samples as validation data)
# mnist_valid = random_split(mnist_train, [len(mnist_train) - 5000, 5000])
dataset_size = len(mnist_train)
indices = list(range(dataset_size))
validation_split = 0.2
split = int(validation_split * dataset_size)
np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

mnist_test=torchvision.datasets.MNIST('mnist',
                train=False,
                download=True,
                transform=torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize((0.1307,), (0.3081,))
                ]))

print(type(mnist_train))


<class 'torchvision.datasets.mnist.MNIST'>


In [190]:
print(mnist_train.data.shape)
print(mnist_test.data.shape)

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


In [193]:
# Dataloader for laoding in data
# Steps to do this is to convert raw data to tensor
# Then tensor to dataset object
# And then from dataset object to dataloader

# Our MNIST is already in tensorDataset format
# train_dataset = torch.utils.data.TensorDataset(mnist_train.data, mnist_train.targets)

# First split trainset into train and validation data
trainset = torch.utils.data.DataLoader(mnist_train, batch_size=264, sampler=train_sampler)
# validset = torch.utils.data.DataLoader(mnist_valid, batch_size=264, sampler=train_sampler)
validset = torch.utils.data.DataLoader(mnist_train, batch_size=264, sampler=valid_sampler)

# Same for test dataset
testset = torch.utils.data.DataLoader(mnist_test, batch_size=264, shuffle=False)



In [194]:
class NN(nn.Module):
    
    def __init__(self):
        super(NN, self).__init__() # any init calls before this should also call this init only
        self.fp_input = nn.Linear(28*28, 1024)
        self.fp1 = nn.Linear(1024, 1024)
        self.fp2 = nn.Linear(1024, 1024)
        self.fp3 = nn.Linear(1024, 1024)
        self.fp4 = nn.Linear(1024, 1024)
        self.fp5 = nn.Linear(1024, 10)
        self.output_activation = nn.Softmax(dim = 1)
        
    def forward(self, input):
        # flatten input already so that it is a row vector
        input = self.fp_input(input)
        input = self.fp1(input)
        input = self.fp2(input)
        input = self.fp3(input)
        input = self.fp4(input)
        input = self.output_activation(self.fp5(input))        
        return input        
    

In [195]:
# A minor difference is that the implementation of CrossEntrypyLoss implicitly applies a softmax activation followed by a log transformation 
# but NLLLoss does not.
torch.cuda.is_available()
device = torch.device('cuda')

# Calling the NN class
net = NN()
if torch.cuda.is_available():
    net.cuda()


In [200]:
# Need to set optimizers and loss functions
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr = 0.001)

train_loss_all = []
valid_loss_all = []

global min_valid_loss
min_valid_loss = 99999
global tolerance_level
tolerance_level = 0

epochs = 30
for epoch in range(1, 10):
    train_loss = 0
    valid_loss = 0
    
    for i, (data, labels) in enumerate(trainset):
        # Push all variables to cuda
        if(torch.cuda.is_available()):
            data, labels =  data.to(device), labels.to(device)
            
        optimizer.zero_grad() # reset gradients
        output = net(data.reshape(-1, 784)).to(device)
        loss = loss_func(output, labels)
        loss.backward()
        optimizer.step()
        # Track loss
        train_loss += loss.item()
        
    # Now do validation and keep track of valid loss
    net.eval()
    for i, (data, labels) in enumerate(validset):
        if(torch.cuda.is_available()):
            data, labels = data.to(device), labels.to(device)
        
        # FP
        pred = net(data.reshape(-1, 784)).to(device)
        loss = loss_func(pred, labels)
        valid_loss += loss.item()
        
    
    print(f'Epoch {epoch} \t\t Training loss: {train_loss/len(trainset)} \t\t Validation loss: {valid_loss/len(validset)} ')
    train_loss_all.append(train_loss/len(trainset))
    valid_loss_all.append(valid_loss/len(validset))
    
    # Early Stopping
    if(valid_loss/len(validset) < min_valid_loss):
        if(tolerance_level == 5):
            print('EARLY STOPPING ACTIVATED!! NO DECREASE IN VALID LOSS FOR PAST 5 EPOCHS')
            break
        else:
            min_valid_loss = valid_loss/len(validset)
            tolerance_level+=1    
        

Epoch 1 		 Training loss: 2.3641576321570428 		 Validation loss: 2.3620731571446294 
Epoch 2 		 Training loss: 2.3641252504600274 		 Validation loss: 2.3627648768217666 
Epoch 3 		 Training loss: 2.3640975048253825 		 Validation loss: 2.362567238185717 
Epoch 4 		 Training loss: 2.3641298825924215 		 Validation loss: 2.362369635830755 
Epoch 5 		 Training loss: 2.36416687808194 		 Validation loss: 2.362468439599742 
Epoch 6 		 Training loss: 2.364143758029728 		 Validation loss: 2.36246845514878 
Epoch 7 		 Training loss: 2.3641159940551923 		 Validation loss: 2.3623696202817173 
Epoch 8 		 Training loss: 2.364111378952697 		 Validation loss: 2.3626660626867544 
Epoch 9 		 Training loss: 2.364116009775099 		 Validation loss: 2.362863680590754 
