In [29]:
import torch
import torchvision
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader

# Importing all the required Libraries

In [30]:
# https://github.com/AquibPy/Pytorch/blob/master/MNIST%20Using%20ANN%20on%20GPU%20with%20Pytorch.ipynb

# The below code is added to transfer data from CPU to CUDA if CUDA is available

def get_default_device():
  """Pick GPU if available, else CPU"""
  if torch.cuda.is_available():
    return torch.device('cuda')
  else:
    return torch.device('cpu')
device = get_default_device()
device

def to_device(data,device):
    if isinstance(data, (list,tuple)): #The isinstance() function returns True if the specified object is of the specified type, otherwise False.
        return [to_device(x,device) for x in data]
    return data.to(device,non_blocking=True)

# The below code is a wrapper around the data loader function to transfer all data points to the available device

class DeviceDataLoader(Dataset):
    def __init__(self,dl,device):
        self.dl = dl
        self.device = device
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b,self.device)

    
    def __len__(self):
        return len(self.dl)

In [31]:
# This block stores all the parameters to train the Neural Network

n_epochs = 3
batch_size_train = 10
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 10

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x7f503a8af530>

In [32]:
# We import the MNIST dataset using the DataLoader and store it in train and test 

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size_test, shuffle=True)

In [33]:
# We add random input to the neural network, that is being sent as one-hot encoded matrix. The 2 functions convert int to one-hot encoding and back to int.

def convert_to_one_hot_encoding(input_data, num_classes):
  one_hot = torch.nn.functional.one_hot(input_data, num_classes = num_classes)
  return one_hot

def convert_to_labels(input_data):
  output = torch.argmax(input_data, dim=1)
  return output

In [34]:
# This is a custom loss function created to calculate Mean squared error loss between the Y2 and the expected output

def complex_mse_loss(output, target):
  return (0.5*(output - target)**2).mean(dtype=torch.complex64)

In [35]:
# Creating Additional Train data
'''
We use this function to create the additional random input (x2) and y2 (which is the sum of x2 and output - y1). We first create a random tensor based on the 
training data size (x2). Next, we iterate through the MNIST data to access the input image (x1) and the digit value (y1). 

We do the necessary data massaging to get all the data at the same type.

We need to process this data simultaneously because our output y2 is dependent on output y1 from the MNIST dataset.
'''
def train_additional_data():
  size_additional_data = 60000
  batch_size_train = 10
  additional_input_data_x2 = torch.randint(0, 10, (size_additional_data,))
  additional_input_data_x2 = torch.tensor(additional_input_data_x2, dtype = torch.float32)

  additional_output_y1_intermediary = []
  image_data = torch.empty(0)
  for batch_idx, (data, target) in enumerate(train_loader):
    target = target.tolist()
    additional_output_y1_intermediary.append(target)
    y = data
    image_data = torch.cat([image_data, y], dim = 0)

  additional_output_y1_intermediary_flat_list = [item for sublist in additional_output_y1_intermediary for item in sublist]
  additional_output_y1 = torch.tensor(additional_output_y1_intermediary_flat_list, dtype = torch.float32)
  additional_output_y2 = torch.add(additional_input_data_x2, additional_output_y1)
  return image_data, additional_output_y1, additional_input_data_x2, additional_output_y2

# Creating Additional Test data
def test_additional_data():
  size_additional_data = 10000
  batch_size_test = 1000
  additional_input_data_x2 = torch.randint(0, 10, (size_additional_data,))
  additional_input_data_x2 = torch.tensor(additional_input_data_x2, dtype = torch.float32)

  additional_output_y1_intermediary = []
  image_data = torch.empty(0)
  for batch_idx, (data, target) in enumerate(test_loader):
    target = target.tolist()
    additional_output_y1_intermediary.append(target)
    y = data
    image_data = torch.cat([image_data, y], dim = 0)

  additional_output_y1_intermediary_flat_list = [item for sublist in additional_output_y1_intermediary for item in sublist]
  additional_output_y1 = torch.tensor(additional_output_y1_intermediary_flat_list, dtype = torch.float32)
  additional_output_y2 = torch.add(additional_input_data_x2, additional_output_y1)
  return image_data, additional_output_y1, additional_input_data_x2, additional_output_y2

In [36]:
train_image_data, train_y1, train_additional_input_data_x2, train_additional_output_y2   = train_additional_data() # Converting the function to tensors
additional_train_loader = torch.utils.data.DataLoader(list(zip(train_image_data, train_y1, train_additional_input_data_x2, train_additional_output_y2)),batch_size=batch_size_train, shuffle=False)
# We turn shuffle = False because we want the y2 to be aligned to the right y1 for a given image. Similarly we do this for the test data set.
additional_train_loader = DeviceDataLoader(additional_train_loader,device)

  additional_input_data_x2 = torch.tensor(additional_input_data_x2, dtype = torch.float32)


In [37]:
test_image_data, test_y1, test_additional_input_data_x2, test_additional_output_y2   = test_additional_data()
additional_test_loader = torch.utils.data.DataLoader(list(zip(test_image_data, test_y1, test_additional_input_data_x2, test_additional_output_y2)),batch_size=batch_size_test, shuffle=False)
additional_test_loader = DeviceDataLoader(additional_test_loader,device)

  additional_input_data_x2 = torch.tensor(additional_input_data_x2, dtype = torch.float32)


In [38]:
# https://stackoverflow.com/questions/51700729/how-to-construct-a-network-with-two-inputs-in-pytorch

# We are introducing the x2 which is the 2nd input only after the full conv is done and we are flattening our image and extracting all the required info from image.

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5) # self.conv1 = nn.Conv2d(1, 10, kernel_size=5) is a line of code in PyTorch that creates a 2D convolutional layer with 1 input channel, 10 output channels, and a 5x5 kernel size.
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5) # self.conv2 = nn.Conv2d(10, 20, kernel_size=5) is a line of code in PyTorch that creates a 2D convolutional layer with 10 input channels, 20 output channels, and a 5x5 kernel size.
        self.conv2_drop = nn.Dropout2d() # self.conv2_drop = nn.Dropout2d() is a line of code in PyTorch that creates a 2D dropout layer.
        self.fc1 = nn.Linear(320, 50) #  self.fc1 = nn.Linear(320, 50) is a line of code in PyTorch that creates a fully connected layer with 320 input units and 50 output units.
        self.fc2 = nn.Linear(50, 10)
        self.fc3 = nn.Linear(20, 320) # Here we are taking the input of 20 because we convert the input x2 and y2 to 20 dimensions, to accommodate 1-20 values.
        self.fc4 = nn.Linear(50, 20)

    def forward(self, x, x2):
        x = F.relu(F.max_pool2d(self.conv1(x), 2)) # x = F.relu(F.max_pool2d(self.conv1(x), 2)) is a line of code in PyTorch that applies a 2x2 max pooling layer with ReLU activation to the output of a convolutional layer.
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) # x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) is a line of code in PyTorch that applies a 2x2 max pooling layer with ReLU activation to the output of a convolutional layer with dropout applied.
        x = x.view(-1, 320) # x = x.view(-1, 320) is a line of code in PyTorch that reshapes a tensor into a 2D matrix with 320 columns.
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training) # Dropout is a regularization technique used in machine learning to reduce overfitting by randomly setting some of the input units to 0 during the forward pass.
        x = self.fc2(x)
        f = F.relu(self.fc3(x2)) # We take a 20 dimension one-hot encoded vector and pass it into the Neural network and convert it to 320 dimensions
        f = F.dropout(f, training=self.training) 
        f = F.relu(self.fc1(f)) # We convert 320 dimensions to 50
        f = F.dropout(f, training=self.training) 
        #f = F.relu(self.fc2(f))
        f = self.fc4(f) # We convert 50 dimensions to 20

        return F.log_softmax(x) , F.log_softmax(f)

In [39]:
network = Net() # Initializing the Neural Network
network = to_device(network, device) # Transferring it to GPU
optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                      momentum=momentum) # Leveraging the Gradient descent optimizer

In [50]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*60000 for i in range(n_epochs + 1)]

In [None]:
def train(epoch):
  network.train() # network.train() sets the model in training mode, which enables features such as dropout and batch normalization.
  additional_train_loader_dataset = 60000 # Train data size
  for training_data in enumerate(additional_train_loader):
    batch_idx = training_data[0] # Splitting the data in the loader to batch, x1 (image data), x2 (random number generated), y1 (number), y2 (sum of y1 and x2)
    image_x1 = training_data[1][0]
    y1 = training_data[1][1]
    x2 = training_data[1][2]
    y2 = training_data[1][3]
    
    optimizer.zero_grad()

    # Converting Float to Int
    x2 = x2.type(torch.LongTensor) 
    y2 = y2.type(torch.LongTensor)
    
    '''
    # One Hot Encoding our inputs
    # Need to convert one hot encoded values to floats. 
    This is needed because the weights are of float datatype. And pytorch requires both matrices being multiplied to have the same datatype

    '''

    x2 = torch.nn.functional.one_hot(x2, num_classes = 20) 
    x2 = x2.to(torch.float32) 

    # Transferring modified inputs to the GPU
    x2 = x2.cuda()
    y2 = y2.cuda()
    output_1, output_2 = network(image_x1, x2) # Passing the image data and one-hot encoded int to the network
    
    y1 = y1.type(torch.LongTensor) # PyTorch won't accept a FloatTensor as categorical target, so need to cast tensor to LongTensor

    y1 = y1.cuda()
    loss_1 = F.nll_loss(output_1, y1) # For Output 1 we calculate negative log likehood loss
    
    output_2 = torch.argmax(output_2, dim=1) # Convert to integer
    
    loss_2 = complex_mse_loss(output_2, y2) # Made a custom function to calculate the mean squared error between output 2 and y2
    # We use this function because the 2 outputs need to be close as in regression
    loss = loss_1 + loss_2 # Calculating the Overall loss
    loss.backward() # Backward Propagation
    optimizer.step()
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * additional_train_loader_dataset, additional_train_loader_dataset,
          100. * batch_idx / len(additional_train_loader), loss.item()))
      train_losses.append(loss.item())
      train_counter.append(
          (batch_idx*64) + ((epoch-1)*additional_train_loader_dataset))
      torch.save(network.state_dict(), '/content/Modified_MNIST_model.pth')
      torch.save(optimizer.state_dict(), '/content/Modified_MNIST_optimizer.pth')


In [None]:
train(0) 

# for epoch in range(1, n_epochs + 1):
#   train(epoch)
#   # test()
#   print(epoch)

# print(test_counter)
# print(test_losses)

In [54]:
def test():
  network.eval()
  test_loss = 0
  correct = 0
  additional_train_loader_dataset = 10000
  with torch.no_grad():
    for test_data in enumerate(additional_test_loader):      
      image_x1 = test_data[1][0]
      y1 = test_data[1][1]
      x2 = test_data[1][2]
      y2 = test_data[1][3]

      x2 = x2.type(torch.LongTensor) # Converting Float to Int
      y2 = y2.type(torch.LongTensor)

      x2 = torch.nn.functional.one_hot(x2, num_classes = 20) # One Hot Encoding our inputs
      x2 = x2.to(torch.float32) # Need to convert one hot encoded values to floats. This is needed because the weights are of float datatype. And pytorch requires both matrices being multiplied to have the same datatype

      x2 = x2.cuda()
      y2 = y2.cuda()

      output_1, output_2 = network(image_x1, x2)

      y1 = y1.type(torch.LongTensor) # PyTorch won't accept a FloatTensor as categorical target, so need to cast tensor to LongTensor

      y1 = y1.cuda()
      loss_1 = F.nll_loss(output_1, y1) # For Output 1 we calculate negative log likehood loss
      output_2 = torch.argmax(output_2, dim=1) # Convert to integer
      loss_2 = complex_mse_loss(output_2, y2) # Made a custom function to calculate the mean squared error between output 2 and y2
      loss = loss_1 + loss_2
      test_loss += loss.item()
      pred_1 = output_1.data.max(1, keepdim=True)[1]
      pred_2 = output_2
      correct_1 = pred_1.eq(y1.data.view_as(pred_1)).sum()
      correct_2 = pred_2.eq(y2.data.view_as(pred_2)).sum()
      correct = correct_1 + correct_2
  test_loss /= additional_train_loader_dataset
  test_losses.append(test_loss)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, additional_train_loader_dataset,
    50. * correct / additional_train_loader_dataset)) # 50 because we have 2 values in correct - Correct 1 & Correct 2, to avoid double counting we multiply by 100/2 = 50

In [55]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*60000 for i in range(n_epochs + 1)]

In [56]:
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()
  print(epoch)

print(test_counter)
print(test_losses)

cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0


  return F.log_softmax(x) , F.log_softmax(f)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda

In [None]:
# After 3 Epochs the test accuracy is at only 5%, perhaps this improves as we increase the number of epochs