## Imports

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn.functional as F
from torchsummary import summary

## Loading the MNIST Dataset

In [2]:
train_dataset = torchvision.datasets.MNIST(
                    root='.',
                    train=True,
                    transform=transforms.ToTensor(), 
                    download=True)

test_dataset = torchvision.datasets.MNIST(
                    root='.',
                    train=False,
                    transform=transforms.ToTensor(),
                    download=True)

 ## Generate random numbers

In [3]:
# Train Set, 60000 Samples
number_train = torch.randint(0, 9, (60000,)) # Random Digit i.e. Second Input
sum_train = number_train + train_dataset.targets # Sum of Random Digit and MNIST i.e. Second Output
number_train = torch.nn.functional.one_hot(number_train, num_classes=10)

# Test Set, 10000 Samples
number_test = torch.randint(0, 9, (10000,)) # Random Digit i.e. Second Input
sum_test = number_test + test_dataset.targets # Sum of Random Digit and MNIST i.e. Second Output
number_test = torch.nn.functional.one_hot(number_test, num_classes=10)

## Re-creating Dataset

In [4]:
# Flattens the MNIST images
train_x = train_dataset.data.reshape(60000, 784).float()
test_x = test_dataset.data.reshape(10000, 784).float()

# Creats the Dataset with two inputs and two outputs
train_ds = torch.utils.data.TensorDataset(train_x, number_train, train_dataset.targets, sum_train)
test_ds = torch.utils.data.TensorDataset(test_x, number_test, test_dataset.targets, sum_test)

## DataLoader

In [5]:
batch_size = 32
train_loader = torch.utils.data.DataLoader(
                        dataset=train_ds,
                        batch_size=batch_size,
                        shuffle=True
                        )

test_loader = torch.utils.data.DataLoader(
                        dataset=test_ds,
                        batch_size=batch_size,
                        shuffle=False # Not necessary!
                        )

## Model

In [6]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.lineara1 = nn.Linear(784, 30) # Flattened MNIST as Input: 28 * 28
        self.relu = nn.ReLU()
        self.selu = nn.SELU()
        self.lineara2 = nn.Linear(30, 30)
        self.lineara3 = nn.Linear(30,10) # 10 Classes for First Output: 0-9

        self.linearb1 = nn.Linear(40, 60) # 10 for random digit, 30 from output of lineara1 layer
        self.linearb2 = nn.Linear(60, 30)
        self.linearb3 = nn.Linear(30,18) # 18 Classes for Second Output: 0-18
            
    def forward(self, Xa, Xb):

        out = self.lineara1(Xa)
        out1 = self.selu(out)
        out = self.lineara2(out1)
        out = self.selu(out)
        out = self.lineara3(out)
        outa = self.selu(out)


        inb = torch.cat((out1,Xb), dim=-1) # Gets input from the first layer after the input layer
        outb = self.linearb1(inb)
        outb = self.relu(outb)
        outb = self.linearb2(outb)
        outb = self.relu(outb)

        outb = self.linearb3(outb)
        outb = self.relu(outb)
        return outa, outb

# Instantiate the Model
model = Model()

## Move Model to GPU (Required Condition by Assignment!)

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

cuda:0


Model(
  (lineara1): Linear(in_features=784, out_features=30, bias=True)
  (relu): ReLU()
  (selu): SELU()
  (lineara2): Linear(in_features=30, out_features=30, bias=True)
  (lineara3): Linear(in_features=30, out_features=10, bias=True)
  (linearb1): Linear(in_features=40, out_features=60, bias=True)
  (linearb2): Linear(in_features=60, out_features=30, bias=True)
  (linearb3): Linear(in_features=30, out_features=18, bias=True)
)

## Model Summary

In [8]:
summary(model, [(1,784), (1,10)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 30]          23,550
              SELU-2                [-1, 1, 30]               0
            Linear-3                [-1, 1, 30]             930
              SELU-4                [-1, 1, 30]               0
            Linear-5                [-1, 1, 10]             310
              SELU-6                [-1, 1, 10]               0
            Linear-7                [-1, 1, 60]           2,460
              ReLU-8                [-1, 1, 60]               0
            Linear-9                [-1, 1, 30]           1,830
             ReLU-10                [-1, 1, 30]               0
           Linear-11                [-1, 1, 18]             558
             ReLU-12                [-1, 1, 18]               0
Total params: 29,638
Trainable params: 29,638
Non-trainable params: 0
---------------------------------

## Loss and Optimizer

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

## Training Loop

In [10]:

n_epochs = 20



for epoch in range(n_epochs):
    train_loss = []
    n_correcta = 0.
    n_correctb = 0.  
    n_total = 0.

    for inputs, numbers, targets, result in train_loader:

        # Move data to GPU
        inputs, numbers, targets, result = inputs.to(device), numbers.to(device), targets.to(device), result.to(device)
        
        # zero the gradient
        optimizer.zero_grad()
        
        # forward pass
        outputa, outputb = model(inputs, numbers)
        lossa = loss_fn(outputa, targets)
        lossb = loss_fn(outputb, result)

        loss = lossa + lossb # Adds loss values from both outputs

        # get predictiona
        _, predictiona = torch.max(outputa, 1)
        _, predictionb = torch.max(outputb, 1)

        # update counts
        n_correcta += (predictiona == targets).sum().item()
        n_correctb += (predictionb == result).sum().item()
        n_total += targets.shape[0]
 
        # backward pass and optimize
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
        
    train_loss = np.mean(train_loss)
    train_acc_a = n_correcta / n_total * 100
    train_acc_b = n_correctb / n_total * 100

    
    test_loss = []
    n_correcta = 0.
    n_correctb = 0. 
    n_total = 0.
    
    for inputs, numbers, targets, result in test_loader:

        # Move data to GPU
        inputs, numbers, targets, result = inputs.to(device), numbers.to(device), targets.to(device), result.to(device)
        
        # forward pass
        outputa, outputb = model(inputs, numbers)
        lossa = loss_fn(outputa, targets)
        lossb = loss_fn(outputb, result)

        loss = lossa + lossb # Adds loss values from both outputs
                
         # get predictions
        _, predictiona = torch.max(outputa, 1) 
        _, predictionb = torch.max(outputb, 1)
        
        
        # update counts
        n_correcta += (predictiona == targets).sum().item()
        n_correctb += (predictionb == result).sum().item()

        n_total += targets.shape[0]

        test_loss.append(loss.item())
        
    test_loss = np.mean(test_loss)
    test_acc_a = n_correcta / n_total * 100
    test_acc_b = n_correctb / n_total * 100
    

    print(f'Epoch: {epoch+1}/{n_epochs}, Train Accuracy [MNIST]: {train_acc_a:.2f}%,  Train Accuracy [Sum]: {train_acc_b:.2f}%, Train Loss: {train_loss:.4f}, Test Accuracy [MNIST]: {test_acc_a:.2f}%,  Test Accuracy [Sum]: {test_acc_b:.2f}%, Train Loss: {test_loss:.4f}')

Epoch: 1/20, Train Accuracy [MNIST]: 78.76%,  Train Accuracy [Sum]: 11.64%, Train Loss: 3.3668, Test Accuracy [MNIST]: 83.33%,  Test Accuracy [Sum]: 12.98%, Train Loss: 3.0302
Epoch: 2/20, Train Accuracy [MNIST]: 83.33%,  Train Accuracy [Sum]: 19.43%, Train Loss: 2.8587, Test Accuracy [MNIST]: 83.94%,  Test Accuracy [Sum]: 31.13%, Train Loss: 2.4583
Epoch: 3/20, Train Accuracy [MNIST]: 88.69%,  Train Accuracy [Sum]: 49.07%, Train Loss: 1.8991, Test Accuracy [MNIST]: 92.82%,  Test Accuracy [Sum]: 64.43%, Train Loss: 1.4057
Epoch: 4/20, Train Accuracy [MNIST]: 92.84%,  Train Accuracy [Sum]: 70.71%, Train Loss: 1.2434, Test Accuracy [MNIST]: 92.35%,  Test Accuracy [Sum]: 73.82%, Train Loss: 1.1752
Epoch: 5/20, Train Accuracy [MNIST]: 93.09%,  Train Accuracy [Sum]: 78.45%, Train Loss: 1.0277, Test Accuracy [MNIST]: 93.12%,  Test Accuracy [Sum]: 78.36%, Train Loss: 1.0014
Epoch: 6/20, Train Accuracy [MNIST]: 93.33%,  Train Accuracy [Sum]: 82.47%, Train Loss: 0.9114, Test Accuracy [MNIST]: 9