In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchsummary import summary


In [2]:
train_dataset = torchvision.datasets.MNIST(
                    root='.',
                    train=True,
                    transform=transforms.ToTensor(), # This will automatically normalize data!
                    download=True)

In [3]:
test_dataset = torchvision.datasets.MNIST(
                    root='.',
                    train=False,
                    transform=transforms.ToTensor(),
                    download=True)

In [4]:
batch_size = 32
train_loader = torch.utils.data.DataLoader(
                        dataset=train_dataset,
                        batch_size=batch_size,
                        shuffle=True
                        )

test_loader = torch.utils.data.DataLoader(
                        dataset=test_dataset,
                        batch_size=batch_size,
                        shuffle=False # Not necessary!
                        )

In [5]:
class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.linear1 = nn.Linear(794, 30)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(30, 30)
        # self.linear3 = nn.Linear(30,28)
        self.linear3 = nn.Linear(30,10)
        self.linear4 = nn.Linear(30,18)
            
    def forward(self, X):
        # out = torch.cat((Xa,Xb), dim=-1)
        out = self.linear1(X)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.relu(out)
        # out = torch.cat((Xa,outa), dim=-1)
        # out = self.linear3(out)
        # out = self.relu(out)
        outa = self.linear3(out)
        outa = self.relu(outa)
        outb = self.linear4(out)
        outb = self.relu(outb)
        return outa, outb

model = Model1()

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

cpu


Model1(
  (linear1): Linear(in_features=794, out_features=30, bias=True)
  (relu): ReLU()
  (linear2): Linear(in_features=30, out_features=30, bias=True)
  (linear3): Linear(in_features=30, out_features=10, bias=True)
  (linear4): Linear(in_features=30, out_features=18, bias=True)
)

In [7]:
summary(model, [(1,794)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 30]          23,850
              ReLU-2                [-1, 1, 30]               0
            Linear-3                [-1, 1, 30]             930
              ReLU-4                [-1, 1, 30]               0
            Linear-5                [-1, 1, 10]             310
              ReLU-6                [-1, 1, 10]               0
            Linear-7                [-1, 1, 18]             558
              ReLU-8                [-1, 1, 18]               0
Total params: 25,648
Trainable params: 25,648
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.10
Estimated Total Size (MB): 0.10
----------------------------------------------------------------


In [8]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [9]:
# Training Loop

n_epochs = 10



for epoch in range(n_epochs):
    train_loss = []
    n_correcta = 0.
    n_total = 0.
    n_correctb = 0.
    for inputs, targets in train_loader:
        # Move data to GPU
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Reshape the data
        inputs = inputs.view(-1, 784)
        
        # Generate random numbers
        ri = torch.randint(0, 9, (inputs.shape[0],))
        r = torch.nn.functional.one_hot(ri, num_classes=10)

        # Concatenate the inputs
        data = torch.hstack((inputs,r))

        # Add image target and input number
        # ro = ri + targets
        # result = torch.nn.functional.one_hot(ro, num_classes=18)

        result = ri + targets

        # zero the gradient
        optimizer.zero_grad()
        
        # forward pass
        outputa, outputb = model(data)
        lossa = loss_fn(outputa, targets)
        lossb = loss_fn(outputb, targets)
        loss = lossa + lossb

        # get predictiona
        _, predictiona = torch.max(outputa, 1)
        _, predictionb = torch.max(outputb, 1)

        # update counts
        n_correcta += (predictiona == targets).sum().item()
        n_total += targets.shape[0]

        n_correctb += (predictionb == result).sum().item()
        
        # backward pass and optimize
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
        
    train_loss = np.mean(train_loss)
    train_acc_a = n_correcta / n_total * 100
    train_acc_b = n_correctb / n_total * 100

    
    test_loss = []
    n_correct = 0.
    n_total = 0.
    
    
    
    # for inputs, targets in test_loader:
    #     # Move data to GPU
    #     inputs, targets = inputs.to(device), targets.to(device)
        
    #     # Reshape the data
    #     inputs = inputs.view(-1, 784)
        
    #     # Generate random numbers
    #     ri = torch.randint(0, 9, (inputs.shape[0])
    #     r = torch.nn.functional.one_hot(ri, num_classes=10)

    #     # Concatenate the inputs
    #     data = torch.hstack((inputs,r))

    #     # forward pass
    #     outputs_test = model(data)
    #     loss_test = loss_fn(outputs_test, targets)
        
    #     # get prediction
    #     _, predictions = torch.max(outputs_test, 1)
              
    #     # update counts
    #     n_correct += (predictions == targets).sum().item()
    #     n_total += targets.shape[0]
        
    #     test_loss.append(loss_test.item())
        
    # test_loss = np.mean(test_loss)
    # test_acc = n_correct / n_total * 100
    
    
    print(f'Epoch: {epoch+1}/{n_epochs}, Train Accuracy [MNIST]: {train_acc_a:.2f}%,  Train Accuracy [Sum]: {train_acc_b:.2f}%, Train Loss: {train_loss:.4f}')

Epoch: 1/10, Train Accuracy [MNIST]: 50.83%,  Train Accuracy [Sum]: 8.07%, Train Loss: 2.7034
Epoch: 2/10, Train Accuracy [MNIST]: 55.75%,  Train Accuracy [Sum]: 8.16%, Train Loss: 2.3451
Epoch: 3/10, Train Accuracy [MNIST]: 56.36%,  Train Accuracy [Sum]: 7.94%, Train Loss: 2.2836
Epoch: 4/10, Train Accuracy [MNIST]: 56.65%,  Train Accuracy [Sum]: 7.91%, Train Loss: 2.2502
Epoch: 5/10, Train Accuracy [MNIST]: 56.85%,  Train Accuracy [Sum]: 7.95%, Train Loss: 2.2284
Epoch: 6/10, Train Accuracy [MNIST]: 57.04%,  Train Accuracy [Sum]: 7.96%, Train Loss: 2.2116
Epoch: 7/10, Train Accuracy [MNIST]: 57.13%,  Train Accuracy [Sum]: 7.78%, Train Loss: 2.1979
Epoch: 8/10, Train Accuracy [MNIST]: 57.22%,  Train Accuracy [Sum]: 7.87%, Train Loss: 2.1883
Epoch: 9/10, Train Accuracy [MNIST]: 57.32%,  Train Accuracy [Sum]: 7.78%, Train Loss: 2.1793
Epoch: 10/10, Train Accuracy [MNIST]: 57.39%,  Train Accuracy [Sum]: 7.75%, Train Loss: 2.1737


In [10]:
class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.lineara1 = nn.Linear(784, 30)
        self.relu = nn.ReLU()
        self.lineara2 = nn.Linear(30, 30)
        # self.linear3 = nn.Linear(30,28)
        self.lineara3 = nn.Linear(30,10)
        # self.lineara4 = nn.Linear(30,18)

        self.linearb1 = nn.Linear(40, 60)
        self.relu = nn.ReLU()
        self.linearb2 = nn.Linear(60, 30)
        # self.linear3 = nn.Linear(30,28)
        self.linearb3 = nn.Linear(30,18)
        # self.linearb4 = nn.Linear(30,18)
            
    def forward(self, Xa, Xb):
        # out = torch.cat((Xa,Xb), dim=-1)

        out = self.lineara1(Xa)
        out1 = self.relu(out)
        out = self.lineara2(out1)
        out = self.relu(out)
        out = self.lineara3(out)
        outa = self.relu(out)
        # out = torch.cat((Xa,outa), dim=-1)
        # out = self.linear3(out)
        # out = self.relu(out)

        inb = torch.cat((out1,Xb), dim=-1)
        outb = self.linearb1(inb)
        outb = self.relu(outb)
        outb = self.linearb2(outb)
        outb = self.relu(outb)
        outb = self.linearb3(outb)
        outb = self.relu(outb)
        return outa, outb

model = Model2()

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

cpu


Model2(
  (lineara1): Linear(in_features=784, out_features=30, bias=True)
  (relu): ReLU()
  (lineara2): Linear(in_features=30, out_features=30, bias=True)
  (lineara3): Linear(in_features=30, out_features=10, bias=True)
  (linearb1): Linear(in_features=40, out_features=60, bias=True)
  (linearb2): Linear(in_features=60, out_features=30, bias=True)
  (linearb3): Linear(in_features=30, out_features=18, bias=True)
)

In [12]:
summary(model, [(1,784), (1,10)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 30]          23,550
              ReLU-2                [-1, 1, 30]               0
            Linear-3                [-1, 1, 30]             930
              ReLU-4                [-1, 1, 30]               0
            Linear-5                [-1, 1, 10]             310
              ReLU-6                [-1, 1, 10]               0
            Linear-7                [-1, 1, 60]           2,460
              ReLU-8                [-1, 1, 60]               0
            Linear-9                [-1, 1, 30]           1,830
             ReLU-10                [-1, 1, 30]               0
           Linear-11                [-1, 1, 18]             558
             ReLU-12                [-1, 1, 18]               0
Total params: 29,638
Trainable params: 29,638
Non-trainable params: 0
---------------------------------

In [13]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [15]:
# Model 2

# Training Loop

n_epochs = 10


for epoch in range(n_epochs):
    train_loss = []
    n_correcta = 0.
    n_total = 0.
    n_correctb = 0.
    for inputs, targets in train_loader:
        # Move data to GPU
        inputs, targets = inputs.to(device), targets.to(device)
        
        # Reshape the data
        inputs = inputs.view(-1, 784)
        
        # Generate random numbers
        ri = torch.randint(0, 9, (inputs.shape[0],))
        r = torch.nn.functional.one_hot(ri, num_classes=10)

        # Concatenate the inputs
        data = torch.hstack((inputs,r))

        # Add image target and input number
        # ro = ri + targets
        # result = torch.nn.functional.one_hot(ro, num_classes=18)

        result = ri + targets

        # zero the gradient
        optimizer.zero_grad()
        
        # forward pass
        outputa, outputb = model(inputs, r)
        lossa = loss_fn(outputa, targets)
        lossb = loss_fn(outputb, targets)
        loss = lossa + lossb

        # get predictiona
        _, predictiona = torch.max(outputa, 1)
        _, predictionb = torch.max(outputb, 1)

        # update counts
        n_correcta += (predictiona == targets).sum().item()
        n_total += targets.shape[0]

        n_correctb += (predictionb == result).sum().item()
        
        # backward pass and optimize
        loss.backward()
        optimizer.step()
        
        train_loss.append(loss.item())
        
    train_loss = np.mean(train_loss)
    train_acc_a = n_correcta / n_total * 100
    train_acc_b = n_correctb / n_total * 100
    
    test_loss = []
    n_correct = 0.
    n_total = 0.
    
    
    
    # for inputs, targets in test_loader:
    #     # Move data to GPU
    #     inputs, targets = inputs.to(device), targets.to(device)
        
    #     # Reshape the data
    #     inputs = inputs.view(-1, 784)
        
    #     # Generate random numbers
    #     ri = torch.randint(0, 9, (inputs.shape[0])
    #     r = torch.nn.functional.one_hot(ri, num_classes=10)

    #     # Concatenate the inputs
    #     data = torch.hstack((inputs,r))

    #     # forward pass
    #     outputs_test = model(data)
    #     loss_test = loss_fn(outputs_test, targets)
        
    #     # get prediction
    #     _, predictions = torch.max(outputs_test, 1)
        
    #     # update counts
    #     n_correct += (predictions == targets).sum().item()
    #     n_total += targets.shape[0]
        
    #     test_loss.append(loss_test.item())
        
    # test_loss = np.mean(test_loss)
    # test_acc = n_correct / n_total * 100
    

    
    print(f'Epoch: {epoch+1}/{n_epochs}, Train Accuracy [MNIST]: {train_acc_a:.2f}%,  Train Accuracy [Sum]: {train_acc_b:.2f}%, Train Loss: {train_loss:.4f}')

Epoch: 1/10, Train Accuracy [MNIST]: 79.48%,  Train Accuracy [Sum]: 8.18%, Train Loss: 2.1042
Epoch: 2/10, Train Accuracy [MNIST]: 84.68%,  Train Accuracy [Sum]: 8.76%, Train Loss: 1.5060
Epoch: 3/10, Train Accuracy [MNIST]: 85.70%,  Train Accuracy [Sum]: 8.67%, Train Loss: 1.4153
Epoch: 4/10, Train Accuracy [MNIST]: 86.35%,  Train Accuracy [Sum]: 8.57%, Train Loss: 1.3711
Epoch: 5/10, Train Accuracy [MNIST]: 86.75%,  Train Accuracy [Sum]: 8.62%, Train Loss: 1.3400
Epoch: 6/10, Train Accuracy [MNIST]: 87.09%,  Train Accuracy [Sum]: 8.83%, Train Loss: 1.3176
Epoch: 7/10, Train Accuracy [MNIST]: 87.33%,  Train Accuracy [Sum]: 8.73%, Train Loss: 1.2990
Epoch: 8/10, Train Accuracy [MNIST]: 87.48%,  Train Accuracy [Sum]: 8.69%, Train Loss: 1.2838
Epoch: 9/10, Train Accuracy [MNIST]: 87.61%,  Train Accuracy [Sum]: 8.76%, Train Loss: 1.2727
Epoch: 10/10, Train Accuracy [MNIST]: 87.74%,  Train Accuracy [Sum]: 8.70%, Train Loss: 1.2635
