### Importing required libraries

In [25]:
import torch
from torch.utils.data import Dataset
import torchvision #provide access to datasets, models, transforms, utils, etc
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

In [None]:
EPOCH = 50
BATCH_SIZE = 32

In [14]:
mnist_train_set = torchvision.datasets.MNIST(
            root='./try1',
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor()
            ])
        )

In [15]:
# trying the sum of 60000 random integers with labels of train dataset of mnist
sum_labels = mnist_train_set.targets + torch.randint(0, 9, (1, 60000)).squeeze()
sum_labels
# torch.stack((mnist_train_set.targets, sum_labels), dim=1)

tensor([11,  1, 11,  ...,  8, 12, 12])

In [56]:
sample = next(iter(mnist_train_set))
image, label = sample
image.dtype

torch.float32

In [59]:
a, b = mnist_train_set[0]
a

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [16]:
type(mnist_train_set[0])

tuple

In [17]:
len(mnist_train_set.data)

60000

In [18]:
rn = torch.randint(0, 9, (1, 60000)).squeeze()
rn[1]

tensor(8)

In [44]:
class MNISTWithNumbers(Dataset):

    def __init__(self):

        self.mnist_data = torchvision.datasets.MNIST(
            root='./data',
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor()
            ])
        )

        self.random_numbers = torch.randint(0, 9, (1, len(self.mnist_data.targets))).squeeze()
        self.numbers = torch.zeros((len(self.mnist_data.targets), 10))
        for i in range(len(self.mnist_data.targets)):
            self.numbers[i][self.random_numbers[i]] = 1


    def __getitem__(self, index):

        sample = self.mnist_data.data[index]
        image = sample[0].unsqueeze(0)
        label = sample[1]
        number = self.numbers[index].unsqueeze(0)
        sum_label = label + self.random_numbers[index]

        return image, label, number, sum_label

    def __len__(self):
        return len(self.mnist_data.data)
    
    @property
    def train_labels(self):
        
        sum_labels = self.mnist_data.targets + self.random_numbers
        return self.mnist_data.targets, sum_labels


In [45]:
mnist_with_numbers = MNISTWithNumbers()

In [46]:
targets, sum_labels = mnist_with_numbers.train_labels
targets.shape, sum_labels.shape

(torch.Size([60000]), torch.Size([60000]))

In [47]:
sample = next(iter(mnist_with_numbers))
image, label, number, sum_label = sample
image.shape, label.shape, number.shape, sum_label.shape

(torch.Size([1, 28, 28]), torch.Size([]), torch.Size([1, 10]), torch.Size([]))

In [48]:
number, sum_label, label

(tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]]), tensor(8), tensor(5))

In [11]:
train_data_loader = torch.utils.data.DataLoader(
    mnist_with_numbers,
    batch_size=BATCH_SIZE,
    shuffle=True
)

In [23]:
batch = next(iter(train_data_loader))
images, labels, numbers, sum_labels = batch
images.shape, labels.shape, numbers.shape, sum_labels.shape

(torch.Size([32, 1, 28, 28]),
 torch.Size([32]),
 torch.Size([32, 1]),
 torch.Size([32]))

In [49]:
class Network(nn.Module):
    def __init__(self,):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3) 
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.fc1 = nn.Linear(in_features=1600, out_features=120)
        self.fc2 = nn.Linear(in_features=130, out_features=60)
        self.out1 = nn.Linear(in_features=60, out_features=10)
        self.fc3 = nn.Linear(in_features=60, out_features=30)
        self.out2 = nn.Linear(in_features=30, out_features=5)

    def forward(self, t1, t2):
        # input layer
        x1 = t1
        x2 = t2

        # conv1 layer
        x1 = self.conv1(x1) # 28 | 26
        x1 = F.relu(x1)

        # conv2 layer
        x1 = self.conv2(x1) # 26 | 24
        x1 = F.relu(x1)
        x1 = F.max_pool2d(x1, kernel_size=2, stride=2) # 24 | 12
        
        # conv3 layer
        x1 = self.conv3(x1) # 12 | 10
        x1 = F.relu(x1)
        x1 = F.max_pool2d(x1, kernel_size=2, stride=2) # 10 | 5

        # flattening the tensor till dimension 1 and keeping the batches
        x1 = x1.flatten(1) # 64 * 5 * 5 = 1600

        # fc1 layer
        x1 = self.fc1(x1) # 1600 | 120
        x1 = F.relu(x1)

        # concatenating the random number
        x = torch.cat((x1, x2), 1) # 120 + 10 

        # fc2 layer 
        x = self.fc2(x) # 130 | 60
        x = F.relu(x)

        #  out1 layer
        out1 = self.out1(x) # 60 | 10
        out1 = F.softmax(out1)

        # fc3 layer
        out2 = self.fc3(x) # 60 | 30
        out2 = F.relu(out2)

        # out2 layer

        out2 = self.out2(out2) # 30 | 5

        return out1, out2


In [50]:
network = Network()

In [54]:
image.dtype

torch.uint8

In [53]:
out1, out2 = network(image.unsqueeze(0), number.unsqueeze(0))
out1, out2

RuntimeError: expected scalar type Byte but found Float