In [1]:
import random 
import numpy as np
from tqdm.notebook import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
# normalize images
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
])

# mnist dataset
img_train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)
img_test_dataset = datasets.MNIST('../data', train=False, transform=transform)

# mnist dataloader
img_train_loader = DataLoader(img_train_dataset, shuffle=True, batch_size=512)
img_test_loader = DataLoader(img_test_dataset, shuffle=False, batch_size=512)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw

Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
# # random number dataset
# class NumDataset(Dataset):
#     def __init__(self, num_digits):
#         # init random int tensor
#         self.num = torch.tensor([random.randint(0, 9) for x in range(num_digits)], dtype=torch.long).unsqueeze(1)

#     def __getitem__(self, idx):
#         return self.num[idx]
    
#     def __len__(self):
#         return len(self.num)


# num_train_dataset = NumDataset(num_digits=len(img_train_dataset))
# num_test_dataset = NumDataset(num_digits=len(img_test_dataset))

# num_train_loader = DataLoader(num_train_dataset, shuffle=False, batch_size=64)
# num_test_loader = DataLoader(num_train_dataset, shuffle=False, batch_size=64)

In [8]:
class ModelClassifier(nn.Module):
    """
    Model definisition.
    Input: 2 objects. A number and image.
    Output: Sum of number and image representations.
    """
    def __init__(self):
        super(ModelClassifier, self).__init__()
        
        self.conv1 = self.conv_block(c_in=1, c_out=64, kernel_size=3, stride=1, padding=1)
        self.conv2 = self.conv_block(c_in=64, c_out=128, kernel_size=3, stride=1)
        self.conv3 = self.conv_block(c_in=128, c_out=64, kernel_size=3, stride=1)

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = self.lin_block(c_in=10 * 10 * 64, c_out=20)

        self.fc2 = self.lin_block(c_in=1, c_out=10)
        self.fc3 = self.lin_block(c_in=10, c_out=20)


    def forward(self, x_img, x_num):
        # x_img = 28 x 28 x 3 
        x_img = self.conv1(x_img) # 28 x 28 x 32
        x_img = self.maxpool(x_img) # 14 x 14 x 32
        x_img = self.conv2(x_img) # 12 x 12 x 64
        x_img = self.conv3(x_img) # 10 x 10 x 16
        x_img = x_img.flatten(1) # 1600
        x_img = self.fc1(x_img) # 10

        x_num = self.fc2(x_num.unsqueeze(1)) # 10
        x_num = self.fc3(x_num) # 20

        return x_img + x_num


    def conv_block(self, c_in, c_out, **kwargs):
        seq_block = nn.Sequential(
            nn.Conv2d(in_channels=c_in, out_channels=c_out, **kwargs),
            nn.BatchNorm2d(num_features=c_out),
            nn.ReLU()
        )
        
        return seq_block



    def lin_block(self, c_in, c_out):
        seq_block = nn.Sequential(
            nn.Linear(in_features=c_in, out_features=c_out),
            nn.BatchNorm1d(num_features=c_out),
            nn.ReLU()
        )
        
        return seq_block


# initialise model
model = ModelClassifier()

model.to(device)

# initialise loss function
criterion = nn.CrossEntropyLoss()

# initialise optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# accuracy function
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [16]:
def train_loop(
    model, epochs, optimizer, criterion, img_train_loader, device
):

    print("Begin training.")
    img_train_loader_iter = iter(img_train_loader)
    # num_train_loader_iter = iter(num_train_loader)

    # run mulitple epochs
    for e in range(epochs):
        train_epoch_loss = 0
        train_epoch_acc = 0

        # load images from train loader
        model.train()
        for x_train_img, y_train_img in img_train_loader:
            x_train_img, y_train_img = x_train_img.to(device), y_train_img.to(device)

            # generate a random integer tensor
            numbers = torch.randint(0, 9, y_train_img.shape).to(device)
            
            # generate output
            y_train_batch = numbers + y_train_img

            # zero out gradients for the batch
            optimizer.zero_grad()

            # generate predictions
            y_train_pred = model(x_train_img, numbers/10)

            # calculate loss and acc
            train_loss = criterion(y_train_pred, y_train_batch)
            train_acc = multi_acc(y_train_pred, y_train_batch)

            # gradients
            train_loss.backward()

            # backprop
            optimizer.step()

            # accumulate loss/acc per epoch
            train_epoch_loss += train_loss.item()
            train_epoch_acc += train_acc.item()

        # average loss/acc per epoch
        avg_train_epoch_loss = train_epoch_loss / len(img_train_loader)
        avg_train_epoch_acc = train_epoch_acc / len(img_train_loader)

        print(
            f"Epoch {e+0:02}/{epochs}: | Train Loss: {avg_train_epoch_loss:.5f} | Train Acc: {avg_train_epoch_acc:.3f}%"
        )
        
    return model

In [17]:
EPOCHS = 200
trained_model = train_loop(model, EPOCHS, optimizer, criterion, img_train_loader, device)

Begin training.
Epoch 00/200: | Train Loss: 2.63119 | Train Acc: 17.149%
Epoch 01/200: | Train Loss: 2.42525 | Train Acc: 19.847%
Epoch 02/200: | Train Loss: 2.34109 | Train Acc: 21.328%
Epoch 03/200: | Train Loss: 2.28273 | Train Acc: 22.387%
Epoch 04/200: | Train Loss: 2.21737 | Train Acc: 22.885%
Epoch 05/200: | Train Loss: 2.15720 | Train Acc: 24.111%
Epoch 06/200: | Train Loss: 2.10579 | Train Acc: 25.532%
Epoch 07/200: | Train Loss: 2.06823 | Train Acc: 26.813%
Epoch 08/200: | Train Loss: 2.03280 | Train Acc: 27.345%
Epoch 09/200: | Train Loss: 2.00797 | Train Acc: 28.349%
Epoch 10/200: | Train Loss: 1.98063 | Train Acc: 28.779%
Epoch 11/200: | Train Loss: 1.94783 | Train Acc: 29.455%
Epoch 12/200: | Train Loss: 1.92739 | Train Acc: 30.009%
Epoch 13/200: | Train Loss: 1.90695 | Train Acc: 30.081%
Epoch 14/200: | Train Loss: 1.88671 | Train Acc: 30.498%
Epoch 15/200: | Train Loss: 1.86636 | Train Acc: 31.157%
Epoch 16/200: | Train Loss: 1.84722 | Train Acc: 31.204%
Epoch 17/200: |

In [18]:
def test_loop(test_loader):
    y_pred_list = []
    y_true_list = []

    with torch.no_grad():
        # load batches from test loader
        for x_batch_img, y_batch_img in tqdm(test_loader):
            x_batch_img, y_batch_img = x_batch_img.to(device), y_batch_img.to(device)

            # generate random integers
            numbers = torch.randint(0, 9, y_batch_img.shape).to(device)

            y_true = numbers + y_batch_img
            
            # get prediction probs
            y_pred_probs = model(x_batch_img, numbers/10)

            # get indices of max prob
            _, y_pred = torch.max(y_pred_probs, dim = 1)

            # store predictions in a list
            y_pred_list.append(y_pred.cpu().numpy())
            y_true_list.append(y_true.cpu().numpy())

    return y_pred_list, y_true_list

In [19]:
# generate predictions
y_pred_list, y_true_list = test_loop(img_test_loader)

HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))




In [20]:
# flatten lists
y_pred_list = np.concatenate(y_pred_list).ravel().tolist()
y_true_list = np.concatenate(y_true_list).ravel().tolist()

In [21]:
# classification report
print(classification_report(y_pred_list, y_true_list))

              precision    recall  f1-score   support

           0       0.99      0.96      0.98       110
           1       0.99      0.71      0.83       314
           2       0.05      0.07      0.06       257
           3       0.57      0.42      0.48       647
           4       0.58      0.54      0.56       636
           5       0.34      0.58      0.43       394
           6       0.63      0.47      0.53      1107
           7       0.53      0.49      0.51       956
           8       0.50      0.59      0.54       800
           9       0.42      0.66      0.51       655
          10       0.44      0.47      0.45       805
          11       0.42      0.43      0.42       747
          12       0.47      0.44      0.46       672
          13       0.45      0.44      0.44       556
          14       0.49      0.45      0.47       473
          15       0.70      0.50      0.59       439
          16       0.86      0.66      0.75       289
          17       0.86    

In [22]:
print(accuracy_score(y_pred_list, y_true_list))

0.5047


In [23]:
print(confusion_matrix(y_pred_list, y_true_list))

[[106   0   0   0   0   0   2   0   2   0   0   0   0   0   0   0   0   0]
 [  1 222  87   0   0   0   1   1   1   1   0   0   0   0   0   0   0   0]
 [  0   1  18 143  78   0   1  14   1   0   1   0   0   0   0   0   0   0]
 [  0   1 251 272  98  21   0   2   1   1   0   0   0   0   0   0   0   0]
 [  0   0  11  58 342 196  22   2   0   4   1   0   0   0   0   0   0   0]
 [  0   0   1   1  40 228 111  11   2   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0  25 197 516 286  70   9   0   3   1   0   0   0   0   0]
 [  0   0   1   1   2  16 165 473 256  39   2   0   1   0   0   0   0   0]
 [  0   0   0   0   1   4   4  90 476 194  20   3   5   3   0   0   0   0]
 [  0   0   0   0   1   0   1  10 105 432  95   8   1   2   0   0   0   0]
 [  0   0   0   0   0   2   0   0  31 264 376 118  11   1   2   0   0   0]
 [  0   0   0   0   0   0   0   1   0  58 297 318  61  10   1   1   0   0]
 [  0   0   0   0   0   0   0   0   1   1  54 260 298  41  11   4   2   0]
 [  0   0   0   0   0   0

In [None]:
# def train_loop(
#     model, epochs, optimizer, criterion, img_train_loader, num_train_loader, device
# ):

#     print("Begin training.")
#     img_train_loader_iter = iter(img_train_loader)
#     num_train_loader_iter = iter(num_train_loader)

#     for e in range(epochs):
#         train_epoch_loss = 0
#         train_epoch_acc = 0

#         model.train()
#         for b in range(len(img_train_loader)):
#             x_train_img, y_train_img = next(img_train_loader_iter)
#             x_train_num = next(num_train_loader_iter)

#             y_train_batch =  x_train_num.squeeze() + y_train_img

#             x_train_img, y_train_img = x_train_img.to(device), y_train_img.to(device)
#             x_train_num = x_train_num.to(device)

#             optimizer.zero_grad()

#             y_train_pred = model(x_train_img, x_train_num.squeeze()/10)

#             # print(y_train_pred.shape, y_train_batch.shape, max(y_train_batch), min(y_train_batch))
#             # print(y_train_pred.shape, y_train_pred)
#             # print(y_train_batch.shape, y_train_batch)

#             train_loss = criterion(y_train_pred, y_train_batch)
#             train_acc = multi_acc(y_train_pred, y_train_batch)

#             train_loss.backward()
#             optimizer.step()

#             train_epoch_loss += train_loss.item()
#             train_epoch_acc += train_acc.item()

#         avg_train_epoch_loss = train_epoch_loss / len(img_train_loader)
#         avg_train_epoch_acc = train_epoch_acc / len(img_train_loader)

#         print(
#             f"Epoch {e+0:02}/{epochs}: | Train Loss: {avg_train_epoch_loss:.5f} | Train Acc: {avg_train_epoch_acc:.3f}%"
#         )
        
#     return model