# Compare accuracy of CNN with BCE loss function and CNN with advanced BCE loss function on MNIST dataset

In [None]:
import time
import copy
import random

import torch
import torchvision
import numpy as np

from tqdm import tqdm
import matplotlib.pyplot as plt

# Parameters

In [None]:
EPOCHS = 5
BATCH_SIZE = 16
LR = 0.001

if torch.cuda.is_available():
    TARGET = 'cuda'
else:
    TARGET = 'cpu'

# Load train and test MNIST datasets

In [None]:
train = torchvision.datasets.MNIST('', train=True, download=True,
                       transform=torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor()
                       ]))

test = torchvision.datasets.MNIST('', train=False, download=True,
                       transform=torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor()
                       ]))

train_dataloader = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, num_workers=4, shuffle=True)

# CNN (input - [batch_size, 1, 28, 28], output [batch_size, 10])

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(32, 64, 5)
        
        self.fc1 = torch.nn.Linear(4*4*64, 64)
        self.fc2 = torch.nn.Linear(64, 10)
        
    def forward(self, x):
        x = torch.nn.functional.relu(self.conv1(x))
        x = torch.nn.functional.max_pool2d(x, (2, 2))
        
        x = torch.nn.functional.relu(self.conv2(x))
        x = torch.nn.functional.max_pool2d(x, (2, 2))
        
        x = x.view(-1, 4*4*64) # flattrern
        x = torch.nn.functional.relu(self.fc1(x))
        
        x = self.fc2(x)
        return torch.sigmoid(x)
    
print(Net())

# CNN train

In [None]:
net = Net().to(TARGET)
optimizer = torch.optim.Adam(net.parameters(), lr=LR)
bce_loss = torch.nn.BCELoss()

best_net = Net().to(TARGET)
best_loss = torch.tensor(10e10)

for epoch in range(EPOCHS):
    time.sleep(1)
    for data in tqdm(train_dataloader): 
        x_train, y_train = data
        x_train /= 255.0
        
        x_train = x_train.to(TARGET)
        y_train = y_train.to(TARGET)
    
        net.zero_grad() 
        output = net(x_train)
        
        y_train_ans = torch.zeros(output.size()).float().to(TARGET)
        for i in range(y_train.size()[0]):
            y_train_ans[i, y_train[i]] = 1
        
        loss = bce_loss(output, y_train_ans)
        
        loss.backward()
        optimizer.step()
    
    print('Loss', loss.item())
    
    if loss < best_loss:
        best_net = copy.deepcopy(net)
        best_loss = loss

time.sleep(0.1)
print('Best loss', best_loss.item())

In [None]:
i = random.randint(0, len(test))
print('Item', i)

net_result = best_net(test[i][0].unsqueeze(0).to(TARGET))
real = test[i][1]

print('Predict', np.argmax(net_result.cpu().detach().numpy()))
print('Real', real)

plt.imshow(test[i][0].view(28,28))
plt.show()

# CNN train with advanced loss calculation

In [None]:
net_adv = Net().to(TARGET)
optimizer_adv = torch.optim.Adam(net_adv.parameters(), lr=LR)
bce_loss_adv = torch.nn.BCELoss()

best_net_adv = Net().to(TARGET)
best_loss_adv = torch.tensor(10e10)

for epoch in range(EPOCHS):
    time.sleep(1)
    for data in tqdm(train_dataloader): 
        x_train, y_train = data
        x_train /= 255.0
        
        x_train = x_train.cuda()
        y_train = y_train.cuda()
    
        net_adv.zero_grad() 
        output = net_adv(x_train)
    
        y_train_ans = torch.zeros(output.size()).float().to(TARGET)
        obj = torch.zeros(output.size()).bool().to(TARGET)
        no_obj = torch.zeros(output.size()).fill_(1).bool().to(TARGET)
        
        for i in range(y_train.size()[0]):
            obj[i, y_train[i]] = 1
            y_train_ans[i, y_train[i]] = 1
            
        for i in range(y_train.size()[0]):
            no_obj[i, y_train[i]] = 0
        
        loss_obj = bce_loss_adv(output[obj], y_train_ans[obj])
        loss_no_obj = bce_loss_adv(output[no_obj], y_train_ans[no_obj])
        loss_adv = 1.0 * loss_obj + 3.0 * loss_no_obj
        
        loss_adv.backward()
        optimizer_adv.step()
    
    print('Loss', loss_adv.item())
    
    if loss_adv < best_loss_adv:
        best_net_adv = copy.deepcopy(net_adv)
        best_loss_adv = loss_adv

time.sleep(0.1)
print('Best loss', best_loss_adv.item())

In [None]:
i = random.randint(0, len(test))
print('Item', i)

adv_net_result = best_net_adv(test[i][0].unsqueeze(0).to(TARGET))
real = test[i][1]

print('Predict', np.argmax(adv_net_result.cpu().detach().numpy()))
print('Real', real)

# Results

In [None]:
i = random.randint(0, len(test))
print('Item', i)

net_result = best_net(test[i][0].unsqueeze(0).to(TARGET))
adv_net_result = best_net_adv(test[i][0].unsqueeze(0).to(TARGET))
real = test[i][1]

print('Predict', np.argmax(net_result.cpu().detach().numpy()))
print('Predict (adv loss)', np.argmax(adv_net_result.cpu().detach().numpy()))
print('Real', real)

plt.imshow(test[i][0].view(28,28))
plt.show()

# Number of errors for every network

In [None]:
net_wr = 0
adv_net_wr = 0

for t_data in tqdm(test):
    net_result = best_net(t_data[0].unsqueeze(0).to(TARGET))
    adv_net_result = best_net_adv(t_data[0].unsqueeze(0).to(TARGET))
    
    net_result = np.argmax(net_result.cpu().detach().numpy())
    adv_net_result = np.argmax(adv_net_result.cpu().detach().numpy())
    
    if net_result != t_data[1]:
        net_wr += 1
    
    if adv_net_result != t_data[1]:
        adv_net_wr += 1

time.sleep(0.1)
print('Predict errors', net_wr)
print('Predict errors (adv loss)', adv_net_wr)

print('Accuracy', (1 - float(net_wr / len(test))) * 100)
print('Accuracy (adv loss)', (1 - float(adv_net_wr / len(test))) * 100)