# Custom Loss Function

* Import
* Cross-entropy loss function
* MSE loss function
* BCE loss function 
* Implementing loss function

## Import

In [None]:
import torch

from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.optim as opt
from torch.autograd import Variable

## MSE Loss Function

In [None]:
# Mean square error loss function here:
def mes_loss(y_hat, y):
    torch.mean((y_hat - y)**2)

In [None]:
# MSE class here
class MyMESLoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, output, label):
        return torch.mean((output - label)**2)

In [None]:
output = torch.tensor([[0., 0., 0.]])
targrt = torch.tensor([[1., 0., 0.]])

# custom mse loss
my_mse = MyMESLoss()
loss = my_mse(output, targrt)
print('custom mse loss:', loss.numpy())

# officail mse loss 
org_mes = nn.MSELoss()
loss = org_mes(output, targrt)
print('officail mse loss:', loss.numpy())

custom mse loss: 0.33333334
officail mse loss: 0.33333334


## BCE Loss Function
* Creates a criterion that measures the `Binary Cross Entropy` between the `target` and the `output`.

* $ BCE = -\frac{1}{N} \sum_{i=0}^{N} {y_i} \cdot log(\widehat{y_i}) + (1-y_i) \cdot log(1-\widehat{y_i}) $

**Reference**

* BCELoss - Pytorch [Docs](https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html)


In [None]:
# BCE class here
class CustomBCE(nn.Module):
    def __init__(self):
        super(CustomBCE, self).__init__()

    def forward(slef, outputs, labels):
        bce_loss = labels * torch.log(outputs) + (1 - labels) * torch.log(1 - outputs)
        total_bce_loss = torch.sum(bce_loss)

        # bce loss mean 1/N
        num_of_samples = outputs.shape[0]
        mean_bc_loss = total_bce_loss / num_of_samples

        return -mean_bc_loss

In [None]:
# output, labels sample
y_pred = torch.tensor([0.1580, 0.4137, 0.2285])
y_true = torch.tensor([0.0, 1.0, 0.0]) # label (0, 1)

# custom BCE loss
loss_func = CustomBCE()
loss = loss_func(y_pred, y_true)
print('custom bce loss:', loss.numpy())

# officail bce loss
loss_func2 = nn.BCELoss()
loss2 = loss_func2(y_pred, y_true)
print('officail bce loss:', loss2.numpy())

custom bce loss: 0.43800268
officail bce loss: 0.4380027


## Cross-Entropy Loss Function

* This criterion combines `LogSoftmax` and `NLLLoss` in one single class.

**Reference**

* CrossEntropyLoss - Pytorch [Docs](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html)



In [None]:
# CrossEntropyLoss
class CustomCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(CustomCrossEntropyLoss, self).__init__()
        self.softmax = nn.Softmax(dim=1)
        self.nll_loss = nn.NLLLoss()
 
    def forward(self, output, label):
        # softmax
        output = self.softmax(output)
        # log(softmax_output)
        log_output = torch.log(output)
        nlloss_output = self.nll_loss(log_output, label)
        return nlloss_output

In [None]:
# output, label sample

y_pred = torch.randn(3, 3)
y_true = torch.tensor([1, 2, 0])

# officail nn.CrossEntropy
loss_func = nn.CrossEntropyLoss()
loss = loss_func(y_pred, y_true)
print('officail cross-entropy loss:', loss.numpy())

# custom 
loss_func2 = CustomCrossEntropyLoss()
loss2 = loss_func2(y_pred, y_true)
print('custom cross-entropy loss:', loss2.numpy())

officail cross-entropy loss: 2.4982007
custom cross-entropy loss: 2.4982007


##  Implementing Loss Function

* Set the device
* Dataset & DataLoader
* CNN Model
* Loss function & Optimizer
* Training Model
* Testing Model



### Set The Device

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# hyperparameter 
train_batch_size = 100
test_batch_szie = 1000
learning_rate = 0.001
num_epochs = 5

### Dataset & DataLoader

In [None]:
train_dataset = MNIST(root = './data', train=True, download=True, transform=transforms.ToTensor())
test_dataset= MNIST(root = './data', train=False, download=True, transform=transforms.ToTensor())

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
# train dataloader
train_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=train_batch_size, 
    shuffle=True
    )

# test dataloader
test_loader = DataLoader(
    dataset=test_dataset, 
    batch_size=test_batch_szie, 
    shuffle=False
    )

### CNN Model

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(stride=2, kernel_size=2)
        )
        
        self.dense = nn.Sequential(
            nn.Linear(in_features=14*14*128, out_features=1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        output = self.conv_layers(x)
        output = output.view(-1, 14*14*128)
        output = self.dense(output)
        return output

In [None]:
model = CNN().to(device)

### Loss Function & Optimizer

In [None]:
# use our custom cross entropy loss
loss_func = CustomCrossEntropyLoss()
optimizer = opt.Adam(model.parameters(), lr=learning_rate)

### Training Model

In [None]:
for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = Variable(images.to(device))
        labels = Variable(labels.to(device))

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        if (idx+1) % 200 == 0:
            print("Epoch: %d, Batch: %d, Loss: %.4f" %(epoch+1, idx+1, loss.data))

Epoch: 1, Batch: 200, Loss: 0.1488
Epoch: 1, Batch: 400, Loss: 0.0756
Epoch: 1, Batch: 600, Loss: 0.0576
Epoch: 2, Batch: 200, Loss: 0.0268
Epoch: 2, Batch: 400, Loss: 0.0353
Epoch: 2, Batch: 600, Loss: 0.0414
Epoch: 3, Batch: 200, Loss: 0.0082
Epoch: 3, Batch: 400, Loss: 0.0026
Epoch: 3, Batch: 600, Loss: 0.0391
Epoch: 4, Batch: 200, Loss: 0.0038
Epoch: 4, Batch: 400, Loss: 0.0009
Epoch: 4, Batch: 600, Loss: 0.0080
Epoch: 5, Batch: 200, Loss: 0.0008
Epoch: 5, Batch: 400, Loss: 0.0326
Epoch: 5, Batch: 600, Loss: 0.0028


### Testing Model

In [None]:
correct = 0
total = 0
for images, labels in test_loader:
  images = Variable(images.to(device))
  outputs = model(images)

  _, pred = torch.max(outputs.data, 1)
  
  correct += (pred == labels.to(device)).sum()
  total += labels.size(0)

print('Accuracy:%.3f%%' %(100.0 * float(correct)/float(total)))

Accuracy:98.970%
