[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)

# <font color='#4C5FDA'>**Cross Entropy Loss vs Negative Log Likelihood Loss** </font> <a name="tema1">

In [3]:
# Original: https://www.youtube.com/watch?v=Ni1ViB1Ezjs&ab_channel=MakeesyAI
import torch # 2.2.1
from torch import nn

In [4]:
print(torch.__version__)

2.2.1


In [5]:
# I simulate the model output with batch size = 3 and 5 classes.

# The requires_grad simulates as if we were training a model.
prediction = torch.rand(3, 5, requires_grad=True) 
print(prediction.size(), prediction.dtype)
print(prediction)

torch.Size([3, 5]) torch.float32
tensor([[0.4499, 0.8788, 0.5056, 0.1445, 0.0907],
        [0.4715, 0.6950, 0.8860, 0.1334, 0.2139],
        [0.1923, 0.5645, 0.9867, 0.4618, 0.1355]], requires_grad=True)


In [6]:
# I simulate the expected outputs of each batch element.
# For the first element corresponds 0, for the second 1 and for the third 4.
target = torch.tensor([0, 1, 4])
print(target.size(), target.dtype)
print(target)

torch.Size([3]) torch.int64
tensor([0, 1, 4])


In [7]:
log_softmax = nn.LogSoftmax(dim=-1) # dim=-1 is so that it always operates on the model outputs not on the batch.
loss_fn_nll = nn.NLLLoss()
loss_fn_ce = nn.CrossEntropyLoss()

In [8]:
loss_nll = loss_fn_nll(log_softmax(prediction), target) # We have to apply log softmax
loss_ce = loss_fn_ce(prediction, target) # Cross Entropy applies softmax

# We obtain the loss as if we were training.
loss_nll.backward()
loss_ce.backward()

# Imprimos la pérdida
print(loss_nll)
print(loss_ce)

# Interestingly, both use the same method for error propagation.

tensor(1.6802, grad_fn=<NllLossBackward0>)
tensor(1.6802, grad_fn=<NllLossBackward0>)


# <font color='#4C5FDA'>**Extra: Binary Cross Entropy Loss vs Binary Cross Entropy Loss With Logits Loss** </font> <a name="tema1">

In [13]:
prediction = torch.rand(3, 1, requires_grad=True) 
print(prediction.size(), prediction.dtype)
print(prediction)

torch.Size([3, 1]) torch.float32
tensor([[0.1320],
        [0.3074],
        [0.6341]], requires_grad=True)


In [14]:
target = torch.tensor([0, 1, 0]).unsqueeze(1).float() # Only two classes
print(target.size(), target.dtype)
print(target)

torch.Size([3, 1]) torch.float32
tensor([[0.],
        [1.],
        [0.]])


In [15]:
sigmoid = nn.Sigmoid() 
loss_fn_bce = nn.BCELoss()
loss_fn_bcewl = nn.BCEWithLogitsLoss() # This applies sigmoid 

In [16]:
loss_bce = loss_fn_bce(sigmoid(prediction), target) # We have to apply sigmoid
loss_bcewl = loss_fn_bcewl(prediction, target)

# We obtain the loss as if we were training.
loss_bce.backward()
loss_bcewl.backward()

# We print the loss
print(loss_bce)
print(loss_bcewl)

tensor(0.7907, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7907, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
