In [1]:
%config Completer.use_jedi = False

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 1 .Binary cross entropy loss is used in case of multi-label classification problem

For ex: classify animals present in the picture. A single picture can have multiple animals(assuming an image can have one or more animals out of 4 possible animals).

In [4]:
# assuming last layer of our neural network outputs logit/ inverse sigmoid values
torch.manual_seed(42)
n_samples, n_classes = 10, 4
x = torch.randn(n_samples, n_classes)
x

tensor([[ 1.9269,  1.4873,  0.9007, -2.1055],
        [ 0.6784, -1.2345, -0.0431, -1.6047],
        [-0.7521,  1.6487, -0.3925, -1.4036],
        [-0.7279, -0.5594, -0.7688,  0.7624],
        [ 1.6423, -0.1596, -0.4974,  0.4396],
        [-0.7581,  1.0783,  0.8008,  1.6806],
        [ 0.0349,  0.3211,  1.5736, -0.8455],
        [ 1.3123,  0.6872, -1.0892, -0.3553],
        [-1.4181,  0.8963,  0.0499,  2.2667],
        [ 1.1790, -0.4345, -1.3864, -1.2862]])

In [5]:
# creating arbitrary true labels 
torch.manual_seed(42)
true_one_hot = torch.randint(2, 
                     size=(n_samples, n_classes),
                       dtype=torch.float)
true_one_hot

tensor([[0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [1., 1., 1., 0.],
        [1., 0., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 0., 0.],
        [1., 1., 1., 0.],
        [1., 0., 0., 0.]])

In [6]:
def sigmoid(x: torch.Tensor) -> torch.Tensor:
    return 1 / (1 + torch.exp(-x))

def binary_cross_entropy_loss(
    true: torch.Tensor, 
    pred: torch.Tensor,
    eps: float=1e-10) -> torch.Tensor:
    """
    Function that measures the Binary Cross Entropy
    between the target and the output.
    
    Assumes probabilities of positive predicted labels are passed.
    True labels should be passed as one-hot encoded. 
    """
    loss = - (true * torch.log(pred + eps) + (1-true) * torch.log(1 - pred + eps) )
    return torch.mean(loss)

In [7]:
# Note: here we are using sigmoid and not softmax
pred_prob = sigmoid(x) 
pred_prob

tensor([[0.8729, 0.8157, 0.7111, 0.1086],
        [0.6634, 0.2254, 0.4892, 0.1673],
        [0.3204, 0.8387, 0.4031, 0.1972],
        [0.3257, 0.3637, 0.3167, 0.6819],
        [0.8378, 0.4602, 0.3782, 0.6082],
        [0.3191, 0.7462, 0.6901, 0.8430],
        [0.5087, 0.5796, 0.8283, 0.3004],
        [0.7879, 0.6653, 0.2518, 0.4121],
        [0.1950, 0.7102, 0.5125, 0.9061],
        [0.7648, 0.3931, 0.2000, 0.2165]])

In [8]:
type(binary_cross_entropy_loss(true=true_one_hot, pred=pred_prob))

torch.Tensor

In [9]:
F.binary_cross_entropy(input=pred_prob, target=true_one_hot)

tensor(0.6935)

In [10]:
def binary_cross_entropy_loss_with_logits(
    true: torch.Tensor, 
    pred: torch.Tensor,
    eps: float=1e-10) -> torch.Tensor:
    """
    Function that measures Binary Cross Entropy between target and output
    logits.
    
    Assumes logits of positive predicted labels are passed.
    True labels should be passed as one-hot encoded. 
    """
    pred = torch.sigmoid(pred)
    loss = - (
        true * torch.log(pred + eps) 
        + (1 - true) * torch.log(1 - pred + eps))
    return torch.mean(loss)

In [11]:
binary_cross_entropy_loss_with_logits(true=true_one_hot,pred=x)

tensor(0.6935)

In [12]:
# Note: built in function assumes logit/ inverse sigmoid values across all classes
F.binary_cross_entropy_with_logits(input=x, target=true_one_hot)

tensor(0.6935)

# 2. Categorical Cross Entropy loss - used in case of multi-class classification where every sample/ observation can belong to single class 

In [13]:
# Defining true labels, one hot encoded
torch.manual_seed(42)
true = torch.randint(n_classes, 
                     size=(n_samples,),
                     dtype=torch.long)
true

tensor([2, 3, 0, 2, 2, 3, 0, 0, 2, 1])

In [14]:
# not used, just for convenience
true_one_hot = torch.zeros((n_samples, n_classes))
true_one_hot[range(true_one_hot.shape[0]), true] = 1
true_one_hot

tensor([[0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.]])

In [15]:
torch.manual_seed(42)
n_samples, n_classes = 10, 4
x = torch.randn(n_samples, n_classes)
x

tensor([[ 1.9269,  1.4873,  0.9007, -2.1055],
        [ 0.6784, -1.2345, -0.0431, -1.6047],
        [-0.7521,  1.6487, -0.3925, -1.4036],
        [-0.7279, -0.5594, -0.7688,  0.7624],
        [ 1.6423, -0.1596, -0.4974,  0.4396],
        [-0.7581,  1.0783,  0.8008,  1.6806],
        [ 0.0349,  0.3211,  1.5736, -0.8455],
        [ 1.3123,  0.6872, -1.0892, -0.3553],
        [-1.4181,  0.8963,  0.0499,  2.2667],
        [ 1.1790, -0.4345, -1.3864, -1.2862]])

In [16]:
# not used, just for convenience
pred_prob = torch.softmax(x, axis=1)
pred_prob
# Note: each row sum to 1

tensor([[0.4950, 0.3189, 0.1774, 0.0088],
        [0.5762, 0.0851, 0.2800, 0.0588],
        [0.0715, 0.7888, 0.1024, 0.0373],
        [0.1319, 0.1561, 0.1266, 0.5854],
        [0.6317, 0.1042, 0.0743, 0.1897],
        [0.0426, 0.2671, 0.2024, 0.4879],
        [0.1351, 0.1798, 0.6291, 0.0560],
        [0.5511, 0.2950, 0.0499, 0.1040],
        [0.0181, 0.1830, 0.0785, 0.7204],
        [0.7347, 0.1464, 0.0565, 0.0624]])

In [17]:
def categorical_cross_entropy_loss(
    pred: torch.Tensor, 
    true: torch.Tensor,
    eps: float=1e-10):
    """
    Categorical cross entropy loss accumulates log loss across true labels.
    
    Parameters
    ----------
    pred: shape (n, c)
        n: number of samples
        
        c: number of classes
        
        Accepts raw unnormalized scores for pred.
    
    target: shape (n, )
        Accepts integer target classes
    """
    pred = torch.softmax(pred, axis=1)
    log_loss_across_true_labels = -torch.log(pred[range(pred.shape[0]),true])
    return torch.mean(log_loss_across_true_labels)

In [18]:
categorical_cross_entropy_loss(pred=x, true=true)

tensor(1.9650)

In [19]:
# Using built in categorical cross entropy loss function
# Note: built in function takes up raw unnormalized scores with 
# true labels passed as integer labels
torch.nn.CrossEntropyLoss()(x, true)

tensor(1.9650)

In [20]:
F.cross_entropy(x, true)

tensor(1.9650)