In [278]:
import torch
from torch import nn

In [279]:
import numpy as np
def softmax(vec):
    return torch.exp(vec)/ torch.sum(torch.exp(vec))

# Cross Entropy Loss

Note : In pytorch > we represent 0,1,2 .. as : [1,0,0] , [0,1,0] , [0,0,1] <br>
[0,0,0] doesn't represent anything

## PART 1 : Cross entroy in simple batch * one_hot_dimension || number_of_classes 

In [283]:
## input for shape : Batch-Size * one-hot-out-dimension

## this is : Batch_size * seq_length * one-hot-out-dimension
# for calculating loss in many-to-many scenarios 
# say, LSTM outputting something for 2 time steps in one training example
# and we have batch of 2
inp = torch.tensor(
         [
          [1.0 , 0, 0], # this means 0
          [0.0  ,1, 0], # this means 1
         ]

)

inp.shape


label = torch.tensor(
        [0,1], ).long()

loss = nn.CrossEntropyLoss(reduction='sum')
output = loss(inp, label)

print(inp.shape, label.shape, output)
# NOTE : try changning labels . loss will only increase. this is the only case with minimum loss

torch.Size([2, 3]) torch.Size([2]) tensor(1.1029)


###  PART 1 manual check

In [284]:
# first take softmax in input
for i in range(2):
        inp[i] = softmax( inp[i] )
        

## for first batch : Expected Output > 1,0,0

loss_at_first_example = - ( np.log(0.5761) * 1 +  np.log(0.2119) * 0 + np.log(0.2119) * 0 )

## for second batch : Expected Output > 0,1,0
loss_at_second_example = - ( np.log(0.2119) * 0 +  np.log(0.5761) * 1 + np.log(0.2119) * 0 )

print( loss_at_first_example + loss_at_second_example   )

1.1029480444875994


In [285]:
## Hence same loss

## PART 2 :  Cross Entropy in RNN

In [286]:
# input of Batch_Size * Number_classes
inp = torch.tensor(
    [ 
        # First example : batch 1
        # list of no_of_class dimensional output, at each timestep
        [ [1,0,0],  [1,0,0],  [0,0,0],  [0,0,1]  ] , 
        
        # Similarly a second Example : batch 2
        [ [ 0, 1.0, 0  ],  [0,0,1] ,  [0,0,0] ,  [0,0,1]  ] 
     ]

)

# Convert to format pytorch expects : as per documentation
inp = torch.permute(inp ,(0, 2, 1) )
inp, inp.shape ## batch_size * num_of_classes * sequence_length

(tensor([[[1., 1., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 1.]],
 
         [[0., 0., 0., 0.],
          [1., 0., 0., 0.],
          [0., 1., 0., 1.]]]),
 torch.Size([2, 3, 4]))

In [287]:
# Batch_size * output_at_each_time_step ( where output between 0,1,2) in our case for now
label = torch.tensor(
        [[0,1,2,2],
         [1,1,0,0],
        ]
        ).long()

In [288]:
loss = nn.CrossEntropyLoss(reduction='sum')
output = loss(inp, label)

print(inp.shape, label.shape, output)

torch.Size([2, 3, 4]) torch.Size([2, 4]) tensor(8.5059)


###  Manual Check what's happening

In [289]:
b, n_c, t = inp.shape
print(f'Batch size : {b}')
print(f'Num Classes / output_vector_space  : {n_c}')
print(f'Time Steps : {t}')

Batch size : 2
Num Classes / output_vector_space  : 3
Time Steps : 4


In [294]:
total_loss_across_batch = 0

for b in range(2):
    print(f'batch : {b}')
    for t in range(4):
        
        label_exact = label[b][t].item()
        label_full = torch.eye(3)[label_exact]
        
        
        # calculate cross-entropy loss for one example
        arbit =  softmax(inp[b, :, t])
#         print(arbit)
        summed = 0
        for i in range(n_c):
            loss_intermediate = - ( torch.log( arbit[i] ) * label_full[i]  )
            summed += loss_intermediate
        
        total_loss_across_batch += summed
        
        print(f'\tInput : {inp[b, :, t]} Expected : {label_full} Loss : {summed}')
        

print('Total Loss across whole batch : ', total_loss_across_batch)

batch : 0
	Input : tensor([1., 0., 0.]) Expected : tensor([1., 0., 0.]) Loss : 0.5514447689056396
	Input : tensor([1., 0., 0.]) Expected : tensor([0., 1., 0.]) Loss : 1.5514447689056396
	Input : tensor([0., 0., 0.]) Expected : tensor([0., 0., 1.]) Loss : 1.0986123085021973
	Input : tensor([0., 0., 1.]) Expected : tensor([0., 0., 1.]) Loss : 0.5514447689056396
batch : 1
	Input : tensor([0., 1., 0.]) Expected : tensor([0., 1., 0.]) Loss : 0.5514447689056396
	Input : tensor([0., 0., 1.]) Expected : tensor([0., 1., 0.]) Loss : 1.5514447689056396
	Input : tensor([0., 0., 0.]) Expected : tensor([1., 0., 0.]) Loss : 1.0986123085021973
	Input : tensor([0., 0., 1.]) Expected : tensor([1., 0., 0.]) Loss : 1.5514447689056396
Total Loss across whole batch :  tensor(8.5059)


## good example for 3d input loss calculation in torch

In [296]:
criterion = nn.CrossEntropyLoss()
batch_size = 2
max_len = 5
num_classes = 4
pred = torch.randn([batch_size, num_classes, max_len ])
label = torch.randint(0, num_classes,[batch_size, max_len])
pred = nn.Softmax(dim = 2)(pred)
criterion(pred, label)

tensor(1.4105)