In [2]:
# https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html
# https://zhuanlan.zhihu.com/p/659288660
import torch
import torch.nn as nn
import torch.nn.functional as F

In [22]:
# Batch = 1
loss = nn.NLLLoss()

# input is of size N x C = 1 x 3
input = torch.tensor([[0.2,0.3,0.5]],requires_grad=True)
target = torch.tensor([2])

# Equation: -input[target[0]]/1
output = loss(input, target)
print("output: ",output)


output:  tensor(-0.5000, grad_fn=<NllLossBackward0>)


In [30]:
# Batch = 1, with softmax
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()

# input is of size N x C = 1 x 3
input = torch.tensor([[0.2,0.3,0.5]],requires_grad=True)
target = torch.tensor([2])

#log(exp(0.2)/(exp(0.5)+exp(0.2)+exp(0.3))) = -1.23983106084446
#log(exp(0.3)/(exp(0.5)+exp(0.2)+exp(0.3))) = -1.13983106084446
#log(exp(0.5)/(exp(0.5)+exp(0.2)+exp(0.3))) = -0.93983106084446

sftmax = m(input)
print("sftmax: ",sftmax) # [[-1.2398, -1.1398, -0.9398]]

# Equation: -input[target[0]]/1 => 0.9398
output = loss(sftmax, target)
print("output: ",output)

sftmax:  tensor([[-1.2398, -1.1398, -0.9398]], grad_fn=<LogSoftmaxBackward0>)
output:  tensor(0.9398, grad_fn=<NllLossBackward0>)


In [25]:
# Batch = 5
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()
# input is of size N x C = 5 x 3
input = torch.tensor([[0.2,0.3,0.5],
                      [0.7,0.2,0.1],
                      [0.3,0.4,0.3],
                      [0.1,0.8,0.1],
                      [0.3,0.1,0.6]],requires_grad=True)

# each element in target has to have 0 <= value < C
target = torch.tensor([2,0,0,1,2])
# Equation: 
"""
(0.5+0.7+0.3+0.8+0.6)/5
"""
output = loss(input, target)
print("output: ",output)

input:  tensor([[0.2000, 0.3000, 0.5000],
        [0.7000, 0.2000, 0.1000],
        [0.3000, 0.4000, 0.3000],
        [0.1000, 0.8000, 0.1000],
        [0.3000, 0.1000, 0.6000]], requires_grad=True)
target:  tensor([2, 0, 0, 1, 2])
output:  tensor(-0.5800, grad_fn=<NllLossBackward0>)


In [None]:
# 2D loss example (used, for example, with image inputs)
N, C = 5, 4
loss = nn.NLLLoss()
# input is of size N x C x height x width
data = torch.randn(N, 16, 10, 10)
conv = nn.Conv2d(16, C, (3, 3))
m = nn.LogSoftmax(dim=1)
# each element in target has to have 0 <= value < C
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
output = loss(m(conv(data)), target)
output.backward()