In [23]:
# https://stackoverflow.com/questions/55681502/label-smoothing-in-pytorch

import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn import CrossEntropyLoss
from torch.nn.modules.loss import _WeightedLoss

"""
Kiyoon implementation taken from https://stackoverflow.com/questions/55681502/label-smoothing-in-pytorch
Edits:
    1. Apply official label smoothing formula. With smoothing=a and num_classes=K, y^LS = y(1-a) + a/K. True label becomes something like 0.933 when a=0.1, depending on how many classes you have.
        a. The original code implements differently. y^LS = y(1-a) + (1-y)*a/(K-1). True label becomes 0.9 when a=0.1
    2. Accepts custom smooth label instead of 1D tensor label. (OneHotCrossEntropyLoss).
"""
class OneHotCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean'):
        super().__init__(weight=weight, reduction=reduction)
        self.weight = weight
        self.reduction = reduction

    def reduce_loss(self, loss):
        return loss.mean() if self.reduction == 'mean' else loss.sum() \
        if self.reduction == 'sum' else loss

    def forward(self, inputs, targets):
        log_preds = F.log_softmax(inputs, -1)

        if self.weight is not None:
            log_preds = log_preds * self.weight.unsqueeze(0)

        return self.reduce_loss(-(targets * log_preds).sum(dim=-1))


class LabelSmoothCrossEntropyLoss(OneHotCrossEntropyLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing

    def k_one_hot(self, targets:torch.Tensor, n_classes:int, smoothing=0.0):
        with torch.no_grad():
            targets = torch.empty(size=(targets.size(0), n_classes),
                                  device=targets.device) \
                                  .fill_(smoothing /n_classes) \
                                  .scatter_(1, targets.data.unsqueeze(1), 1.-smoothing + smoothing /n_classes)
        return targets

    def forward(self, inputs, targets):
        assert 0 <= self.smoothing < 1

        targets = self.k_one_hot(targets, inputs.size(-1), self.smoothing)
        return super().forward(inputs, targets)



# 1. Devin Yang
crit = CrossEntropyLoss()
predict = torch.FloatTensor([[0, 0.2, 0.7, 0.1, 0],
                             [0, 0.9, 0.2, 0.2, 1], 
                             [1, 0.2, 0.7, 0.9, 1]])
label = torch.LongTensor([2, 1, 0])
onehot_label = torch.FloatTensor([[0., 0., 1., 0., 0.],
                             [0., 1., 0., 0., 0.], 
                             [1., 0, 0, 0, 0]])

# Official PyTorch CrossEntropyLoss test with 1D tensor labels.
v = crit(Variable(predict),
         Variable(label))
print(v)

# OneHotCrossEntropyLoss test with one-hot labels
crit = OneHotCrossEntropyLoss()
v = crit(Variable(predict),
         Variable(onehot_label))
print(v)

#
smooth_crit = LabelSmoothCrossEntropyLoss(smoothing=0.3)
smooth_label = smooth_crit.k_one_hot(label, 5, smoothing=0.3)
print(smooth_label)

# OneHotCrossEntropyLoss test with custom applied smooth labels
v = crit(Variable(predict),
         Variable(smooth_label))
print(v)

# LabelSmoothingCrossEntropyLoss test
v = smooth_crit(Variable(predict),
         Variable(label))
print(v)


tensor(1.2703)
tensor(1.2703)
tensor([[0.0600, 0.0600, 0.7600, 0.0600, 0.0600],
        [0.0600, 0.7600, 0.0600, 0.0600, 0.0600],
        [0.7600, 0.0600, 0.0600, 0.0600, 0.0600]])
tensor(1.3883)
tensor(1.3883)
