使用了Label Smoothing损失函数后，在训练阶段预测正确时 loss 不会下降得太快
预测错误的時候 loss 不会惩罚得太多，使其不容易陷入局部最优点，这在一定程度可以抑制网络过拟合的现象。

In [None]:
loss_function = nn.CrossEntropyLoss(label_smoothing=0.1)

#### 自定义使用标签平滑的CrossEntropyLoss方法

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.0):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, input, target):
        log_prob = F.log_softmax(input, dim=-1)
        nll_loss = -log_prob.gather(dim=-1, index=target.unsqueeze(-1))
        nll_loss = nll_loss.squeeze(-1)
        smooth_loss = -log_prob.mean(dim=-1)
        loss = (1.0 - self.smoothing) * nll_loss + self.smoothing * smooth_loss
        return loss.mean()

# 示例用法
loss_function = LabelSmoothingCrossEntropy(smoothing=0.1)


![jupyter](./file/loss.png)

![jupyter](./file/acc.png)