In [10]:
import torch
import numpy as np
import torch.nn as nn

output = torch.tensor([[4.0, 5.0, 10.0], [1.0, 5.0, 4.0], [1.0, 15.0, 4.0]])
label = torch.tensor([2, 1, 1], dtype=torch.int64)

criterion = nn.CrossEntropyLoss()
loss = criterion(output, label)

print("CrossEntropy:{}".format(loss))



CrossEntropy:0.11191823333501816


## 标签平滑后的损失函数公式
<img src="./imgs/lsr_formula1.png"  width="500" height="300" align="bottom" />

其中q是one-hot向量，eg： q = （0， 0， 1）    
u是均匀分布， eg：u = (1/3, 1/3, 1/3)

## 交叉熵计算公式
 $H(q, p) = - \sum^{K}_{k = 1}{log p_i \cdot q_i } = -(0*log p_1 + 0*log p_2 + 1*log p_3) =- log p_3$    (假设  q = （0， 0， 1）)      

 $H(u, p) = - \sum^{K}_{k = 1}{log p_i \cdot u_i } = - \sum^{K}_{k = 1}{log p_i \cdot \frac{1}{K}}$ 
 $= - \frac{1}{K}\sum^{K}_{k = 1}{log p_i } = - \frac{\sum^{K}_{k = 1}{log p_i}}{K} = - \frac{log p_1 + log p_2 + log p_3}{3} $   (假设 K=3, u = （1/3， 1/3， 1/3）) 

In [11]:
import torch
import numpy as np
import torch.nn as nn

class LabelSmoothingCrossEntropy(nn.Module):


    def __init__(self, eps=0.001):

        super(LabelSmoothingCrossEntropy, self).__init__()
        self.eps = eps

    def forward(self, x, target):
        # CE(q, p) = - sigma(q_i * log(p_i))
        log_probs = torch.nn.functional.log_softmax(x, dim=-1)  # 实现  log(p_i)  

        # H(q, p)
        H_qp = -log_probs.gather(index=target.unsqueeze(1), dim=-1)  # 只需要q_i == 1的地方， 此时已经得到CE
        print("index:" , target.unsqueeze(1), target.unsqueeze(1).shape,)
        print("log_probs: ", log_probs, log_probs.shape)
        print("H_qp: ", H_qp, H_qp.shape)
        H_qp = H_qp.squeeze(1)  
        
        # H(u, p)
        H_uq = -log_probs.mean()  # 由于u是均匀分布，等价于求均值
                                                
        loss = (1-self.eps) * H_qp + self.eps * H_uq
        
        return loss.mean()

    
criterion = LabelSmoothingCrossEntropy(eps=0.001)
loss_ls = criterion(output, label)

print("CrossEntropy:{}".format(loss))
print("LableSmoothingCrossEntropy:{}".format(loss_ls))

index: tensor([[2],
        [1],
        [1]]) torch.Size([3, 1])
log_probs:  tensor([[-6.0092e+00, -5.0092e+00, -9.1745e-03],
        [-4.3266e+00, -3.2656e-01, -1.3266e+00],
        [-1.4000e+01, -1.7524e-05, -1.1000e+01]]) torch.Size([3, 3])
H_qp:  tensor([[9.1745e-03],
        [3.2656e-01],
        [1.7524e-05]]) torch.Size([3, 1])
CrossEntropy:0.11191823333501816
LableSmoothingCrossEntropy:0.11647378653287888


### 函数1：torch.nn.functional.log_softmax
### torch.nn.functional.log_softmax(input, dim=None, _stacklevel=3, dtype=None)
功能：先进行softmax激活函数，再取对数
<img src="./imgs/log_softmax.png" width="500" height="300" align="bottom">

### 函数2：torch.gather
### torch.gather(input, dim, index, out=None, sparse_grad=False) → Tensor
按给定的轴，根据index在input上收集数据
```python
out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0    
out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1    
out[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2    
```


In [12]:
import torch
inputs = torch.tensor([[1,2],[3,4]])
torch.gather(inputs, dim=1, index=torch.tensor([[0,0],[1,0]]))

tensor([[1, 1],
        [4, 3]])