In [1]:
import numpy as np
import torch
import torch.nn as nn

torch.__version__

'1.4.0'

## L1Loss

In [2]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [3]:
x

tensor([[-0.2041, -1.2018, -1.1587],
        [ 0.2358, -0.7864,  0.8396]])

In [4]:
y

tensor([[-0.7923, -1.2154,  0.6392],
        [ 0.6671, -0.3902,  1.6788]])

In [5]:
nn.L1Loss()(x, y)

tensor(0.6777)

In [6]:
nn.L1Loss(reduce=False)(x, y)

tensor([[0.5882, 0.0136, 1.7978],
        [0.4313, 0.3962, 0.8392]])

In [7]:
abs(x.numpy() - y.numpy())

array([[0.58823496, 0.01364124, 1.7978334 ],
       [0.43127662, 0.39623898, 0.8391947 ]], dtype=float32)

In [8]:
abs(x.numpy() - y.numpy()).mean()

0.6777367

## MSELoss

In [9]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

x

tensor([[ 0.0960, -1.9526, -0.4237],
        [ 0.4241,  0.9052, -0.0403]])

In [10]:
y

tensor([[-0.0755, -0.0398,  0.3776],
        [ 0.1513, -0.6123,  0.5238]])

In [11]:
nn.MSELoss(reduce=False)(x, y)

tensor([[0.0294, 3.6588, 0.6422],
        [0.0744, 2.3028, 0.3182]])

In [12]:
nn.MSELoss()(x, y)

tensor(1.1710)

In [13]:
(x.numpy() - y.numpy())**2

array([[0.02940254, 3.6588213 , 0.64216805],
       [0.07438623, 2.3027995 , 0.31816533]], dtype=float32)

In [14]:
((x.numpy() - y.numpy())**2).mean()

1.1709572

## CrossEntropyLoss

In [15]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[ 0.5404, -0.7130,  0.6084, -0.7554],
        [ 0.2860, -0.1253, -0.0767, -1.0518]])

In [16]:
y

tensor([0, 1])

In [17]:
nn.CrossEntropyLoss(reduce=False)(x, y), nn.CrossEntropyLoss()(x, y)

(tensor([0.9668, 1.3749]), tensor(1.1709))

In [18]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))
lst, np.mean(lst)

([0.96684796, 1.374885], 1.1708665)

## NLLLoss

LogSoftmax

In [19]:
x = torch.randn(2, 4)
x

tensor([[ 0.8628, -0.6149,  0.9169, -0.7378],
        [ 0.6770,  2.8620,  1.4026, -0.3766]])

In [20]:
y = nn.LogSoftmax(dim=1)(x)
y

tensor([[-0.9105, -2.3882, -0.8564, -2.5111],
        [-2.5100, -0.3250, -1.7845, -3.5637]])

In [21]:
x = x.numpy()
lst = []
for k in range(len(x)):
    lst.append(np.log( np.exp(x[k]) / np.exp(x[k]).sum()))
lst

[array([-0.91048443, -2.3881953 , -0.8563811 , -2.5110943 ], dtype=float32),
 array([-2.5100117 , -0.32501256, -1.7845074 , -3.563671  ], dtype=float32)]

NLLLoss

In [22]:
x0 = torch.randn(3, 4)
x = nn.LogSoftmax(dim=1)(x0)
x0, x

(tensor([[-0.6890,  0.1038,  2.3317,  0.8312],
         [ 1.2110, -0.4997,  1.7207,  0.2459],
         [-0.7878,  0.4338,  0.7650, -1.3290]]),
 tensor([[-3.3425, -2.5496, -0.3217, -1.8223],
         [-1.1714, -2.8821, -0.6617, -2.1365],
         [-2.2721, -1.0505, -0.7192, -2.8133]]))

In [23]:
y = torch.LongTensor(3).random_(4)
y

tensor([3, 2, 2])

In [24]:
nn.NLLLoss()(x, y)

tensor(1.0677)

In [25]:
nn.NLLLoss(reduce=False)(x, y)

tensor([1.8223, 0.6617, 0.7192])

In [26]:
x = x.numpy()
y = y.numpy()

In [27]:
lst = []
for k in range(len(x)):
    lst.append(-x[k][y[k]])

lst, np.mean(lst)

([1.8222755, 0.6617119, 0.7192234], 1.067737)

## PoissonNLLLoss

In [28]:
x = torch.randn(2, 4)
x

tensor([[ 0.5132,  0.7255,  0.2304, -0.0817],
        [-0.1976,  1.9424, -0.4805,  1.5400]])

In [29]:
y = torch.randn(2, 4)
y

tensor([[-0.1327, -0.2685, -1.5387,  1.3353],
        [ 0.4708, -1.8417,  0.5693, -0.6601]])

In [30]:
nn.PoissonNLLLoss()(x, y)

tensor(3.0854)

In [31]:
nn.PoissonNLLLoss(reduce=False)(x, y)

tensor([[ 1.7387,  2.2606,  1.6136,  1.0306],
        [ 0.9137, 10.5530,  0.8920,  5.6811]])

In [32]:
x = x.numpy()
y = y.numpy()

In [33]:
# target∗log(target)−target+0.5∗log(2πtarget)
def sterling_approx(y):
    return y*np.log(y) - y + 0.5*np.log(np.pi*y)

In [34]:
lst = []
for k in range(len(x)):
    lsti = []
    for i in range(len(x[k])):
        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)
        lsti.append(lss)
    lst.append(lsti)

In [35]:
np.array(lst)

array([[ 1.73869157,  2.26059008,  1.61359549,  0.79841203],
       [ 0.91372633, 10.55300903,  0.89201242,  5.68109369]])

In [36]:
np.mean(lst)

3.0563913298259497

## KLDivLoss

In [37]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
x

tensor([[0.1263, 0.7623, 0.1805],
        [0.5087, 0.9412, 0.7618]])

In [38]:
xlog = torch.log(x)
xlog

tensor([[-2.0691, -0.2715, -1.7122],
        [-0.6758, -0.0606, -0.2721]])

In [39]:
y

tensor([[0.6995, 0.3045, 0.8719],
        [0.3052, 0.2629, 0.7163]])

In [40]:
nn.KLDivLoss()(xlog, y)

tensor(0.2927)

In [41]:
nn.KLDivLoss(reduce=False)(xlog, y)

tensor([[ 1.1973, -0.2794,  1.3734],
        [-0.1559, -0.3353, -0.0441]])

In [42]:
x = x.numpy()
xlog = np.log(x)
y = y.numpy()

In [43]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        # xi is already log 
        lsti.append(y[i][j] * (np.log(y[i][j]) - xlog[i][j]))
    lst.append(lsti)
np.array(lst)

array([[ 1.1973273 , -0.27941543,  1.3733681 ],
       [-0.15593857, -0.3352907 , -0.04413316]], dtype=float32)

In [44]:
np.mean(lst)

0.29265293

## BCELoss

Sigmoid

In [45]:
x = torch.randn(2, 4)
y = nn.Sigmoid()(x)
x

tensor([[-1.7858,  1.3834,  0.9291,  1.2445],
        [ 1.0132, -1.5572,  0.6768,  0.3367]])

In [46]:
y

tensor([[0.1436, 0.7995, 0.7169, 0.7763],
        [0.7336, 0.1740, 0.6630, 0.5834]])

In [47]:
x = x.numpy()

In [48]:
1 / (1 + np.exp(-x))

array([[0.14358416, 0.7995305 , 0.716883  , 0.7763487 ],
       [0.7336397 , 0.17404221, 0.66303307, 0.58339965]], dtype=float32)

### single label

In [49]:
x0 = torch.randn(3)
x = nn.Sigmoid()(x0)
x

tensor([0.5012, 0.1091, 0.5466])

In [50]:
y = torch.FloatTensor(3).random_(2)
y

tensor([0., 1., 1.])

In [51]:
nn.BCELoss()(x, y)

tensor(1.1717)

In [52]:
nn.BCELoss(reduce=False)(x, y)

tensor([0.6955, 2.2156, 0.6040])

In [53]:
loss = nn.BCELoss(size_average=False)
lss = loss(x, y)
lss

tensor(3.5152)

In [54]:
x = x.numpy()
y = y.numpy()

In [55]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))
lst, np.mean(lst)

([0.6955276408888791, 2.2156477, 0.60402507], 1.171733468078835)

Equivalently

In [56]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
lst, np.mean(lst)

([0.6955276408888791, 2.2156476974487305, 0.6040250658988953],
 1.171733468078835)

### multilabel

In [57]:
x0 = torch.randn(3, 2)
x = nn.Sigmoid()(x0)
x

tensor([[0.8444, 0.4619],
        [0.8719, 0.5789],
        [0.2051, 0.4613]])

In [58]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[0., 0.],
        [0., 1.],
        [1., 1.]])

In [59]:
nn.BCELoss()(x, y)

tensor(1.2399)

In [60]:
nn.BCELoss(reduce=False)(x, y)

tensor([[1.8602, 0.6197],
        [2.0548, 0.5467],
        [1.5844, 0.7738]])

In [61]:
x = x.numpy()
y = y.numpy()

In [62]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[1.86017666, 0.61970138],
        [2.05476   , 0.54665035],
        [1.58439839, 0.77379549]]),
 1.2399137111671308)

Equivalently

In [63]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
np.array(lst), np.mean(lst)

(array([[1.8601767 , 0.6197013 ],
        [2.05476   , 0.54665035],
        [1.5843984 , 0.7737955 ]], dtype=float32),
 1.2399137)

## BCEWithLogitsLoss

This is just simply adding a sigmoid in front of BCELoss above.

### single label

In [64]:
x = torch.randn(3)
x

tensor([ 1.0295, -0.1577,  0.8108])

In [65]:
xs = nn.Sigmoid()(x)
xs

tensor([0.7368, 0.4607, 0.6923])

In [66]:
y = torch.FloatTensor(3).random_(2)
y

tensor([1., 0., 1.])

In [67]:
nn.BCELoss()(xs, y)

tensor(0.4302)

In [68]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.4302)

### multilabel

In [69]:
x = torch.randn(3, 2)
x

tensor([[-0.3987,  1.8488],
        [ 0.6951, -0.2117],
        [ 0.8462, -0.2805]])

In [70]:
xs = nn.Sigmoid()(x)
xs

tensor([[0.4016, 0.8640],
        [0.6671, 0.4473],
        [0.6998, 0.4303]])

In [71]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[0., 0.],
        [0., 1.],
        [1., 1.]])

In [72]:
nn.BCELoss()(xs, y)

tensor(0.9355)

In [73]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.9355)

## MarginRankingLoss

In [74]:
x1 = torch.randn(3)
x2 = torch.randn(3)
y = torch.FloatTensor(np.random.choice([1, -1], 3))

x1, x2, y

(tensor([1.7691, 0.9587, 1.5972]),
 tensor([ 1.8700, -1.4297, -1.4043]),
 tensor([ 1.,  1., -1.]))

In [75]:
nn.MarginRankingLoss(margin=0.1)(x1, x2, y)

tensor(1.1008)

In [76]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [77]:
lst = []
for i in range(len(x1)):
    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))

lst, np.mean(lst)

([0.20093975067138672, 0, 3.1014212131500245], 1.1007869879404704)

## HingeEmbeddingLoss

In [78]:
x = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))

x

tensor([[ 1.2138, -0.4005,  1.2856],
        [-0.0840,  0.1636, -0.3800]])

In [79]:
y

tensor([[ 1., -1., -1.],
        [-1., -1.,  1.]])

In [80]:
nn.HingeEmbeddingLoss(margin=1)(x, y)

tensor(0.6925)

In [81]:
x = x.numpy()
y = y.numpy()
margin=1

In [82]:
lst=[]

for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        if y[i][j]==1:
            lsti.append(x[i][j])
        else:
            lsti.append(max(0, margin-x[i][j]))
    lst.append(lsti)
np.array(lst)

array([[ 1.21377623,  1.40050983,  0.        ],
       [ 1.08403394,  0.83642654, -0.38003713]])

In [83]:
np.mean(lst)

0.6924515701830387

## MultiLabelMarginLoss

This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152

### one-sample example

In [84]:
x = torch.randn(1, 4)
y = torch.LongTensor(1, 4).random_(-1, 4)
x

tensor([[-0.7918, -0.6320, -0.4217, -0.7662]])

In [85]:
y

tensor([[-1,  1,  2,  2]])

In [86]:
nn.MultiLabelMarginLoss()(x, y)

tensor(0.)

In [87]:
x = x.numpy()
y = y.numpy()

In [88]:
lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                print(i, j)
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

In [89]:
lst, np.mean(lst)

([0.0], 0.0)

### multi-sample example

In [90]:
x = torch.randn(3, 4)
y = torch.LongTensor(3, 4).random_(-1, 4)
x

tensor([[ 1.1823,  0.2702,  0.1555, -0.7384],
        [ 0.3079, -1.6610,  0.4438, -0.0463],
        [-0.9901, -0.1242, -0.0226,  0.0932]])

In [91]:
y

tensor([[ 2,  3,  3, -1],
        [-1,  3,  1, -1],
        [-1,  3,  3, -1]])

In [92]:
nn.MultiLabelMarginLoss()(x, y)

tensor(1.0833)

In [93]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([3.2500042095780373, 0.0, 0.0], 1.0833347365260124)

## SmoothL1Loss

In [94]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [95]:
nn.SmoothL1Loss()(x, y)

tensor(0.8630)

In [96]:
nn.SmoothL1Loss(reduce=False)(x, y)

tensor([[1.2647, 0.0022, 0.4704],
        [1.0595, 0.7187, 1.6626]])

In [97]:
x = x.numpy() 
y = y.numpy()

In [98]:
def smoothl1loss(x, y):
    if abs(x-y)<1: return 1/2*(x-y)**2
    else: return abs(x-y)-1/2

In [99]:
lst = []
for i in range(len(x)):
    lsti=[]
    for j in range(len(x[i])):
        lsti.append(smoothl1loss(x[i][j], y[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[1.26465571, 0.00220146, 0.47039302],
        [1.05950046, 0.71873915, 1.66257   ]]),
 0.863009967224146)

## SoftMarginLoss

In [100]:
x = torch.randn(2, 4)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))
x

tensor([[-1.0040, -1.3977, -0.5483,  0.3972],
        [ 0.7599,  0.4027, -2.2873, -0.4653]])

In [101]:
y

tensor([[-1.,  1.,  1.,  1.],
        [-1., -1.,  1.,  1.]])

In [102]:
nn.SoftMarginLoss()(x, y)

tensor(1.1055)

In [103]:
x = x.numpy()
y = y.numpy()

In [104]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.8623322329591194, 1.348717247500254], 1.1055247402296868)

## MultiLabelSoftMarginLoss

In [105]:
x = torch.randn(2, 4)
y = torch.FloatTensor(2, 4).random_(2)
x

tensor([[ 0.1749, -1.9080,  0.2874,  0.6147],
        [ 0.0044,  0.4795,  1.5634, -1.2525]])

In [106]:
y

tensor([[0., 1., 0., 1.],
        [0., 1., 1., 1.]])

In [107]:
nn.MultiLabelSoftMarginLoss()(x, y)

tensor(0.8727)

In [108]:
x = x.numpy()
y = y.numpy()

In [109]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\
            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([1.027536825933876, 0.7177999670433679], 0.8726683964886219)

## CosineEmbeddingLoss

In [110]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([1, -1], 2))

x1

tensor([[-0.8123, -0.5928,  0.4364],
        [-0.1909,  0.4907,  1.0158]])

In [111]:
x2

tensor([[-0.4588, -0.3070,  0.7637],
        [ 0.3335, -0.4622, -1.1128]])

In [112]:
y

tensor([-1.,  1.])

In [113]:
nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)

tensor(1.3764)

In [114]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [115]:
from scipy.spatial.distance import cosine

def cos(x, y): return 1-cosine(x, y)

In [116]:
lst = []
for k in range(len(x1)):
    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))
    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))
lst, np.mean(lst)

([0.7596048951148987, 1.9932622909545898], 1.3764335930347442)

## MultiMarginLoss

In [117]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[-1.1763, -0.9614, -0.9696,  0.8811],
        [ 0.2756,  1.8158, -0.6432,  0.1407]])

In [118]:
y

tensor([0, 1])

In [119]:
nn.MultiMarginLoss(margin=0.9, p=2)(x, y)

tensor(1.4017)

In [120]:
x = x.numpy()
y = y.numpy()
p=2
margin=0.9

In [121]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        if i!= y[k]:
            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([2.80338842918383, 0.8602149875435743], 1.831801708363702)

## TripletMarginLoss

In [122]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
x3 = torch.randn(2, 3)
margin = 0.9
p = 2

x1

tensor([[-0.4889, -0.1693,  0.7547],
        [ 0.8199,  0.3975, -0.9134]])

In [123]:
nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)

tensor(0.5850)

In [124]:
x1 = x1.numpy()
x2 = x2.numpy()
x3 = x3.numpy()

In [125]:
def d(x1, x2, p):
    return sum((x1-x2)**p)**(1/p)

In [126]:
lst = []
for k in range(len(x1)):
    sm = 0
    for i in range(len(x1[k])):
        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) 
    lst.append(sm/len(x1[k]))

lst, np.mean(lst)

([1.1700361424856895, 0.0], 0.5850180712428448)

## References

- https://pytorch.org/docs/0.4.0/nn.html#loss-functions
- https://blog.csdn.net/zhangxb35/article/details/72464152