In [1]:
import numpy as np
import torch
import torch.nn as nn

torch.__version__

'1.0.0.dev20181113'

## L1Loss

In [2]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)
x

In [4]:
y

tensor([[-1.4317,  2.1610,  0.3035],
        [-0.1670,  1.3160, -0.2527]])

In [5]:
nn.L1Loss()(x, y)

tensor(1.1507)

In [7]:
nn.L1Loss(reduction="none")(x, y)

tensor([[0.0126, 2.9397, 0.0566],
        [1.2657, 1.7187, 0.9105]])

In [8]:
abs(x.numpy() - y.numpy())

array([[0.01260424, 2.9397056 , 0.05664884],
       [1.2657261 , 1.7187158 , 0.9105016 ]], dtype=float32)

In [9]:
abs(x.numpy() - y.numpy()).mean()

1.1506504

## MSELoss

In [19]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)
x

tensor([[ 0.3124, -0.4303,  0.0836],
        [-0.2462,  0.2972, -0.1569]])

In [11]:
y

tensor([[-0.5254, -0.7966, -0.6927],
        [-0.0592, -0.3262,  0.9219]])

In [13]:
nn.MSELoss(reduction="none")(x, y)

tensor([[1.4002, 5.6431, 0.1964],
        [2.4432, 0.0090, 0.8011]])

In [14]:
nn.MSELoss()(x, y)

tensor(1.7488)

In [15]:
(x.numpy() - y.numpy())**2

array([[1.4002407 , 5.64311   , 0.19636004],
       [2.443164  , 0.00899746, 0.80106455]], dtype=float32)

In [16]:
((x.numpy() - y.numpy())**2).mean()

1.7488228

# LogSoftmax

In [85]:
x = torch.randn(2, 4)
x

tensor([[-0.2231,  2.3055, -0.0675,  0.0441],
        [ 0.0789, -1.8668,  0.3625,  0.4843]])

In [86]:
y = nn.LogSoftmax(dim=1)(x)
y

tensor([[-2.7733, -0.2446, -2.6177, -2.5060],
        [-1.3789, -3.3246, -1.0954, -0.9735]])

In [105]:
def logSoftmax(x,dim=0):
    e_xn = np.exp(x)
    return np.log( e_xn / e_xn.sum(dim=dim) ) 

In [106]:
xn = x.numpy()
lst = []
for k in range(len(x)):
    e_xn = np.exp(x[k])
    print(f"\nk                                     :{k}")
    print(f"x[k]                                    :{(xn[k])}")
    print(f"np.exp(x[k])                            :{e_xn}")
    print(f"e_xn / e_xn.sum()                       :{e_xn / e_xn.sum()} sum:{(e_xn / e_xn.sum()).sum()}")
    print(f"np.log( np.exp(x[k])/ np.exp(x[k]).sum():{np.log( e_xn / e_xn.sum() )}" )
    print(f"logSoftmax(x[k])                        :{logSoftmax(x[k])}")
    lst.append( np.log( np.exp(xn[k]) / np.exp(xn[k]).sum() ) )
lst


k                                     :0
x[k]                                    :[-0.22309178  2.3055398  -0.06754822  0.04414958]
np.exp(x[k])                            :tensor([ 0.8000, 10.0296,  0.9347,  1.0451])
e_xn / e_xn.sum()                       :tensor([0.0625, 0.7830, 0.0730, 0.0816]) sum:1.0
np.log( np.exp(x[k])/ np.exp(x[k]).sum():tensor([-2.7733, -0.2446, -2.6177, -2.5060])
logSoftmax(x[k])                        :tensor([-2.7733, -0.2446, -2.6177, -2.5060])

k                                     :1
x[k]                                    :[ 0.07892732 -1.8667957   0.3624572   0.48429057]
np.exp(x[k])                            :tensor([1.0821, 0.1546, 1.4369, 1.6230])
e_xn / e_xn.sum()                       :tensor([0.2519, 0.0360, 0.3344, 0.3777]) sum:1.0
np.log( np.exp(x[k])/ np.exp(x[k]).sum():tensor([-1.3789, -3.3246, -1.0954, -0.9735])
logSoftmax(x[k])                        :tensor([-1.3789, -3.3246, -1.0954, -0.9735])


[array([-2.7732751, -0.2446436, -2.6177316, -2.506034 ], dtype=float32),
 array([-1.3789021 , -3.324625  , -1.0953721 , -0.97353876], dtype=float32)]

# NLLLoss

In [107]:
x0 = torch.randn(3, 4)
x = nn.LogSoftmax(dim=1)(x0)
x0, x
logSoftmax(x0,dim=1)

RuntimeError: The size of tensor a (4) must match the size of tensor b (3) at non-singleton dimension 1

In [77]:
y = torch.LongTensor(3).random_(4)
y

tensor([2, 2, 3])

In [78]:
nn.NLLLoss()(x, y)

tensor(1.6840)

In [79]:
nn.NLLLoss(reduction="none")(x, y)

tensor([1.6931, 2.8134, 0.5453])

In [80]:
xn = x.numpy()
yn = y.numpy()

In [81]:
lst = []
for k in range(len(yn)):
    lst.append(-xn[k,yn[k]])

lst, np.mean(lst)

([1.6931158, 2.8134205, 0.5453279], 1.6839548)

## CrossEntropyLoss

In [33]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[ 9.9237e-01, -2.5124e-01,  2.2127e-03, -1.4842e-01],
        [-2.5225e+00, -2.7517e-02, -1.5043e+00,  3.5565e-01]])

In [34]:
y

tensor([3, 2])

In [35]:
nn.CrossEntropyLoss(reduction="none")(x, y)

tensor([1.8236, 2.4984])

In [36]:
nn.CrossEntropyLoss()(x, y) 

tensor(2.1610)

In [40]:
xn = x.numpy()
yn = y.numpy()

lst = []
for k in range(len(yn)):
    print(f"k:{k} x[k] {xn[k]} y[k]:{y[k]} x[k,y[k]:{xn[k,yn[k]]}")
    lst.append(-np.log( np.exp(xn[k,yn[k]]) / np.exp(xn[k] ).sum() ) )
lst, np.mean(lst)

k:0 x[k] [ 0.99236536 -0.25124344  0.0022127  -0.14841606] y[k]:3 x[k]y[k]:-0.14841605722904205
k:1 x[k] [-2.522484   -0.0275165  -1.5042632   0.35564595] y[k]:2 x[k]y[k]:-1.504263162612915


([1.8235909, 2.498404], 2.1609974)

## PoissonNLLLoss

In [130]:
x = torch.randn(2, 4)
x

tensor([[ 0.6711,  1.5167,  1.4041,  0.2249],
        [-0.5825, -1.2273, -1.5340,  0.8917]])

In [131]:
y = torch.randn(2, 4)
y

tensor([[-0.4753,  0.1411,  0.3186,  0.1708],
        [ 0.2095, -0.6558, -0.7816,  0.5834]])

In [132]:
nn.PoissonNLLLoss()(x, y)

tensor(1.5702)

In [133]:
nn.PoissonNLLLoss(reduce=False)(x, y)

tensor([[ 2.2753,  4.3434,  3.6244,  1.2137],
        [ 0.6806, -0.5119, -0.9833,  1.9191]])

In [134]:
x = x.numpy()
y = y.numpy()

In [142]:
# target∗log(target)−target+0.5∗log(2πtarget)
def sterling_approx(y):
    return y*np.log(y) - y + 0.5*np.log(np.pi*y)

In [143]:
lst = []
for k in range(len(x)):
    lsti = []
    for i in range(len(x[k])):
        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)
        lsti.append(lss)
    lst.append(lsti)

In [146]:
np.array(lst)

array([[ 2.27534461,  4.34339952,  3.62439346,  1.21373343],
       [ 0.68055761, -0.51185942, -0.9832679 ,  1.91914582]])

In [147]:
np.mean(lst)

1.570180892944336

## KLDivLoss

In [149]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
x

tensor([[ 0.7407,  0.8043,  0.7067],
        [ 0.9125,  0.4086,  0.9810]])

In [150]:
y

tensor([[ 0.9362,  0.4141,  0.0179],
        [ 0.6564,  0.2923,  0.5169]])

In [151]:
nn.KLDivLoss()(x, y)

tensor(-0.6234)

In [154]:
nn.KLDivLoss(reduce=False)(x, y)

tensor([[-0.7552, -0.6981, -0.0847],
        [-0.8753, -0.4789, -0.8482]])

In [156]:
x = x.numpy()
y = y.numpy()

In [160]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        # xi is already log 
        lsti.append(y[i][j] * (np.log(y[i][j]) - x[i][j]))
    lst.append(lsti)
np.array(lst)

array([[-0.7551561 , -0.6981464 , -0.08470216],
       [-0.875315  , -0.47894648, -0.84821874]], dtype=float32)

In [161]:
np.mean(lst)

-0.6234141

## BCELoss

Sigmoid

In [162]:
x = torch.randn(2, 4)
y = nn.Sigmoid()(x)
x

tensor([[ 0.3220, -0.8002, -0.5503, -2.0414],
        [ 1.0465, -0.2785, -1.6423,  1.1158]])

In [163]:
y

tensor([[ 0.5798,  0.3100,  0.3658,  0.1149],
        [ 0.7401,  0.4308,  0.1621,  0.7532]])

In [164]:
x = x.numpy()

In [165]:
1 / (1 + np.exp(-x))

array([[0.5798062 , 0.30998793, 0.36578804, 0.11492275],
       [0.7401055 , 0.43081176, 0.1621461 , 0.7531997 ]], dtype=float32)

### single label

In [174]:
x0 = torch.randn(3)
x = nn.Sigmoid()(x0)
x

tensor([ 0.5213,  0.5932,  0.5333])

In [175]:
y = torch.FloatTensor(3).random_(2)
y

tensor([ 1.,  1.,  1.])

In [176]:
nn.BCELoss()(x, y)

tensor(0.6008)

In [177]:
nn.BCELoss(reduce=False)(x, y)

tensor([ 0.6514,  0.5222,  0.6287])

In [178]:
loss = nn.BCELoss(size_average=False)
lss = loss(x, y)
lss

tensor(1.8024)

In [179]:
x = x.numpy()
y = y.numpy()

In [180]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))
lst, np.mean(lst)

([0.65144944, 0.52221346, 0.6287041], 0.600789)

Equivalently

In [184]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
lst, np.mean(lst)

([0.65144944190979, 0.5222134590148926, 0.6287040710449219],
 0.6007889906565348)

### multilabel

In [189]:
x0 = torch.randn(3, 2)
x = nn.Sigmoid()(x0)
x

tensor([[ 0.2400,  0.4380],
        [ 0.2651,  0.4915],
        [ 0.3721,  0.5370]])

In [190]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[ 1.,  1.],
        [ 1.,  1.],
        [ 1.,  0.]])

In [191]:
nn.BCELoss()(x, y)

tensor(1.0082)

In [192]:
nn.BCELoss(reduce=False)(x, y)

tensor([[ 1.4272,  0.8255],
        [ 1.3278,  0.7102],
        [ 0.9886,  0.7700]])

In [193]:
x = x.numpy()
y = y.numpy()

In [196]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[1.42715609, 0.82551563],
        [1.32778549, 0.71021408],
        [0.9886421 , 0.76996785]]), 1.0082135393626286)

Equivalently

In [198]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
np.array(lst), np.mean(lst)

(array([[1.4271561 , 0.8255156 ],
        [1.3277855 , 0.7102141 ],
        [0.9886421 , 0.76996785]], dtype=float32), 1.0082136)

## BCEWithLogitsLoss

This is just simply adding a sigmoid in front of BCELoss above.

### single label

In [206]:
x = torch.randn(3)
x

tensor([-0.1104,  0.2577, -0.5487])

In [207]:
xs = nn.Sigmoid()(x)
xs

tensor([ 0.4724,  0.5641,  0.3662])

In [208]:
y = torch.FloatTensor(3).random_(2)
y

tensor([ 0.,  0.,  0.])

In [209]:
nn.BCELoss()(xs, y)

tensor(0.6419)

In [210]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.6419)

### multilabel

In [211]:
x = torch.randn(3, 2)
x

tensor([[-1.4298,  0.1712],
        [ 0.7382, -1.8834],
        [-1.7065,  1.1530]])

In [212]:
xs = nn.Sigmoid()(x)
xs

tensor([[ 0.1931,  0.5427],
        [ 0.6766,  0.1320],
        [ 0.1536,  0.7601]])

In [213]:
y = torch.FloatTensor(3, 2).random_(2)
y

tensor([[ 1.,  1.],
        [ 1.,  0.],
        [ 1.,  1.]])

In [214]:
nn.BCELoss()(xs, y)

tensor(0.8226)

In [216]:
nn.BCEWithLogitsLoss()(x, y)

tensor(0.8226)

## MarginRankingLoss

In [221]:
x1 = torch.randn(3)
x2 = torch.randn(3)
y = torch.FloatTensor(np.random.choice([1, -1], 3))

x1, x2, y

(tensor([-0.5451,  0.1447, -0.3011]),
 tensor([ 0.1900,  0.6117,  1.5479]),
 tensor([ 1.,  1., -1.]))

In [222]:
nn.MarginRankingLoss(margin=0.1)(x1, x2, y)

tensor(0.4674)

In [223]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [224]:
lst = []
for i in range(len(x1)):
    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))

lst, np.mean(lst)

([0.835101580619812, 0.5670205116271972, 0], 0.4673740307490031)

## HingeEmbeddingLoss

In [235]:
x = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))

x

tensor([[ 0.4457, -1.5535, -0.1648],
        [ 0.7037,  0.2432,  0.3004]])

In [236]:
y

tensor([[-1., -1.,  1.],
        [ 1.,  1.,  1.]])

In [237]:
nn.HingeEmbeddingLoss(margin=1)(x, y)

tensor(0.6984)

In [238]:
x = x.numpy()
y = y.numpy()
margin=1

In [239]:
lst=[]

for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        if y[i][j]==1:
            lsti.append(x[i][j])
        else:
            lsti.append(max(0, margin-x[i][j]))
    lst.append(lsti)
np.array(lst)

array([[ 0.55430901,  2.55346417, -0.16479899],
       [ 0.70371646,  0.24319194,  0.30040452]])

In [240]:
np.mean(lst)

0.6983811855316162

## MultiLabelMarginLoss

This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152

### one-sample example

In [244]:
x = torch.randn(1, 4)
y = torch.LongTensor(1, 4).random_(-1, 4)
x

tensor([[ 0.3257, -1.2182,  1.4421,  0.2452]])

In [245]:
y

tensor([[ 3, -1, -1, -1]])

In [246]:
nn.MultiLabelMarginLoss()(x, y)

tensor(0.8194)

In [248]:
x = x.numpy()
y = y.numpy()

In [249]:
lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                print(i, j)
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

0 3
1 3
2 3


In [250]:
lst, np.mean(lst)

([0.8193658106029034], 0.8193658106029034)

### multi-sample example

In [252]:
x = torch.randn(3, 4)
y = torch.LongTensor(3, 4).random_(-1, 4)
x

tensor([[-1.3972, -0.5922, -0.7198,  0.8985],
        [-0.6777,  0.3352, -0.1973, -0.0305],
        [ 0.4067,  0.2513, -1.0973, -0.1837]])

In [253]:
y

tensor([[ 3,  1,  0,  1],
        [ 2,  0,  0, -1],
        [ 1, -1,  2,  1]])

In [254]:
nn.MultiLabelMarginLoss()(x, y)

tensor(1.2635)

In [255]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.8555163443088531, 2.5048549212515354, 0.4300655126571655],
 1.263478926072518)

## SmoothL1Loss

In [257]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [258]:
nn.SmoothL1Loss()(x, y)

tensor(0.5490)

In [259]:
nn.SmoothL1Loss(reduce=False)(x, y)

tensor([[ 0.6491,  0.0651,  1.2454],
        [ 0.3355,  0.9598,  0.0390]])

In [260]:
x = x.numpy() 
y = y.numpy()

In [261]:
def smoothl1loss(x, y):
    if abs(x-y)<1: return 1/2*(x-y)**2
    else: return abs(x-y)-1/2

In [263]:
lst = []
for i in range(len(x)):
    lsti=[]
    for j in range(len(x[i])):
        lsti.append(smoothl1loss(x[i][j], y[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

(array([[0.64909291, 0.06508577, 1.24535966],
        [0.33547111, 0.95977783, 0.03898569]]), 0.5489621638637431)

## SoftMarginLoss

In [264]:
x = torch.randn(2, 4)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))
x

tensor([[-0.8887, -0.3107, -0.6408, -2.5345],
        [ 0.2605, -0.1133,  0.2433,  0.3387]])

In [265]:
y

tensor([[ 1.,  1.,  1., -1.],
        [ 1.,  1.,  1.,  1.]])

In [266]:
nn.SoftMarginLoss()(x, y)

tensor(0.7092)

In [267]:
x = x.numpy()
y = y.numpy()

In [268]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.8084959688801056, 0.6099205543628277], 0.7092082616214666)

## MultiLabelSoftMarginLoss

In [269]:
x = torch.randn(2, 4)
y = torch.FloatTensor(2, 4).random_(2)
x

tensor([[-1.9676, -1.4290, -0.1576,  0.6041],
        [ 0.0584, -1.0734, -0.4568,  0.4787]])

In [270]:
y

tensor([[ 0.,  1.,  1.,  1.],
        [ 1.,  0.,  1.,  0.]])

In [271]:
nn.MultiLabelSoftMarginLoss()(x, y)

tensor(0.7315)

In [272]:
x = x.numpy()
y = y.numpy()

In [274]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\
            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([0.7464252382614533, 0.7166620319227274], 0.7315436350920903)

## CosineEmbeddingLoss

In [276]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([1, -1], 2))

x1

tensor([[ 0.1394, -0.9875, -0.4814],
        [ 0.4167,  0.9489, -0.2292]])

In [277]:
x2

tensor([[ 0.3618,  1.1291, -1.3030],
        [ 0.9274,  0.7760, -1.6018]])

In [278]:
y

tensor([-1.,  1.])

In [279]:
nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)

tensor(0.1503)

In [280]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [281]:
from scipy.spatial.distance import cosine

def cos(x, y): return 1-cosine(x, y)

In [282]:
lst = []
for k in range(len(x1)):
    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))
    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))
lst, np.mean(lst)

([0, 0.3005916476249695], 0.15029582381248474)

## MultiMarginLoss

In [283]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

tensor([[-0.0025, -0.6019,  1.9809, -1.2663],
        [ 0.6060, -0.0002,  0.9110, -0.6320]])

In [284]:
y

tensor([ 1,  3])

In [285]:
nn.MultiMarginLoss(margin=0.9, p=2)(x, y)

tensor(3.4149)

In [286]:
x = x.numpy()
y = y.numpy()
p=2
margin=0.9

In [287]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        if i!= y[k]:
            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

([3.6083879542856043, 3.2214048583725967], 3.4148964063291007)

## TripletMarginLoss

In [288]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
x3 = torch.randn(2, 3)
margin = 0.9
p = 2

x1

tensor([[ 2.0001,  1.2658, -1.1397],
        [ 0.9793, -0.3433, -0.0746]])

In [289]:
nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)

tensor(0.4055)

In [290]:
x1 = x1.numpy()
x2 = x2.numpy()
x3 = x3.numpy()

In [291]:
def d(x1, x2, p):
    return sum((x1-x2)**p)**(1/p)

In [292]:
lst = []
for k in range(len(x1)):
    sm = 0
    for i in range(len(x1[k])):
        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) 
    lst.append(sm/len(x1[k]))

lst, np.mean(lst)

([0.0, 0.8110052643651849], 0.40550263218259247)

## References

- https://pytorch.org/docs/0.4.0/nn.html#loss-functions
- https://blog.csdn.net/zhangxb35/article/details/72464152