In [1]:
import numpy as np
import torch
import torch.nn as nn

torch.__version__

'1.0.0.dev20181014'

# Select and assign

In [2]:
x = torch.LongTensor(2,4).random_(0, 10)
x

tensor([[2, 9, 3, 4],
        [6, 8, 1, 6]])

In [3]:
x[x==5] = 0
x

tensor([[2, 9, 3, 4],
        [6, 8, 1, 6]])

In [4]:
x[x==0] = 1
x

tensor([[2, 9, 3, 4],
        [6, 8, 1, 6]])

## L1Loss

In [5]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)
x

tensor([[ 0.4013, -0.7200, -0.5870],
        [-0.6927,  0.3350,  0.4485]])

In [6]:
y

tensor([[-0.5541,  1.1629, -1.2743],
        [ 1.9236, -1.5449,  2.4489]])

In [7]:
nn.L1Loss()(x, y)

tensor(1.6704)

In [8]:
nn.L1Loss(reduction="none")(x, y)

tensor([[0.9553, 1.8829, 0.6872],
        [2.6163, 1.8799, 2.0004]])

In [9]:
abs(x.numpy() - y.numpy())

array([[0.9553361, 1.8829002, 0.6872454],
       [2.6163142, 1.8799164, 2.000426 ]], dtype=float32)

In [10]:
abs(x.numpy() - y.numpy()).mean()

1.6703564

## MSELoss

In [11]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)
x

tensor([[-1.9234, -0.8440, -0.2588],
        [ 0.4757, -2.3456, -0.0923]])

In [12]:
y

tensor([[ 0.0508, -0.2656, -0.7649],
        [ 0.2930,  0.0704,  0.7641]])

In [13]:
nn.MSELoss(reduction="none")(x, y)

tensor([[3.8973, 0.3346, 0.2561],
        [0.0334, 5.8372, 0.7335]])

In [14]:
nn.MSELoss()(x, y)

tensor(1.8487)

In [15]:
(x.numpy() - y.numpy())**2

array([[3.8973296 , 0.33457208, 0.25605854],
       [0.03337498, 5.8371625 , 0.73348194]], dtype=float32)

In [16]:
((x.numpy() - y.numpy())**2).mean()

1.8486633

# LogSoftmax

In [17]:
def logSoftmax(x,dim=0):
    def logSoftmax_1d(x):
        e_xn = np.exp(x)
        return np.log( e_xn / e_xn.sum() ) 
    return np.apply_along_axis(logSoftmax_1d, dim, x )


In [18]:
xn = x.numpy()
lst = []
for k in range(len(x)):
    e_xn = np.exp(x[k])
    print(f"\nk                                     :{k}")
    print(f"x[k]                                    :{(xn[k])}")
    print(f"np.exp(x[k])                            :{e_xn}")
    print(f"e_xn / e_xn.sum()                       :{e_xn / e_xn.sum()} sum:{(e_xn / e_xn.sum()).sum()}")
    print(f"np.log( np.exp(x[k])/ np.exp(x[k]).sum():{np.log( e_xn / e_xn.sum() )}" )
    print(f"logSoftmax(x[k])                        :{logSoftmax(x[k])}")
    lst.append( np.log( np.exp(xn[k]) / np.exp(xn[k]).sum() ) )
lst


k                                     :0
x[k]                                    :[-1.9233922  -0.84403884 -0.25883836]
np.exp(x[k])                            :tensor([0.1461, 0.4300, 0.7719])
e_xn / e_xn.sum()                       :tensor([0.1084, 0.3190, 0.5726]) sum:1.0
np.log( np.exp(x[k])/ np.exp(x[k]).sum():tensor([-2.2220, -1.1427, -0.5575])
logSoftmax(x[k])                        :[-2.2220354 -1.1426822 -0.5574816]

k                                     :1
x[k]                                    :[ 0.4757096  -2.345644   -0.09231232]
np.exp(x[k])                            :tensor([1.6092, 0.0958, 0.9118])
e_xn / e_xn.sum()                       :tensor([0.6149, 0.0366, 0.3485]) sum:1.0
np.log( np.exp(x[k])/ np.exp(x[k]).sum():tensor([-0.4862, -3.3076, -1.0542])
logSoftmax(x[k])                        :[-0.48622787 -3.3075814  -1.0542498 ]


[array([-2.2220354, -1.1426822, -0.5574816], dtype=float32),
 array([-0.48622787, -3.3075814 , -1.0542498 ], dtype=float32)]

# NLLLoss

In [19]:
x0 = torch.randn(3, 4)
x = nn.LogSoftmax(dim=1)(x0)
x0, x

(tensor([[-1.6269,  0.7765,  0.2614,  0.2674],
         [ 0.5697,  0.6802,  2.4649,  0.2054],
         [-1.1777,  0.3451,  0.5829, -1.7882]]),
 tensor([[-3.2315, -0.8281, -1.3432, -1.3371],
         [-2.2476, -2.1371, -0.3525, -2.6119],
         [-2.4802, -0.9575, -0.7196, -3.0907]]))

In [20]:
logSoftmax(x0,dim=1)

array([[-3.2314594 , -0.8280719 , -1.3431978 , -1.3371246 ],
       [-2.2476273 , -2.137111  , -0.35245374, -2.6119134 ],
       [-2.4802296 , -0.9574655 , -0.7196103 , -3.0907147 ]],
      dtype=float32)

In [21]:
y = torch.LongTensor(3).random_(4)
y

tensor([3, 1, 2])

In [22]:
nn.NLLLoss()(x, y)

tensor(1.3979)

In [23]:
nn.NLLLoss(reduction="none")(x, y)

tensor([1.3371, 2.1371, 0.7196])

In [24]:
def nllloss(x,y,w=None):
    x_lsm = logSoftmax(x,dim=1)
    if w is not None : 
    #    print(w)
        x_lsm = x_lsm*w
    ix    = np.arange(len(y))
    loss  = x_lsm[ix,y[ix]]
    #print("x:", x)
    #print("y:", y)
    #print("x_ls:", x_lsm)
    #print("loss:",loss)
    return -loss, -loss.mean()


In [25]:
xn = x.numpy()
yn = y.numpy()

In [26]:
lst = []
for k in range(len(yn)):
    lst.append(-xn[k,yn[k]])

print(f"nllloss:{nllloss(x,y)}")
lst, np.mean(lst)

nllloss:(array([1.3371245, 2.1371107, 0.7196103], dtype=float32), 1.3979484)


([1.3371245, 2.1371107, 0.7196103], 1.3979484)

# Experiment to reproduce pytorch crossentropy

In [27]:
#Example
#let bs,c,width,height be batchsize, number og classes, width, height of the image. 
bs,c,width,height =   4,  6 ,2    , 5
x = torch.randn(      bs, c, width, height) 
y = torch.randint(c, (bs,    width, height) ) 
w = torch.rand(c)
w = w/w.sum() #normalize

#x_ predictions for all images and classes.
#y: the groundtrouth is a mask of the class og each pixel (ie a compact representation of one-hot-encoding)
#w: is the weight of each class in the loss function

print(f"Size of x, y, w: {x.size()}, {y.size()}, {w.size()}")

Size of x, y, w: torch.Size([4, 6, 2, 5]), torch.Size([4, 2, 5]), torch.Size([6])


In [28]:
def printWithDec(v,title=None,d=2): 
    with np.printoptions(precision=d, suppress=True): 
        if title is None : print(v.numpy())
        else: print(f"{title}:", v.numpy())

In [29]:
y

tensor([[[5, 0, 4, 3, 4],
         [1, 2, 3, 5, 3]],

        [[5, 0, 4, 4, 3],
         [1, 5, 0, 4, 4]],

        [[1, 3, 2, 2, 1],
         [0, 0, 0, 1, 0]],

        [[2, 4, 3, 4, 2],
         [5, 0, 5, 5, 1]]])

In [66]:
w

tensor([0.1092, 0.1620, 0.2945, 0.0014, 0.1703, 0.2626])

In [69]:
criterion = nn.CrossEntropyLoss(weight=w,reduction="none")
loss = criterion(x,y)
print("loss.size()", loss.size() )
printWithDec(loss,"crossentropy loss with weight\n",3)
printWithDec(nn.CrossEntropyLoss(weight=w)(x,y),"crossentropy loss with weight and reduction=elementwise_mean",2)




loss.size() torch.Size([4, 2, 5])
crossentropy loss with weight
: [[[0.287 0.278 0.255 0.003 0.17 ]
  [0.417 0.957 0.004 0.443 0.003]]

 [[0.248 0.296 0.367 0.388 0.002]
  [0.702 0.851 0.072 0.623 0.255]]

 [[0.256 0.004 0.753 0.729 0.332]
  [0.393 0.169 0.129 0.146 0.454]]

 [[0.263 0.361 0.001 0.339 0.665]
  [0.186 0.163 0.697 0.854 0.375]]]
crossentropy loss with weight and reduction=elementwise_mean: 2.128388


In [32]:
print("y:\n",y.numpy())
printWithDec(loss,"loss",2)

y:
 [[[5 0 4 3 4]
  [1 2 3 5 3]]

 [[5 0 4 4 3]
  [1 5 0 4 4]]

 [[1 3 2 2 1]
  [0 0 0 1 0]]

 [[2 4 3 4 2]
  [5 0 5 5 1]]]
loss: [[[0.29 0.28 0.26 0.   0.17]
  [0.42 0.96 0.   0.44 0.  ]]

 [[0.25 0.3  0.37 0.39 0.  ]
  [0.7  0.85 0.07 0.62 0.25]]

 [[0.26 0.   0.75 0.73 0.33]
  [0.39 0.17 0.13 0.15 0.45]]

 [[0.26 0.36 0.   0.34 0.66]
  [0.19 0.16 0.7  0.85 0.37]]]


In [33]:
print("Loss for pixels with class index:")
for i in range(c):
    printWithDec(loss[y==i], f"class {i} with mean {loss[y==i].mean():.2f} and sum {loss[y==i].sum():.2f}" ) 
    
print("\nMean loss pr prediction of batch images:")
for i in range(len(loss)):
    printWithDec( loss[i].mean(), f"batch image {i}:", d=2)

    

Loss for pixels with class index:
class 0 with mean 0.24 and sum 1.95: [0.28 0.3  0.07 0.39 0.17 0.13 0.45 0.16]
class 1 with mean 0.37 and sum 2.23: [0.42 0.7  0.26 0.33 0.15 0.37]
class 2 with mean 0.67 and sum 3.37: [0.96 0.75 0.73 0.26 0.66]
class 3 with mean 0.00 and sum 0.02: [0. 0. 0. 0. 0. 0.]
class 4 with mean 0.34 and sum 2.76: [0.26 0.17 0.37 0.39 0.62 0.25 0.36 0.34]
class 5 with mean 0.51 and sum 3.57: [0.29 0.44 0.25 0.85 0.19 0.7  0.85]

Mean loss pr prediction of batch images:
batch image 0:: 0.28173932
batch image 1:: 0.38041538
batch image 2:: 0.33649737
batch image 3:: 0.3905453


In [65]:
def apply_along_axis(tensor, func, axis=0):
    res = torch.stack(
        [func(t) for i, t in enumerate( torch.unbind(tensor, dim=axis) ) ], 
        dim=axis)
    return res

def logSoftmax_1d(x,dim):
    e_xn     = x.exp()
    e_xn_sum = e_xn.sum(dim)
    for i in range(len(x)):
        e_xn[i] = (e_xn[i] / e_xn_sum[i]).log()
    return e_xn

    #return e_xn
#    return ( e_xn / e_xn.sum(0,keepdim=True) ).log()
#logSoftmax_1d(x,dim=1)

#xa = apply_along_axis( x, logSoftmax_1d, axis=1 )
#print("x:\n", x.exp())
#print("x:\n", xa[0])
#x[0,0].exp()
#xa[0,0]

#apply_along_axis(loss, lambda x:x.mean())
ex = x[0,:,0,0].exp()
#print(ex/ex.sum()), 
print("-logsoftmax for 1-hot at pixel at 0,0:", -(ex/ex.sum()).log() ), 
#print(ex.sum())
#print( f"exp of x for first pixel for class 0:", x[0,:,0,0].exp().sum() )
#xa = apply_along_axis(x, logSoftmax_1d, axis=1)
#torch.unbind(x, dim=1)
0.2703+1.7679 + 0.8170 + 4.9955 + 1.2597 + 4.0727
lsm = logSoftmax_1d(x,dim=1)
print("-logsoftmax for pixel 0,0:            ", -lsm[0,:,0,0] )

-logsoftmax for 1-hot at pixel at 0,0: tensor([2.2710, 1.1528, 3.2058, 1.7625, 3.4082, 1.0913])
-logsoftmax for pixel 0,0:             tensor([2.2710, 1.1528, 3.2058, 1.7625, 3.4082, 1.0913])


In [72]:
loss = nn.CrossEntropyLoss(reduction="none")(x,y)
print("loss no weight:\n", loss )


loss no weight:
 tensor([[[1.0913, 2.5444, 1.5001, 2.1175, 0.9995],
         [2.5732, 3.2506, 2.7228, 1.6884, 1.9917]],

        [[0.9448, 2.7134, 2.1557, 2.2805, 1.5269],
         [4.3361, 3.2389, 0.6568, 3.6563, 1.4973]],

        [[1.5805, 2.5311, 2.5561, 2.4766, 2.0499],
         [3.6022, 1.5479, 1.1826, 0.8993, 4.1591]],

        [[0.8945, 2.1219, 0.5442, 1.9889, 2.2575],
         [0.7094, 1.4966, 2.6549, 3.2540, 2.3149]]])


In [83]:
print("y:\n",y.numpy())
print("Loss for pixels with class index:")
mean_of_classmeans = 0
#loss = nn.CrossEntropyLoss(weight=w,reduction="none")(x,y)
loss = nn.CrossEntropyLoss( weight=w, reduction="none")(x,y)
for i in range(c):
    #loss_yi = loss[y==i]
    printWithDec(loss[y==i], f"class {i} with mean {loss[y==i].mean():.2f} and sum {loss[y==i].sum():.2f}" ) 
    mean_of_classmeans +=  loss[y==i].mean()
mean_of_classmeans /= 6    
print("mean of class means:", mean_of_classmeans)
print("mean loss:", loss.mean())
print("crossentropy sum:", nn.CrossEntropyLoss()(x,y))


y:
 [[[5 0 4 3 4]
  [1 2 3 5 3]]

 [[5 0 4 4 3]
  [1 5 0 4 4]]

 [[1 3 2 2 1]
  [0 0 0 1 0]]

 [[2 4 3 4 2]
  [5 0 5 5 1]]]
Loss for pixels with class index:
class 0 with mean 2.24 and sum 17.90: [2.54 2.71 0.66 3.6  1.55 1.18 4.16 1.5 ]
class 1 with mean 2.29 and sum 13.75: [2.57 4.34 1.58 2.05 0.9  2.31]
class 2 with mean 2.29 and sum 11.44: [3.25 2.56 2.48 0.89 2.26]
class 3 with mean 1.91 and sum 11.43: [2.12 2.72 1.99 1.53 2.53 0.54]
class 4 with mean 2.03 and sum 16.20: [1.5  1.   2.16 2.28 3.66 1.5  2.12 1.99]
class 5 with mean 1.94 and sum 13.58: [1.09 1.69 0.94 3.24 0.71 2.65 3.25]
mean of class means: tensor(2.1147)
mean loss: tensor(2.1077)
crossentropy sum: tensor(2.1077)


In [113]:
print(nn.CrossEntropyLoss( weight=w, reduction="none")(x,y).sum() )
print(nn.CrossEntropyLoss( weight=w, reduction="sum")(x,y))
print(nn.CrossEntropyLoss( weight=w)(x,y))
print(nn.CrossEntropyLoss()(x,y))



tensor(13.8920)
tensor(13.8920)
tensor(2.1284)
tensor(2.1077)


In [None]:
def logSoftmax(x,dim=0):
    
    def logSoftmax_1d(x):
        e_xn = x.exp()
        return ( e_xn / e_xn.sum() ).log()
    
    return apply_along_axis(logSoftmax_1d, dim, x )

def nllloss(x,y,w=None):
    x_lsm = logSoftmax(x,dim=1)
    
    if w is not None : 
    #    print(w)
        x_lsm = x_lsm * w
    ix    = np.arange(len(y))
    loss  = x_lsm[ix,y[ix]]
    #print("x:", x)
    #print("y:", y)
    #print("x_ls:", x_lsm)
    #print("loss:",loss)
    return -loss, -loss.mean()

def crossentropy( x, y, w=None ): 
    return nllloss(x,y,w)

crossentropy(x,y,w)

## CrossEntropyLoss

In [None]:
def crossentropy( x, y, w=None ): return nllloss(x,y,w)

In [None]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

In [None]:
y

In [None]:
nn.CrossEntropyLoss(reduction="none")(x, y), nn.CrossEntropyLoss()(x, y) 

In [None]:
xn = x.numpy()
yn = y.numpy()

lst = []
for k in range(len(yn)):
    print(f"k:{k} x[k] {xn[k]} y[k]:{y[k]} x[k,y[k]:{xn[k,yn[k]]}")
    lst.append(-np.log( np.exp(xn[k,yn[k]]) / np.exp(xn[k] ).sum() ) )
lst, np.mean(lst)

In [None]:
nllloss(xn,yn)

## CrossEntropyLoss with weights

In [None]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
w = torch.rand(4)
x,y,w

In [None]:
w = torch.rand(4)
w

In [None]:
nn.CrossEntropyLoss(weight=w, reduction="none")(x, y), nn.CrossEntropyLoss(weight=w,reduction="sum")(x, y) 

In [None]:
crossentropy(x.numpy(),y.numpy(),w.numpy())

In [None]:
#the mean values does not add up because pytorch ignore weights with the default
#reduction="elementwise_mean". whe the weights are all 1 then the two methods concord
w = torch.rand(4)*0+1

In [None]:
nn.CrossEntropyLoss(weight=w, reduction="none")(x, y), nn.CrossEntropyLoss(weight=w,reduction="mean")(x, y) 

In [None]:
crossentropy(x.numpy(),y.numpy(),w.numpy())

In [None]:
nn.CrossEntropyLoss(weight=w, reduction="none")(x, y), nn.CrossEntropyLoss(weight=w,reduction="sum")(x, y) 

In [None]:
(1.2285+2.2941)/2

# DICE loss simpple multiclass

In [None]:
def dice_loss(input, target):
    smooth = 1.
    loss = 0.
    for c in range(n_classes):
           iflat = input[:, c ].view(-1)
           tflat = target[:, c].view(-1)
           intersection = (iflat * tflat).sum()
           
           w = class_weights[c]
           loss += w*(1 - ((2. * intersection + smooth) /
                             (iflat.sum() + tflat.sum() + smooth)))
    return loss

In [None]:
def dice_loss(input, label, epsilon=0.00001):
    """
    Dice loss for comparing the similarity of two batch of data,
    usually is used for binary image segmentation i.e. labels are binary.
    The dice loss can be defined as below equation:
    .. math::
        dice\_loss &= 1 - \\frac{2 * intersection\_area}{total\_area} \\\\
                  &= \\frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\\\
                  &= \\frac{(union\_area - intersection\_area)}{total\_area}
    Args:
        input (Variable): The predictions with rank>=2. The first dimension is batch size,
                          and the last dimension is class number.
        label (Variable): The groud truth with the same rank with input. The first dimension
                          is batch size, and the last dimension is 1.
        epsilon (float): The epsilon will be added to the numerator and denominator.
                         If both input and label are empty, it makes sure dice is 1.
                         Default: 0.00001
    Returns:
        dice_loss (Variable): The dice loss with shape [1].
    Examples:
        .. code-block:: python
            predictions = fluid.layers.softmax(x)
            loss = fluid.layers.dice_loss(input=predictions, label=label, 2)
    """
    label = one_hot(label, depth=input.shape[-1])
    reduce_dim = list(range(1, len(input.shape)))
    inse = reduce_sum(input * label, dim=reduce_dim)
    dice_denominator = reduce_sum(
        input, dim=reduce_dim) + reduce_sum(
            label, dim=reduce_dim)
    dice_score = 1 - inse * 2 / (dice_denominator + epsilon)
    return reduce_mean(dice_score)

## PoissonNLLLoss

In [None]:
x = torch.randn(2, 4)
x

In [None]:
y = torch.randn(2, 4)
y

In [None]:
nn.PoissonNLLLoss()(x, y)

In [None]:
nn.PoissonNLLLoss(reduce=False)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
# target∗log(target)−target+0.5∗log(2πtarget)
def sterling_approx(y):
    return y*np.log(y) - y + 0.5*np.log(np.pi*y)

In [None]:
lst = []
for k in range(len(x)):
    lsti = []
    for i in range(len(x[k])):
        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)
        lsti.append(lss)
    lst.append(lsti)

In [None]:
np.array(lst)

In [None]:
np.mean(lst)

## KLDivLoss

In [None]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
x

In [None]:
y

In [None]:
nn.KLDivLoss()(x, y)

In [None]:
nn.KLDivLoss(reduce=False)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        # xi is already log 
        lsti.append(y[i][j] * (np.log(y[i][j]) - x[i][j]))
    lst.append(lsti)
np.array(lst)

In [None]:
np.mean(lst)

## BCELoss

Sigmoid

In [None]:
x = torch.randn(2, 4)
y = nn.Sigmoid()(x)
x

In [None]:
y

In [None]:
x = x.numpy()

In [None]:
1 / (1 + np.exp(-x))

### single label

In [None]:
x0 = torch.randn(3)
x = nn.Sigmoid()(x0)
x

In [None]:
y = torch.FloatTensor(3).random_(2)
y

In [None]:
nn.BCELoss()(x, y)

In [None]:
nn.BCELoss(reduce=False)(x, y)

In [None]:
loss = nn.BCELoss(size_average=False)
lss = loss(x, y)
lss

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))
lst, np.mean(lst)

Equivalently

In [None]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
lst, np.mean(lst)

### multilabel

In [None]:
x0 = torch.randn(3, 2)
x = nn.Sigmoid()(x0)
x

In [None]:
y = torch.FloatTensor(3, 2).random_(2)
y

In [None]:
nn.BCELoss()(x, y)

In [None]:
nn.BCELoss(reduce=False)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

Equivalently

In [None]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
np.array(lst), np.mean(lst)

## BCEWithLogitsLoss

This is just simply adding a sigmoid in front of BCELoss above.

### single label

In [None]:
x = torch.randn(3)
x

In [None]:
xs = nn.Sigmoid()(x)
xs

In [None]:
y = torch.FloatTensor(3).random_(2)
y

In [None]:
nn.BCELoss()(xs, y)

In [None]:
nn.BCEWithLogitsLoss()(x, y)

### multilabel

In [None]:
x = torch.randn(3, 2)
x

In [None]:
xs = nn.Sigmoid()(x)
xs

In [None]:
y = torch.FloatTensor(3, 2).random_(2)
y

In [None]:
nn.BCELoss()(xs, y)

In [None]:
nn.BCEWithLogitsLoss()(x, y)

## MarginRankingLoss

In [None]:
x1 = torch.randn(3)
x2 = torch.randn(3)
y = torch.FloatTensor(np.random.choice([1, -1], 3))

x1, x2, y

In [None]:
nn.MarginRankingLoss(margin=0.1)(x1, x2, y)

In [None]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [None]:
lst = []
for i in range(len(x1)):
    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))

lst, np.mean(lst)

## HingeEmbeddingLoss

In [None]:
x = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))

x

In [None]:
y

In [None]:
nn.HingeEmbeddingLoss(margin=1)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()
margin=1

In [None]:
lst=[]

for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        if y[i][j]==1:
            lsti.append(x[i][j])
        else:
            lsti.append(max(0, margin-x[i][j]))
    lst.append(lsti)
np.array(lst)

In [None]:
np.mean(lst)

## MultiLabelMarginLoss

This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152

### one-sample example

In [None]:
x = torch.randn(1, 4)
y = torch.LongTensor(1, 4).random_(-1, 4)
x

In [None]:
y

In [None]:
nn.MultiLabelMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                print(i, j)
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

In [None]:
lst, np.mean(lst)

### multi-sample example

In [None]:
x = torch.randn(3, 4)
y = torch.LongTensor(3, 4).random_(-1, 4)
x

In [None]:
y

In [None]:
nn.MultiLabelMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## SmoothL1Loss

In [None]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [None]:
nn.SmoothL1Loss()(x, y)

In [None]:
nn.SmoothL1Loss(reduce=False)(x, y)

In [None]:
x = x.numpy() 
y = y.numpy()

In [None]:
def smoothl1loss(x, y):
    if abs(x-y)<1: return 1/2*(x-y)**2
    else: return abs(x-y)-1/2

In [None]:
lst = []
for i in range(len(x)):
    lsti=[]
    for j in range(len(x[i])):
        lsti.append(smoothl1loss(x[i][j], y[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

## SoftMarginLoss

In [None]:
x = torch.randn(2, 4)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))
x

In [None]:
y

In [None]:
nn.SoftMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## MultiLabelSoftMarginLoss

In [None]:
x = torch.randn(2, 4)
y = torch.FloatTensor(2, 4).random_(2)
x

In [None]:
y

In [None]:
nn.MultiLabelSoftMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\
            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## CosineEmbeddingLoss

In [None]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([1, -1], 2))

x1

In [None]:
x2

In [None]:
y

In [None]:
nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)

In [None]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [None]:
from scipy.spatial.distance import cosine

def cos(x, y): return 1-cosine(x, y)

In [None]:
lst = []
for k in range(len(x1)):
    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))
    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))
lst, np.mean(lst)

## MultiMarginLoss

In [None]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

In [None]:
y

In [None]:
nn.MultiMarginLoss(margin=0.9, p=2)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()
p=2
margin=0.9

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        if i!= y[k]:
            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## TripletMarginLoss

In [None]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
x3 = torch.randn(2, 3)
margin = 0.9
p = 2

x1

In [None]:
nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)

In [None]:
x1 = x1.numpy()
x2 = x2.numpy()
x3 = x3.numpy()

In [None]:
def d(x1, x2, p):
    return sum((x1-x2)**p)**(1/p)

In [None]:
lst = []
for k in range(len(x1)):
    sm = 0
    for i in range(len(x1[k])):
        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) 
    lst.append(sm/len(x1[k]))

lst, np.mean(lst)

## References

- https://pytorch.org/docs/0.4.0/nn.html#loss-functions
- https://blog.csdn.net/zhangxb35/article/details/72464152

In [None]:
class CustomCrossEntropy(torch.nn.Module):
    
    def __init__(self, class_weights):
        super(CustomCrossEntropy,self).__init__()
        self.register_buffer("class_weights",  torch.from_numpy( class_weights).cuda() )
        self.register_buffer("ix_class_weights", torch.from_numpy( (class_weights>0).flatten().astype(np.float32) ) )

    @staticemethod    
    def logSoftmax(x,dim=0):
        def logSoftmax_1d(x):
            e_xn = np.exp(x)
            return np.log( e_xn / e_xn.sum() ) 
        return np.apply_along_axis(logSoftmax_1d, dim, x )
    
    @staticemethod    
    def nllloss(x,y,w=None):
        x_lsm = logSoftmax(x,dim=1)
        if w is not None : 
            x_lsm = x_lsm*w
        ix    = np.arange(len(y))
        loss  = x_lsm[ix,y[ix]]
        #print("x:", x)
        #print("y:", y)
        #print("x_ls:", x_lsm)
        #print("loss:",loss)
        return -loss, -loss.mean()
    
    @staticmethod
    def crossentropy( x, y, w=None ): return nllloss(x,y,w)

    def forward(self,input,target):
        
        input[]
        nn.LogSoftmax(dim=1)(input)
        
        #loss = crossentropy( src, trg, self.weights)
        
        return loss
    
"""
weight = torch.ones(vocab_size)
weight[pad_idx] = 0.0
crit = nn.CrossEntropy(weight=weight)
crit(output, targets)
"""