In [1]:
import numpy as np
import torch
import torch.nn as nn
from pathlib import *

torch.__version__

'1.0.0.dev20181108'

# Select and assign

In [2]:
x = torch.LongTensor(2,4).random_(0, 10)
x

tensor([[8, 4, 7, 9],
        [0, 4, 6, 3]])

In [3]:
x[x==5] = 0
x

tensor([[8, 4, 7, 9],
        [0, 4, 6, 3]])

In [4]:
x[x==0] = 1
x

tensor([[8, 4, 7, 9],
        [1, 4, 6, 3]])

## L1Loss

In [5]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)
x

tensor([[1.1693, 0.2476, 0.8034],
        [1.6055, 0.1212, 0.6313]])

In [6]:
y

tensor([[ 0.3543,  0.0859,  1.1909],
        [-0.5183,  1.3345,  0.1028]])

In [7]:
nn.L1Loss()(x, y)

tensor(0.8716)

In [8]:
nn.L1Loss(reduction="none")(x, y)

tensor([[0.8150, 0.1617, 0.3876],
        [2.1239, 1.2133, 0.5285]])

In [9]:
abs(x.numpy() - y.numpy())

array([[0.8149524 , 0.16171369, 0.38757598],
       [2.1238647 , 1.2133046 , 0.5284501 ]], dtype=float32)

In [10]:
abs(x.numpy() - y.numpy()).mean()

0.87164354

## MSELoss

In [11]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)
x

tensor([[-1.4791e+00, -1.9164e+00,  2.6991e-01],
        [-4.3319e-01, -1.2599e+00,  5.6285e-04]])

In [12]:
y

tensor([[-0.1659,  1.1100,  0.1855],
        [-2.1571, -1.4729,  0.9606]])

In [13]:
nn.MSELoss(reduction="none")(x, y)

tensor([[1.7244e+00, 9.1587e+00, 7.1161e-03],
        [2.9719e+00, 4.5388e-02, 9.2171e-01]])

In [14]:
nn.MSELoss()(x, y)

tensor(2.4715)

In [15]:
(x.numpy() - y.numpy())**2

array([[1.7244132e+00, 9.1586561e+00, 7.1160896e-03],
       [2.9719412e+00, 4.5388479e-02, 9.2170686e-01]], dtype=float32)

In [16]:
((x.numpy() - y.numpy())**2).mean()

2.4715369

# LogSoftmax

In [17]:
def logSoftmax(x,dim=0):
    def logSoftmax_1d(x):
        e_xn = np.exp(x)
        return np.log( e_xn / e_xn.sum() ) 
    return np.apply_along_axis(logSoftmax_1d, dim, x )


In [18]:
xn = x.numpy()
lst = []
for k in range(len(x)):
    e_xn = np.exp(x[k])
    print(f"\nk                                     :{k}")
    print(f"x[k]                                    :{(xn[k])}")
    print(f"np.exp(x[k])                            :{e_xn}")
    print(f"e_xn / e_xn.sum()                       :{e_xn / e_xn.sum()} sum:{(e_xn / e_xn.sum()).sum()}")
    print(f"np.log( np.exp(x[k])/ np.exp(x[k]).sum():{np.log( e_xn / e_xn.sum() )}" )
    print(f"logSoftmax(x[k])                        :{logSoftmax(x[k])}")
    lst.append( np.log( np.exp(xn[k]) / np.exp(xn[k]).sum() ) )
lst


k                                     :0
x[k]                                    :[-1.479074   -1.9163687   0.26990512]
np.exp(x[k])                            :tensor([0.2278, 0.1471, 1.3098])
e_xn / e_xn.sum()                       :tensor([0.1352, 0.0873, 0.7774]) sum:1.0
np.log( np.exp(x[k])/ np.exp(x[k]).sum():tensor([-2.0007, -2.4380, -0.2518])
logSoftmax(x[k])                        :[-2.0007381 -2.4380329 -0.2517589]

k                                     :1
x[k]                                    :[-4.3318903e-01 -1.2598501e+00  5.6284800e-04]
np.exp(x[k])                            :tensor([0.6484, 0.2837, 1.0006])
e_xn / e_xn.sum()                       :tensor([0.3355, 0.1468, 0.5177]) sum:1.0
np.log( np.exp(x[k])/ np.exp(x[k]).sum():tensor([-1.0921, -1.9188, -0.6584])
logSoftmax(x[k])                        :[-1.0921057 -1.918767  -0.6583538]


[array([-2.0007381, -2.4380329, -0.2517589], dtype=float32),
 array([-1.0921057, -1.918767 , -0.6583538], dtype=float32)]

# NLLLoss

In [19]:
x0 = torch.randn(3, 4)
x = nn.LogSoftmax(dim=1)(x0)
x0, x

(tensor([[-0.0667, -0.5184, -0.6454, -0.5125],
         [ 0.1278, -0.3439,  0.0251,  0.7741],
         [-0.6585,  0.7634, -0.5677, -0.5173]]),
 tensor([[-1.0429, -1.4946, -1.6216, -1.4887],
         [-1.4895, -1.9611, -1.5922, -0.8432],
         [-2.0004, -0.5784, -1.9096, -1.8591]]))

In [20]:
logSoftmax(x0,dim=1)

array([[-1.0429392 , -1.4946206 , -1.6215965 , -1.4886994 ],
       [-1.4894848 , -1.9611334 , -1.5921797 , -0.84320116],
       [-2.0004194 , -0.5784316 , -1.9096116 , -1.859141  ]],
      dtype=float32)

In [21]:
y = torch.LongTensor(3).random_(4)
y

tensor([2, 0, 3])

In [22]:
nn.NLLLoss()(x, y)

tensor(1.6567)

In [23]:
nn.NLLLoss(reduction="none")(x, y)

tensor([1.6216, 1.4895, 1.8591])

In [24]:
def nllloss(x,y,w=None):
    x_lsm = logSoftmax(x,dim=1)
    if w is not None : 
    #    print(w)
        x_lsm = x_lsm*w
    ix    = np.arange(len(y))
    loss  = x_lsm[ix,y[ix]]
    #print("x:", x)
    #print("y:", y)
    #print("x_ls:", x_lsm)
    #print("loss:",loss)
    return -loss, -loss.mean()


In [25]:
xn = x.numpy()
yn = y.numpy()

In [26]:
lst = []
for k in range(len(yn)):
    lst.append(-xn[k,yn[k]])

print(f"nllloss:{nllloss(x,y)}")
lst, np.mean(lst)

nllloss:(array([1.6215966, 1.4894848, 1.8591411], dtype=float32), 1.6567408)


([1.6215966, 1.4894847, 1.8591411], 1.6567408)

In [27]:
def printWithDec(v,title=None,d=2): 
    with np.printoptions(precision=d, suppress=True): 
        if title is None : print(v.numpy())
        else: print(f"{title}:", v.numpy())

# Experiment to reproduce pytorch crossentropy

In [28]:
#Example
#let bs,c,width,height be batchsize, number og classes, width, height of the image. 
bs,c,width,height =   4,  6 ,2    , 5
x = torch.randn(      bs, c, width, height) 
y = torch.randint(c, (bs,    width, height) ) 
w = torch.rand(c)
w = w/w.sum() #normalize

#x_ predictions for all images and classes.
#y: the groundtrouth is a mask of the class og each pixel (ie a compact representation of one-hot-encoding)
#w: is the weight of each class in the loss function

print(f"Size of x, y, w: {x.size()}, {y.size()}, {w.size()}")

Size of x, y, w: torch.Size([4, 6, 2, 5]), torch.Size([4, 2, 5]), torch.Size([6])


In [29]:
path  = Path('../../data/mammography-data/mammography-dogscats-match-equalization-BINS-CHX/tiles') 
#print(list(path.iterdir()))
x  = torch.from_numpy( np.load( path/"input.npy" ) )
y  = torch.from_numpy(np.load( path/"target.npy" ) )
w  = torch.from_numpy(np.asarray([0.448717, 0.003049, 0.168137, 0.10334 , 0.119562, 0.157196]).astype(np.float32) )
tl = torch.from_numpy(np.load( path/"loss.npy" ) )
bs,c,width,height = x.shape

printWithDec(tl[0], "loss",2)
print(f"Size of x, y, w: {x.size()}, {y.size()}, {w.size()}")

loss: [[1.66 2.11 4.38 ... 1.53 1.51 1.53]
 [1.95 1.74 5.12 ... 1.45 1.48 1.46]
 [3.98 5.12 8.93 ... 1.55 1.51 1.54]
 ...
 [0.01 0.02 0.   ... 1.65 1.64 1.6 ]
 [0.82 0.46 0.04 ... 1.66 1.61 1.63]
 [0.49 0.67 0.02 ... 1.59 1.6  1.56]]
Size of x, y, w: torch.Size([16, 6, 224, 224]), torch.Size([16, 224, 224]), torch.Size([6])


In [30]:
printWithDec(x[0], "x[0]",2)

x[0]: [[[ 1.51  2.08  4.73 ... -3.39 -3.53 -3.45]
  [ 2.17  1.74  5.62 ... -3.3  -3.25 -3.33]
  [ 4.34  5.19  9.09 ... -3.52 -3.59 -3.5 ]
  ...
  [ 5.52  4.84 10.68 ... -3.56 -3.69 -3.64]
  [ 1.31  1.9   4.31 ... -3.83 -3.94 -3.88]
  [ 2.02  1.58  5.24 ... -3.65 -3.73 -3.71]]

 [[ 0.55  0.35  0.38 ...  0.81  0.83  0.81]
  [ 0.65  0.59  0.51 ...  0.9   0.88  0.89]
  [ 0.41  0.09  0.16 ...  0.81  0.84  0.81]
  ...
  [ 0.55  0.42  0.35 ...  0.79  0.8   0.83]
  [ 0.57  0.36  0.42 ...  0.77  0.8   0.8 ]
  [ 0.66  0.59  0.54 ...  0.85  0.84  0.87]]

 [[-0.24 -0.51 -0.68 ...  0.49  0.51  0.49]
  [-0.43 -0.36 -0.97 ...  0.49  0.5   0.47]
  [-0.61 -1.04 -1.25 ...  0.5   0.52  0.5 ]
  ...
  [-0.91 -0.79 -1.71 ...  0.66  0.63  0.61]
  [-0.16 -0.43 -0.55 ...  0.65  0.6   0.62]
  [-0.35 -0.29 -0.85 ...  0.6   0.6   0.57]]

 [[-0.03 -0.48 -0.91 ...  1.42  1.43  1.41]
  [-0.27 -0.19 -1.3  ...  1.43  1.41  1.42]
  [-0.8  -1.51 -2.1  ...  1.46  1.46  1.44]
  ...
  [-1.24 -1.13 -2.79 ...  1.61  1.6   1.

In [31]:
y[0]

tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1]])

In [32]:
#loss = nn.CrossEntropyLoss(weight=w,reduction="none")(x,y)
#print("loss.size()", loss.size() )
#printWithDec(loss,"crossentropy loss with weight and reduction=none\n",2)
#printWithDec(nn.CrossEntropyLoss(weight=w)(x,y),"crossentropy loss with weight and reduction=elementwise_mean",2)
loss = nn.CrossEntropyLoss(reduction="none")(x,y)
print("loss.size()", loss.size() )
printWithDec(loss,"crossentropy loss with reduction=none\n",2)


loss.size() torch.Size([16, 224, 224])
crossentropy loss with reduction=none
: [[[ 1.66  2.11  4.38 ...  1.53  1.51  1.53]
  [ 1.95  1.74  5.12 ...  1.45  1.48  1.46]
  [ 3.98  5.12  8.93 ...  1.55  1.51  1.54]
  ...
  [ 0.01  0.02  0.   ...  1.65  1.64  1.6 ]
  [ 0.82  0.46  0.04 ...  1.66  1.61  1.63]
  [ 0.49  0.67  0.02 ...  1.59  1.6   1.56]]

 [[ 1.75  1.76  1.85 ...  1.84  1.72  1.73]
  [ 1.72  1.69  1.81 ...  1.77  1.7   1.66]
  [ 1.86  1.88  2.03 ...  2.    1.8   1.82]
  ...
  [ 1.64  1.6   1.69 ...  1.6   1.6   1.57]
  [ 1.63  1.64  1.65 ...  1.64  1.6   1.62]
  [ 1.62  1.58  1.65 ...  1.56  1.58  1.55]]

 [[ 0.02  0.01  0.   ...  1.66  1.62  1.64]
  [ 0.01  0.02  0.   ...  1.6   1.61  1.57]
  [ 0.    0.   -0.   ...  1.7   1.63  1.66]
  ...
  [ 1.77  1.75  1.88 ...  1.58  1.58  1.55]
  [ 1.72  1.73  1.79 ...  1.63  1.59  1.61]
  [ 1.7   1.66  1.76 ...  1.55  1.57  1.54]]

 ...

 [[ 1.67  1.69  1.72 ...  1.8   1.68  1.71]
  [ 1.66  1.63  1.71 ...  1.72  1.68  1.64]
  [ 1.72  1

In [33]:
print("Loss for pixels with class index:")
mean_of_classmeans = 0
for i in range(c):
    printWithDec(loss[y==i], f"class {i} with mean {loss[y==i].mean():.2f} and sum {loss[y==i].sum():.2f}" ) 
    mean_of_classmeans +=  loss[y==i].mean()
mean_of_classmeans /= 6    
print("mean of class means:", mean_of_classmeans)
print("mean loss:", loss.mean())
    
#print("\nMean loss pr prediction of batch images:")
#for i in range(len(loss)):printWithDec( loss[i].mean(), f"batch image {i}:", d=2)

Loss for pixels with class index:
class 0 with mean 0.01 and sum 19.75: [0. 0. 0. ... 0. 0. 0.]
class 1 with mean 1.89 and sum 1427408.00: [1.66 2.11 4.38 ... 1.64 1.64 1.59]
class 2 with mean 1.68 and sum 34482.50: [1.5  1.66 1.51 ... 1.65 1.67 1.65]
class 3 with mean nan and sum 0.00: []
class 4 with mean 1.97 and sum 15385.13: [2.36 2.32 2.21 ... 2.02 2.1  2.13]
class 5 with mean 1.91 and sum 28919.49: [2.64 2.51 2.48 ... 2.26 2.21 2.26]
mean of class means: tensor(nan)
mean loss: tensor(1.8762)


In [34]:
#np.unique(y[0])
np.argmax(x[0,0])

tensor(41669)

In [35]:
def apply_along_axis(tensor, func, axis=0):
    res = torch.stack(
        [func(t) for i, t in enumerate( torch.unbind(tensor, dim=axis) ) ], 
        dim=axis)
    return res

def logSoftmax_1d(x,dim):
    
    e_xn     = x.exp()
    e_xn_sum = e_xn.sum(dim)
    for i in range(len(x)):
        e_xn[i] = (e_xn[i] / e_xn_sum[i]).log()
    return e_xn

    #return e_xn
#    return ( e_xn / e_xn.sum(0,keepdim=True) ).log()

#apply_along_axis(loss, lambda x:x.mean())
ex = x[0,:,0,0].exp()
#print(ex/ex.sum()), 
print("-logsoftmax for 1-hot at pixel at 0,0:", -(ex/ex.sum()).log() ), 
lsm = logSoftmax_1d(x,dim=1)
print("-logsoftmax for pixel 0,0:            ", -lsm[0,:,0,0] )
print(lsm.size())
print(y.size())

-logsoftmax for 1-hot at pixel at 0,0: tensor([0.6950, 1.6604, 2.4472, 2.2312, 2.7893, 2.8918])
-logsoftmax for pixel 0,0:             tensor([0.6950, 1.6604, 2.4472, 2.2312, 2.7893, 2.8918])
torch.Size([16, 6, 224, 224])
torch.Size([16, 224, 224])


In [36]:
y0=y[0]
l0=lsm[0]
print(y0.size())
print(l0.size())

torch.Size([224, 224])
torch.Size([6, 224, 224])


In [37]:
print(f"Size of x, y, w: {x.size()}, {y.size()}, {w.size()}")

Size of x, y, w: torch.Size([16, 6, 224, 224]), torch.Size([16, 224, 224]), torch.Size([6])


In [38]:
bs, nc, cols, rows = x.size()
xp = x.permute(0, 2, 3, 1)
xp = xp.contiguous().view(-1, nc)
yp = y.view(-1)
print(f"Size of xp, yp: {xp.size()}, {yp.size()}")
l  = torch.nn.functional.cross_entropy(xp, yp, reduction="none")
l

Size of xp, yp: torch.Size([802816, 6]), torch.Size([802816])


tensor([1.6604, 2.1135, 4.3762,  ..., 1.6388, 1.6360, 1.5908])

In [39]:
def crossEntropy(x,y):
    bs, nc, cols, rows = x.size()
    xp = x.permute(0, 2, 3, 1)
    xp = xp.contiguous().view(-1, nc)
    yp = y.view(-1)
    xp.exp_()
    #print(f"xp:\n", xp)
    for i in range(xp.size(0)):
        xp[i].div_( xp[i].sum() ).log_().mul_(-1)
    #print(f"xp normalized:\n", xp)
    #print(f"prediction:\n", xp[:,yp[:]])
    ix = np.arange(len(yp))
    p = xp[ix,yp[ix]].view( bs, cols, rows)
    #print(f"prediction:\n", p)
    return p
l=crossEntropy(x,y)
printWithDec(l,"crossentropy loss\n",2)
print("l.shape",l.shape)


crossentropy loss
: [[[ 1.66  2.11  4.38 ...  1.53  1.51  1.53]
  [ 1.95  1.74  5.12 ...  1.45  1.48  1.46]
  [ 3.98  5.12  8.93 ...  1.55  1.51  1.54]
  ...
  [ 0.01  0.02  0.   ...  1.65  1.64  1.6 ]
  [ 0.82  0.46  0.04 ...  1.66  1.61  1.63]
  [ 0.49  0.67  0.02 ...  1.59  1.6   1.56]]

 [[ 1.75  1.76  1.85 ...  1.84  1.72  1.73]
  [ 1.72  1.69  1.81 ...  1.77  1.7   1.66]
  [ 1.86  1.88  2.03 ...  2.    1.8   1.82]
  ...
  [ 1.64  1.6   1.69 ...  1.6   1.6   1.57]
  [ 1.63  1.64  1.65 ...  1.64  1.6   1.62]
  [ 1.62  1.58  1.65 ...  1.56  1.58  1.55]]

 [[ 0.02  0.01  0.   ...  1.66  1.62  1.64]
  [ 0.01  0.02  0.   ...  1.6   1.61  1.57]
  [ 0.    0.   -0.   ...  1.7   1.63  1.66]
  ...
  [ 1.77  1.75  1.88 ...  1.58  1.58  1.55]
  [ 1.72  1.73  1.79 ...  1.63  1.59  1.61]
  [ 1.7   1.66  1.76 ...  1.55  1.57  1.54]]

 ...

 [[ 1.67  1.69  1.72 ...  1.8   1.68  1.71]
  [ 1.66  1.63  1.71 ...  1.72  1.68  1.64]
  [ 1.72  1.75  1.79 ...  1.93  1.75  1.78]
  ...
  [ 1.62  1.58  1.65

In [40]:
l[-1]

tensor([[1.6378, 1.6409, 1.6709,  ..., 1.7157, 1.6469, 1.6643],
        [1.6229, 1.5769, 1.6539,  ..., 1.6469, 1.6424, 1.5963],
        [1.6721, 1.6741, 1.7281,  ..., 1.7906, 1.6880, 1.7123],
        ...,
        [1.6498, 1.6053, 1.7061,  ..., 1.7065, 1.6720, 1.6273],
        [1.6248, 1.6376, 1.6571,  ..., 1.7136, 1.6372, 1.6628],
        [1.6152, 1.5701, 1.6484,  ..., 1.6388, 1.6360, 1.5908]])

In [41]:
nn.CrossEntropyLoss(reduction="none")(x,y)[-1]

tensor([[1.6378, 1.6409, 1.6709,  ..., 1.7157, 1.6469, 1.6643],
        [1.6229, 1.5769, 1.6539,  ..., 1.6469, 1.6424, 1.5963],
        [1.6721, 1.6741, 1.7281,  ..., 1.7906, 1.6880, 1.7123],
        ...,
        [1.6498, 1.6053, 1.7061,  ..., 1.7065, 1.6720, 1.6273],
        [1.6248, 1.6376, 1.6571,  ..., 1.7136, 1.6372, 1.6628],
        [1.6152, 1.5701, 1.6484,  ..., 1.6388, 1.6360, 1.5908]])

In [42]:
def meanLoss(l,target):
    batch_loss = batch_loss_n = 0.
    for i in range(c):
        ix = target==i
        s  = ix.sum()
            
        if s>1:
            m             = loss[ix].mean().pow(2)
            #sd            = loss[ix].std()
            batch_loss   += m #+sd
            batch_loss_n += 1
        elif s==1:                
            m             = loss[ix].mean().pow(2)
            #sd            = 0
            batch_loss   += m
            batch_loss_n += 1
    return batch_loss/batch_loss_n
ml = meanLoss(l,y)
ml

tensor(2.7859)

In [43]:
w.argmax()

tensor(0)

In [44]:
def crossEntropyFocalArgMax(x,y):
    
    bs, nc, cols, rows = x.size()
    #bs,cols,rows, nc
    xv        = x.permute(0, 2, 3, 1)
    #bs*cols*rows, nc
    xv        = xv.contiguous().view(-1, nc)
    yv        = y.view(-1)
    
    xv_argmax = xv.argmax(dim=1)
    ix_error  = xv_argmax.ne(yv)  #torch.ones_like(yv).byte()  
    
    
    #xv.exp_()
    #for i in range(xv.size(0)):
    #    xv[i].div_( xv[i].sum() ).log_().mul_(-1)
    #ix       = np.arange(len(yv))
    #xv_error = xv[ix,yv[ix]][ix_error]
    
    xv_p = torch.nn.functional.cross_entropy(xv, yv, reduction = "none" )
    xv_error = xv_p[ix_error]
    
    yg_error = yv[ix_error]
    
    print("len(yg_error)/len(yv)", len(yg_error)/len(yv))
    #print("yg_error", yg_error.size())
    
    batch_loss = batch_loss_n = 0.
    for i in range(c):
        ix = yg_error==i
        s  = ix.sum()
            
        if s>1:
            m             = xv_error[ix].mean() #.pow(2)
            batch_loss   += m
            batch_loss_n += 1
        elif s==1:                
            m             = xv_error[ix].mean()# .pow(2)
            batch_loss   += m
            batch_loss_n += 1
                
    if batch_loss > 0: 
        batch_loss /= batch_loss_n
    
    return batch_loss  * len(yg_error)/ len(yv)
p=crossEntropyFocalArgMax(x.clone(),y.clone())
print(p)
#print("p.shape:",p.shape)
#printWithDec(p,"crossentropy loss\n",2)


len(yg_error)/len(yv) 1.0
tensor(1.4915)


In [None]:
len(yg_error)/len(yv) 0.9931740274234694
tensor(1.8514)

In [None]:
2.7859                2.8/7*9

torch.nn.functional.cross_entropy(x.view(bs,nc,-1), y.view(bs,-1), reduction="none")

In [None]:
torch.nn.functional.cross_entropy(x, y, reduction="none")

In [None]:
loss = nn.CrossEntropyLoss(reduction="none")(x,y).flatten()
print("loss no weight:\n", loss)


In [None]:
y0v = y0.view(1,-1).squeeze()
l0v = l0.view(l0.size(0),-1).squeeze()
printWithDec( y0v, f"y0:\n", d=2) 
printWithDec( l0v, f"l0:\n", d=2) 
print(f"l0v.size()", l0v.size())

for i in range(y0v.size(0)):
    print(i," - ", y0v[i].item(), " - ", l0v[y0v[i], i] )
[l0v[y0v[i],i] for i in range(len(y0v))]
            


In [None]:

# outputs.shape =(batch_size, n_classes, img_cols, img_rows) 
lsmp = lsm.permute(0, 2, 3, 1)
# outputs.shape =(batch_size, img_cols, img_rows, n_classes) 
outputs = outputs.resize(batch_size*img_cols*img_rows, n_classes)
labels = labels.resize(batch_size*img_cols*img_rows)
loss = F.cross_entropy(outputs, labels)


In [None]:
def logSoftmax(x,dim=0):
    
    def logSoftmax_1d(x):
        e_xn = x.exp()
        return ( e_xn / e_xn.sum() ).log()
    
    return apply_along_axis(logSoftmax_1d, dim, x )

def nllloss(x,y,w=None):
    x_lsm = logSoftmax(x,dim=1)
    
    if w is not None : 
    #    print(w)
        x_lsm = x_lsm * w
    ix    = np.arange(len(y))
    loss  = x_lsm[ix,y[ix]]
    #print("x:", x)
    #print("y:", y)
    #print("x_ls:", x_lsm)
    #print("loss:",loss)
    return -loss, -loss.mean()

def crossentropy( x, y, w=None ): 
    return nllloss(x,y,w)

crossentropy(x,y,w)

## CrossEntropyLoss

In [None]:
def crossentropy( x, y, w=None ): return nllloss(x,y,w)

In [None]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

In [None]:
y

In [None]:
nn.CrossEntropyLoss(reduction="none")(x, y), nn.CrossEntropyLoss()(x, y) 

In [None]:
xn = x.numpy()
yn = y.numpy()

lst = []
for k in range(len(yn)):
    print(f"k:{k} x[k] {xn[k]} y[k]:{y[k]} x[k,y[k]:{xn[k,yn[k]]}")
    lst.append(-np.log( np.exp(xn[k,yn[k]]) / np.exp(xn[k] ).sum() ) )
lst, np.mean(lst)

In [None]:
nllloss(xn,yn)

## CrossEntropyLoss with weights

In [None]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
w = torch.rand(4)
x,y,w

In [None]:
w = torch.rand(4)
w

In [None]:
nn.CrossEntropyLoss(weight=w, reduction="none")(x, y), nn.CrossEntropyLoss(weight=w,reduction="sum")(x, y) 

In [None]:
crossentropy(x.numpy(),y.numpy(),w.numpy())

In [None]:
#the mean values does not add up because pytorch ignore weights with the default
#reduction="elementwise_mean". whe the weights are all 1 then the two methods concord
w = torch.rand(4)*0+1

In [None]:
nn.CrossEntropyLoss(weight=w, reduction="none")(x, y), nn.CrossEntropyLoss(weight=w,reduction="mean")(x, y) 

In [None]:
crossentropy(x.numpy(),y.numpy(),w.numpy())

In [None]:
nn.CrossEntropyLoss(weight=w, reduction="none")(x, y), nn.CrossEntropyLoss(weight=w,reduction="sum")(x, y) 

In [None]:
(1.2285+2.2941)/2

# DICE loss simpple multiclass

In [None]:
def dice_loss(input, target):
    smooth = 1.
    loss = 0.
    for c in range(n_classes):
           iflat = input[:, c ].view(-1)
           tflat = target[:, c].view(-1)
           intersection = (iflat * tflat).sum()
           
           w = class_weights[c]
           loss += w*(1 - ((2. * intersection + smooth) /
                             (iflat.sum() + tflat.sum() + smooth)))
    return loss

In [None]:
def dice_loss(input, label, epsilon=0.00001):
    """
    Dice loss for comparing the similarity of two batch of data,
    usually is used for binary image segmentation i.e. labels are binary.
    The dice loss can be defined as below equation:
    .. math::
        dice\_loss &= 1 - \\frac{2 * intersection\_area}{total\_area} \\\\
                  &= \\frac{(total\_area - intersection\_area) - intersection\_area}{total\_area} \\\\
                  &= \\frac{(union\_area - intersection\_area)}{total\_area}
    Args:
        input (Variable): The predictions with rank>=2. The first dimension is batch size,
                          and the last dimension is class number.
        label (Variable): The groud truth with the same rank with input. The first dimension
                          is batch size, and the last dimension is 1.
        epsilon (float): The epsilon will be added to the numerator and denominator.
                         If both input and label are empty, it makes sure dice is 1.
                         Default: 0.00001
    Returns:
        dice_loss (Variable): The dice loss with shape [1].
    Examples:
        .. code-block:: python
            predictions = fluid.layers.softmax(x)
            loss = fluid.layers.dice_loss(input=predictions, label=label, 2)
    """
    label = one_hot(label, depth=input.shape[-1])
    reduce_dim = list(range(1, len(input.shape)))
    inse = reduce_sum(input * label, dim=reduce_dim)
    dice_denominator = reduce_sum(
        input, dim=reduce_dim) + reduce_sum(
            label, dim=reduce_dim)
    dice_score = 1 - inse * 2 / (dice_denominator + epsilon)
    return reduce_mean(dice_score)

## PoissonNLLLoss

In [None]:
x = torch.randn(2, 4)
x

In [None]:
y = torch.randn(2, 4)
y

In [None]:
nn.PoissonNLLLoss()(x, y)

In [None]:
nn.PoissonNLLLoss(reduce=False)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
# target∗log(target)−target+0.5∗log(2πtarget)
def sterling_approx(y):
    return y*np.log(y) - y + 0.5*np.log(np.pi*y)

In [None]:
lst = []
for k in range(len(x)):
    lsti = []
    for i in range(len(x[k])):
        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)
        lsti.append(lss)
    lst.append(lsti)

In [None]:
np.array(lst)

In [None]:
np.mean(lst)

## KLDivLoss

In [None]:
x = torch.rand(2, 3)
y = torch.rand(2, 3)
x

In [None]:
y

In [None]:
nn.KLDivLoss()(x, y)

In [None]:
nn.KLDivLoss(reduce=False)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        # xi is already log 
        lsti.append(y[i][j] * (np.log(y[i][j]) - x[i][j]))
    lst.append(lsti)
np.array(lst)

In [None]:
np.mean(lst)

## BCELoss

Sigmoid

In [None]:
x = torch.randn(2, 4)
y = nn.Sigmoid()(x)
x

In [None]:
y

In [None]:
x = x.numpy()

In [None]:
1 / (1 + np.exp(-x))

### single label

In [None]:
x0 = torch.randn(3)
x = nn.Sigmoid()(x0)
x

In [None]:
y = torch.FloatTensor(3).random_(2)
y

In [None]:
nn.BCELoss()(x, y)

In [None]:
nn.BCELoss(reduce=False)(x, y)

In [None]:
loss = nn.BCELoss(size_average=False)
lss = loss(x, y)
lss

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))
lst, np.mean(lst)

Equivalently

In [None]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
lst, np.mean(lst)

### multilabel

In [None]:
x0 = torch.randn(3, 2)
x = nn.Sigmoid()(x0)
x

In [None]:
y = torch.FloatTensor(3, 2).random_(2)
y

In [None]:
nn.BCELoss()(x, y)

In [None]:
nn.BCELoss(reduce=False)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

Equivalently

In [None]:
lst = []
for i in range(len(x)):
    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))
np.array(lst), np.mean(lst)

## BCEWithLogitsLoss

This is just simply adding a sigmoid in front of BCELoss above.

### single label

In [None]:
x = torch.randn(3)
x

In [None]:
xs = nn.Sigmoid()(x)
xs

In [None]:
y = torch.FloatTensor(3).random_(2)
y

In [None]:
nn.BCELoss()(xs, y)

In [None]:
nn.BCEWithLogitsLoss()(x, y)

### multilabel

In [None]:
x = torch.randn(3, 2)
x

In [None]:
xs = nn.Sigmoid()(x)
xs

In [None]:
y = torch.FloatTensor(3, 2).random_(2)
y

In [None]:
nn.BCELoss()(xs, y)

In [None]:
nn.BCEWithLogitsLoss()(x, y)

## MarginRankingLoss

In [None]:
x1 = torch.randn(3)
x2 = torch.randn(3)
y = torch.FloatTensor(np.random.choice([1, -1], 3))

x1, x2, y

In [None]:
nn.MarginRankingLoss(margin=0.1)(x1, x2, y)

In [None]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [None]:
lst = []
for i in range(len(x1)):
    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))

lst, np.mean(lst)

## HingeEmbeddingLoss

In [None]:
x = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))

x

In [None]:
y

In [None]:
nn.HingeEmbeddingLoss(margin=1)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()
margin=1

In [None]:
lst=[]

for i in range(len(x)):
    lsti = []
    for j in range(len(x[i])):
        if y[i][j]==1:
            lsti.append(x[i][j])
        else:
            lsti.append(max(0, margin-x[i][j]))
    lst.append(lsti)
np.array(lst)

In [None]:
np.mean(lst)

## MultiLabelMarginLoss

This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152

### one-sample example

In [None]:
x = torch.randn(1, 4)
y = torch.LongTensor(1, 4).random_(-1, 4)
x

In [None]:
y

In [None]:
nn.MultiLabelMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                print(i, j)
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

In [None]:
lst, np.mean(lst)

### multi-sample example

In [None]:
x = torch.randn(3, 4)
y = torch.LongTensor(3, 4).random_(-1, 4)
x

In [None]:
y

In [None]:
nn.MultiLabelMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

lst = []
for k in range(len(x)):
    sm = 0
    js = []
    for j in range(len(y[k])):
        if y[k][j]<0: break 
        js.append(y[k][j])
    for i in range(len(x[k])):
        for j in js:
            if (i not in js) and (i!=j):
                sm += max(0, 1-(x[k][j] - x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## SmoothL1Loss

In [None]:
x = torch.randn(2, 3)
y = torch.randn(2, 3)

In [None]:
nn.SmoothL1Loss()(x, y)

In [None]:
nn.SmoothL1Loss(reduce=False)(x, y)

In [None]:
x = x.numpy() 
y = y.numpy()

In [None]:
def smoothl1loss(x, y):
    if abs(x-y)<1: return 1/2*(x-y)**2
    else: return abs(x-y)-1/2

In [None]:
lst = []
for i in range(len(x)):
    lsti=[]
    for j in range(len(x[i])):
        lsti.append(smoothl1loss(x[i][j], y[i][j]))
    lst.append(lsti)
np.array(lst), np.mean(lst)

## SoftMarginLoss

In [None]:
x = torch.randn(2, 4)
y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))
x

In [None]:
y

In [None]:
nn.SoftMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## MultiLabelSoftMarginLoss

In [None]:
x = torch.randn(2, 4)
y = torch.FloatTensor(2, 4).random_(2)
x

In [None]:
y

In [None]:
nn.MultiLabelSoftMarginLoss()(x, y)

In [None]:
x = x.numpy()
y = y.numpy()

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\
            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## CosineEmbeddingLoss

In [None]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
y = torch.FloatTensor(np.random.choice([1, -1], 2))

x1

In [None]:
x2

In [None]:
y

In [None]:
nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)

In [None]:
x1 = x1.numpy()
x2 = x2.numpy()
y = y.numpy()
margin=0.1

In [None]:
from scipy.spatial.distance import cosine

def cos(x, y): return 1-cosine(x, y)

In [None]:
lst = []
for k in range(len(x1)):
    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))
    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))
lst, np.mean(lst)

## MultiMarginLoss

In [None]:
x = torch.randn(2, 4)
y = torch.LongTensor(2).random_(4)
x

In [None]:
y

In [None]:
nn.MultiMarginLoss(margin=0.9, p=2)(x, y)

In [None]:
x = x.numpy()
y = y.numpy()
p=2
margin=0.9

In [None]:
lst = []
for k in range(len(x)):
    sm = 0
    for i in range(len(x[k])):
        if i!= y[k]:
            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)
    lst.append(sm/len(x[k]))

lst, np.mean(lst)

## TripletMarginLoss

In [None]:
x1 = torch.randn(2, 3)
x2 = torch.randn(2, 3)
x3 = torch.randn(2, 3)
margin = 0.9
p = 2

x1

In [None]:
nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)

In [None]:
x1 = x1.numpy()
x2 = x2.numpy()
x3 = x3.numpy()

In [None]:
def d(x1, x2, p):
    return sum((x1-x2)**p)**(1/p)

In [None]:
lst = []
for k in range(len(x1)):
    sm = 0
    for i in range(len(x1[k])):
        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) 
    lst.append(sm/len(x1[k]))

lst, np.mean(lst)

## References

- https://pytorch.org/docs/0.4.0/nn.html#loss-functions
- https://blog.csdn.net/zhangxb35/article/details/72464152

In [None]:
class CustomCrossEntropy(torch.nn.Module):
    
    def __init__(self, class_weights):
        super(CustomCrossEntropy,self).__init__()
        self.register_buffer("class_weights",  torch.from_numpy( class_weights).cuda() )
        self.register_buffer("ix_class_weights", torch.from_numpy( (class_weights>0).flatten().astype(np.float32) ) )

    @staticemethod    
    def logSoftmax(x,dim=0):
        def logSoftmax_1d(x):
            e_xn = np.exp(x)
            return np.log( e_xn / e_xn.sum() ) 
        return np.apply_along_axis(logSoftmax_1d, dim, x )
    
    @staticemethod    
    def nllloss(x,y,w=None):
        x_lsm = logSoftmax(x,dim=1)
        if w is not None : 
            x_lsm = x_lsm*w
        ix    = np.arange(len(y))
        loss  = x_lsm[ix,y[ix]]
        #print("x:", x)
        #print("y:", y)
        #print("x_ls:", x_lsm)
        #print("loss:",loss)
        return -loss, -loss.mean()
    
    @staticmethod
    def crossentropy( x, y, w=None ): return nllloss(x,y,w)

    def forward(self,input,target):
        
        input[]
        nn.LogSoftmax(dim=1)(input)
        
        #loss = crossentropy( src, trg, self.weights)
        
        return loss
    
"""
weight = torch.ones(vocab_size)
weight[pad_idx] = 0.0
crit = nn.CrossEntropy(weight=weight)
crit(output, targets)
"""