In [1]:
import numpy as np
from scipy.spatial.distance import cosine
import torch
import torch.nn as nn

torch.__version__

'1.5.1'

##### torch.rand() - samples from a uniform distribution on the interval [0, 1)
##### torch.randn() - samples from a normal distribution with mean=0 and sigma=1
##### torch.randint(low, high, size) - samples integers randomly from low to high-1

## Cross Entropy & NLL Losses

In [2]:
x = torch.randn(4, 4)
y = torch.randint(0, 4, (4,))

print('X:', x, '\nY:', y)

X: tensor([[ 1.0122, -0.6784, -0.4772,  0.2061],
        [ 0.8358,  0.0491, -1.2866, -0.8148],
        [ 2.0391, -0.1507, -0.6823,  0.3083],
        [ 0.0897,  0.9958, -0.7984, -0.6539]]) 
Y: tensor([2, 3, 0, 3])


In [3]:
# torch implementation
print('CCE:')
print('\nLoss per Element:', nn.CrossEntropyLoss(reduction='none')(x, y))
print('Mean Loss:', nn.CrossEntropyLoss()(x, y))

print('\n\nNLL:')
print('\nLoss per Element:', nn.NLLLoss(reduction='none')(nn.LogSoftmax(dim=1)(x), y))
print('Mean Loss:', nn.NLLLoss()(nn.LogSoftmax(dim=1)(x), y))

CCE:

Loss per Element: tensor([2.1082, 2.2198, 0.3037, 2.2165])
Mean Loss: tensor(1.7120)


NLL:

Loss per Element: tensor([2.1082, 2.2198, 0.3037, 2.2165])
Mean Loss: tensor(1.7120)


In [4]:
# numpy implementation
cce = np.zeros(x.shape[0])
for i in range(x.shape[0]):
    cce[i] = -np.log(np.exp(x[i][y[i]])/np.exp(x[i]).sum())
print('Loss per Element:', cce)
print('Mean Loss:', cce.mean())

Loss per Element: [2.10816598 2.21984363 0.30370933 2.21646667]
Mean Loss: 1.7120463997125626


## Poisson-NLL-Loss

In [5]:
target = torch.randn(4, 4)
print('Y:', target)

# torch implementation
print('\nWithout Stirling approximation:')
print('Loss per Element:', nn.PoissonNLLLoss(log_input=True, reduction='none')(x, target))
print('Mean Loss:', nn.PoissonNLLLoss(log_input=True)(x, target))

print('\nWith Stirling approximation:')
print('Loss per Element:', nn.PoissonNLLLoss(log_input=True, reduction='none', full=True)(x, target))
print('Mean Loss:', nn.PoissonNLLLoss(log_input=True, full=True)(x, target))

Y: tensor([[-0.5860,  0.6840, -1.5116,  1.7220],
        [-0.2024, -0.2763,  1.0518,  0.4510],
        [ 0.2592, -1.1736, -0.9690,  0.1357],
        [-0.5040,  0.4342,  1.3743,  1.5051]])

Without Stirling approximation:
Loss per Element: tensor([[ 3.3449,  0.9715, -0.1009,  0.8740],
        [ 2.4757,  1.0639,  1.6295,  0.8102],
        [ 7.1549,  0.6833, -0.1558,  1.3193],
        [ 1.1391,  2.2746,  1.5472,  1.5042]])
Mean Loss: tensor(1.6585)

With Stirling approximation:
Loss per Element: tensor([[ 3.3449,  0.9715, -0.1009,  1.2785],
        [ 2.4757,  1.0639,  1.5750,  0.8102],
        [ 7.1549,  0.6833, -0.1558,  1.3193],
        [ 1.1391,  2.2746,  1.6878,  1.7379]])
Mean Loss: tensor(1.7037)


In [6]:
# numpy implementation
poisson = np.exp(x) - x*target
print('\nWithout Stirling approximation:')
print('Loss per Element:', poisson)
print('Mean Loss:', poisson.mean())

poisson[target>1] += (target * np.log(target) - target + 0.5*np.log(2*np.pi*target))[target>1]
print('\nWith Stirling approximation:')
print('Loss per Element:', poisson)
print('Mean Loss:', poisson.mean())


Without Stirling approximation:
Loss per Element: tensor([[ 3.3449,  0.9715, -0.1009,  0.8740],
        [ 2.4757,  1.0639,  1.6295,  0.8102],
        [ 7.1549,  0.6833, -0.1558,  1.3193],
        [ 1.1391,  2.2746,  1.5472,  1.5042]])
Mean Loss: tensor(1.6585)

With Stirling approximation:
Loss per Element: tensor([[ 3.3449,  0.9715, -0.1009,  1.2785],
        [ 2.4757,  1.0639,  1.5750,  0.8102],
        [ 7.1549,  0.6833, -0.1558,  1.3193],
        [ 1.1391,  2.2746,  1.6878,  1.7379]])
Mean Loss: tensor(1.7037)


  poisson[target>1] += (target * np.log(target) - target + 0.5*np.log(2*np.pi*target))[target>1]


## KLDivergence Loss

In [7]:
x = torch.rand(4, 4)
y = torch.rand(4, 4)

print('X:', x, '\nY:', y)

# As with NLLLoss, the input given is expected to contain log-probabilities
xlog = np.log(x)

# torch implementation
print('\nLoss per Element:', nn.KLDivLoss(reduction='none')(xlog, y))
print('Mean Loss:', nn.KLDivLoss(reduction='batchmean')(xlog, y))

X: tensor([[0.6813, 0.0164, 0.6615, 0.9512],
        [0.5952, 0.0906, 0.0625, 0.1622],
        [0.7820, 0.8275, 0.8201, 0.6805],
        [0.5325, 0.3463, 0.7606, 0.9412]]) 
Y: tensor([[0.6856, 0.1691, 0.8089, 0.0290],
        [0.6198, 0.3843, 0.4747, 0.5940],
        [0.5110, 0.6849, 0.4696, 0.6748],
        [0.5274, 0.5670, 0.1215, 0.0474]])

Loss per Element: tensor([[ 0.0043,  0.3947,  0.1627, -0.1013],
        [ 0.0252,  0.5551,  0.9624,  0.7709],
        [-0.2174, -0.1295, -0.2618, -0.0057],
        [-0.0051,  0.2796, -0.2229, -0.1417]])
Mean Loss: tensor(0.5174)


In [8]:
# numpy implementation
kld = y*(np.log(y)-xlog)
print('Loss per Element:', kld)
print('Mean Loss:', kld.sum()/kld.shape[0])

Loss per Element: tensor([[ 0.0043,  0.3947,  0.1627, -0.1013],
        [ 0.0252,  0.5551,  0.9624,  0.7709],
        [-0.2174, -0.1295, -0.2618, -0.0057],
        [-0.0051,  0.2796, -0.2229, -0.1417]])
Mean Loss: tensor(0.5174)


## BCELoss & BCEWithLogitsClass

### single label

In [9]:
x = torch.randn(4)
xsig = nn.Sigmoid()(x)
y_single_label = torch.FloatTensor(4).random_(2)
sample_wts = torch.rand(4)

print('X:', xsig, '\nY:', y_single_label)

X: tensor([0.4431, 0.3669, 0.5999, 0.4177]) 
Y: tensor([0., 1., 1., 1.])


In [10]:
# torch implementation
print('\nBCELoss')
print('\nLoss per Element:', nn.BCELoss(reduction='none', weight=sample_wts)(xsig, y_single_label))
print('Mean Loss:', nn.BCELoss(weight=sample_wts)(xsig, y_single_label))

print('\nBCEWithLogitsLoss')
print('\nLoss per Element:', nn.BCEWithLogitsLoss(reduction='none', weight=sample_wts)(x, y_single_label))
print('Mean Loss:', nn.BCEWithLogitsLoss(weight=sample_wts)(x, y_single_label))


BCELoss

Loss per Element: tensor([0.1490, 0.2209, 0.4339, 0.6489])
Mean Loss: tensor(0.3632)

BCEWithLogitsLoss

Loss per Element: tensor([0.1490, 0.2209, 0.4339, 0.6489])
Mean Loss: tensor(0.3632)


In [11]:
# numpy implementation
bce = -sample_wts*(y_single_label*np.log(xsig) + (1-y_single_label)*np.log(1-xsig))
print('Loss per Element:', bce)
print('Mean Loss:', bce.mean())

Loss per Element: tensor([0.1490, 0.2209, 0.4339, 0.6489])
Mean Loss: tensor(0.3632)


### multi-label w/o unbalanced class weighting

In [12]:
x = torch.randn(4, 4)
xsig = nn.Sigmoid()(x)
y_multi_label = torch.FloatTensor(4, 4).random_(2)

sample_wts = torch.rand(4)

print('X:', xsig, '\nY:', y_multi_label)

X: tensor([[0.7011, 0.4829, 0.7022, 0.1409],
        [0.3880, 0.3372, 0.2545, 0.7500],
        [0.2334, 0.6017, 0.7131, 0.5574],
        [0.1010, 0.3448, 0.6275, 0.9657]]) 
Y: tensor([[1., 0., 0., 0.],
        [0., 0., 1., 1.],
        [1., 0., 1., 1.],
        [0., 1., 1., 1.]])


In [13]:
# torch implementation
print('\n\nBCELoss')
print('\nLoss per Element:', nn.BCELoss(reduction='none', weight=sample_wts)(xsig, y_multi_label))
print('Mean Loss:', nn.BCELoss(weight=sample_wts)(xsig, y_multi_label))

print('\n\nBCEWithLogitsLoss')
print('\nLoss per Element:', nn.BCEWithLogitsLoss(reduction='none', weight=sample_wts)(x, y_multi_label))
print('Mean Loss:', nn.BCEWithLogitsLoss(weight=sample_wts)(x, y_multi_label))



BCELoss

Loss per Element: tensor([[0.3453, 0.5791, 0.5197, 0.1452],
        [0.4775, 0.3612, 0.5870, 0.2752],
        [1.4145, 0.8084, 0.1451, 0.5591],
        [0.1035, 0.9350, 0.1999, 0.0333]])
Mean Loss: tensor(0.4681)


BCEWithLogitsLoss

Loss per Element: tensor([[0.3453, 0.5791, 0.5197, 0.1452],
        [0.4775, 0.3612, 0.5870, 0.2752],
        [1.4145, 0.8084, 0.1451, 0.5591],
        [0.1035, 0.9350, 0.1999, 0.0333]])
Mean Loss: tensor(0.4681)


In [14]:
# numpy implementation
bce = -sample_wts*(y_multi_label*np.log(xsig) + (1-y_multi_label)*np.log(1-xsig))
print('Loss per Element:', bce)
print('Mean Loss:', bce.mean())

Loss per Element: tensor([[0.3453, 0.5791, 0.5197, 0.1452],
        [0.4775, 0.3612, 0.5870, 0.2752],
        [1.4145, 0.8084, 0.1451, 0.5591],
        [0.1035, 0.9350, 0.1999, 0.0333]])
Mean Loss: tensor(0.4681)


### multi-label with unbalanced class weighting

In [15]:
pos_wts = torch.rand(4)

# Not possible with BCELoss

# torch implementation
print('\nBCEWithLogitsLoss')
print('\nLoss per Element:', nn.BCEWithLogitsLoss(reduction='none', weight=sample_wts, pos_weight=pos_wts)(x, y_multi_label))
print('Mean Loss:', nn.BCEWithLogitsLoss(weight=sample_wts, pos_weight=pos_wts)(x, y_multi_label))


BCEWithLogitsLoss

Loss per Element: tensor([[0.2677, 0.5791, 0.5197, 0.1452],
        [0.4775, 0.3612, 0.3448, 0.0459],
        [1.0968, 0.8084, 0.0852, 0.0933],
        [0.1035, 0.6172, 0.1174, 0.0056]])
Mean Loss: tensor(0.3543)


In [16]:
# numpy implementation
bce = -sample_wts*(pos_wts*y_multi_label*np.log(xsig) + (1-y_multi_label)*np.log(1-xsig))
print('Loss per Element:', bce)
print('Mean Loss:', bce.mean())

Loss per Element: tensor([[0.2677, 0.5791, 0.5197, 0.1452],
        [0.4775, 0.3612, 0.3448, 0.0459],
        [1.0968, 0.8084, 0.0852, 0.0933],
        [0.1035, 0.6172, 0.1174, 0.0056]])
Mean Loss: tensor(0.3543)


## MultiLabelMarginLoss

In [17]:
x = torch.randn(6, 4)
# y = torch.LongTensor(6, 4).random_(-1, 4)
y_choices = torch.Tensor(range(-1, x.shape[1]))
y = torch.Tensor([np.random.choice(y_choices, (4, ), replace=False) for _ in range(x.shape[0])]).long()

print('X:', x, '\nY:', y)

# torch implementation
print('\nLoss per Element:', nn.MultiLabelMarginLoss(reduction='none')(x, y))
print('Mean Loss:', nn.MultiLabelMarginLoss()(x, y))

X: tensor([[-1.0196,  0.8123, -1.1416, -1.8985],
        [ 0.6518, -0.6446,  0.1760,  1.0418],
        [-0.7584,  1.0687, -0.0389,  1.4692],
        [-1.6708, -0.8434,  0.2748,  0.0430],
        [ 0.5977,  1.5124, -1.1130, -0.8811],
        [ 1.4084, -1.7454, -0.6006, -0.0891]]) 
Y: tensor([[ 0,  2, -1,  3],
        [ 3,  1,  0,  2],
        [ 1,  2, -1,  3],
        [ 2,  1, -1,  3],
        [ 1,  2,  0,  3],
        [ 1,  0,  2,  3]])

Loss per Element: tensor([1.5375, 0.0000, 1.0472, 0.7068, 0.0000, 0.0000])
Mean Loss: tensor(0.5486)


##### Found the documentation pretty confusing for this one - https://pytorch.org/docs/1.5.1/nn.html#multilabelmarginloss. My explanation below:
##### x and y in the doc are 1D tensors though the function works for 2D as well.
##### i indexes over x, j indexes over y: same range as y and x must have the same size.
##### j stops when y<0 (or ==-1) is encountered.
##### i!=y[j] for all i, j; that's captured in the variable cont_non_neg_targets below.
##### k in code below indexes over N, the number of samples; so x[k], y[k] correspond to x and y in the doc

In [18]:
# numpy implementation
mlm_l = np.zeros(x.shape[0], dtype=np.float32)
for k in range(x.shape[0]):
    loss_ele = 0
    cont_non_neg_targets = []
    
    for j in range(len(y[k])):
        if y[k, j]<0: break
        cont_non_neg_targets.append(y[k, j])
    
    for i in range(len(x[k])):
        if len(cont_non_neg_targets)>0:
            for j in cont_non_neg_targets:
                if i not in cont_non_neg_targets:
                    loss_ele += max(0, 1 - (x[k, j] - x[k, i]))
    
    mlm_l[k] = loss_ele/len(x[k])

print('Loss per Element:', mlm_l)
print('Mean Loss:', mlm_l.mean())

Loss per Element: [1.537469   0.         1.0472248  0.70677024 0.         0.        ]
Mean Loss: 0.54857737


## MarginRankingLoss

In [19]:
x1 = torch.rand(6)
x2 = torch.rand(6)
y_choices = torch.Tensor([1, -1])
y = y_choices[torch.randint(len(y_choices), (len(x1),))]
margin = 0.5

print('x1:', x1, '\nx2:', x2, '\ny:', y)

# torch implementation
print('\nLoss per Element:', nn.MarginRankingLoss(margin=margin, reduction='none')(x1, x2, y))
print('Mean Loss:', nn.MarginRankingLoss(margin=margin)(x1, x2, y))

x1: tensor([0.8117, 0.9745, 0.3043, 0.8968, 0.6093, 0.2207]) 
x2: tensor([0.4264, 0.9011, 0.8347, 0.0628, 0.1673, 0.3796]) 
y: tensor([ 1., -1.,  1., -1.,  1.,  1.])

Loss per Element: tensor([0.1147, 0.5735, 1.0303, 1.3340, 0.0580, 0.6589])
Mean Loss: tensor(0.6282)


In [20]:
# numpy implementation
ranking_l = np.clip(-y*(x1-x2) + margin, a_min=0, a_max=None)
print('Loss per Element:', ranking_l)
print('Mean Loss:', ranking_l.mean())

Loss per Element: tensor([0.1147, 0.5735, 1.0303, 1.3340, 0.0580, 0.6589])
Mean Loss: tensor(0.6282)


## HingeEmbeddingLoss

In [21]:
# torch implementation
print('\nLoss per Element:', nn.HingeEmbeddingLoss(margin=margin, reduction='none')(x1, y))
print('Mean Loss:', nn.HingeEmbeddingLoss(margin=margin)(x1, y))


Loss per Element: tensor([0.8117, 0.0000, 0.3043, 0.0000, 0.6093, 0.2207])
Mean Loss: tensor(0.3243)


In [22]:
# numpy implementation
hinge = x1
hinge[y==-1] = (np.clip(margin-x1, a_min=0, a_max=None))[y==-1]
print('Loss per Element:', hinge)
print('Mean Loss:', hinge.mean())

Loss per Element: tensor([0.8117, 0.0000, 0.3043, 0.0000, 0.6093, 0.2207])
Mean Loss: tensor(0.3243)


## MultiLabelSoftMarginLoss

In [23]:
x = torch.randn(6, 4)
y = torch.FloatTensor(6, 4).random_(2)
print('X:', x, '\nY:', y)

# torch implementation
print('\nLoss per Element:', nn.MultiLabelSoftMarginLoss(reduction='none')(x, y))
print('Mean Loss:', nn.MultiLabelSoftMarginLoss()(x, y))

X: tensor([[-0.3800,  1.0549, -1.0730, -0.1902],
        [ 0.0697,  0.5335, -0.3867,  0.3889],
        [-0.4021, -1.9058,  0.3312,  0.4127],
        [ 0.2506,  0.1345, -0.0142, -0.2597],
        [ 1.3403,  2.2513, -0.8659, -0.0038],
        [ 1.1053,  1.2902,  0.9825, -0.1717]]) 
Y: tensor([[0., 1., 1., 0.],
        [1., 1., 1., 1.],
        [0., 1., 0., 0.],
        [1., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

Loss per Element: tensor([0.6974, 0.6358, 1.0874, 0.6154, 1.4581, 1.2520])
Mean Loss: tensor(0.9577)


In [24]:
# numpy implementation
mlsm_l = (y*np.log(1/(1 + np.exp(-x))) + (1-y)*np.log(np.exp(-x)/(1 + np.exp(-x))))/(-x.shape[1])
mlsm_l = mlsm_l.sum(axis=1)
print('Loss per Element:', mlsm_l)
print('Mean Loss:', mlsm_l.mean())

Loss per Element: tensor([0.6974, 0.6358, 1.0874, 0.6154, 1.4581, 1.2520])
Mean Loss: tensor(0.9577)


## SoftMarginLoss

In [25]:
x = torch.rand(4, 6)
y_choices = torch.Tensor([1, -1])
y = y_choices[torch.randint(len(y_choices), (x.shape[0], x.shape[1]))]

print('X:', x, '\nY:', y)

# torch implementation
print('\nLoss per Element:', nn.SoftMarginLoss(reduction='none')(x, y))
print('Mean Loss:', nn.SoftMarginLoss()(x, y))

X: tensor([[0.2943, 0.8297, 0.3987, 0.5874, 0.8499, 0.7911],
        [0.2872, 0.5321, 0.3106, 0.9681, 0.0945, 0.5349],
        [0.8597, 0.5731, 0.8170, 0.6084, 0.5536, 0.5601],
        [0.1298, 0.5443, 0.2394, 0.8772, 0.0049, 0.0089]]) 
Y: tensor([[-1., -1.,  1., -1.,  1.,  1.],
        [ 1.,  1., -1., -1., -1.,  1.],
        [ 1., -1., -1.,  1.,  1.,  1.],
        [-1., -1., -1., -1., -1.,  1.]])

Loss per Element: tensor([[0.8511, 1.1917, 0.5135, 1.0294, 0.3559, 0.3739],
        [0.5598, 0.4621, 0.8605, 1.2900, 0.7415, 0.4611],
        [0.3530, 1.0202, 1.1828, 0.4345, 0.4542, 0.4518],
        [0.7602, 1.0019, 0.8200, 1.2250, 0.6956, 0.6887]])
Mean Loss: tensor(0.7408)


In [26]:
# numpy implementation
smloss = np.log(1 + np.exp(-y*x))
print('Loss per Element:', smloss)
print('Mean Loss:', smloss.mean())

Loss per Element: tensor([[0.8511, 1.1917, 0.5135, 1.0294, 0.3559, 0.3739],
        [0.5598, 0.4621, 0.8605, 1.2900, 0.7415, 0.4611],
        [0.3530, 1.0202, 1.1828, 0.4345, 0.4542, 0.4518],
        [0.7602, 1.0019, 0.8200, 1.2250, 0.6956, 0.6887]])
Mean Loss: tensor(0.7408)


## CosineEmbeddingLoss

In [27]:
x1 = torch.rand(6, 3)
x2 = torch.rand(6, 3)
y_choices = torch.Tensor([1, -1])
y = y_choices[torch.randint(len(y_choices), (len(x1),))]
margin = 0.2

print('x1:', x1, '\nx2:', x2, '\ny:', y)

# torch implementation
print('\nLoss per Element:', nn.CosineEmbeddingLoss(margin=margin, reduction='none')(x1, x2, y))
print('Mean Loss:', nn.CosineEmbeddingLoss(margin=margin)(x1, x2, y))

x1: tensor([[0.6965, 0.2198, 0.9593],
        [0.4978, 0.7715, 0.9826],
        [0.0248, 0.8703, 0.3042],
        [0.4056, 0.8449, 0.4021],
        [0.1218, 0.6030, 0.0039],
        [0.8904, 0.7904, 0.5569]]) 
x2: tensor([[0.1880, 0.5525, 0.5761],
        [0.8281, 0.0927, 0.4129],
        [0.8989, 0.6844, 0.4363],
        [0.5131, 0.6720, 0.9893],
        [0.7900, 0.7059, 0.3628],
        [0.9239, 0.3779, 0.1682]]) 
y: tensor([-1., -1., -1.,  1., -1., -1.])

Loss per Element: tensor([0.6142, 0.5112, 0.4720, 0.1157, 0.5596, 0.7132])
Mean Loss: tensor(0.4977)


In [28]:
# numpy implementation
cos_emb_l = np.zeros(x1.shape[0])
for i in range(cos_emb_l.shape[0]):
    cos_emb_l[i] = cosine(x1[i].numpy(), x2[i].numpy()) if y[i]==1 else max(0, 1-cosine(x1[i].numpy(), x2[i].numpy())-margin)

print('Loss per Element:', cos_emb_l)
print('Mean Loss:', cos_emb_l.mean())

Loss per Element: [0.61423074 0.51118069 0.47203498 0.11568004 0.55964606 0.71316324]
Mean Loss: 0.4976559579372406


## MultiMarginLoss

In [29]:
x = torch.randn(8, 4)
y = torch.randint(0, 4, (8,))

margin = 1
p = 2
class_wts = torch.ones(4)

print('X:', x, '\nY:', y)

# torch implementation
print('\nLoss per Element:', nn.MultiMarginLoss(reduction='none', margin=margin, p=p, weight=class_wts)(x, y))
print('Mean Loss:', nn.MultiMarginLoss(margin=margin, p=p, weight=class_wts)(x, y))

X: tensor([[ 0.3148,  0.7408, -0.2145,  1.0087],
        [ 1.9221,  0.0741, -0.6381, -0.3867],
        [-0.6045,  0.8229,  0.2759,  0.2721],
        [-0.7059, -0.2502,  0.8719,  1.1585],
        [-1.1585, -0.4801,  1.6348, -1.8812],
        [-0.9168, -1.4287,  0.9173,  1.4184],
        [-0.2423,  0.1271, -3.1706, -1.2293],
        [ 0.3333,  1.2549,  0.7563, -0.8989]]) 
Y: tensor([0, 3, 2, 2, 1, 3, 2, 2])

Loss per Element: tensor([ 1.2811,  3.4105,  0.8499,  0.4138,  2.4514,  0.0622, 10.6381,  0.6447])
Mean Loss: tensor(2.4690)


In [30]:
# numpy implementation
mml = np.zeros(x.shape[0], dtype=np.float32)

for i in range(x.shape[0]):
    loss_ele = 0
    for j in range(x.shape[1]):
        if j!=y[i]:
            loss_ele += max(0, class_wts[y[i]]*(margin - x[i, y[i]] + x[i, j]))**p
    mml[i] = loss_ele/x.shape[1]

print(mml)
print(mml.mean())

[ 1.2810552   3.4104855   0.849915    0.4138348   2.4514422   0.06221414
 10.638116    0.64472604]
2.4689736


## TripletMarginLoss

In [31]:
anchor = torch.randn(8, 4)
positive = torch.randn(8, 4)
negative = torch.randn(8, 4)

margin = 1
p = 2

# torch implementation
print('\nLoss per Element:', nn.TripletMarginLoss(reduction='none', margin=margin, p=p)(anchor, positive, negative))
print('Mean Loss:', nn.TripletMarginLoss(margin=margin, p=p)(anchor, positive, negative))


Loss per Element: tensor([0.9923, 0.0924, 3.0348, 0.0000, 0.8075, 1.6179, 0.8449, 2.4973])
Mean Loss: tensor(1.2359)


In [32]:
def p_norm_dist(x1, x2, p):
    d = ((x1-x2)**p).sum(axis=1)
    return d**(1/p)

# numpy implementation
triplet_l = np.clip(p_norm_dist(anchor, positive, p) - p_norm_dist(anchor, negative, p) + margin, a_min=0, a_max=None)
print('Loss per Element:', triplet_l)
print('Mean Loss:', triplet_l.mean())

Loss per Element: tensor([0.9923, 0.0924, 3.0348, 0.0000, 0.8075, 1.6179, 0.8449, 2.4973])
Mean Loss: tensor(1.2359)
