In [1]:
import torch
import torch.nn as nn

### 

In [2]:
# 모델은 Q개의 label을 출력
# 논문 상에서 activate function은 tanh
class Model(nn.Module):
    
    def __init__(self, num_features, hid_dim, Q):
        super().__init__()
        self.layer1 = nn.Linear(num_features, hid_dim, )
        self.layer2 = nn.Linear(hid_dim, Q)
        
    def forward(self, x):
        out = torch.tanh(self.layer1(x))
        return torch.tanh(self.layer2(out))

In [8]:
bsz = 32
model = Model(784,100,5)

### Sample data
loss 구현을 위한 sample data 생성

In [11]:
# pred는 -1~1사이의 실수(tanh이므로)
pred = model(torch.randn(32, 784))
pred.size()

In [13]:
# target은 1과 -1로 이루어짐
target = torch.LongTensor(32, 5).random_(2)
target[target == 0] = -1

In [12]:
pred[0]

tensor([-2.2813e-01, -8.6457e-02, -2.6944e-04,  4.8055e-01,  1.1362e-03],
       grad_fn=<SelectBackward>)

In [21]:
target[0]

tensor([-1, -1,  1, -1,  1])

In [22]:
(target == 1).size()

torch.Size([32, 5])

## BP-MLL Loss  
$E = \sum_{i=1}^{m}{\frac{1}{|Y_i||{\bar Y}_i|}} $
$\sum_{(k,l)∈Y_i×\bar Y_i}{exp(−(c_k^i − c_l^i))}$
 

### 하나의 인스턴스로 일단 수식을 구현

In [23]:
# 첫번째 인스턴스
pred[0]

tensor([-2.2813e-01, -8.6457e-02, -2.6944e-04,  4.8055e-01,  1.1362e-03],
       grad_fn=<SelectBackward>)

In [24]:
target[0]

tensor([-1, -1,  1, -1,  1])

In [25]:
# Y는 target이 1인 것
Y = pred[0][target[0] == 1]
Y

tensor([-0.0003,  0.0011], grad_fn=<IndexBackward>)

In [26]:
# Y_bar는 target이 -1인 것 
Y_bar = pred[0][target[0] != 1]
Y_bar

tensor([-0.2281, -0.0865,  0.4805], grad_fn=<IndexBackward>)

In [27]:
# cartesian_product -> (ck,cl) 쌍을 생성 
torch.cartesian_prod(Y, Y_bar)

tensor([[-2.6944e-04, -2.2813e-01],
        [-2.6944e-04, -8.6457e-02],
        [-2.6944e-04,  4.8055e-01],
        [ 1.1362e-03, -2.2813e-01],
        [ 1.1362e-03, -8.6457e-02],
        [ 1.1362e-03,  4.8055e-01]], grad_fn=<StackBackward>)

In [31]:
# Sum(exp(-(ck-cl)))
sum(torch.exp(-torch.cartesian_prod(Y, Y_bar).sum(dim=-1)))

tensor(5.9274, grad_fn=<AddBackward0>)

In [35]:
# BP-MLL loss 
sum(torch.exp(torch.cartesian_prod(Y, -Y_bar).sum(dim=-1)))/len(torch.cartesian_prod(Y, Y_bar)) 
                                                                # 카르테지안의 경우의수가 |Y||Y_bar|

tensor(0.9888, grad_fn=<DivBackward0>)

### 전체 인스턴스를 계산할 수 있도록 loss 구현

In [37]:
pred = model(torch.randn(32, 784))

In [38]:
target = torch.LongTensor(32, 5).random_(2)
target[target == 0] = -1

# division zero 해결을 위한 전처리 offset. 강제로 각 인스턴스의 마지막에 -1과 1을 포함하도록 만듬..
target[:, -2] = 1      
target[:, -1] = -1

In [40]:
target

tensor([[-1,  1, -1,  1, -1],
        [ 1, -1, -1,  1, -1],
        [-1,  1, -1,  1, -1],
        [ 1,  1, -1,  1, -1],
        [-1, -1,  1,  1, -1],
        [ 1,  1,  1,  1, -1],
        [ 1, -1,  1,  1, -1],
        [ 1, -1, -1,  1, -1],
        [ 1,  1, -1,  1, -1],
        [ 1, -1,  1,  1, -1],
        [ 1,  1,  1,  1, -1],
        [ 1, -1, -1,  1, -1],
        [-1, -1,  1,  1, -1],
        [ 1,  1,  1,  1, -1],
        [-1, -1, -1,  1, -1],
        [-1,  1,  1,  1, -1],
        [-1,  1, -1,  1, -1],
        [-1,  1, -1,  1, -1],
        [ 1, -1, -1,  1, -1],
        [ 1, -1,  1,  1, -1],
        [ 1, -1,  1,  1, -1],
        [ 1,  1,  1,  1, -1],
        [-1, -1, -1,  1, -1],
        [-1,  1, -1,  1, -1],
        [-1, -1, -1,  1, -1],
        [-1,  1, -1,  1, -1],
        [ 1,  1,  1,  1, -1],
        [-1, -1, -1,  1, -1],
        [ 1, -1,  1,  1, -1],
        [-1,  1, -1,  1, -1],
        [-1,  1,  1,  1, -1],
        [ 1,  1,  1,  1, -1]])

In [41]:
# BP_MLL loss 
loss_sum = 0
for i in range(len(target)) : 
    Y = pred[i][target[i] == 1]
    Y_bar = pred[i][target[i] != 1]
    loss = sum(torch.exp(torch.cartesian_prod(Y, -Y_bar).sum(dim=-1)))/ len(torch.cartesian_prod(Y, Y_bar))
    loss_sum += loss
    print(loss)
print('BP_MLL loss : ',loss_sum)

tensor(1.1303, grad_fn=<DivBackward0>)
tensor(0.7542, grad_fn=<DivBackward0>)
tensor(1.0558, grad_fn=<DivBackward0>)
tensor(1.2187, grad_fn=<DivBackward0>)
tensor(0.6552, grad_fn=<DivBackward0>)
tensor(0.8923, grad_fn=<DivBackward0>)
tensor(1.1526, grad_fn=<DivBackward0>)
tensor(0.7676, grad_fn=<DivBackward0>)
tensor(0.7911, grad_fn=<DivBackward0>)
tensor(1.1053, grad_fn=<DivBackward0>)
tensor(1.1520, grad_fn=<DivBackward0>)
tensor(0.8414, grad_fn=<DivBackward0>)
tensor(1.2720, grad_fn=<DivBackward0>)
tensor(1.0303, grad_fn=<DivBackward0>)
tensor(1.3211, grad_fn=<DivBackward0>)
tensor(1.1845, grad_fn=<DivBackward0>)
tensor(1.0706, grad_fn=<DivBackward0>)
tensor(0.6839, grad_fn=<DivBackward0>)
tensor(1.0162, grad_fn=<DivBackward0>)
tensor(1.0522, grad_fn=<DivBackward0>)
tensor(0.7982, grad_fn=<DivBackward0>)
tensor(0.5337, grad_fn=<DivBackward0>)
tensor(1.1568, grad_fn=<DivBackward0>)
tensor(1.3675, grad_fn=<DivBackward0>)
tensor(1.3487, grad_fn=<DivBackward0>)
tensor(1.0400, grad_fn=<D

### loss함수로 사용할 수 있도록 구현 

In [42]:
class BPMLLLoss(nn.Module):

    def __init__(self, size_average=None, reduce=None, reduction='mean'):
        super().__init__()
        if size_average is not None or reduce is not None:
            self.reduction = _Reduction.legacy_get_string(size_average, reduce)
        else:
            self.reduction = reduction

    def forward(self, input, target):
        # https://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/tkde06a.pdf
        # input's shape is (bsz, Q)
        # target's shape is (bsz, Q)
        # target's unique value is in [-1, 1]
    
        loss_sum = 0
        for i in range(len(target)) : 
            Y = input[i][target[i] == 1]
            Y_bar = input[i][target[i] != 1]
            loss = sum(torch.exp(-torch.cartesian_prod(Y, -Y_bar).sum(dim=-1)))/ len(torch.cartesian_prod(Y, Y_bar))
            loss_sum += loss
        return loss_sum
        

In [43]:
criterion = BPMLLLoss()

In [45]:
criterion(pred, target)

tensor(35.0952, grad_fn=<AddBackward0>)