In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import logging
import math
def configure_logging(level=logging.INFO):
    format = '%(asctime)s %(filename)s:%(lineno)d %(levelname)s] %(message)s'
    datefmt = '%Y-%m-%d %H:%M:%S'
    logging.basicConfig(level=level, format=format, datefmt=datefmt)
configure_logging()

### Embedding

Embedding层可以训练后得到词向量，但是目前只有3种优化器可以使embedding层被学习到。

Keep in mind that only a limited number of optimizers support sparse gradients: currently it’s optim.SGD (CUDA and CPU), optim.SparseAdam (CUDA and CPU) and optim.Adagrad (CPU)

另外from_pretrained方法可以直接加载预训练的词向量。

With padding_idx set, the embedding vector at padding_idx is initialized to all zeros. However, note that this vector can be modified afterwards, e.g., using a customized initialization method, and thus changing the vector used to pad the output. The gradient for this vector from Embedding is always zero.

In [1]:
import torch
import torch.nn as nn
word_to_ix = {"hello": 0, "world": 1}
embeds = nn.Embedding(2, 5)  # 2 words in vocab, 5 dimensional embeddings
lookup_tensor = torch.LongTensor([word_to_ix["hello"], word_to_ix['world']])
hello_embed = embeds(lookup_tensor)
print(hello_embed)

tensor([[-0.1860, -0.7426, -0.8872,  0.1476, -0.2419],
        [ 0.8223,  1.1087,  0.2378,  0.3572, -0.7053]],
       grad_fn=<EmbeddingBackward>)


### 分类的几种损失函数相关例子

In [7]:
# 模拟网络最后输出与目标值，[batchsize=3, num_labels=5]
# 适用每个样本2分类或者多分类（但是分类是互斥的）
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
logging.info(input)
logging.info(target)
"""
对比损失函数 CrossEntropyLoss和NLLLoss
"""
loss1 = nn.CrossEntropyLoss()
loss2 = nn.NLLLoss()

l1 = loss1(input, target)
m = nn.LogSoftmax(dim=1)
l2 = loss2(m(input), target)
logging.info('{},{}'.format(l1,l2))
"""
模拟NLLLoss的计算过程
"""
input_log_softmax = -torch.log(F.softmax(input))
logging.info(input_log_softmax)
target_one_hot = F.one_hot(target, num_classes=5)
logging.info(target_one_hot)
a = input_log_softmax * target_one_hot.float()
logging.info(a)
b = torch.mean(torch.sum(a, dim=1))
logging.info(b)
logging.info(l2)

2020-09-15 17:50:56 <ipython-input-7-427e367640bc>:5 INFO] tensor([[ 1.3423,  0.2123, -0.1786,  1.0153, -0.0546],
        [-0.4240, -0.8279,  0.3512,  0.3944,  0.6943],
        [-0.3604, -0.2497, -1.0211, -0.2197,  0.8163]], requires_grad=True)
2020-09-15 17:50:56 <ipython-input-7-427e367640bc>:6 INFO] tensor([0, 0, 1])
2020-09-15 17:50:56 <ipython-input-7-427e367640bc>:16 INFO] 1.6583219766616821,1.6583219766616821
2020-09-15 17:50:56 <ipython-input-7-427e367640bc>:21 INFO] tensor([[0.9203, 2.0503, 2.4412, 1.2473, 2.3172],
        [2.2154, 2.6193, 1.4402, 1.3970, 1.0971],
        [1.9499, 1.8393, 2.6106, 1.8092, 0.7733]], grad_fn=<NegBackward>)
2020-09-15 17:50:56 <ipython-input-7-427e367640bc>:23 INFO] tensor([[1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0]])
2020-09-15 17:50:56 <ipython-input-7-427e367640bc>:25 INFO] tensor([[0.9203, 0.0000, 0.0000, 0.0000, 0.0000],
        [2.2154, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 1.8393, 0.0000, 0.0000, 0.0000]],

In [8]:
"""
模拟BCELoss的计算过程，3个样本3个类别，target代表每个样本属于那些类别
适用每个样本多标签分类（1个样本可能属于多个类别）
"""
input = torch.randn(3, 3, requires_grad=True)
target = torch.FloatTensor([[0, 1, 1], [0, 0, 1], [1, 0, 1]])
logging.info(input)
logging.info(target)

m = nn.Sigmoid()
a = m(input)
logging.info(a)

loss = nn.BCELoss()
loss2 = nn.BCEWithLogitsLoss()
logging.info(loss(m(input), target))

b = target * torch.log(m(input)) + (1 - target) * torch.log(1 - m(input))
logging.info(b)

c = torch.mean(b)
logging.info(-c)
if torch.equal(loss(m(input), target), -c) is True:
    logging.info('BCELoss compute example')

logging.info(loss2(input, target))
logging.info(loss(m(input), target))

2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:7 INFO] tensor([[-0.0304,  0.9403, -0.4497],
        [-1.9937, -0.8510,  0.8463],
        [-0.2669,  1.4899, -0.4149]], requires_grad=True)
2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:8 INFO] tensor([[0., 1., 1.],
        [0., 0., 1.],
        [1., 0., 1.]])
2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:12 INFO] tensor([[0.4924, 0.7192, 0.3894],
        [0.1199, 0.2992, 0.6998],
        [0.4337, 0.8161, 0.3977]], grad_fn=<SigmoidBackward>)
2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:16 INFO] tensor(0.6935, grad_fn=<BinaryCrossEntropyBackward>)
2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:19 INFO] tensor([[-0.6781, -0.3297, -0.9431],
        [-0.1277, -0.3556, -0.3570],
        [-0.8355, -1.6932, -0.9220]], grad_fn=<AddBackward0>)
2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:22 INFO] tensor(0.6935, grad_fn=<NegBackward>)
2020-09-15 18:00:09 <ipython-input-8-cb80ddf15354>:24 INFO] BCELoss compute example
20

In [13]:
input = torch.randn(3,3)
target = torch.tensor([[0,1,0],[1,0,0],[1,1,1]], dtype=torch.float)
m = torch.nn.Sigmoid()
loss = torch.nn.BCELoss()
print(loss(m(input), target))

loss1 = torch.nn.BCEWithLogitsLoss()
print(loss1(input, target))

import torch.nn.functional as F
print(F.binary_cross_entropy_with_logits(input, target))

tensor(0.7109)
tensor(0.7109)
tensor(0.7109)
