In [39]:
from torch.nn import RNN
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
rnn = RNN(input_size=4, hidden_size=5, batch_first=True)

In [3]:
inputs = torch.rand(2, 3, 4)

In [4]:
outputs, hn = rnn(inputs)

In [5]:
outputs

tensor([[[ 0.1379,  0.7494,  0.3985, -0.3255,  0.0272],
         [ 0.3392,  0.6557, -0.2550, -0.4567, -0.2601],
         [ 0.5708,  0.7729, -0.0370, -0.4537,  0.1507]],

        [[ 0.3325,  0.7780, -0.0464, -0.3506, -0.2138],
         [ 0.4590,  0.7016, -0.2452, -0.4165, -0.0343],
         [ 0.5885,  0.7117, -0.3927, -0.4772,  0.0844]]],
       grad_fn=<TransposeBackward1>)

In [6]:
hn  # shape: 1 x 批次大小 x 隐藏层大小

tensor([[[ 0.5708,  0.7729, -0.0370, -0.4537,  0.1507],
         [ 0.5885,  0.7117, -0.3927, -0.4772,  0.0844]]],
       grad_fn=<StackBackward0>)

### transformer demo

In [7]:
encoder_layer = nn.TransformerEncoderLayer(d_model=4, nhead=2)
src = torch.rand(2, 3, 4)
out = encoder_layer(src)

In [8]:
out

tensor([[[ 0.8745,  1.1060, -0.8307, -1.1498],
         [-1.6010,  0.1219,  1.1444,  0.3347],
         [ 0.3442,  1.4746, -0.7462, -1.0726]],

        [[-0.5222,  1.6971, -0.8677, -0.3071],
         [-0.0512,  0.9340,  0.7281, -1.6109],
         [ 1.5189, -0.6427,  0.2313, -1.1075]]],
       grad_fn=<NativeLayerNormBackward0>)

In [9]:
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
out = transformer_encoder(src)
print(out)

tensor([[[ 0.7188,  0.8874, -1.6415,  0.0354],
         [-1.3742,  0.1996,  1.4185, -0.2438],
         [ 0.0299,  1.3304,  0.1274, -1.4876]],

        [[ 0.0065,  1.3204, -1.4930,  0.1661],
         [ 0.8215,  0.9743, -0.2800, -1.5157],
         [ 0.8588, -0.2737,  0.9355, -1.5206]]],
       grad_fn=<NativeLayerNormBackward0>)


In [10]:
memory = transformer_encoder(src)
decoder_layer = nn.TransformerDecoderLayer(d_model=4, nhead=2)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
out_part = torch.rand(2, 3, 4)
out = transformer_decoder(out_part, memory)
print(out)

tensor([[[ 0.4187,  1.3972, -0.5734, -1.2425],
         [-1.6286,  0.4788,  0.0967,  1.0531],
         [-1.6675,  0.8310,  0.1193,  0.7173]],

        [[-1.5641, -0.1679,  0.7533,  0.9787],
         [-0.6510, -0.5150, -0.5641,  1.7300],
         [ 0.0886,  1.5920, -0.9908, -0.6899]]],
       grad_fn=<NativeLayerNormBackward0>)


### 情感分析

In [12]:
embedding = nn.Embedding(8, 3)
inpt = torch.tensor([[0, 1, 2, 1], [4, 6, 6, 7]], dtype=torch.long)
output = embedding(inpt)
print(output)

tensor([[[ 0.6923, -0.1346, -0.1242],
         [-1.4251,  0.0223, -0.2153],
         [ 0.7121,  1.3631,  0.8899],
         [-1.4251,  0.0223, -0.2153]],

        [[-1.4206, -1.9937, -0.6145],
         [ 0.9015, -1.0612, -2.2288],
         [ 0.9015, -1.0612, -2.2288],
         [-1.2294,  1.3970,  0.0421]]], grad_fn=<EmbeddingBackward0>)


In [13]:
output.mean(dim=1)

tensor([[-0.3614,  0.3183,  0.0838],
        [-0.2117, -0.6797, -1.2575]], grad_fn=<MeanBackward1>)

In [15]:
input1 = torch.tensor([0, 1, 2, 1], dtype=torch.long)
input2 = torch.tensor([2, 1, 3, 7, 5], dtype=torch.long)
input3 = torch.tensor([6, 4, 2], dtype=torch.long)
input4 = torch.tensor([1, 3, 4, 3, 5, 7], dtype=torch.long)
inputs = [input1, input2, input3, input4]

In [16]:
inputs

[tensor([0, 1, 2, 1]),
 tensor([2, 1, 3, 7, 5]),
 tensor([6, 4, 2]),
 tensor([1, 3, 4, 3, 5, 7])]

In [21]:
offsets = [0] + [i.shape[0] for i in inputs]

In [24]:
offsets

[0, 4, 5, 3, 6]

In [25]:
offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)

In [26]:
offsets

tensor([ 0,  4,  9, 12])

In [27]:
inputs = torch.cat(inputs)

In [28]:
inputs

tensor([0, 1, 2, 1, 2, 1, 3, 7, 5, 6, 4, 2, 1, 3, 4, 3, 5, 7])

In [30]:
embeddingbag = nn.EmbeddingBag(num_embeddings=8, embedding_dim=3)
embeddings = embeddingbag(inputs, offsets)
print(embeddings)

tensor([[-0.3425,  0.2018,  0.3092],
        [-0.6064,  0.4776,  0.2644],
        [ 0.2542,  0.5554,  0.2555],
        [-0.4282,  0.8517,  0.2413]], grad_fn=<EmbeddingBagBackward0>)


In [25]:
from torch.nn.utils.rnn import pad_sequence

inpts = [torch.tensor([1, 23, 2, 32, 6, 3, 2]), torch.tensor([2, 3, 6, 86])]
inpts = pad_sequence(inpts2, batch_first=True)

In [26]:
inpts

tensor([[ 1, 23,  2, 32,  6,  3,  2],
        [ 2,  3,  6, 86,  0,  0,  0]])

In [29]:
embedding_size = 4

embedding = nn.Embedding(1000, embedding_size)
embeded = embedding(inpts)
embeded

tensor([[[-1.3715,  0.6363, -0.5548, -0.0558],
         [ 1.2211,  2.4912, -0.2137,  0.4861],
         [ 0.9996,  0.3148, -1.3507,  0.6074],
         [ 0.3628,  1.2994, -0.3457,  0.0648],
         [ 1.7582, -1.3414,  0.9058,  1.2536],
         [ 0.4227,  0.3319,  0.0122,  0.2849],
         [ 0.9996,  0.3148, -1.3507,  0.6074]],

        [[ 0.9996,  0.3148, -1.3507,  0.6074],
         [ 0.4227,  0.3319,  0.0122,  0.2849],
         [ 1.7582, -1.3414,  0.9058,  1.2536],
         [ 0.2788,  0.7897,  0.0026,  1.2993],
         [-0.9469,  0.6242,  0.6587, -0.0258],
         [-0.9469,  0.6242,  0.6587, -0.0258],
         [-0.9469,  0.6242,  0.6587, -0.0258]]], grad_fn=<EmbeddingBackward0>)

In [28]:
embeded_permute = embeded.permute(0, 2, 1)
embeded_permute

tensor([[[-1.3296, -0.6447,  0.9056,  1.3039,  1.2000, -1.1855,  0.9056],
         [-0.4849, -1.1315, -2.0710,  0.6838,  1.4062, -0.1139, -2.0710],
         [-0.9392, -0.4895, -0.0487,  0.4222,  2.1146, -0.1432, -0.0487],
         [-2.6767, -0.4440,  2.7525,  0.3903, -0.2755,  0.5894,  2.7525]],

        [[ 0.9056, -1.1855,  1.2000, -0.2737,  0.1550,  0.1550,  0.1550],
         [-2.0710, -0.1139,  1.4062,  0.8133, -3.4000, -3.4000, -3.4000],
         [-0.0487, -0.1432,  2.1146,  1.3023, -1.0608, -1.0608, -1.0608],
         [ 2.7525,  0.5894, -0.2755,  0.3600, -0.8101, -0.8101, -0.8101]]],
       grad_fn=<PermuteBackward0>)

In [32]:
num_filter = 2
filter_size = 3

conv = nn.Conv1d(embedding_size, num_filter, filter_size, padding=1)
conv(embeded_permute)

tensor([[[ 0.8028, -0.5301, -0.6558,  0.3391,  0.2585, -1.4390, -0.7452],
         [ 1.5670, -0.9241, -1.2493, -0.2487,  0.2659, -0.9733, -0.4585]],

        [[-0.8408,  0.4677,  1.0985, -0.5779, -1.0557, -0.3508,  0.4763],
         [-0.4814,  0.0597, -0.1091,  0.5664,  1.1982,  1.4356,  0.6557]]],
       grad_fn=<ConvolutionBackward0>)

In [60]:
class CNN(nn.Module):

    def __init__(self, vocab_size, embedding_dim, filter_size, num_filter, num_class):
        super(CNN, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.conv1d = nn.Conv1d(embedding_dim, num_filter, filter_size, padding=1)
        self.activate = F.relu
        self.linear = nn.Linear(num_filter, num_class)

    def forward(self, inputs):
        embedding = self.embedding(inputs)
        print(f'embedding={embedding}')
        convolution = self.activate(self.conv1d(embedding.permute(0, 2, 1)))  # permute? 一维卷积是对词embedding的每一个维度进行卷积，所以转成这种形式
        print(f'convolution={convolution}')
        pooling = F.max_pool1d(convolution, kernel_size=convolution.shape[2])
        print(f'pooling={pooling.squeeze(dim=2)}')
        outputs = self.linear(pooling.squeeze(dim=2))
        print(f'outputs={outputs}')
        log_probs = F.log_softmax(outputs, dim=1)

        return log_probs

In [61]:
print(f'inputs: {inpts}')

cnn = CNN(1000, embedding_size, filter_size, num_filter, 2)
cnn(inpts)

inputs: tensor([[ 1, 23,  2, 32,  6,  3,  2],
        [ 2,  3,  6, 86,  0,  0,  0]])
embedding=tensor([[[ 0.2413,  0.0720, -0.7477,  1.8785],
         [-1.7803, -0.0543, -0.1390,  0.3807],
         [ 1.0015, -0.7007,  0.1363, -0.3353],
         [ 0.9456,  0.7800,  0.4074, -1.4046],
         [-0.3521, -0.1994, -0.7078, -1.0127],
         [-1.7384,  0.0944,  0.7324, -1.1386],
         [ 1.0015, -0.7007,  0.1363, -0.3353]],

        [[ 1.0015, -0.7007,  0.1363, -0.3353],
         [-1.7384,  0.0944,  0.7324, -1.1386],
         [-0.3521, -0.1994, -0.7078, -1.0127],
         [ 0.8916, -1.0496, -1.5470,  0.4027],
         [ 1.5845,  1.9865, -1.2389,  2.6552],
         [ 1.5845,  1.9865, -1.2389,  2.6552],
         [ 1.5845,  1.9865, -1.2389,  2.6552]]], grad_fn=<EmbeddingBackward0>)
convolution=tensor([[[0.7875, 0.2760, 0.3373, 0.0000, 0.7290, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0981, 0.0000, 0.0000, 0.0000]],

        [[0.7420, 0.1674, 0.0000, 0.0000, 0.0000, 0.4017, 1.0699]

tensor([[-0.5687, -0.8353],
        [-0.5540, -0.8548]], grad_fn=<LogSoftmaxBackward0>)