In [1]:
from torch.nn import RNN
import torch
import torch.nn as nn

In [2]:
rnn = RNN(input_size=4, hidden_size=5, batch_first=True)

In [3]:
inputs = torch.rand(2, 3, 4)

In [4]:
outputs, hn = rnn(inputs)

In [5]:
outputs

tensor([[[ 0.1379,  0.7494,  0.3985, -0.3255,  0.0272],
         [ 0.3392,  0.6557, -0.2550, -0.4567, -0.2601],
         [ 0.5708,  0.7729, -0.0370, -0.4537,  0.1507]],

        [[ 0.3325,  0.7780, -0.0464, -0.3506, -0.2138],
         [ 0.4590,  0.7016, -0.2452, -0.4165, -0.0343],
         [ 0.5885,  0.7117, -0.3927, -0.4772,  0.0844]]],
       grad_fn=<TransposeBackward1>)

In [6]:
hn  # shape: 1 x 批次大小 x 隐藏层大小

tensor([[[ 0.5708,  0.7729, -0.0370, -0.4537,  0.1507],
         [ 0.5885,  0.7117, -0.3927, -0.4772,  0.0844]]],
       grad_fn=<StackBackward0>)

### transformer demo

In [7]:
encoder_layer = nn.TransformerEncoderLayer(d_model=4, nhead=2)
src = torch.rand(2, 3, 4)
out = encoder_layer(src)

In [8]:
out

tensor([[[ 0.8745,  1.1060, -0.8307, -1.1498],
         [-1.6010,  0.1219,  1.1444,  0.3347],
         [ 0.3442,  1.4746, -0.7462, -1.0726]],

        [[-0.5222,  1.6971, -0.8677, -0.3071],
         [-0.0512,  0.9340,  0.7281, -1.6109],
         [ 1.5189, -0.6427,  0.2313, -1.1075]]],
       grad_fn=<NativeLayerNormBackward0>)

In [9]:
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
out = transformer_encoder(src)
print(out)

tensor([[[ 0.7188,  0.8874, -1.6415,  0.0354],
         [-1.3742,  0.1996,  1.4185, -0.2438],
         [ 0.0299,  1.3304,  0.1274, -1.4876]],

        [[ 0.0065,  1.3204, -1.4930,  0.1661],
         [ 0.8215,  0.9743, -0.2800, -1.5157],
         [ 0.8588, -0.2737,  0.9355, -1.5206]]],
       grad_fn=<NativeLayerNormBackward0>)


In [10]:
memory = transformer_encoder(src)
decoder_layer = nn.TransformerDecoderLayer(d_model=4, nhead=2)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
out_part = torch.rand(2, 3, 4)
out = transformer_decoder(out_part, memory)
print(out)

tensor([[[ 0.4187,  1.3972, -0.5734, -1.2425],
         [-1.6286,  0.4788,  0.0967,  1.0531],
         [-1.6675,  0.8310,  0.1193,  0.7173]],

        [[-1.5641, -0.1679,  0.7533,  0.9787],
         [-0.6510, -0.5150, -0.5641,  1.7300],
         [ 0.0886,  1.5920, -0.9908, -0.6899]]],
       grad_fn=<NativeLayerNormBackward0>)


### 情感分析

In [12]:
embedding = nn.Embedding(8, 3)
inpt = torch.tensor([[0, 1, 2, 1], [4, 6, 6, 7]], dtype=torch.long)
output = embedding(inpt)
print(output)

tensor([[[ 0.6923, -0.1346, -0.1242],
         [-1.4251,  0.0223, -0.2153],
         [ 0.7121,  1.3631,  0.8899],
         [-1.4251,  0.0223, -0.2153]],

        [[-1.4206, -1.9937, -0.6145],
         [ 0.9015, -1.0612, -2.2288],
         [ 0.9015, -1.0612, -2.2288],
         [-1.2294,  1.3970,  0.0421]]], grad_fn=<EmbeddingBackward0>)


In [13]:
output.mean(dim=1)

tensor([[-0.3614,  0.3183,  0.0838],
        [-0.2117, -0.6797, -1.2575]], grad_fn=<MeanBackward1>)

In [15]:
input1 = torch.tensor([0, 1, 2, 1], dtype=torch.long)
input2 = torch.tensor([2, 1, 3, 7, 5], dtype=torch.long)
input3 = torch.tensor([6, 4, 2], dtype=torch.long)
input4 = torch.tensor([1, 3, 4, 3, 5, 7], dtype=torch.long)
inputs = [input1, input2, input3, input4]

In [16]:
inputs

[tensor([0, 1, 2, 1]),
 tensor([2, 1, 3, 7, 5]),
 tensor([6, 4, 2]),
 tensor([1, 3, 4, 3, 5, 7])]

In [21]:
offsets = [0] + [i.shape[0] for i in inputs]

In [24]:
offsets

[0, 4, 5, 3, 6]

In [25]:
offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)

In [26]:
offsets

tensor([ 0,  4,  9, 12])

In [27]:
inputs = torch.cat(inputs)

In [28]:
inputs

tensor([0, 1, 2, 1, 2, 1, 3, 7, 5, 6, 4, 2, 1, 3, 4, 3, 5, 7])

In [30]:
embeddingbag = nn.EmbeddingBag(num_embeddings=8, embedding_dim=3)
embeddings = embeddingbag(inputs, offsets)
print(embeddings)

tensor([[-0.3425,  0.2018,  0.3092],
        [-0.6064,  0.4776,  0.2644],
        [ 0.2542,  0.5554,  0.2555],
        [-0.4282,  0.8517,  0.2413]], grad_fn=<EmbeddingBagBackward0>)
