In [37]:
import collections
import math
import torch
from torch import nn
from d2l import torch as d2l

class Seq2SeqEncoder(d2l.Encoder):
    def __init__(self,vocab_size, embed_size, num_hiddens, num_layers, dropout=0, **kwargs):
        super(Seq2SeqEncoder, self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.GRU(embed_size, num_hiddens, num_layers, dropout=dropout)

    def forward(self, X, *args):
        X = self.embedding(X) 
        X = X.permute(1,0,2)
        output, state = self.rnn(X)
        return output, state



class Seq2SeqDecoder(d2l.Decoder):
    """用于序列到序列学习的循环神经网络解码器"""
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 dropout=0, **kwargs):
        super(Seq2SeqDecoder, self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.GRU(embed_size + num_hiddens, num_hiddens, num_layers,
                          dropout=dropout)
        self.dense = nn.Linear(num_hiddens, vocab_size)

    def init_state(self, enc_outputs, *args):
        return enc_outputs[1]

    def forward(self, X, state):
        # 输出'X'的形状：(batch_size,num_steps,embed_size)
        X = self.embedding(X).permute(1, 0, 2)
        # 广播context，使其具有与X相同的num_steps
        context = state[-1].repeat(X.shape[0], 1, 1)
        X_and_context = torch.cat((X, context), 2)
        output, state = self.rnn(X_and_context, state)
        output = self.dense(output).permute(1, 0, 2)
        # output的形状:(batch_size,num_steps,vocab_size)
        # state的形状:(num_layers,batch_size,num_hiddens)
        return output, state



In [38]:
encoder = Seq2SeqEncoder(vocab_size=10, embed_size=8, num_hiddens=16, num_layers=2)
encoder.eval()

X = torch.zeros((4,7), dtype=torch.long)
decoder = Seq2SeqDecoder(vocab_size=10, embed_size=8, num_hiddens=16,
                         num_layers=2)
decoder.eval()
state = decoder.init_state(encoder(X))
output, state = decoder(X, state)
output.shape, state.shape


(torch.Size([4, 7, 10]), torch.Size([2, 4, 16]))

In [33]:
output.shape 
state.shape 

torch.Size([3, 7, 16])

In [26]:
output.shape 

torch.Size([4, 7, 16])

In [18]:
a = nn.Embedding(10,8)

b = torch.arange(28).reshape(4,7).to(torch.long)
print(b.shape)
b.type()

torch.Size([4, 7])


'torch.LongTensor'

In [15]:
a(b)

IndexError: index out of range in self

In [3]:
import torch  
import torch.nn as nn 

x = torch.LongTensor([[1,2,4],[4,3,2]], padding_idx=4)

embeddings = nn.Embedding(5,6)
print(embeddings(x))

TypeError: new() received an invalid combination of arguments - got (list, list, padding_idx=int), but expected one of:
 * (*, torch.device device)
 * (torch.Storage storage)
 * (Tensor other)
 * (tuple of ints size, *, torch.device device)
      didn't match because some of the keywords were incorrect: padding_idx
 * (object data, *, torch.device device)
      didn't match because some of the keywords were incorrect: padding_idx


In [6]:
import torch  
import torch.nn as nn 

x = torch.LongTensor([1,2,4],[4,3,2])

embeddings = nn.Embedding(5,6)
print(embeddings.weight)

TypeError: new() received an invalid combination of arguments - got (list, list), but expected one of:
 * (*, torch.device device)
      didn't match because some of the arguments have invalid types: (!list!, !list!)
 * (torch.Storage storage)
 * (Tensor other)
 * (tuple of ints size, *, torch.device device)
 * (object data, *, torch.device device)


In [9]:
import torch  
import torch.nn as nn 

x = torch.LongTensor([[1,2,4],[4,3,2]])

In [17]:
print(x)
print(x.type())
print(x.shape)
embeddings = nn.Embedding(5,6)
embeddings(x).shape 

tensor([[1, 2, 4],
        [4, 3, 2]])
torch.LongTensor
torch.Size([2, 3])


torch.Size([2, 3, 6])

In [13]:
a(b)

IndexError: index out of range in self

In [4]:
print(a) 

Embedding(2, 2)
