In [4]:
%matplotlib inline
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time
import matplotlib.pyplot as plt
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from IPython.core.debugger import set_trace

seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [5]:
class EncoderDecoder(nn.Module):
    def __init__(self, encoder, decoder, src_embed, trg_embed, generator):
        super(EncoderDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.src_embed = src_embed
        self.trg_embed = trg_embed
        self.generator = generator
        
    def forward(self, src, trg, src_mask, trg_mask, src_lengths, trg_lengths):
        """
        maskされたソースとtarget 列　を取り込み、処理する
        """
        encoder_hidden, encoder_final = self.encode(src, src_mask, src_lengths)
        return self.decode(encoder_hidden, encoder_final, src_mask, trg, trg_mask)
    
    def encode(self, src, src_mask, src_lengths):
        return self.encoder(self.src_embed(src), src_mask, src_lengths)
    
    def decode(self, encoder_hidden, encoder_final, src_mask, trg, trg_mask, decoder_hidden=None):
        return self.decoder(self.trg_embed(trg), encoder_hidden,
                            encoder_final, src_mask, trg_mask, hidden=decoder_hidden)
    

In [6]:
class Generator(nn.Module):
    """
    linear model と softmax を定義する
    fainal output の次元はtarget vocabulary のsize
    """
    def __init__(self, hidden_size, vocab_size):
        super(Generator, self).__init__()
        self.proj = nn.Linear(hidden_size, vocab_size, bias=False)
        
    def forward(self, x):
        return F.log_softmax(self.proj(x), dim=-1)

In [7]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1, dropout=0.):
        super(Encoder, self).__init__()
        self.num_layers = num_layers
        self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dopout=dropout)
        
    def forward(self, x, mask, lengths):
        """
        embeddings x に bidirectional GRU を適用する
        input の mini-batch x は lengthでソートされる必要がある
        x は [batch, time, dim] をもつ
        
        pytorch では　pack_padded_sequence, pad_packed_sequence という
        ２つの関数がmask と padding を請け負ってくれる
        """
        packed = pack_padded_sequence(x, lengths, batch_first=True)
        output, final = self.rnn(packed)
        output, _ = pad_packed_sequence(output, batch_first=True)
        
        # 双方向のfinal hidden statesを結合させる
        fwd_final = final[0:final.size(0):2]
        bwd_final = final[1:final.size(0):2]
        final = torch.cat([fwd_final, bwd_final], dim=2) #[num_layers, batch, 2*dim]
        return output, final

In [17]:
a=torch.randn((6,6))
b=a[0:a.size(0):2]
c=a[1:a.size(0):2]
a

tensor([[-1.4790,  0.4323, -0.1250,  0.7821,  0.5635,  1.8582],
        [ 1.0441, -0.8638,  1.3059,  0.2466, -1.9776,  0.0179],
        [-1.4129, -1.8791, -0.1798,  0.7904, -0.1222, -0.7470],
        [ 1.7093,  0.0579,  0.5230,  0.9717, -0.2779, -0.6116],
        [-0.5572, -0.9683,  0.8713, -0.0956,  0.3463, -0.5402],
        [ 0.8569, -0.6721,  1.0682, -0.2527, -0.1882, -0.7712]])

In [18]:
b

tensor([[-1.4790,  0.4323, -0.1250,  0.7821,  0.5635,  1.8582],
        [-1.4129, -1.8791, -0.1798,  0.7904, -0.1222, -0.7470],
        [-0.5572, -0.9683,  0.8713, -0.0956,  0.3463, -0.5402]])

In [19]:
c

tensor([[ 1.0441, -0.8638,  1.3059,  0.2466, -1.9776,  0.0179],
        [ 1.7093,  0.0579,  0.5230,  0.9717, -0.2779, -0.6116],
        [ 0.8569, -0.6721,  1.0682, -0.2527, -0.1882, -0.7712]])