## Encoder-Decoder model with training

In [None]:
class EncoderRNN2(nn.Module):
    def __init__(self, embedding, hidden_size, n_layers, dropout=0):
        super(EncoderRNN2, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = embedding
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        embedded = self.embedding(input_seq)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths) # this line from tutorial
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # this line from tutorial
        return outputs, hidden

In [None]:
class DecoderRNN2(nn.Module):
    def __init__(self, embedding, hidden_size, output_size, n_layers, dropout):
        super(DecoderRNN2, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input_sequence, hidden):
        embedded = self.embedding(input_sequence)
        rnn_output, hidden = self.gru(embedded, hidden)
        output = F.softmax(rnn_output, dim=1)
        return output, hidden

In [None]:
def train2(input_variable, input_lengths, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, mask, batch_size, max_length=MAX_LENGTH):
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    loss = 0
    n_totals = 0
    all_loss = []
    target_length = target_variable.size(0)
    encoder_output, encoder_hidden = encoder(input_variable, input_lengths)
    
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]]) # this line from tutorial
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    
    for t in range(target_length):
        print('decoder_hidden_shape', len(decoder_hidden.shape))
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()
        mask_loss, n_total = maskNLLLoss(decoder_output, target_variable[t], mask[t]) # this line from tutorial
        loss += mask_loss
        n_totals += n_total
        all_loss.append(mask_loss.item() * nTotal)
    
    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

In [None]:
def trainIters2(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, 
               embedding, encoder_n_layers, decoder_n_layers, n_iteration, batch_size):

    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)] # from tutorial

    for iteration in range(n_iteration):
        training_batch = training_batches[iteration]
        input_variable, input_lengths, target_variable, _, max_target_len = training_batch
        loss = train2(input_variable, input_lengths, target_variable, 
                     encoder, decoder, encoder_optimizer, decoder_optimizer,
                     batch_size, max_target_len)
        print("Iteration: {}; Average loss: {:.4f}".format(iteration, loss))