# "Sequence to Sequence Learning with Neural Networks" paper implementation - https://arxiv.org/pdf/1409.3215.pdf

In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import os

import torch
import torch.utils.data
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from data_loader import Dataset
import argparse

In [2]:
SOS_token = 1 
EOS_token = 2 

args = {
    'lr': 0.01,
    'momentum': 0.9,
    'weight_decay': 5e-4,
    'gamma': 0.1,
    'epochs_per_lr_drop': 450,
    'num_epochs': 10,
    'batch_size': 32,
    'num_workers': 8,
    'num_epoch': 600,
    'cuda': True,
    'save_folder': os.path.expanduser('~/weights'),
    'epochs_per_save': 10,
    'batch_per_log': 10,
    'auto_encoder': True,
    'MAX_LENGTH': 10,
    'bidirectional': False,
    'hidden_size_decoder': 256,
    'num_layer_decoder': 1,
    'hidden_size_encoder': 256,
    'num_layer_encoder': 1,
    'teacher_forcing': False
}

if args['cuda']:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")

trainset = Dataset(phase='train', max_input_length=10, auto_encoder=args['auto_encoder'])

input_lang, output_lang = trainset.langs()

trainloader = torch.utils.data.DataLoader(trainset, batch_size=args['batch_size'],
                                          shuffle=True, num_workers=args['num_workers'], pin_memory=False, drop_last=True)
dataiter = iter(trainloader)

testset = Dataset(phase='test', max_input_length=10, auto_encoder=args['auto_encoder'])
testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                          shuffle=True, num_workers=1, pin_memory=False, drop_last=True)

Reading lines...
Read 135842 sentence pairs
Trimmed to 11793 sentence pairs
Counting words...
Counted words:
eng 3117
eng 3117
['you re disobeying orders ', 'you re disobeying orders ']
Reading lines...
Read 135842 sentence pairs
Trimmed to 11793 sentence pairs
Counting words...
Counted words:
eng 3117
eng 3117
['he s an aristocrat ', 'he s an aristocrat ']


In [4]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, input_size, batch_size, num_layers=1, bidirectional=False):
        super(EncoderRNN, self).__init__()
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, embedding_dim=hidden_size)

        if args['bidirectional']:
            self.lstm_forward = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers)
            self.lstm_backward = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers)
        else:
            self.lstm = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers)


    def forward(self, input, hidden):

        if args['bidirectional']:
            input_forward, input_backward = input
            hidden_forward, hidden_backward = hidden
            input_forward = self.embedding(input_forward).view(1, 1, -1)
            input_backward = self.embedding(input_backward).view(1, 1, -1)

            out_forward, (h_n_forward, c_n_forward) = self.lstm_forward(input_forward, hidden_forward)
            out_backward, (h_n_backward, c_n_backward) = self.lstm_backward(input_backward, hidden_backward)

            forward_state = (h_n_forward, c_n_forward)
            backward_state = (h_n_backward, c_n_backward)
            output_state = (forward_state, backward_state)

            return output_state
        else:
            embedded = self.embedding(input).view(1, 1, -1)
            rnn_input = embedded
            output, (h_n, c_n) = self.lstm(rnn_input, hidden)
            return output, (h_n, c_n)

    def init_hidden(self):

        if self.bidirectional:
            encoder_state = [torch.zeros(self.num_layers, 1, self.hidden_size, device=device),
                                      torch.zeros(self.num_layers, 1, self.hidden_size, device=device)]
            encoder_state = {"forward": encoder_state, "backward": encoder_state}
            return encoder_state
        else:
            encoder_state = [torch.zeros(self.num_layers, 1, self.hidden_size, device=device),
                              torch.zeros(self.num_layers, 1, self.hidden_size, device=device)]
            return encoder_state

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, batch_size, num_layers=1):
        super(DecoderRNN, self).__init__()
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(input_size=hidden_size,
                            hidden_size=hidden_size, num_layers=1)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output, (h_n, c_n) = self.lstm(output, hidden)
        output = self.out(output[0])
        return output, (h_n, c_n)

    def initHidden(self):
        return [torch.zeros(self.num_layers, 1, self.hidden_size, device=device),
                torch.zeros(self.num_layers, 1, self.hidden_size, device=device)]


class Linear(nn.Module):
    def __init__(self, bidirectional, hidden_size_encoder, hidden_size_decoder):
        super(Linear, self).__init__()
        self.bidirectional = bidirectional
        num_directions = int(bidirectional) + 1
        self.linear_connection_op = nn.Linear(
            num_directions * hidden_size_encoder, hidden_size_decoder)
        self.connection_possibility_status = num_directions * \
            hidden_size_encoder == hidden_size_decoder

    def forward(self, input):

        if self.connection_possibility_status:
            return input
        else:
            return self.linear_connection_op(input)
