# Sequence To Sequence (Seq2seq)

In [1]:
import numpy as np
from random import randint

In [2]:
import torch
import torch.nn as nn

## Set Helpers

In [3]:
# generate a sequence of random integers
def generate_sequence(length, n_unique):
    return [randint(0, n_unique-1) for _ in range(length)]

In [4]:
# one hot encode sequence
def one_hot_encode(sequence, n_unique):
    encoding = list()
    for value in sequence:
        vector = [0. for _ in range(n_unique)]
        vector[value] = 1.
        encoding.append(vector)
        
    return np.array(encoding)

In [5]:
# decode a one hot encoded string
def one_hot_decode(encoded_sequence):
    return [np.argmax(vector, axis=0) for vector in encoded_sequence]

## Try It!

In [6]:
# generate random sequence
sequence = generate_sequence(5, 50)
sequence

[23, 37, 35, 44, 27]

In [7]:
# one hot encode
encoded = one_hot_encode(sequence, 50)
encoded

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [8]:
# decode
decoded = one_hot_decode(encoded)
decoded

[23, 37, 35, 44, 27]

## Prepare Data

In [9]:
# prepare data for the LSTM
def get_pair_of_sequence(n_input, n_output, n_unique):
    # generate random sequence
    input_sequence = generate_sequence(n_input, n_unique)
    
    # pad output sequence with 0 values to the required length
    output_sequence = input_sequence[:n_output] + [0 for _ in range(n_input - n_output)]
    
    # one hot encode
    X = one_hot_encode(input_sequence, n_unique)
    y = one_hot_encode(output_sequence, n_unique)
    
    # reshape as 3D
    X = X.reshape((1, X.shape[0], X.shape[1]))
    y = y.reshape((1, y.shape[0], y.shape[1]))
    
    # turn into tensor
    X = torch.from_numpy(X)
    y = torch.from_numpy(y)
        
    return X, y

In [10]:
# generate random sequence
X, y = get_pair_of_sequence(5, 2, 50)
print('X shape = %s, y shape = %s' % (X.numpy().shape, y.numpy().shape))
print('X = %s, y = %s' % (one_hot_decode(X.numpy()[0]), one_hot_decode(y.numpy()[0])))

X shape = (1, 5, 50), y shape = (1, 5, 50)
X = [40, 38, 22, 10, 10], y = [40, 38, 0, 0, 0]


## Set Configs

In [11]:
# configure problem
n_input = 5
n_output = 2
hidden_dim = 8

In [12]:
is_cuda = torch.cuda.is_available()

if is_cuda: device = torch.device('cuda')
else: device = torch.device('cpu')

## Build Seq2seq Network

In [13]:
class Encoder(nn.Module):
    
    def __init__(self, input_size, hidden_dim):
        super(Encoder, self).__init__()
        
        self.input_size = input_size
        self.hidden_dim = hidden_dim
        self.gru_layer = nn.GRU(self.input_size, self.hidden_dim)
        
    def init_hidden(self):
        hidden = torch.zeros(1, self.hidden_dim, self.hidden_dim)
        return hidden
    
    def forward(self, x, hidden):
        
        x = x.long()
        gru_out, hidden = self.gru_layer(x, hidden)
        
        return gru_out, hidden

In [14]:
class Decoder(nn.Module):
    
    def __init__(self, hidden_dim, output_size):
        super(Decoder, self).__init__()
        
        self.output_size = output_size
        self.hidden_dim = hidden_dim
        self.gru_layer = nn.GRU(self.hidden_dim, self.hidden_dim)
        self.fc_layer = nn.Linear(self.hidden_dim, self.output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def init_hidden(self):
        hidden = torch.zeros(1, self.hidden_dim, self.hidden_dim)
        return hidden
    
    def forward(self, x, hidden):
        
        batch_size = x.size(0)
        
        x = x.long()
        gru_out, hidden = self.gru_layer(F.relu(x), hidden)
        gru_out = self.softmax(self.fc_layer(gru_out[0]))
        
        return gru_out, hidden

#### Initialize Seq2seq Network

In [15]:
encoder = Encoder(n_input, hidden_dim)
encoder.to(device)

Encoder(
  (gru_layer): GRU(5, 8)
)

In [16]:
decoder = Decoder(hidden_dim, n_output)
decoder.to(device)

Decoder(
  (gru_layer): GRU(8, 8)
  (fc_layer): Linear(in_features=8, out_features=2, bias=True)
  (softmax): LogSoftmax()
)

## Train Seq2seq Network

---