# 6. Sequence to Sequence Models

### About this notebook

This notebook was used in the 50.039 Deep Learning course at the Singapore University of Technology and Design.

**Author:** Matthieu DE MARI (matthieu_demari@sutd.edu.sg)

**Version:** 1.0 (27/12/2022)

**Requirements:**
- Python 3 (tested on v3.9.6)
- 

### Imports and CUDA

In [1]:
# Torch
import torch
import torch.nn as nn

In [2]:
# Use GPU if available, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# Define the dataset
data = torch.randint(0, 10, (1000, 2)).float()
target = torch.cat((torch.zeros(1000, 1), torch.sum(data, dim = 1, keepdim = True)), dim = 1).float()

# Create a DataLoader
batch_size = 64
train_data = torch.utils.data.TensorDataset(data, target)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

In [4]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, inputs):
        encoding_vec = self.encoder(inputs)
        out = self.decoder(encoding_vec)
        return out

In [5]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hid_dim):
        super(Encoder, self).__init__()
        self.input_dim = input_dim
        self.hid_dim = hid_dim
        self.enc = nn.Linear(input_dim, hid_dim)

    def forward(self, x):
        v = self.enc(x)
        return v

In [6]:
class Decoder(nn.Module):
    def __init__(self, output_dim, hid_dim):
        super(Decoder, self).__init__()
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.dec = nn.Linear(hid_dim, output_dim)

    def forward(self, v):
        y = self.dec(v)
        return y

In [7]:
# Define the model encoder
input_dim = 2
output_dim = 2
hid_dim = 8
enc = Encoder(input_dim, hid_dim)
print(enc)

Encoder(
  (enc): Linear(in_features=2, out_features=8, bias=True)
)


In [8]:
# Define the model decoder
dec = Decoder(output_dim, hid_dim)
print(dec)

Decoder(
  (dec): Linear(in_features=8, out_features=2, bias=True)
)


In [9]:
# Define the model assembling encoder and decoder
model = Seq2Seq(enc, dec)
print(model)

Seq2Seq(
  (encoder): Encoder(
    (enc): Linear(in_features=2, out_features=8, bias=True)
  )
  (decoder): Decoder(
    (dec): Linear(in_features=8, out_features=2, bias=True)
  )
)


In [14]:
# Define the model
input_dim = 2
output_dim = 2
hid_dim = 8
torch.manual_seed(12)
enc = Encoder(input_dim, hid_dim)
dec = Decoder(output_dim, hid_dim)
model = Seq2Seq(enc, dec)

# Define the loss function and the optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

# Define the training loop
num_epochs = 50
for epoch in range(num_epochs):
    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch + 1} Loss: {loss.item():.8f}')

Epoch 1 Loss: 8.69502068
Epoch 2 Loss: 0.76978475
Epoch 3 Loss: 0.16806419
Epoch 4 Loss: 0.11615036
Epoch 5 Loss: 0.07884483
Epoch 6 Loss: 0.05809250
Epoch 7 Loss: 0.04653561
Epoch 8 Loss: 0.03641570
Epoch 9 Loss: 0.04140667
Epoch 10 Loss: 0.03090168
Epoch 11 Loss: 0.02510375
Epoch 12 Loss: 0.02093919
Epoch 13 Loss: 0.01640337
Epoch 14 Loss: 0.01191575
Epoch 15 Loss: 0.01451499
Epoch 16 Loss: 0.01022676
Epoch 17 Loss: 0.00920647
Epoch 18 Loss: 0.00517963
Epoch 19 Loss: 0.00619758
Epoch 20 Loss: 0.00303210
Epoch 21 Loss: 0.00292261
Epoch 22 Loss: 0.00155205
Epoch 23 Loss: 0.00126476
Epoch 24 Loss: 0.00108019
Epoch 25 Loss: 0.00077979
Epoch 26 Loss: 0.00059092
Epoch 27 Loss: 0.00043995
Epoch 28 Loss: 0.00023037
Epoch 29 Loss: 0.00016451
Epoch 30 Loss: 0.00014088
Epoch 31 Loss: 0.00008299
Epoch 32 Loss: 0.00005941
Epoch 33 Loss: 0.00003514
Epoch 34 Loss: 0.00002514
Epoch 35 Loss: 0.00001674
Epoch 36 Loss: 0.00000744
Epoch 37 Loss: 0.00000788
Epoch 38 Loss: 0.00000471
Epoch 39 Loss: 0.0000

In [19]:
# Test model
test_x = torch.Tensor([[1, 2], [10, 2], [0, 5]])
test_y = torch.Tensor([[0, 3], [0, 12], [0, 5]])
output = model(test_x)
print("Prediction: \n", output)
print("Expected output: \n", test_y)

Prediction: 
 tensor([[-2.8859e-07,  3.0002e+00],
        [-2.5879e-07,  1.2000e+01],
        [-3.1840e-07,  5.0002e+00]], grad_fn=<AddmmBackward0>)
Expected output: 
 tensor([[ 0.,  3.],
        [ 0., 12.],
        [ 0.,  5.]])


In [21]:
# Extracting a submodel
encoder_trained = model.encoder
encoding_vector = encoder_trained(test_x)
print("Encoding vector: \n", encoding_vector)

Encoding vector: 
 tensor([[-0.8454,  0.0829, -2.4598, -1.3727,  1.8971, -1.2504, -0.8168,  0.3137],
        [-4.0618, -2.6302, -6.3626, -2.2877,  7.6828, -7.7594, -1.8075,  2.8787],
        [-2.2951,  0.2047, -4.2821, -3.0378,  2.8706, -0.3742, -0.7672, -0.9402]],
       grad_fn=<AddmmBackward0>)


### What's next?

...