# Minimal Example: Feeding 2 Sentences to an LSTM

In [1]:
import numpy as np

import torch
import torch.nn as nn

### Create Example Data

In [2]:
sentences = ['i like fresh bread', 'i hate stale bread']

### Create Vocabulary

In [3]:
vocabulary = set()
for sent in sentences:
    for word in sent.split():
        vocabulary.add(word)

print(vocabulary)

{'stale', 'like', 'bread', 'i', 'hate', 'fresh'}


### Create Indices to Vectorize Words

In [4]:
# Most NLP tasks require some basic tokens, e.g., for padding, unknown words, start of sequence, end of sequence
word2index = {'<PAD>': 0, '<UNK>': 1, '<SOS>': 2, '<EOS>': 3}

for word in vocabulary:
    word2index[word] = len(word2index)
    
print(word2index)

{'<PAD>': 0, '<UNK>': 1, '<SOS>': 2, '<EOS>': 3, 'stale': 4, 'like': 5, 'bread': 6, 'i': 7, 'hate': 8, 'fresh': 9}


### Vectorize Sentences

In [5]:
batch = [ [ word2index[word] for word in sent.split() ] for sent in sentences]

print(batch)
print()

# Let's make a numpy array out of it
batch = np.array(batch)

print(batch)
print('The shape of batch is:', batch.shape)
print()

# Let's make a PyTorch tensor out of it
batch = torch.tensor(batch, dtype=torch.long)
print(batch)
print('The shape of batch is:', batch.shape)

[[7, 5, 9, 6], [7, 8, 4, 6]]

[[7 5 9 6]
 [7 8 4 6]]
The shape of batch is: (2, 4)

tensor([[7, 5, 9, 6],
        [7, 8, 4, 6]])
The shape of batch is: torch.Size([2, 4])


Now we have the first step of having our data in the shape `(batch_size, seq_len)`. to feed it into an LSTM, we still need `input_size`.

### Embedding Layer

#### Define layer

In [6]:
vocab_size = len(word2index)   # vocab_size reflects all known words in the index
embed_dim = 10                 # Let's assume the word embeddings are of size 10 to keep it simple

word_embedding_layer = nn.Embedding(vocab_size, embed_dim)

#### Push batch through layer

In [7]:
batch = word_embedding_layer(batch)

print('The shape of batch is:', batch.shape)
print()
print(batch)

The shape of batch is: torch.Size([2, 4, 10])

tensor([[[-0.3179,  0.5848, -0.0688, -0.6153, -0.5866, -0.2300, -0.0115,
           1.6450,  2.4355,  0.5729],
         [-0.3548,  0.6907, -2.0556, -1.5537,  0.3275, -1.2800,  0.6300,
          -0.5206,  1.8570, -0.3607],
         [-0.2171, -0.4977, -0.5973, -1.4402, -0.8320, -0.4389, -1.2304,
          -0.2994,  0.2303, -0.4284],
         [ 1.9364, -1.8846, -0.4562, -0.1178,  0.8705, -0.1335,  0.1754,
          -1.7738, -1.0396,  0.1272]],

        [[-0.3179,  0.5848, -0.0688, -0.6153, -0.5866, -0.2300, -0.0115,
           1.6450,  2.4355,  0.5729],
         [-1.5821,  0.2974, -1.3868, -0.3796,  1.5824, -0.6330, -0.0880,
           0.2109, -1.1064, -0.6549],
         [-1.5234, -0.3309, -0.3816,  0.9839,  1.6590, -1.2978, -0.1062,
           0.0335, -1.5911, -1.8855],
         [ 1.9364, -1.8846, -0.4562, -0.1178,  0.8705, -0.1335,  0.1754,
          -1.7738, -1.0396,  0.1272]]], grad_fn=<EmbeddingBackward>)


Now we have want we want: `(batch_size, seq_len, input_size)`

### LSTM Layer (`batch_first=True`)

#### Define LSTM layer

In [8]:
hidden_dim = 32 # Let's set the size if the hidden dimension to 32

lstm  = nn.LSTM(embed_dim, hidden_dim, batch_first=True)

#### Push batch through LSTM layer

In [9]:
# Initialize hidden state (here with zeros)
batch_size = batch.shape[0]
(h, c) =  (torch.zeros(1, batch_size, hidden_dim), torch.zeros(1, batch_size, hidden_dim)) 

lstm_out, (h, c) = lstm(batch, (h, c))

print('The shape of lstm_out is:', lstm_out.shape) # (batch_size, seq_len, hidden_dim)
print('The shape of h is:', h.shape) # (num_layers*num_directions, batch_size, hidden_dim)

The shape of lstm_out is: torch.Size([2, 4, 32])
The shape of h is: torch.Size([1, 2, 32])


### LSTM Layer (`batch_first=False`)

#### Define LSTM layer

In [10]:
hidden_dim = 32 # Let's set the size if the hidden dimension to 32

lstm  = nn.LSTM(embed_dim, hidden_dim, batch_first=False)

#### Push batch through LSTM layer

In [11]:
# Initialize hidden state (here with zeros)
batch_size = batch.shape[0]
(h, c) =  (torch.zeros(1, batch_size, hidden_dim), torch.zeros(1, batch_size, hidden_dim)) 

# We need to reshape the batch from (batch_size, seq_len, input_size) to (seq_len, batch_size, input_size)
batch = batch.transpose(0,1)

lstm_out, (h, c) = lstm(batch, (h, c))

print('The shape of lstm_out is:', lstm_out.shape) # (seq_len, batch_size, hidden_dim)
print('The shape of h is:', h.shape) # (num_layers*num_directions, batch_size, hidden_dim)

The shape of lstm_out is: torch.Size([4, 2, 32])
The shape of h is: torch.Size([1, 2, 32])
