### SEQUENCE MODELS AND LONG-SHORT TERM MEMORY NETWORKS

https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html

In [1]:
# Author: Robert Guthrie

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x10c370250>

In [23]:
a = torch.randn(2,3)
a

tensor([[-0.6245, -0.7920,  1.2385],
        [ 0.8845, -0.4387, -0.9479]])

In [30]:
a.view(-1,1).squeeze()
a.squeeze()

tensor([[-0.6245, -0.7920,  1.2385],
        [ 0.8845, -0.4387, -0.9479]])

In [32]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
inputs

[tensor([[1.7899, 0.9227, 1.4088]]),
 tensor([[-0.4567,  0.1100, -0.2419]]),
 tensor([[ 0.2401,  1.4360, -0.0668]]),
 tensor([[ 0.8720,  0.6791, -0.4543]]),
 tensor([[-0.2149, -1.8579, -0.4278]])]

In [34]:

for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)
print(out)
print(hidden)

tensor([[[ 0.1911, -0.3173, -0.1396]]], grad_fn=<StackBackward>)
(tensor([[[ 0.1911, -0.3173, -0.1396]]], grad_fn=<StackBackward>), tensor([[[ 0.7261, -0.4811, -0.2850]]], grad_fn=<StackBackward>))


In [2]:


# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>)
(tensor([[[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>), tensor([[[-0.9825,  0.4715, -0.0633]]], grad_fn=<StackBackward>))
