In [2]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [8]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))

In [9]:
lstm

LSTM(3, 3)

In [10]:
for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print("out: ", out)
    print("hidden: ", hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension

out:  tensor([[[-0.0579,  0.2796, -0.0890]]], grad_fn=<CatBackward>)
hidden:  (tensor([[[-0.0579,  0.2796, -0.0890]]], grad_fn=<ViewBackward>), tensor([[[-0.2656,  0.3973, -0.4105]]], grad_fn=<ViewBackward>))
out:  tensor([[[-0.2075, -0.0041, -0.0028]]], grad_fn=<CatBackward>)
hidden:  (tensor([[[-0.2075, -0.0041, -0.0028]]], grad_fn=<ViewBackward>), tensor([[[-0.5567, -0.0086, -0.0059]]], grad_fn=<ViewBackward>))
out:  tensor([[[-0.3246, -0.1086, -0.0493]]], grad_fn=<CatBackward>)
hidden:  (tensor([[[-0.3246, -0.1086, -0.0493]]], grad_fn=<ViewBackward>), tensor([[[-0.7477, -0.1442, -0.1625]]], grad_fn=<ViewBackward>))
out:  tensor([[[-0.2259, -0.0476, -0.0552]]], grad_fn=<CatBackward>)
hidden:  (tensor([[[-0.2259, -0.0476, -0.0552]]], grad_fn=<ViewBackward>), tensor([[[-0.8860, -0.0926, -0.1202]]], grad_fn=<ViewBackward>))
out:  tensor([[[-0.2878, -0.1108,  0.1416]]], grad_fn=<CatBackward>)
hidden:  (tensor([[[-0.2878, -0.1108,  0.1416]]], grad_fn=<ViewBackward>), tensor([[[-0.9335, -

In [11]:
inputs = torch.cat(inputs).view(len(inputs), 1, -1)

In [13]:
inputs.shape

torch.Size([5, 1, 3])

In [14]:
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)

tensor([[[-0.3282,  0.0192, -0.0195]],

        [[-0.2797, -0.0302,  0.0073]],

        [[-0.3952, -0.1122, -0.0519]],

        [[-0.2514, -0.0444, -0.0612]],

        [[-0.3150, -0.1087,  0.1378]]], grad_fn=<CatBackward>)
(tensor([[[-0.3150, -0.1087,  0.1378]]], grad_fn=<ViewBackward>), tensor([[[-1.1161, -0.2957,  0.2149]]], grad_fn=<ViewBackward>))


In [None]:
nn.NLLLoss()

In [15]:
s = 'Hello'

In [36]:
inp = torch.zeros((5,1,4), dtype=torch.long)
inp[0][0][0] = 1
inp[1][0][1] = 1
inp[2][0][2] = 1
inp[3][0][2] = 1
inp[4][0][3] = 1

In [37]:
lstm = nn.LSTM(4, 4)  # Input dim is 3, output dim is 3
hidden = (torch.randn(1, 1, 4),
          torch.randn(1, 1, 4))

In [40]:
out, hidden = lstm(inp.float(), hidden)

In [41]:
criterion = nn.CrossEntropyLoss()

In [51]:
loss = criterion(out.reshape(5,4), inp.argmax(2)[:, 0])

In [52]:
loss

tensor(1.3652, grad_fn=<NllLossBackward>)

In [54]:
loss.backward()

In [59]:
lstm.all_weights[0][0]

tensor([[ 0.0096, -0.0018, -0.0053, -0.0020],
        [ 0.0034, -0.0090, -0.0023,  0.0008],
        [-0.0007,  0.0074,  0.0100, -0.0022],
        [-0.0037,  0.0036,  0.0027, -0.0068],
        [-0.0005, -0.0037, -0.0077, -0.0017],
        [ 0.0022,  0.0112, -0.0024, -0.0007],
        [ 0.0010,  0.0031,  0.0104, -0.0014],
        [-0.0044, -0.0033,  0.0022, -0.0051],
        [-0.0099,  0.0156,  0.0264,  0.0018],
        [-0.0065, -0.0243,  0.0415,  0.0102],
        [-0.0004, -0.0114, -0.0606,  0.0030],
        [ 0.0069,  0.0108,  0.0182, -0.0246],
        [ 0.0227, -0.0027, -0.0081, -0.0063],
        [-0.0062,  0.0038, -0.0015, -0.0001],
        [-0.0025, -0.0006,  0.0323, -0.0058],
        [-0.0050,  0.0023,  0.0113, -0.0153]])