In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
input_size = 9
hidden_size = 16
num_layers = 2

lstm = nn.LSTM(input_size, hidden_size, num_layers)
lstm

LSTM(9, 16, num_layers=2)

In [3]:
??nn.LSTM

[0;31mInit signature:[0m [0mnn[0m[0;34m.[0m[0mLSTM[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mLSTM[0m[0;34m([0m[0mRNNBase[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34mr"""__init__(input_size,hidden_size,num_layers=1,bias=True,batch_first=False,dropout=0.0,bidirectional=False,proj_size=0,device=None,dtype=None)[0m
[0;34m[0m
[0;34m    Apply a multi-layer long short-term memory (LSTM) RNN to an input sequence.[0m
[0;34m    For each element in the input sequence, each layer computes the following[0m
[0;34m    function:[0m
[0;34m[0m
[0;34m    .. math::[0m
[0;34m        \begin{array}{ll} \\[0m
[0;34m            i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\[0m
[0;34m            f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\[0m
[0;34m            g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} 

In [5]:
seq_length = 5
batch_size = 2

x = torch.randn(seq_length, batch_size, input_size)

H = torch.zeros(num_layers, batch_size, hidden_size)
C = torch.zeros(num_layers, batch_size, hidden_size)

hidden_inputs = (H, C)

y, h = lstm(x, hidden_inputs)
print(f"Input shape: {list(x.shape)}")
print(f"Hidden shape: {list(h[0].shape)}")
print(f"Cell shape: {list(h[1].shape)}")
print(f"Output shape: {list(y.shape)}")

Input shape: [5, 2, 9]
Hidden shape: [2, 2, 16]
Cell shape: [2, 2, 16]
Output shape: [5, 2, 16]


In [6]:
for p in lstm.named_parameters():
    if "weight" in p[0]:
        print(f"{p[0]} has size {p[1].shape}")

weight_ih_l0 has size torch.Size([64, 9])
weight_hh_l0 has size torch.Size([64, 16])
weight_ih_l1 has size torch.Size([64, 16])
weight_hh_l1 has size torch.Size([64, 16])


In [7]:
class LSTMNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        self.out = nn.Linear(hidden_size, 1)

    def forward(self, x, debug=False):
        if debug:
            print(f"Input Size: {x.shape}")
        x, h = self.lstm(x)
        if debug:
            print(f"Hidden shape: {list(h[0].shape)}")
            print(f"Cell shape: {list(h[1].shape)}")
            print(f"LSTMout shape: {list(x.shape)}")
        x = self.out(x)
        if debug:
            print(f"FinalOut shape: {list(x.shape)}")
        return x, h

In [8]:
lstm = LSTMNet(input_size, hidden_size, num_layers)
lstm

LSTMNet(
  (lstm): LSTM(9, 16, num_layers=2)
  (out): Linear(in_features=16, out_features=1, bias=True)
)

In [None]:
x = torch.randn(seq_length, batch_size, input_size)
y_pred, h = lstm(x)
print(y_pred.shape)
print(h[0].shape)
print(h[1].shape)

lossfunc = nn.MSELoss()
loss = lossfunc(y_pred, torch.randn(seq_length, batch_size, 1))
#loss = lossfunc(y_pred[-1], torch.randn(batch_size, 1)) # is also valid
loss


torch.Size([5, 2, 1])
torch.Size([2, 2, 16])
torch.Size([2, 2, 16])


tensor(0.2710, grad_fn=<MseLossBackward0>)

In [14]:
class GruNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(input_size, hidden_size, num_layers)
        self.out = nn.Linear(hidden_size, 1)

    def forward(self, x, debug=False):
        if debug:
            print(f"Input Size: {x.shape}")
        x, h = self.gru(x)
        if debug:
            print(f"Hidden shape: {list(h[0].shape)}")
            print(f"Cell shape: {list(h[1].shape)}")
            print(f"GruOut shape: {list(x.shape)}")
        x = self.out(x)
        if debug:
            print(f"FinalOut shape: {list(x.shape)}")
        return x, h

In [15]:
gru = GruNet(input_size, hidden_size, num_layers)
x = torch.randn(seq_length, batch_size, input_size)
y_pred, h = gru(x)
print(y_pred.shape)
print(h[0].shape)
print(h[1].shape)

lossfunc = nn.MSELoss()
loss = lossfunc(y_pred, torch.randn(seq_length, batch_size, 1))
#loss = lossfunc(y_pred[-1], torch.randn(batch_size, 1)) # is also valid
loss


torch.Size([5, 2, 1])
torch.Size([2, 16])
torch.Size([2, 16])


tensor(1.5894, grad_fn=<MseLossBackward0>)

In [16]:
for p in gru.named_parameters():
    if "weight" in p[0]:
        print(f"{p[0]} has size {p[1].shape}")

gru.weight_ih_l0 has size torch.Size([48, 9])
gru.weight_hh_l0 has size torch.Size([48, 16])
gru.weight_ih_l1 has size torch.Size([48, 16])
gru.weight_hh_l1 has size torch.Size([48, 16])
out.weight has size torch.Size([1, 16])
