In [1]:
import torch
import torch.nn as nn

Args:
- input_size: The number of expected features in the input `x`
- hidden_size: The number of features in the hidden state `h`
- num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
        would mean stacking two LSTMs together to form a `stacked LSTM`,
        with the second LSTM taking in outputs of the first LSTM and
        computing the final results. Default: 1
- bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
        Default: ``True``
- batch_first: If ``True``, then the input and output tensors are provided
        as (batch, seq, feature). Default: ``False``
- dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
        LSTM layer except the last layer, with dropout probability equal to
        :attr:`dropout`. Default: 0
- bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False``

In [2]:
rnn = nn.LSTM(10, 20, 2)

In [3]:
rnn

LSTM(10, 20, num_layers=2)

In [6]:
input = torch.randn(5,3,10)
input

tensor([[[ 0.5638, -1.9552,  0.4559,  1.4515,  0.4724, -0.3450, -0.1131,
          -0.1658, -0.0179,  0.3788],
         [ 0.0867, -0.9159, -0.9350,  0.6540,  0.6091,  1.1622, -0.1762,
          -0.0762, -2.1139, -0.4871],
         [-1.1072,  0.3773, -0.3664, -0.9821, -1.7027, -0.7514, -2.0770,
          -1.1149,  0.3212,  1.3777]],

        [[ 0.9755, -0.1942,  1.8847, -0.1119, -1.4359,  0.3637, -0.3443,
           0.1174,  1.1738,  0.6225],
         [ 1.6132, -0.3473,  0.7501, -0.2116, -1.2292, -0.3324, -1.7711,
           0.0913, -0.4354,  0.7766],
         [-0.1884, -1.1890, -0.8953,  0.3908,  0.5672, -0.6490, -0.6420,
           0.2471,  0.4966, -0.0663]],

        [[-1.5825,  1.3707,  0.0850,  1.8917, -0.0940,  1.4641,  1.1842,
          -0.1550, -0.7120, -2.2068],
         [ 1.0723, -0.8320, -1.3720,  1.6562, -0.3811, -0.8923,  0.5598,
           2.5367, -0.2664,  0.1786],
         [-1.2683, -2.2597, -1.4429,  1.0103, -0.9522,  1.5153,  1.0032,
          -0.5775,  0.2372,  0.7514

In [7]:
h0 = torch.randn(2,3,20)
c0 = torch.randn(2,3,20)

In [9]:
output, (h1, c1) = rnn(input, (h0,c0))

In [10]:
output

tensor([[[ 4.0121e-01, -1.1273e-01, -3.7310e-01,  4.3318e-01, -1.1693e-01,
           7.2703e-02,  2.8580e-03,  4.5770e-02,  9.8060e-02, -6.5252e-02,
           9.5057e-02, -4.5540e-01, -6.9190e-02, -4.2195e-02,  4.5454e-02,
           4.2997e-01, -3.0321e-02, -2.5476e-01,  1.6888e-01, -1.7168e-01],
         [ 2.8788e-01,  1.2746e-01, -7.6260e-02, -1.5571e-01, -2.1772e-01,
          -6.0782e-02, -3.0601e-02,  2.2182e-01,  1.1036e-02,  6.9470e-03,
          -2.5087e-01,  1.3426e-01, -1.8257e-02, -9.9165e-03,  5.4043e-02,
           1.2960e-01, -1.2753e-01,  3.3772e-01, -5.1832e-01,  7.7530e-02],
         [ 9.0554e-02,  1.8734e-01, -5.9285e-01, -2.2838e-01, -3.7391e-01,
          -4.5800e-02,  2.7236e-02,  2.8070e-01, -1.5754e-01, -5.2292e-02,
          -9.1672e-02, -4.3063e-02,  3.2151e-01,  5.8067e-01,  1.4132e-02,
           2.9767e-01,  4.0520e-01, -2.1284e-01,  4.9562e-01, -6.8515e-02]],

        [[ 2.5021e-01, -2.3220e-02, -3.0189e-01,  2.2795e-01, -6.7515e-02,
          -3.1047e-0

In [11]:
h1

tensor([[[ 0.0653,  0.1257, -0.0392,  0.0004, -0.0941,  0.0307,  0.0129,
           0.0572,  0.0657,  0.0030,  0.0433,  0.0276, -0.1410,  0.0061,
          -0.0998,  0.0803,  0.1194,  0.1083,  0.0677, -0.0029],
         [ 0.1238,  0.0690, -0.0298, -0.0156, -0.1310,  0.2627, -0.0213,
           0.0037,  0.0467,  0.0448,  0.0755, -0.0367,  0.0616, -0.0096,
          -0.1425,  0.0239,  0.0799,  0.1759, -0.0278,  0.0744],
         [ 0.0666,  0.0046, -0.0989, -0.0302,  0.1288, -0.2636,  0.0589,
           0.1042,  0.0900,  0.1239, -0.0833,  0.2482, -0.1705, -0.0661,
           0.0044, -0.1447,  0.1075,  0.0914,  0.0912,  0.1658]],

        [[ 0.0763, -0.0590, -0.1457,  0.0096, -0.0919, -0.0185, -0.0195,
           0.0548,  0.0462, -0.0310,  0.0492, -0.1220,  0.0535, -0.0552,
           0.0159, -0.0112,  0.0422,  0.1195, -0.1253, -0.0730],
         [ 0.0446, -0.0455, -0.1379, -0.0376, -0.1096, -0.0439, -0.0391,
           0.0562,  0.1108, -0.0642,  0.0147, -0.0084,  0.1174, -0.0590,
        

In [12]:
c1

tensor([[[ 0.0987,  0.2887, -0.0777,  0.0009, -0.1392,  0.0444,  0.0191,
           0.1594,  0.1131,  0.0081,  0.0900,  0.0552, -0.2594,  0.0144,
          -0.2387,  0.1620,  0.3070,  0.2540,  0.2806, -0.0085],
         [ 0.2337,  0.1770, -0.0509, -0.0599, -0.1818,  0.4448, -0.0470,
           0.0088,  0.0848,  0.1090,  0.1946, -0.0798,  0.1565, -0.0187,
          -0.2462,  0.0719,  0.1838,  0.3261, -0.0621,  0.2670],
         [ 0.1118,  0.0099, -0.1966, -0.0678,  0.2084, -0.6087,  0.1325,
           0.1992,  0.1666,  0.2112, -0.1685,  0.3823, -0.3688, -0.1568,
           0.0116, -0.2444,  0.2207,  0.2693,  0.2321,  0.3826]],

        [[ 0.1543, -0.1244, -0.2642,  0.0180, -0.1666, -0.0404, -0.0369,
           0.0925,  0.1090, -0.0695,  0.0950, -0.2538,  0.1115, -0.1292,
           0.0330, -0.0215,  0.0773,  0.2713, -0.2555, -0.1600],
         [ 0.0961, -0.1026, -0.2463, -0.0691, -0.1882, -0.0938, -0.0717,
           0.0970,  0.2623, -0.1475,  0.0275, -0.0176,  0.2541, -0.1443,
        

In [13]:
output.size()

torch.Size([5, 3, 20])