In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

#One cell RNN input_dim (4) -> output_dim (2). sequence: 5
cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True)

# (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
hidden = Variable(torch.randn(1,1,2))

print(hidden.data)

tensor([[[-0.0428,  0.2412]]])


In [3]:
# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
inputs = Variable(torch.Tensor([h,e,l,l,o]))
print(inputs)

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])


In [4]:
for one in inputs:
    one = one.view(1,1,-1)
    print(one)
    out,hidden = cell(one,hidden)
    print("one input size ", one.size(), "\nout size: ",out.size())
    print("out: ",out)
    print("hidden: ",hidden)

tensor([[[1., 0., 0., 0.]]])
one input size  torch.Size([1, 1, 4]) 
out size:  torch.Size([1, 1, 2])
out:  tensor([[[0.4459, 0.6955]]], grad_fn=<TransposeBackward0>)
hidden:  tensor([[[0.4459, 0.6955]]], grad_fn=<StackBackward>)
tensor([[[0., 1., 0., 0.]]])
one input size  torch.Size([1, 1, 4]) 
out size:  torch.Size([1, 1, 2])
out:  tensor([[[0.8065, 0.4371]]], grad_fn=<TransposeBackward0>)
hidden:  tensor([[[0.8065, 0.4371]]], grad_fn=<StackBackward>)
tensor([[[0., 0., 1., 0.]]])
one input size  torch.Size([1, 1, 4]) 
out size:  torch.Size([1, 1, 2])
out:  tensor([[[0.9688, 0.7976]]], grad_fn=<TransposeBackward0>)
hidden:  tensor([[[0.9688, 0.7976]]], grad_fn=<StackBackward>)
tensor([[[0., 0., 1., 0.]]])
one input size  torch.Size([1, 1, 4]) 
out size:  torch.Size([1, 1, 2])
out:  tensor([[[0.9844, 0.7846]]], grad_fn=<TransposeBackward0>)
hidden:  tensor([[[0.9844, 0.7846]]], grad_fn=<StackBackward>)
tensor([[[0., 0., 0., 1.]]])
one input size  torch.Size([1, 1, 4]) 
out size:  torch

In [5]:
# We can do the whole at once
# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True

inputs = inputs.view(1,5,-1)
out,hidden = cell(one,hidden)
print("sequence input size", inputs.size(), "out size", out.size())
print("out: ",out)
print("hidden: ",hidden)

sequence input size torch.Size([1, 5, 4]) out size torch.Size([1, 1, 2])
out:  tensor([[[0.9783, 0.7415]]], grad_fn=<TransposeBackward0>)
hidden:  tensor([[[0.9783, 0.7415]]], grad_fn=<StackBackward>)


In [6]:
# hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False
hidden = Variable(torch.randn(1, 3, 2))
print(hidden)

tensor([[[-0.9220,  0.1620],
         [ 0.3504,  0.1072],
         [-0.7518, -0.3606]]])


In [7]:
inputs = Variable(torch.Tensor([[h,e,l,l,o],
                                [e,o,l,l,l],
                                [l,l,e,e,l]
                               ]))
print(inputs)
# Propagate input through RNN
# Input: (batch, seq_len, input_size) when batch_first=True
# B x S x I
out, hidden = cell(inputs, hidden)
print(hidden)
print("batch input size", inputs.size(), "out size", out.size())
print("out: ",out)
print("hidden: ",hidden)

tensor([[[1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 0., 1.]],

        [[0., 1., 0., 0.],
         [0., 0., 0., 1.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.]],

        [[0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 1., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.]]])
tensor([[[0.9797, 0.7410],
         [0.9844, 0.7835],
         [0.9661, 0.7967]]], grad_fn=<StackBackward>)
batch input size torch.Size([3, 5, 4]) out size torch.Size([3, 5, 2])
out:  tensor([[[-0.1200,  0.7755],
         [ 0.6759,  0.5231],
         [ 0.9675,  0.8067],
         [ 0.9846,  0.7847],
         [ 0.9797,  0.7410]],

        [[ 0.5712,  0.4555],
         [ 0.9474,  0.7767],
         [ 0.9835,  0.7863],
         [ 0.9845,  0.7835],
         [ 0.9844,  0.7835]],

        [[ 0.5026,  0.8869],
         [ 0.9756,  0.8175],
         [ 0.9104,  0.3475],
         [ 0.8221,  0.36

In [8]:
# One cell RNN input_dim (4) -> output_dim (2)
cell = nn.RNN(input_size=4, hidden_size=2)

# The given dimensions dim0 and dim1 are swapped.
inputs = inputs.transpose(dim0=0, dim1=1)
print(inputs)
# Propagate input through RNN
# Input: (seq_len, batch_size, input_size) when batch_first=False (default)
# S x B x I
out, hidden = cell(inputs, hidden)
print("batch input size", inputs.size(), "out size", out.size())
print("out: ",out)
print("hidden: ",hidden)

tensor([[[1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 1., 0.]],

        [[0., 1., 0., 0.],
         [0., 0., 0., 1.],
         [0., 0., 1., 0.]],

        [[0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 1., 0., 0.]],

        [[0., 0., 1., 0.],
         [0., 0., 1., 0.],
         [0., 1., 0., 0.]],

        [[0., 0., 0., 1.],
         [0., 0., 1., 0.],
         [0., 0., 1., 0.]]])
batch input size torch.Size([5, 3, 4]) out size torch.Size([5, 3, 2])
out:  tensor([[[-0.0198,  0.7311],
         [-0.4763,  0.5800],
         [ 0.4422,  0.4770]],

        [[-0.5306,  0.1620],
         [-0.0765,  0.0337],
         [ 0.3103,  0.0754]],

        [[ 0.1444, -0.5178],
         [ 0.1174, -0.4162],
         [-0.6828, -0.0940]],

        [[-0.0894, -0.5955],
         [-0.0506, -0.5614],
         [-0.7412, -0.5793]],

        [[-0.4804, -0.4803],
         [-0.1165, -0.6662],
         [-0.1585, -0.8129]]], grad_fn=<StackBackward>)
hidden:  tensor([[[-0.4804, -0.4803],
 