In [105]:
import torch as tc
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

**Series** of input -> **Series** of output

memories previous data to predict next output.

RNN Applications: **Series** of data.

1. Time series prediction: predict stock prices etc.
2. Language modelling (text generation): understand the current sentence from the previous context.
3. Text sentiment analysis: 
4. Named entity recognition
5. Translation
6. Speech recognition
7. Music composition
...

We need to know
1. the number of classes (RNN problem is multiclass classification (i.e., predict one out of five) --> cross entropy.
2. input size
3. hidden size
4. batch size
5. sequence length
6. number of layer of rnn

Use one-hot vectors to encode letters.

In [106]:
h = [1,0,0,0]
e = [0,1,0,0]
l = [0,0,1,0]
o = [0,0,0,1]

In [107]:
cell = tc.nn.RNN(4, 2)
inputs = Variable(tc.Tensor([[h]]))
hidden = Variable(tc.randn(1,1,2))

out, hidden = cell(inputs, hidden)
print("INPUT: ", inputs)
print("OUT: ", out, "HIDDEN: ", hidden)

INPUT:  Variable containing:
(0 ,.,.) = 
  1  0  0  0
[torch.FloatTensor of size 1x1x4]

OUT:  Variable containing:
(0 ,.,.) = 
  0.5847  0.5382
[torch.FloatTensor of size 1x1x2]
 HIDDEN:  Variable containing:
(0 ,.,.) = 
  0.5847  0.5382
[torch.FloatTensor of size 1x1x2]



**Feed sequence of letters,** not character by character.

In [108]:
cell = tc.nn.RNN(input_size=4, hidden_size=2, batch_first=True)
# By setting `batch_first=True`, the first element of shape becomes the batch size

inputs = Variable(tc.Tensor([[h, e, l, l, o]]))
hidden = Variable(tc.randn(1,1,2))
out, hidden = cell(inputs, hidden)
print("INPUT: ", inputs) # input size: 1X5X4. 1: batch size, 5: sequence size (h,e,l,l,o), 4: input dimension
print("OUT: ", out, "HIDDEN: ", hidden)

INPUT:  Variable containing:
(0 ,.,.) = 
  1  0  0  0
  0  1  0  0
  0  0  1  0
  0  0  1  0
  0  0  0  1
[torch.FloatTensor of size 1x5x4]

OUT:  Variable containing:
(0 ,.,.) = 
  0.8503 -0.2171
  0.6434 -0.8183
  0.4419 -0.1800
  0.5729 -0.3833
  0.7308  0.0717
[torch.FloatTensor of size 1x5x2]
 HIDDEN:  Variable containing:
(0 ,.,.) = 
  0.7308  0.0717
[torch.FloatTensor of size 1x1x2]



In [109]:
tc.manual_seed(1)
cell = tc.nn.RNN(input_size=4, hidden_size=2, batch_first=True)
# batch_first: If ``True``, then the input and output tensors are provided as (batch, seq, feature)

# Three batches: hello, eolll, lleel
inputs = Variable(tc.Tensor([[h,e,l,l,o],
                            [e,o,l,l,l],
                            [l,l,e,e,l]]))
# input_size: torch.Size([3,5,4]): Batch size, sequence length, input dimension (one-hot size)

hidden = Variable(tc.randn(1,3,2)) # num_layers*num_direction, num_batches, hidden_size
# hidden = Variable(tc.randn(1,1,2)) works as well. WHY???????????????????????????????????????????????
out, hidden = cell(inputs, hidden)
print("INPUT: ", inputs) # input size: 3X5X4. 3: batch size, 5: sequence size (h,e,l,l,o), 4: input dimension
print("OUT: ", out, "HIDDEN: ", hidden) # input size: 3X5X4. 3: batch size, 5: sequence size (h,e,l,l,o), 4: hidden dimension

INPUT:  Variable containing:
(0 ,.,.) = 
  1  0  0  0
  0  1  0  0
  0  0  1  0
  0  0  1  0
  0  0  0  1

(1 ,.,.) = 
  0  1  0  0
  0  0  0  1
  0  0  1  0
  0  0  1  0
  0  0  1  0

(2 ,.,.) = 
  0  0  1  0
  0  0  1  0
  0  1  0  0
  0  1  0  0
  0  0  1  0
[torch.FloatTensor of size 3x5x4]

OUT:  Variable containing:
(0 ,.,.) = 
  0.4768 -0.8414
  0.0143  0.1479
  0.0582 -0.4436
  0.1133 -0.4532
  0.5292  0.0269

(1 ,.,.) = 
 -0.1859 -0.0919
  0.4841 -0.0191
  0.1183 -0.3717
  0.1129 -0.4418
  0.1184 -0.4446

(2 ,.,.) = 
  0.1332 -0.5055
  0.1258 -0.4432
 -0.0546  0.0938
 -0.1183  0.0773
  0.0513 -0.4662
[torch.FloatTensor of size 3x5x2]
 HIDDEN:  Variable containing:
(0 ,.,.) = 
  0.5292  0.0269
  0.1184 -0.4446
  0.0513 -0.4662
[torch.FloatTensor of size 1x3x2]



We want to train our model to say "hi hello".

The RNN model predicts which letter follows which letter.

And we need to use **multi-label classification** because we determine which letter out of five (h,i,e,l,o) will follow the given letter..

In [140]:
h = [1,0,0,0,0]
i = [0,1,0,0,0]
e = [0,0,1,0,0]
l = [0,0,0,1,0]
o = [0,0,0,0,1]
letters = [h,i,e,l,o]

letter_to_idx = {letter: idx for letter, idx in enumerate(letters)}

In [187]:
idx2char = ['h', 'i', 'e', 'l', 'o']
x_data = [0,1,0,2,3,3]
one_hot_lookup = [[1,0,0,0,0],
                 [0,1,0,0,0],
                 [0,0,1,0,0],
                 [0,0,0,1,0],
                 [0,0,0,0,1],]

y_data = [1,0,2,3,3,4]
x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = Variable(tc.Tensor(x_one_hot))
labels = Variable(tc.LongTensor(y_data))

In [188]:
# input_dim = 5: one-hot size ( == number of classes in this example)
# sequence_length = 1
# num_layers = 1: one-layer rnn
# batch_size = 1: one sentence
# hidden_dim = 5: output directly to a one-hot vector
class Model(tc.nn.Module):
    def __init__(self, input_dim, sequence_length, num_layers, batch_size, hidden_dim):
        super(Model, self).__init__()
        self.input_dim = input_dim
        self.sequence_length = sequence_length
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        
        self.rnn = tc.nn.RNN(input_size=input_dim, hidden_size=self.hidden_dim, batch_first=True)
        
    def forward(self, inputs, hidden):
        inputs = inputs.view(self.batch_size, self.sequence_length, self.input_dim)
        out, hidden = self.rnn(inputs)
        out = out.view(-1, self.input_dim)
        return out, hidden
    
    
    def initialize_hidden(self):
        return Variable(tc.zeros(self.num_layers, self.batch_size, self.hidden_dim))

In [223]:
idx2char = ['h', 'i', 'e', 'l', 'o']
x_data = [0,1,0,2,3,3]
one_hot_lookup = [[1,0,0,0,0],
                 [0,1,0,0,0],
                 [0,0,1,0,0],
                 [0,0,0,1,0],
                 [0,0,0,0,1],]

y_data = [1,0,2,3,3,4]
x_one_hot = [one_hot_lookup[x] for x in x_data]

inputs = Variable(tc.Tensor(x_one_hot))
labels = Variable(tc.LongTensor(y_data))

In [224]:
model = Model(5, 1, 1, 1, 5)
criterion = tc.nn.CrossEntropyLoss()
optimizer = tc.optim.SGD(model.parameters(), lr=0.01)

In [225]:
tc.manual_seed(1)
for epoch in range(100):
    optimizer.zero_grad()
    hidden = model.initialize_hidden()
    total_loss = 0
    for inp, lab in zip(inputs, labels):
        out, hidden = model(inp, hidden)
        val, idx = out.max(1)
        total_loss += criterion(out, lab)

In [226]:
total_loss

Variable containing:
 9.5889
[torch.FloatTensor of size 1]

In [239]:
model = Model(5, 1, 1, 1, 5)
criterion = tc.nn.CrossEntropyLoss()
optimizer = tc.optim.SGD(model.parameters(), lr=0.01)

In [243]:
# tc.manual_seed(1)
for epoch in range(100):
    optimizer.zero_grad()
    hidden = model.initialize_hidden()
    out, hidden = model(inputs, hidden)
    loss = criterion(out, labels)

RuntimeError: invalid argument 2: size '[1 x 1 x 5]' is invalid for input with 30 elements at /opt/conda/conda-bld/pytorch_1512387374934/work/torch/lib/TH/THStorage.c:41

In [241]:
loss

Variable containing:
 1.7382
[torch.FloatTensor of size 1]

In [162]:
labels

Variable containing:
 1
 0
 2
 3
 3
 4
[torch.LongTensor of size 6]