In [58]:
import torch
from torch import nn, optim
from torch.autograd import Variable
import numpy as np
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as Data

In [59]:
#Data creation
idx2char = ['h', 'i', 'e','l', 'o']
x_data = [[0,1,0,2,3,3]] #hihell
x_one_hot = [[[1,0,0,0,0],
              [0,1,0,0,0],
              [1,0,0,0,0],
              [0,0,1,0,0],
              [0,0,0,1,0],
              [0,0,0,1,0]]]

y_data = [1,0,2,3,3,4] #ihello


X = Variable(torch.tensor(x_one_hot)).float()
Y = Variable(torch.tensor(y_data))

In [60]:
class RNN(nn.Module):
    
    def __init__(self, hidden_size, input_dim, num_classes, sequence_length, num_layers):
        super(RNN, self).__init__()
        #RNN parameters
        self.hidden_size = hidden_size
        self.input_dim = input_dim
        self.num_classes = num_classes
        self.sequence_length = sequence_length
        self.num_layers = num_layers
        self.lr = 0.1
        self._build_net()
    
    def _build_net(self):
        #Batch_first: the input data shape will be (batch_size, seq_length, input_dim)
        #otherwise, (seq_length, batch_size, input_dim)
        self.rnn1 = nn.RNN(input_size = self.input_dim, hidden_size = self.hidden_size, 
                        num_layers = self.num_layers, batch_first = True)
        self.fc = nn.Linear(self.hidden_size, self.num_classes)
        
        #cost definition & optimizer
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr = self.lr)
        
    def forward(self, x):
        #Inputs for nn.RNN: input_data & h_0
        #h_0: tensor containing the initial hidden state for each element in the batch
        #+ Defaluts to zero if not provided
        #reshape input
        x.view(x.size(0), self.sequence_length, self.input_dim)
        outputs, _ = self.rnn1(x)
        #Reshape output from (batch_size, seq_length, input_dim) to (batch*seq_lenght, hidden_size)
        outputs = outputs.view(-1, self.hidden_size)
        outputs = self.fc(outputs)
        return outputs
        
    def train_(self,train_X, train_Y):
        self.train()
        self.optimizer.zero_grad()
        outputs = self.forward(train_X)
        self.cost = self.criterion(outputs, train_Y)
        self.cost.backward()
        self.optimizer.step()
        return self.cost, outputs




In [61]:
#RNN parameters
hidden_size = 5
input_dim = 5
batch_size = 1 #one sentence
sequence_length = 6
num_classes = 5

model = RNN(hidden_size, input_dim, num_classes, sequence_length, 1)

epochs = 15


In [63]:
for epoch in range(1, epochs+1):
    cost, outputs = model.train_(X,Y)
    one_hot = outputs.argmax(1)
    result_str = [idx2char[c] for c in one_hot]
    print('epoch: {}, loss: {}'.format(epoch, cost))
    print('Predicted String: ',''.join(result_str))

epoch: 1, loss: 0.05341649055480957
Predicted String:  ihello
epoch: 2, loss: 0.041533470153808594
Predicted String:  ihello
epoch: 3, loss: 0.03344957157969475
Predicted String:  ihello
epoch: 4, loss: 0.027612289413809776
Predicted String:  ihello
epoch: 5, loss: 0.02317102812230587
Predicted String:  ihello
epoch: 6, loss: 0.019658884033560753
Predicted String:  ihello
epoch: 7, loss: 0.016809701919555664
Predicted String:  ihello
epoch: 8, loss: 0.014463345520198345
Predicted String:  ihello
epoch: 9, loss: 0.012513796798884869
Predicted String:  ihello
epoch: 10, loss: 0.010885079391300678
Predicted String:  ihello
epoch: 11, loss: 0.009519259445369244
Predicted String:  ihello
epoch: 12, loss: 0.008370240218937397
Predicted String:  ihello
epoch: 13, loss: 0.0074005126953125
Predicted String:  ihello
epoch: 14, loss: 0.00657963752746582
Predicted String:  ihello
epoch: 15, loss: 0.005882898811250925
Predicted String:  ihello
