In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

torch.manual_seed(777) # reproducibility

<torch._C.Generator at 0x7f6c540ea630>

In [2]:
sentence = ("if you want to build a ship, don't drum up people together to "
  "collect wood and don't assign them tasks and work, but rather "
  "teach them to long for the endless immensity of the sea.")

In [3]:
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}

In [4]:
# hyperparameters
learning_rate = 0.1
num_epochs = 500
input_size = len(char_set) # RNN input size (one hot size)
hidden_size = len(char_set) # RNN output size
num_classes = len(char_set) # final output size (RNN or softmax, etc.)
sequence_length = 10 # any arbitrary number
num_layers = 2 # number of layers in RNN

In [5]:
dataX = []
dataY = []

for i in range(0, len(sentence) - sequence_length):
  x_str = sentence[i:i + sequence_length]
  y_str = sentence[i + 1:i + sequence_length + 1]
  print(i, x_str, '->', y_str)

  x = [char_dic[c] for c in x_str] # x str to index
  y = [char_dic[c] for c in y_str] # y str to index

  dataX.append(x)
  dataY.append(y)

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [6]:
batch_size = len(dataX)

In [7]:
x_data = torch.Tensor(dataX)
y_data = torch.LongTensor(dataY)

In [8]:
# one hot encoding
def one_hot(x, num_classes):
  idx = x.long()
  idx = idx.view(-1, 1)
  x_one_hot = torch.zeros(x.size()[0] * x.size()[1], num_classes)
  x_one_hot.scatter_(1, idx, 1)
  x_one_hot = x_one_hot.view(x.size()[0], x.size()[1], num_classes)
  return x_one_hot

x_one_hot = one_hot(x_data, num_classes)

In [9]:
inputs = Variable(x_one_hot)
labels = Variable(y_data)

In [10]:
class LSTM(nn.Module):
  def __init__(self, num_classes, input_size, hidden_size, num_layers):
    super(LSTM, self).__init__()
    self.num_classes = num_classes
    self.num_layers = num_layers
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.sequence_length = sequence_length
    # Set parameters for RNN block
    # Note: batch_first=False by default.
    # When true, inputs are (batch_size, sequence_length, input_dimension)
    # instead of (sequence_length, batch_size, input_dimension)
    self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size,
                        num_layers = num_layers, batch_first = True)
    # Fully connected layer
    self.fc = nn.Linear(hidden_size, num_classes)
  
  def forward(self, x):
    # Initialize hidden and cell states
    h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
    c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))

    # Propagate input through LSTM
    # Input: (batch, seq_len, input_size)
    out, _ = self.lstm(x, (h_0, c_0))
    # Note: the output tensor of LSTM in this case is a block with holes
    # > add .contiguous() to apply view()
    out = out.contiguous().view(-1, self.hidden_size)
    # Return outputs applied to fully connected layer
    out = self.fc(out)
    return out

In [11]:
# Instantiate RNN model
lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

In [12]:
# Set loss and optimizer function
criterion = torch.nn.CrossEntropyLoss() # Softmax is internally computed.
optimizer = torch.optim.Adam(lstm.parameters(), lr = learning_rate)

In [13]:
# Train the model
for epoch in range(num_epochs):
  outputs = lstm(inputs)
  optimizer.zero_grad()
  # obtain the loss function
  # flatten target labels to match output
  loss = criterion(outputs, labels.view(-1))
  loss.backward()
  optimizer.step()
  # obtain the predicted indices of the next character
  _, idx = outputs.max(1)
  idx = idx.data.numpy()
  idx = idx.reshape(-1, sequence_length) # (170,10)
  # display the prediction of the last sequence
  result_str = [char_set[c] for c in idx[-1]]
  print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data))
  print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

epoch: 1, loss: 3.199
Predicted string:  aaaassssss
epoch: 2, loss: 2.946
Predicted string:            
epoch: 3, loss: 3.081
Predicted string:  e leeeeeee
epoch: 4, loss: 2.963
Predicted string:  eee       
epoch: 5, loss: 2.977
Predicted string:            
epoch: 6, loss: 2.885
Predicted string:            
epoch: 7, loss: 2.821
Predicted string:            
epoch: 8, loss: 2.793
Predicted string:        t   
epoch: 9, loss: 2.761
Predicted string:    t   t  t
epoch: 10, loss: 2.724
Predicted string:    t  o   t
epoch: 11, loss: 2.670
Predicted string:    t      o
epoch: 12, loss: 2.621
Predicted string:    t      o
epoch: 13, loss: 2.547
Predicted string:    t  t   t
epoch: 14, loss: 2.459
Predicted string:    t  t   t
epoch: 15, loss: 2.360
Predicted string:    t  o h t
epoch: 16, loss: 2.269
Predicted string:    theo h t
epoch: 17, loss: 2.181
Predicted string:    the  h t
epoch: 18, loss: 2.093
Predicted string:    the th t
epoch: 19, loss: 2.027
Predicted string:    theot  t
ep