In [220]:
import torch
from torch import nn, optim
from torch.autograd import Variable
import numpy as np
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as Data

In [221]:
sample = 'if you want you'
idx2char = list(set(sample))
char2idx = {c:i for i, c in enumerate(idx2char)}

sample_idx = [char2idx[c] for c in sample]
x_data = [sample_idx[:-1]]
y_data = [sample_idx[1:]]

#hyper parameters
epochs = 50
sequence_length = len(sample)-1
num_classes = len(idx2char)
input_size = len(char2idx)
hidden_size = len(char2idx)
num_layers = 1
batch_size = 1

In [222]:
x = torch.tensor(x_data)
y = torch.tensor(y_data)

#one_hot encoding
x_one_hot = torch.zeros(sequence_length, num_classes).scatter(1, x.view(-1,1), 1)
x_one_hot = x_one_hot.view(x.size()[0], x.size()[1], num_classes)

X = Variable(x_one_hot)
Y = Variable(y)

In [223]:
class LSTM(nn.Module):
    
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_len):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.lr = 0.1
        
        #nn.LSTM: Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
        #True: inputs are (batch_size, sequence_length, input_dimension)c
        self.lstm = nn.LSTM(input_size = self.input_size, hidden_size = self.hidden_size,
                           num_layers = self.num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr = self.lr)
    
    def forward(self, x):
        #If (h_0, c_0) is not provided, both h_0 and c_0 default to zero.
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        
        #reshape input
        x.view(x.size(0), self.seq_len, self.input_size)
        
        out,_ = self.lstm(x, (h_0, c_0))
        out.view(-1, hidden_size)
        out = self.fc(out)
        return out
    
    def train_(self,train_X, train_Y):
        self.train()
        self.optimizer.zero_grad()
        outputs = self.forward(train_X)
        self.cost = self.criterion(outputs.view(-1, self.num_classes), train_Y.view(-1))
        self.cost.backward()
        self.optimizer.step()
        return self.cost, outputs
        

In [224]:
model = LSTM(num_classes, input_size, hidden_size, num_layers, sequence_length)

In [225]:
for epoch in range(1, epochs+1):
    cost, outputs = model.train_(X,Y)
    one_hot = outputs.argmax(2).view(-1)
    result_str = [idx2char[c] for c in one_hot]
    print('epoch: {}, loss: {}'.format(epoch, cost))
    print('Predicted String: ',''.join(result_str))

epoch: 1, loss: 2.3664352893829346
Predicted String:  uuiuiiiuuuuuuu
epoch: 2, loss: 2.217012882232666
Predicted String:  uuuuuuuuuuuuuu
epoch: 3, loss: 2.0700409412384033
Predicted String:  uuyouuuuoouuou
epoch: 4, loss: 1.8808022737503052
Predicted String:  o yo  y  o yo 
epoch: 5, loss: 1.617344856262207
Predicted String:  o yo  y    yo 
epoch: 6, loss: 1.2875350713729858
Predicted String:    yo  yant yo 
epoch: 7, loss: 0.9702016115188599
Predicted String:    you yant yo 
epoch: 8, loss: 0.6603325605392456
Predicted String:    you yant you
epoch: 9, loss: 0.43520307540893555
Predicted String:  f you yant you
epoch: 10, loss: 0.287358820438385
Predicted String:  f you want you
epoch: 11, loss: 0.18535640835762024
Predicted String:  f you want you
epoch: 12, loss: 0.12194705009460449
Predicted String:  f you want you
epoch: 13, loss: 0.07975012809038162
Predicted String:  f you want you
epoch: 14, loss: 0.05235157534480095
Predicted String:  f you want you
epoch: 15, loss: 0.03547372

## Really long sentence?

In [250]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [259]:
char_set = list(set(sentence))
char_dic = {w:i for i,w in enumerate(char_set)}

input_size = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
seq_length = 10
learning_rate = 0.1
num_layers = 2
epochs = 500


dataX = []  #index값들을 넣는다
dataY = []
for i in range(0, len(sentence) - seq_length):
    x_str = sentence[i:i+seq_length]
    y_str = sentence[i+1:i+1+seq_length]
    print(i, x_str,'->', y_str)
    
    x = [char_dic[c] for c in x_str]
    y = [char_dic[c] for c in y_str]
    
    dataX.append(x)
    dataY.append(y)
    
batch_size = len(dataX)

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [346]:
x_data = torch.Tensor(dataX)
y_data = torch.Tensor(dataY)

#one_hot encoding
x_one_hot = torch.zeros(x_data.size()[0]*x_data.size()[1], num_classes).scatter(1, x_data.view(-1,1).long(), 1)
x_one_hot = x_one_hot.view(x_data.size()[0], x_data.size()[1], num_classes)

X = Variable(x_one_hot)
Y = Variable(y_data)

In [347]:
class LSTM2(nn.Module):
    
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_len):
        super(LSTM2, self).__init__()
        self.num_classes = num_classes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_len = seq_len
        self.lr = 0.1
        
        #nn.LSTM: Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
        #True: inputs are (batch_size, sequence_length, input_dimension)c
        self.lstm = nn.LSTM(input_size = self.input_size, hidden_size = self.hidden_size,
                           num_layers = self.num_layers, batch_first = True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr = self.lr)
    
    def forward(self, x):
        #If (h_0, c_0) is not provided, both h_0 and c_0 default to zero.
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
        
        out,_ = self.lstm(x, (h_0, c_0))
        # Note: the output tensor of LSTM in this case is a block with holes
        # > add .contiguous() to apply view()
        out = out.contiguous().view(-1, hidden_size)
        out = self.fc(out)
        return out
    
    def train_(self,train_X, train_Y):
        self.train()
        self.optimizer.zero_grad()
        outputs = self.forward(train_X)
        self.cost = self.criterion(outputs.view(-1, self.num_classes), train_Y.view(-1).long())
        self.cost.backward()
        self.optimizer.step()
        return self.cost, outputs
        

In [348]:
model2 = LSTM2(num_classes, input_size, hidden_size, num_layers, seq_length)

In [349]:
for epoch in range(1, epochs+1):
    cost, outputs = model2.train_(X,Y)
    outputs = outputs.view(-1, seq_length, num_classes)

    if epoch % 50 == 0:
        for i, output in enumerate(outputs):
            output = output.argmax(1)
            if i == 0:
                print(i, ''.join([char_set[c] for c in output]), end = '')
            else:
                print(char_set[output[-1]] , end = '')
        print('\n')


0 t t n tond ao tuipd tnseip' ton't so   t ite g,e to  the  th to de   aord and ton't sndipe the  thssd tnd tored tni tothe  thosh the  th to d to  the ths dheitn  tdiph an the tecs

0 p tou want to build a ship, don't drum up people together to collect wood and don't dssign them tasks and dork, but rather teach them to long for the dndless immensity of the seas

0 m you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.

0 p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.

0 l you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.

0 l you want to build a ship, don't drum up people together to collect wood and don't