<a href="https://colab.research.google.com/github/sourcecode369/transformers-tutorials/blob/master/lstm/Autocomplete_TextLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

In [None]:
n_step = 3 # number of cells(= number of Step)
n_hidden = 128 # number of hidden units in one cell

char_arr = [c for c in 'abcdefghijklmnopqrstuvwxyz']
word_dict = {n:i for i, n in enumerate(char_arr)}
number_dict = {i:w for i, w in enumerate(char_arr)}
n_class = len(word_dict)

seq_data = ['gini', 'code', 'ragi', 'ruhi', 'sinh', 'sing']

In [None]:
class TextLSTM(nn.Module):
    def __init__(self):
        super(TextLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden)
        self.W = nn.Linear(n_hidden, n_class, bias=False)
        self.b = nn.Parameter(torch.ones([n_class]))
    
    def forward(self, X):
        input = X.transpose(0, 1)
        hidden_state = torch.zeros(1, len(X), n_hidden)
        cell_state = torch.zeros(1, len(X), n_hidden)

        outputs, (_, _) = self.lstm(input, (cell_state, hidden_state))
        outputs = outputs[-1]
        model = self.W(outputs) + self.b
        return model

In [None]:
model = TextLSTM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
def make_batch():
    input_batch, target_batch = [], []

    for seq in seq_data:
        input = [word_dict[n] for n in seq[:-1]] # 'm', 'a' , 'k' is input
        target = word_dict[seq[-1]] # 'e' is target
        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)

    return input_batch, target_batch

In [None]:
input_batch, target_batch = make_batch()
input_batch = torch.FloatTensor(input_batch)
target_batch = torch.LongTensor(target_batch)

In [None]:
for epoch in range(5000):
    model.zero_grad()
    output = model(input_batch)
    loss = criterion(output, target_batch)
    if (epoch+1)%1000==0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    loss.backward()
    optimizer.step()

Epoch: 1000 cost = 0.231241
Epoch: 2000 cost = 0.231101
Epoch: 3000 cost = 0.231071
Epoch: 4000 cost = 0.231060
Epoch: 5000 cost = 0.231055


In [None]:
inputs = [sen[:3] for sen in seq_data]

predict = model(input_batch).data.max(1, keepdim=True)[1]
print(inputs, '->', [number_dict[n.item()] for n in predict.squeeze()])

['gin', 'cod', 'rag', 'ruh', 'sin', 'sin'] -> ['i', 'e', 'i', 'i', 'g', 'g']
