In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [2]:
vocab = ["I like drinking coffee", "I love watching cricket", "I hate talking back", "We are going out"]

In [3]:
word_list = " ".join(vocab).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}


In [4]:
def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(np.eye(n_class)[input]) # similar to one hot representation
        target_batch.append(target)

    return Variable(torch.Tensor(input_batch)), Variable(torch.LongTensor(target_batch))

In [5]:
n_class = len(word_dict)
batch_size = len(vocab)
n_hidden = 5

In [6]:
dtype = torch.FloatTensor

In [7]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.rnn = nn.RNN(input_size=n_class, hidden_size=n_hidden)
        self.W = nn.Parameter(torch.randn([n_hidden, n_class]).type(dtype))
        self.b = nn.Parameter(torch.randn([n_class]).type(dtype))

    def forward(self, hidden, X):
        X = X.transpose(0, 1) 
        outputs, hidden = self.rnn(X, hidden)
        outputs = outputs[-1] 
        model = torch.mm(outputs, self.W) + self.b 
        return model

In [8]:
model = Model()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
input_batch, target_batch = make_batch(vocab)

In [10]:
input_batch

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]],

        [[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]])

In [11]:
epochs = 1000
for epoch in range(1, epochs+1):
    optimizer.zero_grad()

    # hidden : [num_layers * num_directions, batch, hidden_size]
    hidden = Variable(torch.zeros(1, batch_size, n_hidden))
    
    output = model(hidden, input_batch)

    
    loss = criterion(output, target_batch)
    if epoch % 250 == 0:
        print(f"epoch: {epoch} ===> loss:{loss}")
        

    loss.backward()
    optimizer.step()

epoch: 250 ===> loss:1.2575438022613525
epoch: 500 ===> loss:0.558586597442627
epoch: 750 ===> loss:0.25878608226776123
epoch: 1000 ===> loss:0.13347597420215607


In [18]:
hidden = Variable(torch.zeros(1, batch_size, n_hidden))
predict = model(hidden, input_batch).data.max(1, keepdim=True)[1]
predict

tensor([[5],
        [0],
        [7],
        [1]])

In [19]:
f_in = []
for sen in vocab:
    word = sen.split()
    f_in.append([n for n in word[:-1]])

In [20]:
f_out = []
for index in predict:
    f_out.append(number_dict[index.item()])

In [21]:
for i in range(len(f_in)):
    print(f_in[i]," ==> ", f_out[i])

['I', 'like', 'drinking']  ==>  coffee
['I', 'love', 'watching']  ==>  cricket
['I', 'hate', 'talking']  ==>  back
['We', 'are', 'going']  ==>  out
