In [27]:
# code by Tae Hwan Jung @graykode

# 2020.07.17. code review by Seungsoo Lee @teddy309
# add comments.

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

dtype = torch.FloatTensor

sentences =["hi my name is seung soo", "oh really ?", "thank you"] #[ "i like dog", "i love coffee", "i hate milk"]

word_list = " ".join(sentences).split() #word_list에 각 단어 대입. 
word_list = list(set(word_list)) #중복제거
word_dict = {w: i for i, w in enumerate(word_list)} #word로 인덱스를 찾는 dictionary
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # number of Vocabulary

#test
print(word_list)
print(word_dict)
print(number_dict)
print(sentences[1])
print(sentences[1].split()[:0])
print(word_list[1:5:2])

['my', 'is', 'soo', 'name', 'you', 'really', 'thank', 'hi', 'seung', 'oh', '?']
{'my': 0, 'is': 1, 'soo': 2, 'name': 3, 'you': 4, 'really': 5, 'thank': 6, 'hi': 7, 'seung': 8, 'oh': 9, '?': 10}
{0: 'my', 1: 'is', 2: 'soo', 3: 'name', 4: 'you', 5: 'really', 6: 'thank', 7: 'hi', 8: 'seung', 9: 'oh', 10: '?'}
oh really ?
[]
['is', 'name']


In [28]:
# NNLM Parameter
n_step = 2 # n-1(n= layer 수) in paper (http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)
n_hidden = 2 # h in paper
m = 2 # m(차원 수) in paper

def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(input)
        target_batch.append(target)

    return input_batch, target_batch

# Model
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m) #matrix C[h*(n-1)m]: 
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype)) #matrix H[h*(n-1)]: 가중치(입력층->은닉층)
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype)) #
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype)) #vetcor d[h*1]: 
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype)) #matrix U[V*h]: 
        self.b = nn.Parameter(torch.randn(n_class).type(dtype)) #vector b[V*1]:

    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m) # [batch_size, n_step * n_class] #
        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden] #tanh(score:Ywt)
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
        return output

#test
input_batch, target_batch = make_batch(sentences)
print(input_batch)
print(target_batch)


[[7, 0, 3, 1, 8], [9, 5], [6]]
[2, 10, 4]


In [29]:
model = NNLM()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

# Training
for epoch in range(5000):

    optimizer.zero_grad()
    output = model(input_batch)

    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)
    if (epoch + 1)%1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()

# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]

# Test
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

ValueError: ignored