<a href="https://colab.research.google.com/github/square-1111/Natural-Language-Generation/blob/master/NNLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [0]:
sentences = ['I love music', 'I play football', 'I love mathematics', 'Food is delicious']

In [0]:
words = list(set(" ".join(sentences).split()))

In [4]:
words_dict = {w:i for i,w in enumerate(words)}
num_dict = {i:w for i,w in enumerate(words)}
words_dict, num_dict

({'Food': 3,
  'I': 2,
  'delicious': 8,
  'football': 6,
  'is': 0,
  'love': 4,
  'mathematics': 7,
  'music': 5,
  'play': 1},
 {0: 'is',
  1: 'play',
  2: 'I',
  3: 'Food',
  4: 'love',
  5: 'music',
  6: 'football',
  7: 'mathematics',
  8: 'delicious'})

In [0]:
n_class = len(words_dict) # number of Vocabulary

In [0]:
# NNLM Parameter
n_step = 2 # n-1 in paper
n_hidden = 2 # h in paper
m = 2 # m in paper

In [7]:
words[:-1]

['is', 'play', 'I', 'Food', 'love', 'music', 'football', 'mathematics']

In [0]:
def make_batch(sentences):
  input_batch = []
  target_batch = []

  for sen in sentences:
    word = sen.split()
    input = [words_dict[term] for term in word[:-1]]
    output = [words_dict[word[-1]]]

    input_batch.append(input)
    target_batch.append(output)

  return input_batch, target_batch


In [9]:
make_batch(sentences)

([[2, 4], [2, 1], [2, 4], [3, 0]], [[5], [6], [7], [8]])

In [0]:
class NNLM(nn.Module):
  def __init__(self):
    super(NNLM, self).__init__()
    self.C = nn.Embedding(n_class, m)
    self.H = nn.Parameter(torch.randn(n_step*m, n_hidden).type(torch.FloatTensor))
    self.W = nn.Parameter(torch.randn(n_step*m, n_class).type(torch.FloatTensor))
    self.d = nn.Parameter(torch.randn(n_hidden).type(torch.FloatTensor))
    self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(torch.FloatTensor))
    self.b = nn.Parameter(torch.randn(n_class).type(torch.FloatTensor))
  
  def forward(self, X):
    X = self.C(X)
    X = X.view(-1, n_step*m)
    tanh = torch.tanh(self.d + torch.mm(X, self.H))
    output = self.b + torch.mm(X,self.W) + torch.mm(tanh, self.U)
    return output

In [0]:
model = NNLM()

In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
list(model.named_parameters())

[('H', Parameter containing:
  tensor([[-0.3808,  0.2286],
          [-0.3603, -0.5479],
          [-0.0034, -0.8887],
          [ 0.1994, -0.8191]], requires_grad=True)),
 ('W', Parameter containing:
  tensor([[ 1.3340,  1.1089,  1.1024, -0.1865,  0.2948,  1.0652, -0.1043, -0.6568,
           -1.4329],
          [-1.0706,  0.1257,  0.5168, -0.4716, -1.1019,  0.0293, -0.0165, -1.5264,
            0.1736],
          [ 1.5681, -1.4069,  0.1139,  0.2738, -0.7006,  0.5726,  0.1446,  0.5148,
           -0.6364],
          [-0.1973,  0.5885, -1.1763,  1.4654,  1.8609,  1.4203, -1.1583, -1.0705,
           -1.8272]], requires_grad=True)),
 ('d', Parameter containing:
  tensor([0.9058, 1.0286], requires_grad=True)),
 ('U', Parameter containing:
  tensor([[-3.7860e-01, -5.8057e-01, -2.2433e-01,  1.2631e-01, -1.4523e+00,
            1.0511e-05,  1.1851e+00,  1.3208e-01, -5.9338e-01],
          [ 6.2613e-01,  1.7651e-01, -2.7484e-01, -1.3838e+00,  1.8908e+00,
           -9.8183e-01,  2.2769e-01, 

In [0]:
input_batch, target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

In [0]:
import plotly.express as px
from IPython.display import clear_output
import pandas as pd

In [0]:
def plot(df):
  clear_output()  
  fig = px.line(df, x="epoch", y="loss", title='Loss visualization')
  fig.show()

In [0]:
# Training
loss_df = pd.DataFrame({'epoch':[],'loss':[]})

In [18]:
for epoch in range(5000):
  optimizer.zero_grad()
  output = model(input_batch)

  loss = criterion(output,target_batch.squeeze(1))
  pd.DataFrame(np.array([[epoch, loss]]), columns=['epoch','loss']).append(loss_df, ignore_index=True)
  if (epoch+1)%1000 == 0:
    # plot(loss_df)
    print('Epoch:', '%03d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

  loss.backward()
  optimizer.step()

Epoch: 1000 cost = 0.388519
Epoch: 2000 cost = 0.355393
Epoch: 3000 cost = 0.349795
Epoch: 4000 cost = 0.348031
Epoch: 5000 cost = 0.347307


In [0]:
# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]


In [20]:
predict

tensor([[5],
        [6],
        [5],
        [8]])

In [22]:
print([sen.split()[:2] for sen in sentences], '->', [num_dict[n.item()] for n in predict.squeeze()])

[['I', 'love'], ['I', 'play'], ['I', 'love'], ['Food', 'is']] -> ['music', 'football', 'music', 'delicious']
