In [1]:
import numpy as np
import mindspore
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Parameter, Tensor, ms_function

In [2]:
def make_batch(sentence, word_dict, n_class, max_len):
    input_batch = []
    target_batch = []

    words = sentence.split()
    for i, word in enumerate(words[:-1]):
        input = [word_dict[n] for n in words[:(i + 1)]]
        input = input + [0] * (max_len - len(input))
        target = word_dict[words[i + 1]]
        input_batch.append(np.eye(n_class)[input])
        target_batch.append(target)

    return input_batch, target_batch

In [3]:
class BiLSTM(nn.Cell):
    def __init__(self, n_class, n_hidden, batch_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size=n_class, hidden_size=n_hidden, bidirectional=True)
        self.W = nn.Dense(n_hidden * 2, n_class, has_bias=False)
        self.b = Parameter(Tensor(np.ones([n_class], dtype=np.float32), mindspore.float32), 'b')

    def construct(self, X):
        input = X.transpose((1, 0, 2))
        output, (_, _) = self.lstm(input)
        outputs = output[-1]
        model = self.W(outputs) + self.b
        
        return model

In [4]:
n_hidden = 5 # number of hidden units in one cell

sentence = (
    'Lorem ipsum dolor sit amet consectetur adipisicing elit '
    'sed do eiusmod tempor incididunt ut labore et dolore magna '
    'aliqua Ut enim ad minim veniam quis nostrud exercitation'
)

word_dict = {w: i for i, w in enumerate(list(set(sentence.split())))}
number_dict = {i: w for i, w in enumerate(list(set(sentence.split())))}
n_class = len(word_dict)
max_len = len(sentence.split())
vocab_size = len(word_dict)

In [5]:
input_batch, target_batch = make_batch(sentence, word_dict, n_class, max_len)
# print(input_batch, target_batch)
input_batch = Tensor(input_batch, mindspore.float32)
target_batch = Tensor(target_batch, mindspore.int32)
print(input_batch.shape, target_batch.shape)

batch_size = len(input_batch)

(26, 27, 27) (26,)


In [6]:
model = BiLSTM(n_class, n_hidden, batch_size)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)

In [8]:
def forward(inputs, targets):
    logits = model(inputs)
    loss = criterion(logits, targets)
    return loss

In [9]:
grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)

In [10]:
@ms_function
def train_step(inputs, targets):
    loss, grads = grad_fn(inputs, targets)
    optimizer(grads)
    return loss

In [11]:
model.set_train()

epoch = 10000
for step in range(epoch):
    loss = train_step(input_batch, target_batch)
    if (step + 1) % 1000 == 0:
        print('Epoch:', '%04d' % (step + 1), 'cost = ', '{:.6f}'.format(loss.asnumpy()))

Epoch: 1000 cost =  2.585795
Epoch: 2000 cost =  2.581421
Epoch: 3000 cost =  2.569982
Epoch: 4000 cost =  2.311544
Epoch: 5000 cost =  1.974983
Epoch: 6000 cost =  1.053331
Epoch: 7000 cost =  0.681154
Epoch: 8000 cost =  0.568491
Epoch: 9000 cost =  0.448840
Epoch: 10000 cost =  0.375638


In [12]:
model.set_train(False)
predict = model(input_batch).asnumpy().argmax(axis=1)
print(sentence)
print([number_dict[n.item()] for n in predict.squeeze()])

Lorem ipsum dolor sit amet consectetur adipisicing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation
['dolor', 'dolor', 'sit', 'amet', 'consectetur', 'adipisicing', 'elit', 'sed', 'sed', 'eiusmod', 'tempor', 'incididunt', 'ut', 'labore', 'et', 'dolore', 'magna', 'aliqua', 'ad', 'ad', 'ad', 'minim', 'veniam', 'quis', 'nostrud', 'exercitation']
