# Simple Language Model with MLP

Multi-layer Perceptron을 이용하여 간단한 language model을 학습시켜 보겠습니다.

In [2]:
## library import
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)
print(keras.__version__)

2.0.0-beta1
2.2.4-tf


## Training Sentence

In [3]:
## 학습시킬 문장
sentence = " if you want you"
## index를 주면 charcter로 바꿔주는 list
idx2char = list(set(sentence))
## character를 주면 index로 바꿔주는 dictionary
char2idx = {w: i for i, w in enumerate(idx2char)}

print(idx2char)
print(char2idx)

['a', 'w', ' ', 'u', 't', 'o', 'f', 'i', 'n', 'y']
{'a': 0, 'w': 1, ' ': 2, 'u': 3, 't': 4, 'o': 5, 'f': 6, 'i': 7, 'n': 8, 'y': 9}


## Hyper Parameters

In [4]:
data_dim = len(idx2char)
hidden_size = len(idx2char)
num_classes = len(idx2char)
sequence_length = 10  # Any arbitrary number
learning_rate = 0.1
batch_size = 1
training_epochs = 30
print(num_classes)

10


## Dataset

In [5]:
sentence_idx = [char2idx[c] for c in sentence]
dataX = [sentence_idx[:-1]]
dataY = [sentence_idx[1:]]

In [6]:
dataX = np.array(to_categorical(dataX, num_classes))
dataY = np.array(to_categorical(dataY, num_classes))
print(dataX.shape, dataY.shape)

(1, 15, 10) (1, 15, 10)


In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((dataX, dataY)).shuffle(
                buffer_size=1000).prefetch(buffer_size=batch_size).batch(batch_size).repeat()

## Model Function

In [8]:
def create_model():
    model = keras.Sequential()
    model.add(keras.layers.Dense(units=num_classes, activation='relu',
                                input_shape=(dataX.shape[1],dataX.shape[2])))
    model.add(keras.layers.Dense(units=num_classes, activation='softmax'))
    return model

In [9]:
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 15, 10)            110       
_________________________________________________________________
dense_1 (Dense)              (None, 15, 10)            110       
Total params: 220
Trainable params: 220
Non-trainable params: 0
_________________________________________________________________


## Loss & Optimizer

In [10]:
def loss(labels, logits):
    return keras.losses.categorical_crossentropy(labels, logits)

def adam_opt(learning_rate):
    return keras.optimizers.Adam(learning_rate)

## Model Compile

In [11]:
model.compile(optimizer=adam_opt(learning_rate),
              loss=loss)

## Training

In [12]:
model.fit(train_dataset, epochs=training_epochs,
                    steps_per_epoch=dataX.shape[0]//batch_size)

W0824 03:00:34.544486  2756 deprecation.py:323] From C:\Users\jwlee\Anaconda3\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1c527491a90>

## Checking the Test Result

In [13]:
results = model.predict(dataX, steps=1)
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    if j is 0:  # print all for the first result to make a sentence
        print(''.join([idx2char[t] for t in index]), end='')
    else:
        print(idx2char[index[-1]], end='')

yf you yant you