In [2]:
import numpy as np

from gen_char import read_file, process_data, build_model, SampleText

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, LSTM, Dense

import warnings
warnings.filterwarnings('ignore')

## Parameter values

In [3]:
maxlen = 200
step = 3
hidden_nodes = 128
batch_size = 256

## Reading input file

In [4]:
# chars : all the alphabets from A-Z and a-z including \n
input_path = 'first_name.txt'
text = read_file(input_path)
chars = sorted(set(text))

In [5]:
# Assigning indices to characters and vice versa for one-hot encoding
char_indices = {c:i for i, c in enumerate(chars)}
indices_char = {i:c for i, c in enumerate(chars)}

## Preparing the data

In [6]:
# x, y one-hot encodings for input and labels
# sentences, next_chars - data in raw format
x, y, sentences, next_chars = process_data(text, chars, char_indices, maxlen, step)
x.shape, y.shape

((2851, 200, 52), (2851, 52))

## Model Architecture

In [28]:
def build_model():
    model = Sequential()
    model.add(LSTM(hidden_nodes, input_shape=(maxlen, len(chars))))
    model.add(Dense(len(chars), activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=tf.keras.optimizers.RMSprop())
    
    return model

In [8]:
input_shape = (maxlen, len(chars))
output = len(chars)
model = build_model(input_shape, output)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 128)               92672     
                                                                 
 dense_1 (Dense)             (None, 52)                6708      
                                                                 
Total params: 99,380
Trainable params: 99,380
Non-trainable params: 0
_________________________________________________________________


## Helper fuctions and Callbacks

## Training the Model

In [9]:
model.fit(x, y, batch_size=batch_size,
          epochs=150,
          callbacks=[SampleText(text, chars, char_indices, indices_char, maxlen)])

Epoch 1/150
diversity: 0.5
Generated Text : ['r', 'a', 'n', 'hn', 'fnd', 'n', 't', 'ir', 'iun', 'r', 'a', 'rt', 'SQ', 't', 'r', 'n', 'tsi', 'ia', 'd', 'r', 'i', 'm', 'aa', 'r', 'r', 'ai', 'r', 'tk', 'nr', 'ra', 'ta', 'r', 't', 'ani', 'anr', 'g', 'titar', 'h', 'arnantta', 'e', 'r', 'a', 'rri', 'ea', 'ei', 'nrh', 'n', 'k', 'rh', 'i', 'r', 'a', 'a', 'era', 'esrenairi', 'ai', 'daai', 'ntii']

Epoch 2/150
diversity: 0.5
Generated Text : ['iaaih', 'her', 'tia', 'a', 'ajgsaraiaaaiaia', 'ah', 'aAil', 'Snaahiahsaa', 'yia', 'iiiaa', 'rn', 'ea', 'ra', 'Ph', 'haraid', 'iaa', 'ahS', 'j', 'i', 'uh', 'ia', 'ai', 'iiih', 'hiaa', 'iaaiihanSa', 'aha', 'iS', 'a', 'ahSsriaha', 'ahta', 'rs', 'S', 'hnSSaSaaa', 'iianahiihaha', 'ath', 'i', 'a', 'raab', 'SaA']

Epoch 3/150
diversity: 0.5
Generated Text : ['aaan', 'aaa', 'aara', 'aye', 'srhaeannaaaeshaadjathiinaaaaeaairhh', 'aaytananaaeiaairaantaaa', 'iayhaaaaa', 'a', 'aaJaaiayaa', 'uhmaaninarSaaaaaahaanhnniniiaaiailaaad', 'ina', 'y', 'emaaia', 'aird', 'aaaniit

<keras.callbacks.History at 0x7f65803737c0>