# Text generation using Keras and Tensorflow

From: https://machinelearningmastery.com/develop-character-based-neural-language-model-keras/

## Load Data

In [1]:
from numpy import array
from pickle import dump
from pickle import load
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [2]:
# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

In [8]:
# load text
raw_text = load_doc('austen.txt')

raw_text = raw_text[:20000]

In [9]:
# clean
tokens = raw_text.split()
raw_text = ' '.join(tokens)

In [10]:
# organize into sequences of characters
length = 60
sequences = list()
sep = 1
for i in range(length, len(raw_text)//sep):
    # select sequence of tokens
    seq = raw_text[(sep*i)-length:(sep*(i+1))]
    # store
    sequences.append(seq)
print('Total Sequences: %d' % len(sequences))

Total Sequences: 19788


In [11]:
# save tokens to file, one dialog per line
def save_doc(lines, filename):
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()
    
# save sequences to file
out_filename = 'char_sequences.txt'
save_doc(sequences, out_filename)

## Train Language Model

### Load Data

In [12]:
# load
in_filename = 'char_sequences.txt'
raw_text = load_doc(in_filename)
lines = raw_text.split('\n')

### Encode Sequences

In [13]:
chars = sorted(list(set(raw_text)))
mapping = dict((c, i) for i, c in enumerate(chars))

In [14]:
sequences = list()
for line in lines:
    # integer encode line
    encoded_seq = [mapping[char] for char in line]
    # store
    sequences.append(encoded_seq)

In [15]:
# vocabulary size
vocab_size = len(mapping)
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 64


In [16]:
sequences = array(sequences)
X, y = sequences[:,:-1], sequences[:,-1]

In [17]:
sequences = [to_categorical(x, num_classes=vocab_size) for x in X]
X = array(sequences)
y = to_categorical(y, num_classes=vocab_size)

## Fit Model

In [18]:
# define model
model = Sequential()
model.add(LSTM(75, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(vocab_size, activation='softmax'))
print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 75)                42000     
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4864      
Total params: 46,864
Trainable params: 46,864
Non-trainable params: 0
_________________________________________________________________
None


In [19]:
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model
model.fit(X, y, epochs=100, verbose=2)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 26s - loss: 3.0003 - acc: 0.1851
Epoch 2/100
 - 26s - loss: 2.6224 - acc: 0.2889
Epoch 3/100
 - 27s - loss: 2.4211 - acc: 0.3258
Epoch 4/100
 - 29s - loss: 2.3205 - acc: 0.3460
Epoch 5/100
 - 34s - loss: 2.2480 - acc: 0.3620
Epoch 6/100
 - 31s - loss: 2.1907 - acc: 0.3776
Epoch 7/100
 - 32s - loss: 2.1404 - acc: 0.3896
Epoch 8/100
 - 29s - loss: 2.0918 - acc: 0.3997
Epoch 9/100
 - 31s - loss: 2.0493 - acc: 0.4107
Epoch 10/100
 - 27s - loss: 2.0082 - acc: 0.4176
Epoch 11/100
 - 27s - loss: 1.9675 - acc: 0.4285
Epoch 12/100
 - 38s - loss: 1.9308 - acc: 0.4386
Epoch 13/100
 - 28s - loss: 1.8967 - acc: 0.4465
Epoch 14/100
 - 27s - loss: 1.8626 - acc: 0.4539
Epoch 15/100
 - 27s - loss: 1.8324 - acc: 0.4614
Epoch 16/100
 - 28s - loss: 1.8038 - acc: 0.4669
Epoch 17/100
 - 28s - loss: 1.7774 - acc: 0.4754
Epoch 18/100
 - 28s - loss: 1.7500 - acc: 0.4826
Epoch 19/100
 - 28s - loss: 1.7254 - acc: 0.4872
Epoch 20/100
 - 27s - loss: 1.

<keras.callbacks.History at 0x16b02d30>

## Save Model

In [20]:
# save the model to file
model.save('model.h5')

# save the mapping
dump(mapping, open('mapping.pkl', 'wb'))

## Generate Text

In [21]:
# load the model
model = load_model('model.h5')

# load the mapping
mapping = load(open('mapping.pkl', 'rb'))

In [22]:
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
    in_text = seed_text
    # generate a fixed number of characters
    for _ in range(n_chars):
        # encode the characters as integers
        encoded = [mapping[char] for char in in_text]
        # truncate sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        # one hot encode
        encoded = to_categorical(encoded, num_classes=len(mapping))
        # predict character
        yhat = model.predict_classes(encoded, verbose=0)
        # reverse map integer to character
        out_char = ''
        for char, index in mapping.items():
            if index == yhat:
                out_char = char
                break
        # append to input
        in_text += char
    return in_text

In [23]:
# test start of rhyme
print(generate_seq(model, mapping, length, 'The family of Dashwood had lon', length*2))
# test mid-line
print(generate_seq(model, mapping, length, 'settled in sussex', length*2))
# test not in original
print(generate_seq(model, mapping, length, 'I love you ', length*20))

The family of Dashwood had long general are consolation and her mother was seave on his present in his own to dear dealing for them thenessived to dea
settled in sussex. Their every commone of the produne of his own sore of his nether the mother was sended a sum, and there intention to t
I love you tread, and then the present of first them therefore and had not to them any was all, that you are pounds a-piece. But he deale the condict only considerable a years of his nother of such a thought of it. They think them any was all the fortune of her sisters betongered to live in event dore for them and sentien what year fortune of the produne of his was sead. The rester father she would have lefey inconvented a goung in every commone of the produne of his wishes, who her hand, that you abe alion the latee so comelf that you are pounds a-piece. But he deale the condict only considerable a years of his nother of such a thought of it. They think them any was all the fortune of her sisters beto