# LTSM

based on https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

In [1]:
import sys
#!conda install --yes --prefix {sys.prefix} tensorflow

In [2]:
# load packages
import numpy
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
#from tensorflow.keras.utils import np_utils

In [3]:
# load text and covert to lowercase
filename = "Bowie.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [4]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [5]:
int_to_char

{0: '\n',
 1: ' ',
 2: '!',
 3: '"',
 4: '&',
 5: "'",
 6: '(',
 7: ')',
 8: '*',
 9: ',',
 10: '-',
 11: '.',
 12: '/',
 13: '0',
 14: '1',
 15: '2',
 16: '3',
 17: '4',
 18: '5',
 19: '6',
 20: '7',
 21: '8',
 22: '9',
 23: ':',
 24: ';',
 25: '?',
 26: '[',
 27: ']',
 28: '_',
 29: 'a',
 30: 'b',
 31: 'c',
 32: 'd',
 33: 'e',
 34: 'f',
 35: 'g',
 36: 'h',
 37: 'i',
 38: 'j',
 39: 'k',
 40: 'l',
 41: 'm',
 42: 'n',
 43: 'o',
 44: 'p',
 45: 'q',
 46: 'r',
 47: 's',
 48: 't',
 49: 'u',
 50: 'v',
 51: 'w',
 52: 'x',
 53: 'y',
 54: 'z',
 55: '{',
 56: '}',
 57: 'ß',
 58: 'à',
 59: 'ä',
 60: 'è',
 61: 'é',
 62: 'ê',
 63: 'ñ',
 64: 'ò',
 65: 'ô',
 66: 'ö',
 67: 'ù',
 68: 'ü'}

In [6]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab (Different characters): ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)




Total Characters:  950732
Total Vocab (Different characters):  69
Total Patterns:  950632


In [7]:
dataX[0]

[43,
 36,
 1,
 44,
 37,
 48,
 53,
 1,
 49,
 47,
 1,
 36,
 33,
 46,
 33,
 1,
 51,
 33,
 1,
 29,
 42,
 35,
 33,
 40,
 47,
 1,
 43,
 34,
 1,
 40,
 33,
 29,
 32,
 0,
 51,
 33,
 5,
 46,
 33,
 1,
 32,
 33,
 29,
 32,
 9,
 1,
 51,
 33,
 5,
 46,
 33,
 1,
 47,
 37,
 31,
 39,
 1,
 36,
 29,
 42,
 35,
 37,
 42,
 35,
 1,
 30,
 53,
 1,
 48,
 36,
 46,
 33,
 29,
 32,
 0,
 35,
 33,
 48,
 1,
 46,
 33,
 29,
 40,
 0,
 35,
 33,
 48,
 1,
 46,
 33,
 29,
 40,
 0,
 53,
 43,
 49,
 1,
 31,
 29,
 42]

In [8]:

# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

In [9]:
X.shape

(950632, 100, 1)

In [10]:
# define the LSTM model with Keras
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement3-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 256)               264192    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 100)               25700     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 69)                6969      
Total params: 296,861
Trainable params: 296,861
Non-trainable params: 0
_________________________________________________________________


In [12]:
# ALERT: very computationally expensive!

# load the network weights
filename = "./weights-improvement3-09-1.9255.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')


# fit the model
model.fit(X, y, epochs=100, batch_size=300, callbacks=callbacks_list)

Epoch 1/100

Epoch 00001: loss improved from inf to 1.91644, saving model to weights-improvement3-01-1.9164.hdf5
Epoch 2/100

Epoch 00002: loss improved from 1.91644 to 1.86233, saving model to weights-improvement3-02-1.8623.hdf5
Epoch 3/100

Epoch 00003: loss did not improve from 1.86233
Epoch 4/100

Epoch 00004: loss improved from 1.86233 to 1.81375, saving model to weights-improvement3-04-1.8138.hdf5
Epoch 5/100

Epoch 00005: loss improved from 1.81375 to 1.80215, saving model to weights-improvement3-05-1.8022.hdf5
Epoch 6/100

Epoch 00006: loss improved from 1.80215 to 1.78124, saving model to weights-improvement3-06-1.7812.hdf5
Epoch 7/100

Epoch 00007: loss improved from 1.78124 to 1.75733, saving model to weights-improvement3-07-1.7573.hdf5
Epoch 8/100

Epoch 00008: loss did not improve from 1.75733
Epoch 9/100

Epoch 00009: loss did not improve from 1.75733
Epoch 10/100

Epoch 00010: loss improved from 1.75733 to 1.75175, saving model to weights-improvement3-10-1.7517.hdf5
Epoc

KeyboardInterrupt: 

In [None]:
# pick a random seed
# start = numpy.random.randint(0, len(dataX)-1)
# pattern = dataX[start]
# print("Seed:")
# print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# # generate characters
# for i in range(1000):
#     x = numpy.reshape(pattern, (1, len(pattern), 1))
#     x = x / float(n_vocab)
#     prediction = model.predict(x, verbose=0)
#     index = numpy.argmax(prediction)
#     result = int_to_char[index]
#     seq_in = [int_to_char[value] for value in pattern]
#     sys.stdout.write(result)
#     pattern.append(index)
#     pattern = pattern[1:len(pattern)]
# print("\nDone.")

In [14]:
# define the LSTM model with Keras
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

# load the network weights
filename = "./weights-improvement3-26-1.6012.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
print("---------")

# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" to stay
oh, you pretty things (oh, you pretty things)
don't you know you're driving your mamas and p "
---------
apas insane
oh, you pretty things (oh, you pretty things)
gond thar you're goyna bete mo to the goas the say the gone the sase the sas the say the say the wald
the say thet well the say the walte the sas the say i mote you
wouldn the walds the said the say the was the said
the say the walts the say i was she soeee th the saie the sas the say the walds and the shar whlh the say what she saadees wou thet then the sase th the say th the sai the sas the say the wall
the say the walt
and the seadeee of the saie
the say the wast th she soeee th the saie the sas th the sore the say i was she soeee th the saie the say th the soeee th the say the wald
the was the sooe the say i was she sail so the shine she saie the say to she sooe the say the wast th the say the wald
the say wou dotld lote
then you raae i'm soeer thing the seee the sas the say the walds and the saie the say 