In [1]:
# Load LSTM network and generate text
import sys
import numpy

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

# load ascii text and covert to lowercase


Using TensorFlow backend.


In [2]:
filename = 'all_lyrics_no-umlaut_edited2.txt'


raw_text = open(filename).read()
raw_text = raw_text.lower()
raw_text = raw_text.replace('\x99', '')
raw_text = raw_text.replace('\x80', '')
raw_text = raw_text.replace('\xe2', '')

raw_text = raw_text.replace('"', '')
raw_text = raw_text.replace('(', '')
raw_text = raw_text.replace(')', '')


# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)


('Total Characters: ', 149347)
('Total Vocab: ', 49)


In [40]:

# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)


# define the LSTM model
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.4))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(y.shape[1], activation='softmax'))



('Total Patterns: ', 149247)


In [None]:

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# define the checkpoint
filepath="aws_256-256_0.4_char_weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]



#model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

model.fit(X, y, epochs=100, batch_size=128, callbacks=callbacks_list)

filename = "weights-improvement-19-1.8336.hdf5"
model.load_weights(filename)

Epoch 1/100
Epoch 2/100
Epoch 3/100
 23424/149247 [===>..........................] - ETA: 289s - loss: 1.8070

In [28]:

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = numpy.asarray(preds).astype('float64')
    preds = numpy.log(preds) / temperature
    exp_preds = numpy.exp(preds)
    preds = exp_preds / numpy.sum(exp_preds)
    probas = numpy.random.multinomial(1, preds, 1)
    return numpy.argmax(probas)

In [5]:

filename = "aws_256-256_0.4_char_weights-improvement-56-1.2562.hdf5"
model.load_weights(filename)

In [11]:
for ii in range(len(char_to_int)):
    print(ii == char_to_int[int_to_char[ii]])
    

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [39]:

filename = "aws_256-256_0.4_char_weights-improvement-56-1.2562.hdf5"
model.load_weights(filename)

# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")


for temprt in [0.05, 0.1, 0.2, 0.5, 1.0]:

    print("temp = ", temprt, "\n")
# generate characters
    for i in range(200):
        x = numpy.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)
        prediction = model.predict(x, verbose=0)
        #print(numpy.squeeze(prediction))
        index2 = numpy.argmax(prediction)
        index = sample(numpy.squeeze(prediction), temprt)
        #print(index,' ', index2, ' - ',int_to_char[index], ' ',int_to_char[index2])
        result = int_to_char[index]
        seq_in = [int_to_char[value] for value in pattern]
        sys.stdout.write(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
('"', 'poor\n\r\nwinter turns to summer\r\nsadness turns to fun\r\nkeep the faith, baby\r\nyou broke the rules and w', '"')
('temp = ', 0.05, '\n')
our 

caby i love you and i taln you a babk
i mnow your baby, wou're a puierhes boon
now i wanna be a good boy i don't wanna be aad

i don't wanna be a pinhead no more. i just met a nurse that i co('temp = ', 0.1, '\n')
uld go for.
i don't wanna be a good boy i don't wanna be a pinee do you don't want to be an and 
i want you around i want you around

i want you around i want you around

i want you around i want y('temp = ', 0.2, '\n')
ou around

i want you around i want you around

i want you around i want you around
i want you around i want you around

i want you around i want you around
i want you around it so say 
i don't('temp = ', 0.5, '\n')
 wanna gev a bol the dorn

i dan't control my fingers i want you around
i can't takn to sale i rienl it 

cnn't want to gor me 
i can't sake mt our  
ii you shink i'w all iori
i don't like a