In [3]:
# Load LSTM network and generate text
import sys
import numpy

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

# load ascii text and covert to lowercase


In [4]:
filename = 'all_lyrics_no-umlaut_edited2.txt'


raw_text = open(filename).read()
raw_text = raw_text.lower()
raw_text = raw_text.replace('\x99', '')
raw_text = raw_text.replace('\x80', '')
raw_text = raw_text.replace('\xe2', '')

raw_text = raw_text.replace('"', '')
raw_text = raw_text.replace('(', '')
raw_text = raw_text.replace(')', '')


# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)


('Total Characters: ', 149347)
('Total Vocab: ', 49)


In [8]:

# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)


# define the LSTM model
model = Sequential()
model.add(LSTM(256, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.4))
model.add(LSTM(256, return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(y.shape[1], activation='softmax'))


('Total Patterns: ', 149247)


In [None]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')


filename = "aws_256-256_0.4_char_weights-improvement-253-1.2255.hdf5"
model.load_weights(filename)

# define the checkpoint
filepath="aws_256-256_0.4-32batch_char_weights-improvement-2{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]



#model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

model.fit(X, y, epochs=50, batch_size=32, callbacks=callbacks_list)

Epoch 1/50

In [10]:

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = numpy.asarray(preds).astype('float64')
    preds = numpy.log(preds) / temperature
    exp_preds = numpy.exp(preds)
    preds = exp_preds / numpy.sum(exp_preds)
    probas = numpy.random.multinomial(1, preds, 1)
    return numpy.argmax(probas)


In [10]:
filename = "aws_128-128_0.2_char_weights-improvement-50-1.5829.hdf5"
model.load_weights(filename)
#model.compile(loss='categorical_crossentropy', optimizer='adam')


ValueError: Dimension 1 in both shapes must be equal, but are 49 and 55 for 'Assign_13' (op: 'Assign') with input shapes: [128,49], [128,55].

In [12]:

# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")


for temprt in [0.1, 0.2, 0.5, 1.0, 1.2]:
    print("temp = ", temprt, "\n")
# generate characters
    for i in range(200):
        x = numpy.reshape(pattern, (1, len(pattern), 1))
        x = x / float(n_vocab)
        prediction = model.predict(x, verbose=0)
        #index = numpy.argmax(prediction)
        index = sample(numpy.squeeze(prediction), temprt)
        result = int_to_char[index]
        seq_in = [int_to_char[value] for value in pattern]
        sys.stdout.write(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
print("\nDone.")


print("argmax result: \n")
# generate characters
for i in range(200):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")




Seed:
('"', "'m howling at the moon\r\ni took the law &amp; threw it away\r\ncause there's nothing wrong\r\nit's just f", '"')
('temp = ', 0.1, '\n')
or play
i'm gonna beane you sey
you don't know what it's talk
it she cand world and fllg
go she cass to the say
wey dan't yay

so mote her boo 
caby she was io minei tp 
i mnow you i lever have ('temp = ', 0.2, '\n')
a gold to mo
i can't get you outta my mind, 
no no noo 
oh no no
oh no no
oh no no, oh no no
oh no no, oh no no
oh no no, oh no no
oh no no, oh no no
oh no no, oh no no
oh no no, oh no no('temp = ', 0.5, '\n')

oh no no oo 

the teid of move that you donstreet'
in yhe clack nn the breteng hanl
the milsser the siowe she samns to me

i mnow you i wanna be sedated
i'm i kook you wane you aaby a('temp = ', 1.0, '\n')
gainst you 
it she garden of crimne b datt
in my lovl oueh tol woneey


likd a ararinb,gllle de thet iekp 

dho-t go there-s you neoe

soaaseare of caid if that b phacrni'ksgare
yall, i meoe ('temp = ', 1.2, '\n')
me