In [1]:
import keras
import string
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [2]:
def load_word_list(path):
    """
    Loads a list of the words from the file at path <path>, removing all
    non-alpha-numeric characters from the file.
    """
    with open(path) as handle:
        # Load a list of whitespace-delimited words from the specified file
        raw_text = handle.read().strip().split()
        # Strip non-alphanumeric characters from each word
        alphanumeric_words = map(lambda word: ''.join(char for char in word if char.isalnum()), raw_text)
        # Filter out words that are now empty (e.g. strings that only contained non-alphanumeric chars)
        alphanumeric_words = filter(lambda word: len(word) > 0, alphanumeric_words)
        # Convert each word to lowercase and return the result
        return list(map(lambda word: word.lower(), alphanumeric_words))
    
def list_string(word_list):
    '''
    Converts a list of words into a string.
    '''
    word_string = ' '
    return word_string.join(word_list)

In [3]:
#loading the text
word_list = load_word_list('./data/shakespeare.txt')

#removing sonnet numbers
for i in range(1,155):
    word_list.remove('{}'.format(i))

#converting the stripped wordlist back to a string
word_string = list_string(word_list)

In [4]:
#getting all characters in the text and assigning indices
characters = sorted(list(set(word_string)))
character_indices = dict((c, i) for i, c in enumerate(characters))
indices = dict((i, c) for i, c in enumerate(characters))

In [5]:
#splitting the text into sequences of fixed length
maxlen = 40
#pick sequences starting at each 5th character
step = 5
sentences = []
next_characters = []
for i in range(0, len(word_string) - maxlen, step):
    sentences.append(word_string[i: i + maxlen])
    next_characters.append(word_string[i + maxlen])
print(len(sentences), 'sequences')

18100 sequences


In [6]:
x = np.zeros((len(sentences), maxlen, len(characters)), dtype=np.bool)
y = np.zeros((len(sentences), len(characters)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, character_indices[char]] = 1
    y[i, character_indices[next_characters[i]]] = 1

In [7]:
model = keras.models.Sequential()
model.add(keras.layers.LSTM(180, input_shape=(maxlen, len(characters))))
model.add(keras.layers.Dense(len(characters), activation='softmax'))

optimizer = keras.optimizers.adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 180)               149760    
_________________________________________________________________
dense_1 (Dense)              (None, 27)                4887      
Total params: 154,647
Trainable params: 154,647
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [9]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(word_string) - maxlen - 1)
    for temps in [1.5, 0.75, 0.25]:
        print('----- temps:', temps)

        generated = ''
        sentence = word_string[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(characters)))
            for t, char in enumerate(sentence):
                x_pred[0, t, character_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temps)
            next_char = indices[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()


### Version with random sentence seed

In [10]:
print_callback = keras.callbacks.LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y, epochs=60, callbacks=[print_callback])

Epoch 1/60

----- Generating text after Epoch: 0
----- temps: 1.5
----- Generating with seed: "lf that for my self i praise painting my"
lf that for my self i praise painting myownwhilkwovd tooy sreyegs artext i thavefar sserd gakfe u wiily spam algircvon gnowhube sorsell if of ingsed ablm fie moullulohowomcpwaedds eot her fheigerw songeetawt you boqedededervenvid tore puse afour ry bett rass now daivefefens kwice coor my shif erjat shaelr whob to ow totpore in eay hatwraan gromec toudoh amd buir rishigid worrouss vistalls my mownrfsheing thak dartpwwgirt eullu oly iw yo
----- temps: 0.75
----- Generating with seed: "lf that for my self i praise painting my"
lf that for my self i praise painting my bene forw and make the woar sear worr and and and thet me and reik bat and hang wart of ror ta how at hath stoos row i hat it bea somw and whac not srop to cofy so cor core a fooll sowenceero mirt as hou deow lounh and heve shou eris forthom the prowt wor with los bull be iih for wol sord pu

  after removing the cwd from sys.path.


 and me lime thou thy sweet beauty stride the dedies weed ace my some for my self what is loved thee is preserth seet thou art thou art thou worth the warding si
Epoch 40/60

----- Generating text after Epoch: 39
----- temps: 1.5
----- Generating with seed: "gar thief thee have i not locked up in a"
gar thief thee have i not locked up in a quijed oer like much etrt whost afforgelt in mbuater as alone dayfase why diss pent finned on my grosgas why agayfou tearing me for hath would rentgrare lie heir bad eare etright and batspees puty both falsed would a dath the ranter cauint ever by the mone shand aod upor my sighe cain when a monglal ks by and having batwhereion low youl selime youly he wich if a ewyont refood penpyy a vorgould be
----- temps: 0.75
----- Generating with seed: "gar thief thee have i not locked up in a"
gar thief thee have i not locked up in a morge bat look in i wanteres with tan best is of die in grascerter than not lend thou thy swearing sins mine owed is much is my 

<keras.callbacks.callbacks.History at 0x7fd488862610>

### Version with fixed seed

In [15]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(word_string) - maxlen - 1)
    for temps in [1.5, 0.75, 0.25]:
        print('----- temps:', temps)

        generated = ''
        sentence = "shall i  compare  thee  to a summers day"
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(characters)))
            for t, char in enumerate(sentence):
                x_pred[0, t, character_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temps)
            next_char = indices[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()


In [16]:
print_callback = keras.callbacks.LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y, epochs=60, callbacks=[print_callback])

Epoch 1/60

----- Generating text after Epoch: 0
----- temps: 1.5
----- Generating with seed: "shall i  compare  thee  to a summers day"
shall i  compare  thee  to a summers days lay blovs alvower a bare please cending time like to pleate fresh knan as dazs cross jurring in me for theilall sweet bstace to dromow gigeing and they lovs me is am the will in my most truthing seping yes contue dame of an my how willyed hath juss ow all boor my show respicked it that in whit squigngepepes still oncely cay dapposties so seme noths readonhand me runh but ont and husmins so pont 
----- temps: 0.75
----- Generating with seed: "shall i  compare  thee  to a summers day"
shall i  compare  thee  to a summers days for they fortit with heavenle spride with hurm rechath and thine eyes beauty in that bond cold in nat thy souls be the bette prespite wherein the dese is beauty destion barrater is longe that hath give are dest one is such faire of if ower as what not so pru

  after removing the cwd from sys.path.


n still wilt the world you lies slevent be then dear and lives and strive ontebe that your will thy body me lies falre formeas where the whi
----- temps: 0.25
----- Generating with seed: "shall i  compare  thee  to a summers day"
shall i  compare  thee  to a summers days on my mind my self af other thy love feell the wind beinga to sucme i would thy soul and the rest on me lives from what noth happyse brie and strile whit i seek the bette in linds so porn which is hloon and in me live thee all the that his come strike and in my sighos than longe to than the rest on my mind a begait of mine eye awore and thought be not so wring have you and sime and in me with th
Epoch 2/60

----- Generating text after Epoch: 1
----- temps: 1.5
----- Generating with seed: "shall i  compare  thee  to a summers day"
shall i  compare  thee  to a summers days how fairs matures drea chect do notull away will thou dote the same me how lide as ents buttherwherempreath pluke to sucbe firse wough which slave as 

<keras.callbacks.callbacks.History at 0x7fd4b0da8d10>