# Poetry Generation with RNNs

In [55]:
import numpy
import re
import os


import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [56]:
# Function from solutions in Set 6 to change text to data
def parse_observations(text):
    # Convert text to dataset.
    lines = [line.split() for line in text.split('\n') if line.split()]

    obs_counter = 0
    obs = []
    obs_map = {}

    for line in lines:
        obs_elem = []
        
        for word in line:
            word = re.sub(r'[^\w]', '', word).lower()
            if word not in obs_map:
                # Add unique words to the observations map.
                obs_map[word] = obs_counter
                obs_counter += 1
            
            # Add the encoded word.
            obs_elem.append(obs_map[word])
        
        # Add the encoded sequence.
        obs.append(obs_elem)

    return obs, obs_map


In [57]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read()
obs, obs_map = parse_observations(text)

#chars = list(text)
chars = sorted(list(set(text)))
n_chars, n_vocab = len(text), len(chars)

char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 40
train = []
char_seqs = []

for i in range(0, n_chars - seq_length, 2):
    seq_in = text[i:i + seq_length]
    train.append(seq_in)
    
    if i + seq_length < n_chars - 1:
        seq_out = text[i + seq_length]
        char_seqs.append(seq_out)

n_patterns = len(train)
print("Total Patterns: ", n_patterns)

char_seqs.append(' ')

print(len(char_seqs))

Total Characters:  98029
Total Vocab:  71
Total Patterns:  48995
48995


In [58]:
# One-hot encode the training data

xTrain = np.zeros((n_patterns, seq_length, n_vocab))
yTrain = np.zeros((n_patterns, n_vocab))

for ind in range(n_patterns):
    sentence = train[ind]
    for i, c in enumerate(sentence):
        xTrain[ind, i, char_to_int[c]] = 1
    
    yTrain[ind, char_to_int[char_seqs[ind]]] = 1

In [59]:
model = Sequential()
model.add(LSTM(128, input_shape=(seq_length, n_vocab)))
model.add(Dropout(0.2))
model.add(Dense(n_vocab, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit(xTrain, yTrain, epochs=60, batch_size=128)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x109ba8b38>

In [None]:
def sample(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

In [72]:
for temp in [0.25, 0.75, 1.5]:
    start = numpy.random.randint(0, n_chars - seq_length - 1)
    sentence = "shall i compare thee to a summer's day?\n"
    seq_in = sentence
    # generate characters
    for ind in range(660):
        x_format = np.zeros((1, seq_length, n_vocab))

        for i, c in enumerate(sentence):
            x_format[0, i, char_to_int[c]] = 1.0

        prediction = model.predict(x_format, verbose=0)[0]

        pred_array = np.array(prediction)

        #best_ind = np.argmax(pred_array)

        best_ind = sample(pred_array, temp)

        result = int_to_char[best_ind]

        sentence = sentence[1:] + result

        seq_in += result
    #print( "\nDone.")
    print("Temperature of " + str(temp))
    print(seq_in)
    print("\n")

Temperature of 0.25
shall i compare thee to a summer's day?
That hanst thou he wours but for mine, and thou mays,
And for the world and care my heart be de,
That of this longures dound with the beart
That us and in endill be the beart
The oroun the sun and they fail as ande
With paartion prow my a parting should,
And in the canse hath thou art the sull deate,
Which my menst for the worts that without,
  To she strong tound thy self thy worth dost,
I  s a caull I have sweet my meaking eart,
  And this this fillice thy eresurn's soull,
And thou my love soul fail shall with make
And than thou shall with the blewsen ste my,
Sive that that I heaven this love the groud,
And to bencasse mine one doth fout with doth,
 


Temperature of 0.75
shall i compare thee to a summer's day?
So far I baros, sheel 'fourn thas frame sold,
And hore mine e's now and this linged in fare,
  five noth thy live and that by sed ind,
But my self it to mun my seef, dosn deess,
And you like mine a dofon more shee sum