In [2]:
import os
import numpy as np
import re
import keras
import tensorflow as tf

from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


A function from set 6 to parse the observations:

In [3]:
def parse_observations(text):
    # Convert text to dataset.
    lines = [line.split() for line in text.split('\n') if line.split()]

    obs_counter = 0
    obs = []
    obs_map = {}

    for line in lines:
        obs_elem = []
        
        for word in line:
            word = re.sub(r'[^\w]', '', word).lower()
            if word not in obs_map:
                # Add unique words to the observations map.
                obs_map[word] = obs_counter
                obs_counter += 1
            
            # Add the encoded word.
            obs_elem.append(obs_map[word])
        
        # Add the encoded sequence.
        obs.append(obs_elem)

    return obs, obs_map

In [4]:
text = open(os.path.join(os.getcwd(), \
                         'Release/data/shakespeare.txt')).read()
obs, obs_map = parse_observations(text)

In [5]:
all_chars = list(text)
chars = sorted(list(set(text)))
data_size, vocab_size = len(text), len(chars)
char_to_index = {char:idx for idx, char in enumerate(chars)}
index_to_char = {idx:char for idx, char in enumerate(chars)}

In [6]:
n_step = 2
s_len = 40
training_data = []
next_char = []
for i in range(0, len(text)-s_len, n_step):
    string_to_add = text[i:i + s_len]
    training_data.append(string_to_add)
    if i + s_len < len(text)-1:
        next_char.append(text[i + s_len])
    
# This is our end symbol: a space
next_char.append(' ')

In [7]:
trainX = np.zeros((len(training_data), s_len, len(chars)))
trainY = np.zeros((len(training_data), len(chars)))

for sent_idx in range(0, len(training_data)):
    curr_sentence = training_data[sent_idx]
    curr_next_char = next_char[sent_idx]
    for i, char in enumerate(curr_sentence):
        trainX[sent_idx, i, char_to_index[char]] = 1
    trainY[sent_idx, char_to_index[curr_next_char]] = 1

In [8]:
def print_poem_maxprob(length, random = False):
    if random == True:
        start_index = np.random.randint(0, len(text)-s_len-1)
        start_index = 50
        sentence = text[start_index: start_index + s_len]
    else:
        sentence = "shall i compare thee to a summer's day?\n"
    sequence = sentence
    for i in range(length):
        x_pred = np.zeros((1, s_len, len(chars)))
        for j, char in enumerate(sentence):
            x_pred[0, j, char_to_index[char]] = 1.

        predictions = np.array(model.predict(x_pred)[0])
        max_index = np.argmax(predictions)
        next_char = index_to_char[max_index]
        sentence = sentence[1:] + next_char
        sequence += next_char

    print(sequence)

In [9]:
def print_poem_randprob(length, random = False):
    if random == True:
        start_index = np.random.randint(0, len(text)-s_len-1)
        start_index = 50
        sentence = text[start_index: start_index + s_len]
    else:
        sentence = "shall i compare thee to a summer's day?\n"
    sequence = sentence
    for i in range(length):
        x_pred = np.zeros((1, s_len, len(chars)))
        for j, char in enumerate(sentence):
            x_pred[0, j, char_to_index[char]] = 1.

        predictions = np.array(model.predict(x_pred)[0])
        #max_index = np.argmax(predictions)
        index = np.random.choice(len(chars), 1, p=predictions)[0]
        next_char = index_to_char[index]
        sentence = sentence[1:] + next_char
        sequence += next_char

    print(sequence)

In this model, we use an LSTM of 150 layers, with no dropout. It has a standard fully-connected output layer with a softmax activation. We train it for 100 epochs. We print out the poems generated by selecting the maximum probability letter and also the poems generated by selecting a random character weighted based on the probability.

In [10]:
model = Sequential()
model.add(LSTM(150, input_shape=(s_len, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

#train the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop')
model.fit(trainX, trainY, batch_size = 128, nb_epoch = 100, verbose = 0)
print_poem_maxprob(1000)
print_poem_randprob(1000)



shall i compare thee to a summer's day?
bach trach the breathess grass and sur of yourn, heartnes, lees.
Fould give I lavel weech ours a fair trie,
Love sunkeriew, her thought that love of thy preased,
That have parser cimed and for when lives,
And make sumbless with so steelldst trim such frow hided be,
  Tile minden the sacke where not mellengs crasses firit,
My ming no black where eyes, her best brainy?
That Love of chankest where thou all me arthint,
fore tho  ives thy swornt balds and do clear,
Sa all the paint a dabiest in my pantier.


                   112
, alt my that thou mayst thou dost crnelds,
  Is of their vir uppeef, or erst pownors,
And in a beact as is makes sommmanfoume,
And that hath my truectsing for a wrons,
For thee if heaven's sors, and I long thes,
Is in the creace, beingent of lige,
And seaver cove with in he pay one receive,
The earth on my nig beadt and this spring,
When I a pooreare though thy shard,
Thy segs and voors excuming both dosMy hire,
When somble

In this model, we use an LSTM of 100 layers, with a dropout of 0.2. It has a standard fully-connected output layer with a softmax activation. We train it for 150 epochs. We print out the poems generated by selecting the maximum probability letter and also the poems generated by selecting a random character weighted based on the probability.

In [11]:
model = Sequential()
model.add(LSTM(100, input_shape=(s_len, len(chars))))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

#train the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'rmsprop')
model.fit(trainX, trainY, batch_size = 128, nb_epoch = 150, verbose = 0)
print_poem_maxprob(1000)
print_poem_randprob(1000)



shall i compare thee to a summer's day?
So be mased, now for me of my love notg,
Which you where where thou artures my sear,
And to my self tho  blassing he moull dest,
That the bads a dost thou when than you hishsestrend the hearte,
  Thes fay to the thee that dear the beauty,
That to the where of all my day,
  Thes fair beauty mose,
    h the wiret my user's withor what be forg,
llance I to meturn ortance of he dose face,
Which not love's with gureforen in pray,
  Then sham thou beauty, and the ull the stand,
That the should of thee, when thou with the witledstred,
And in the braintor me hat hamd to steed,
That to the beauty world do I of gelfay,
When thy self and fall than thee beauty's bear,
And woth thy selfor me, ho  art the sond,
That the self whe reasuned and hear with thee,
Which thee I bast thou should that mene, Are then stringle doth lightlss.
I soon so fail theee speak that thou moun.


                   112
O for the wantes shath my seat with the have nowe thos slowe sha

For this model, we use the Adam optimizer with a LSTM of size 200, run for 50 epochs.

In [12]:
model = Sequential()
model.add(LSTM(200, input_shape=(s_len, len(chars))))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

#train the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')
model.fit(trainX, trainY, batch_size = 128, nb_epoch = 50, verbose = 0)
print_poem_maxprob(1000)
print_poem_randprob(1000)



shall i compare thee to a summer's day?
Shence a love to me for thould may love the stren,
  For will great from thy strent form on pear,
Which reake my nee of thy his my and preasure,
But thy sins for the self will be to steet,
Which my beauty the fire or summers bad,
My beauty do the beauty shall of to spent,
Shall beauty's ragurnoth bothers that then seew,
Betith that false of thy recorned doth re.
  There wo love thee that I do beatt, when thou art owe to me,
  Tind they doin of thy five my love sweet forghad,
And suin the erave ancemmint,
  As chose the will with heart, and thou art owned,
I aut thou makes if thy beauty day,
  To thus forthing in their day, and than the will whe holl by menders,
That the rome there would baingters of you,
When thou art fair this thy self which like to sheat,
And such and therse paitter what thou art,
If tells no love though showell be the sure,
And sun of more belt and math the grower,
Which in the farse my self will beauty,
And then in the world 