# LSTM poem generation for Shakespeare's sonnets

In [1]:
import numpy as np
import itertools

Reference: https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

# Data preprocessing for LSTM

In [2]:
# Function to get Shakespeare's poems from file
def getPoems():
    with open("./data/shakespeare.txt", "r") as f:
        data = f.read().lower()
    # Split by poems
    poems = data.split("\n\n\n")
    # Remove 1st line of each poem
    out = []
    for poem in poems:
        for i in range(len(poem)):
            if poem[i]=='\n':
                break
        out.append(poem[i+1:])
    return out

# Get character to integer dictionary for one hot encoding
def getChardict(poems):
    # merge all poems and get list of characters
    data = "".join(poems)
    # Get dictionary of characters for one hot encoding
    chars = sorted(list(set(data)))
    charint = dict((c, i) for i, c in enumerate(chars))
    intchar = dict((i, c) for i, c in enumerate(chars))
    return charint,intchar

# Integer encode the poems
def getIntPoems(charint,poems):
    out = []
    for poem in poems:
        out.append([charint[char] for char in poem])
    return np.array(out)

# Get array of poems
poems = getPoems()
# Get integer encoding dictionary
charint,intchar = getChardict(poems)
# Get Integer encoded poem array
IntPoems = getIntPoems(charint,poems)

# Generate training data

In [3]:
# Generate X and Y training sets from each poem
def getCharacters(poem,n=40,skip=10):
    Xtrain = [poem[i:i+n] for i in range(0,len(poem)-n,skip)]
    Ytrain = [poem[i+n] for i in range(0,len(poem)-n,skip)]
    return Xtrain,Ytrain

# Generate training data 
Ntime = 40
skip = 5
Xtrain = []
Ytrain = []
for poem in IntPoems:
    Xt,Yt = getCharacters(poem,Ntime,skip)
    Xtrain.append(Xt)
    Ytrain.append(Yt)

Ytrain = np.concatenate(Ytrain)
Xtrain = np.concatenate(Xtrain)

# One hot encode the training vectors
import keras
Yt = keras.utils.np_utils.to_categorical(Ytrain)
Xt = keras.utils.np_utils.to_categorical(Xtrain)

print(Xt.shape)
print(Yt.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


(17652, 40, 38)
(17652, 38)


# LSTM Model

In [4]:
from keras.layers import LSTM, Dense, Activation, BatchNormalization, Dropout
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint

Nchars = len(charint)
Ntime = 40

model = Sequential()
model.add(LSTM(200, input_shape=(Xt.shape[1],Xt.shape[2])))
model.add(Dropout(0.0))
model.add(Dense(Nchars))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 200)               191200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 38)                7638      
_________________________________________________________________
activation_1 (Activation)    (None, 38)                0         
Total params: 198,838
Trainable params: 198,838
Non-trainable params: 0
_________________________________________________________________


In [5]:
# define the checkpoint
fname="./data/Data_LSTM-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(fname, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model.fit(Xt, Yt, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 00001: loss improved from inf to 3.00120, saving model to ./data/Data_LSTM-01-3.0012.hdf5
Epoch 2/20
Epoch 00002: loss improved from 3.00120 to 2.70584, saving model to ./data/Data_LSTM-02-2.7058.hdf5
Epoch 3/20
Epoch 00003: loss improved from 2.70584 to 2.40852, saving model to ./data/Data_LSTM-03-2.4085.hdf5
Epoch 4/20
Epoch 00004: loss improved from 2.40852 to 2.28374, saving model to ./data/Data_LSTM-04-2.2837.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.28374 to 2.20105, saving model to ./data/Data_LSTM-05-2.2011.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.20105 to 2.13636, saving model to ./data/Data_LSTM-06-2.1364.hdf5
Epoch 7/20
Epoch 00007: loss improved from 2.13636 to 2.08544, saving model to ./data/Data_LSTM-07-2.0854.hdf5
Epoch 8/20
Epoch 00008: loss improved from 2.08544 to 2.03537, saving model to ./data/Data_LSTM-08-2.0354.hdf5
Epoch 9/20
Epoch 00009: loss improved from 2.03537 to 1.98897, saving model to ./data/Data_LSTM-09-1.9890.hdf5
Epoch

<keras.callbacks.History at 0x11ff01d68>

# Generate poems from training set

In [12]:
def CharToInt(charint,text):
    return np.array([charint[char] for char in text])
    
def IntToChar(intchar,text):
    return "".join([intchar[char] for char in text])

# helper function to sample an index from a probability array
def sample(a, temperature=1.0):
    a = np.log(a) / temperature
    a = np.exp(a)
    a = a/np.sum(a)*.99
    return np.argmax(np.random.multinomial(1, a, 1))

def generatePoem(model,intchar,charint,seed,temp=1.0):
    print('Seed = ',seed)
    IntSeed = CharToInt(charint,seed)
    IntOut = IntSeed
    temp = 1.0
    lines = 13
    # generate characters
    for i in range(1000):
        X = IntOut[i:i+Ntime]
        OneHot_X = keras.utils.np_utils.to_categorical([X],num_classes=len(charint))
        Ypred = model.predict(OneHot_X)
        idx = sample(Ypred[0],temp)
        IntOut = np.concatenate((IntOut,[idx]))
        # Count number of poem lines generated
        if idx==0:
            lines-=1
        if lines==0:
            break
    return IntToChar(intchar,IntOut)

In [13]:
seed = "shall i compare thee to a summer's day?\n"
temp = [1.5,0.75,0.25]
for i in temp:
    print('Generated Poem at temp = ',i,':')
    print(generatePoem(model,intchar,charint,seed,temp=i))

Generated Poem at temp =  1.5 :
Seed =  shall i compare thee to a summer's day?

shall i compare thee to a summer's day?
d then os huve porsed adand youruli
thou douchsung erefoous stove sumpered,
and purse if thes beary thot helryss fird
worhtulies douthes which thy eferthavened sfold,
thut shosely or ad ave minesed exppacc;
eats is buresmough disswide af with)
 ur in of mest a thisene,
ow suckings thou asted sparned love's f?reg,,
ouch if sous loffextr lave's priszeaped
caill sum turseim cant:
at ay's mo onewure ait thes,
wsechell be and in theprad dyouch loven yoursert,
to wenth thar herose more, now heperise,

Generated Poem at temp =  0.75 :
Seed =  shall i compare thee to a summer's day?

shall i compare thee to a summer's day?
the exporse of dory arine in mefer.
so whar smine, and to you is ofy oungrenuds
thou garsz af omy of sest love.
  wrechoush thaucty have uppre, but ch artele,
and thinghery andzet of free, bar ines pare,
i cumfiin efemfbes cwikeey out not to rilt,
more co 

# LSTM model 2

In [8]:
# 2 layer LSTM
Nchars = len(charint)
Ntime = 40

model2 = Sequential()
model2.add(LSTM(200, input_shape=(Xt.shape[1],Xt.shape[2]),return_sequences=True))
model2.add(Dropout(0.0))
model2.add(LSTM(200))
model2.add(Dropout(0.0))
model2.add(Dense(Nchars))
model2.add(Activation('softmax'))
model2.summary()

model2.compile(loss='categorical_crossentropy', optimizer='adam')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 40, 200)           191200    
_________________________________________________________________
dropout_2 (Dropout)          (None, 40, 200)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 200)               320800    
_________________________________________________________________
dropout_3 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 38)                7638      
_________________________________________________________________
activation_2 (Activation)    (None, 38)                0         
Total params: 519,638
Trainable params: 519,638
Non-trainable params: 0
_________________________________________________________________


In [9]:
# define the checkpoint
fname="./data/Data_LSTM-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(fname, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model2.fit(Xt, Yt, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 00001: loss improved from inf to 3.01231, saving model to ./data/Data_LSTM-01-3.0123.hdf5
Epoch 2/20
Epoch 00002: loss improved from 3.01231 to 2.67342, saving model to ./data/Data_LSTM-02-2.6734.hdf5
Epoch 3/20
Epoch 00003: loss improved from 2.67342 to 2.36747, saving model to ./data/Data_LSTM-03-2.3675.hdf5
Epoch 4/20
Epoch 00004: loss improved from 2.36747 to 2.22862, saving model to ./data/Data_LSTM-04-2.2286.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.22862 to 2.12061, saving model to ./data/Data_LSTM-05-2.1206.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.12061 to 2.02923, saving model to ./data/Data_LSTM-06-2.0292.hdf5
Epoch 7/20
Epoch 00007: loss improved from 2.02923 to 1.94754, saving model to ./data/Data_LSTM-07-1.9475.hdf5
Epoch 8/20
Epoch 00008: loss improved from 1.94754 to 1.87937, saving model to ./data/Data_LSTM-08-1.8794.hdf5
Epoch 9/20
Epoch 00009: loss improved from 1.87937 to 1.81830, saving model to ./data/Data_LSTM-09-1.8183.hdf5
Epoch

<keras.callbacks.History at 0x12f953a90>

In [11]:
seed = "shall i compare thee to a summer's day?\n"
temp = [1.5,0.75,0.25]
for i in temp:
    print('Generated Poem at temp = ',i,':')
    print(generatePoem(model,intchar,charint,seed,temp=i))

Generated Poem at temp =  1.5 :
Seed =  shall i compare thee to a summer's day?

Integer Encoded Seed= [30 19 12 23 23  1 20  1 14 26 24 27 12 29 16  1 31 19 16 16  1 31 26  1
 12  1 30 32 24 24 16 29  3 30  1 15 12 36 11  0]
shall i compare thee to a summer's day?
o  and sulflass marked not i gainty
so blath to yzull are eich,
and dur her illak, fare weros, and wilhy of with lav'st of zite,
so lovethig then serind, and veruthes groienc:
at the ublouklads time, fir than my lase,
 urush of this doein love praces zistign ounking,
oo roughize's fie thought'szown thou touths when love?
 zench soml yrus dieple, our troughtdin fzemmy.
  now right isznking though the anguened in a thy shedzedon
to hil stoun sonotss sand,
the wiends and averathou soughtull of thoued pnowife,
that panpy, than pari him sowners stroncezllosk speng thoo sill-dich prrice.
jightugarun outh croun, that ponene or thou hids,

Generated Poem at temp =  0.75 :
Seed =  shall i compare thee to a summer's day?

Integer Enco