# LSTM poem generation for Shakespeare's sonnets

In [1]:
import numpy as np
import itertools

Reference: https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

# Data preprocessing for LSTM

In [2]:
# Function to get Shakespeare's poems from file
def getPoems():
    with open("./data/shakespeare.txt", "r") as f:
        data = f.read().lower()
    # Split by poems
    poems = data.split("\n\n\n")
    # Remove 1st line of each poem
    out = []
    for poem in poems:
        for i in range(len(poem)):
            if poem[i]=='\n':
                break
        out.append(poem[i+1:])
    return out

# Get character to integer dictionary for one hot encoding
def getChardict(poems):
    # merge all poems and get list of characters
    data = "".join(poems)
    # Get dictionary of characters for one hot encoding
    chars = sorted(list(set(data)))
    charint = dict((c, i) for i, c in enumerate(chars))
    intchar = dict((i, c) for i, c in enumerate(chars))
    return charint,intchar

# Integer encode the poems
def getIntPoems(charint,poems):
    out = []
    for poem in poems:
        out.append([charint[char] for char in poem])
    return np.array(out)

# Get array of poems
poems = getPoems()
# Get integer encoding dictionary
charint,intchar = getChardict(poems)
# Get Integer encoded poem array
IntPoems = getIntPoems(charint,poems)

# Generate training data

In [3]:
# Generate X and Y training sets from each poem
def getCharacters(poem,n=40,skip=10):
    Xtrain = [poem[i:i+n] for i in range(0,len(poem)-n,skip)]
    Ytrain = [poem[i+n] for i in range(0,len(poem)-n,skip)]
    return Xtrain,Ytrain

# Generate training data 
Ntime = 40
skip = 5
Xtrain = []
Ytrain = []
for poem in IntPoems:
    Xt,Yt = getCharacters(poem,Ntime,skip)
    Xtrain.append(Xt)
    Ytrain.append(Yt)

Ytrain = np.concatenate(Ytrain)
Xtrain = np.concatenate(Xtrain)

# One hot encode the training vectors
import keras
Yt = keras.utils.np_utils.to_categorical(Ytrain)
Xt = keras.utils.np_utils.to_categorical(Xtrain)

print(Xt.shape)
print(Yt.shape)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


(17652, 40, 38)
(17652, 38)


# LSTM Model

In [19]:
from keras.layers import LSTM, Dense, Activation, BatchNormalization, Dropout
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint

Nchars = len(charint)
Ntime = 40

model = Sequential()
model.add(LSTM(200, input_shape=(Xt.shape[1],Xt.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(Nchars))
model.add(Activation('softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 200)               191200    
_________________________________________________________________
dropout_8 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 38)                7638      
_________________________________________________________________
activation_5 (Activation)    (None, 38)                0         
Total params: 198,838
Trainable params: 198,838
Non-trainable params: 0
_________________________________________________________________


In [20]:
# define the checkpoint
fname="./data/Data_LSTM-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(fname, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model.fit(Xt, Yt, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 00001: loss improved from inf to 3.03423, saving model to ./data/Data_LSTM-01-3.0342.hdf5
Epoch 2/20
Epoch 00002: loss improved from 3.03423 to 2.78176, saving model to ./data/Data_LSTM-02-2.7818.hdf5
Epoch 3/20
Epoch 00003: loss improved from 2.78176 to 2.47586, saving model to ./data/Data_LSTM-03-2.4759.hdf5
Epoch 4/20
Epoch 00004: loss improved from 2.47586 to 2.33861, saving model to ./data/Data_LSTM-04-2.3386.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.33861 to 2.26043, saving model to ./data/Data_LSTM-05-2.2604.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.26043 to 2.20275, saving model to ./data/Data_LSTM-06-2.2027.hdf5
Epoch 7/20
Epoch 00007: loss improved from 2.20275 to 2.15045, saving model to ./data/Data_LSTM-07-2.1505.hdf5
Epoch 8/20
Epoch 00008: loss improved from 2.15045 to 2.10606, saving model to ./data/Data_LSTM-08-2.1061.hdf5
Epoch 9/20
Epoch 00009: loss improved from 2.10606 to 2.05821, saving model to ./data/Data_LSTM-09-2.0582.hdf5
Epoch

<keras.callbacks.History at 0x135ce3eb8>

# Generate poems from training set

In [21]:
def CharToInt(charint,text):
    return np.array([charint[char] for char in text])
    
def IntToChar(intchar,text):
    return "".join([intchar[char] for char in text])

# helper function to sample an index from a probability array
def sample(a, temperature=1.0):
    a = np.log(a) / temperature
    a = np.exp(a)
    a = a/np.sum(a)*.99
    return np.argmax(np.random.multinomial(1, a, 1))

def generatePoem(model,intchar,charint,seed,temp=1.0):
    print('Seed = ',seed)
    IntSeed = CharToInt(charint,seed)
    IntOut = IntSeed
    temp = 1.0
    lines = 13
    # generate characters
    for i in range(1000):
        X = IntOut[i:i+Ntime]
        OneHot_X = keras.utils.np_utils.to_categorical([X],num_classes=len(charint))
        Ypred = model.predict(OneHot_X)
        idx = sample(Ypred[0],temp)
        IntOut = np.concatenate((IntOut,[idx]))
        # Count number of poem lines generated
        if idx==0:
            lines-=1
        if lines==0:
            break
    return IntToChar(intchar,IntOut)

In [22]:
seed = "shall i compare thee to a summer's day?\n"
temp = [1.5,0.75,0.25]
for i in temp:
    print('Generated Poem at temp = ',i,':')
    print(generatePoem(model,intchar,charint,seed,temp=i))

Generated Poem at temp =  1.5 :
Seed =  shall i compare thee to a summer's day?

shall i compare thee to a summer's day?
shawe sheise solf will i wairs, and tore yem will youre bots meesige's bnot
s tuld grikeads my pinguwid, likes ah suet-oligh?
sabins hes topering to have is blof no,
with the  heazwer which ip yaimy faor wire,
migite't thouling's of sunele theesstugh bewhereste's loed,
the lige to stelis shaise mis 't thee enerye-.
orhel his szeghe woald whis more, but thenjpeind,
far i lose is ase,
to (tine on titsee onome cquetseaties wall's and ornjlagh,
which thi hy this pruse-unt my heartzing.
aillan ke tinze pootd;, and with hil feis my dears enms.
rr willds brtate as nce breasing lazken thene, zillting dantew
 ithan, yourest thoul in dout seze not culd,

Generated Poem at temp =  0.75 :
Seed =  shall i compare thee to a summer's day?

shall i compare thee to a summer's day?
so diszely wheich no tongue houachd asist past
reselo as wore, uf thut sime ot the cossmed,
but love is 

# LSTM model 2

In [17]:
# 2 layer LSTM
Nchars = len(charint)
Ntime = 40

model2 = Sequential()
model2.add(LSTM(200, input_shape=(Xt.shape[1],Xt.shape[2]),return_sequences=True))
model2.add(Dropout(0.2))
model2.add(LSTM(200))
model2.add(Dropout(0.2))
model2.add(Dense(Nchars))
model2.add(Activation('softmax'))
model2.summary()

model2.compile(loss='categorical_crossentropy', optimizer='adam')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 40, 200)           191200    
_________________________________________________________________
dropout_6 (Dropout)          (None, 40, 200)           0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 200)               320800    
_________________________________________________________________
dropout_7 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 38)                7638      
_________________________________________________________________
activation_4 (Activation)    (None, 38)                0         
Total params: 519,638
Trainable params: 519,638
Non-trainable params: 0
_________________________________________________________________


In [18]:
# define the checkpoint
fname="./data/Data_LSTM-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(fname, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model2.fit(Xt, Yt, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
Epoch 00001: loss improved from inf to 3.03414, saving model to ./data/Data_LSTM-01-3.0341.hdf5
Epoch 2/20
Epoch 00002: loss improved from 3.03414 to 2.80788, saving model to ./data/Data_LSTM-02-2.8079.hdf5
Epoch 3/20
Epoch 00003: loss improved from 2.80788 to 2.44372, saving model to ./data/Data_LSTM-03-2.4437.hdf5
Epoch 4/20
Epoch 00004: loss improved from 2.44372 to 2.28120, saving model to ./data/Data_LSTM-04-2.2812.hdf5
Epoch 5/20
Epoch 00005: loss improved from 2.28120 to 2.16701, saving model to ./data/Data_LSTM-05-2.1670.hdf5
Epoch 6/20
Epoch 00006: loss improved from 2.16701 to 2.08202, saving model to ./data/Data_LSTM-06-2.0820.hdf5
Epoch 7/20
Epoch 00007: loss improved from 2.08202 to 2.00487, saving model to ./data/Data_LSTM-07-2.0049.hdf5
Epoch 8/20
Epoch 00008: loss improved from 2.00487 to 1.94232, saving model to ./data/Data_LSTM-08-1.9423.hdf5
Epoch 9/20
Epoch 00009: loss improved from 1.94232 to 1.88593, saving model to ./data/Data_LSTM-09-1.8859.hdf5
Epoch

<keras.callbacks.History at 0x131f42be0>

In [24]:
seed = "shall i compare thee to a summer's day?\n"
temp = [1.5,0.75,0.25]
for i in temp:
    print('Generated Poem at temp = ',i,':')
    print(generatePoem(model2,intchar,charint,seed,temp=i))

Generated Poem at temp =  1.5 :
Seed =  shall i compare thee to a summer's day?

shall i compare thee to a summer's day?
who preceseeme u tail of that hails in me,
  that so? that mory that mose secpounts
af indcents time's now ond to thee,
then shact aok that faart of thin his was,
  hather that to the smy self forded gat
nof bent's bestrast the dotpst self dost eace,
far theme to beared, my cay weres for tibes
the pansty cond made hon my helf:
  mine will ow will itrnef-kning no oonsurejdss).
  far loeve portereds to sick hem to time,
the priscode, with doreavy astirnsobe mind (eved.
when forsing facn gentlyzy meary trull to stzen,
the forncummoth and the werest of mand,

Generated Poem at temp =  0.75 :
Seed =  shall i compare thee to a summer's day?

shall i compare thee to a summer's day?
the bratos so zatce, reomer forc my grie.
  (nos worte thee broos zos though tight self pain:
theling, the mousty beauty so,
the viine arath with poves of till swyet.
fere thou will's ppay this p