In [1]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras import optimizers
import numpy as np
# from tqdm import tqdm

Using TensorFlow backend.


In [3]:
# code used liberally from https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py

sonnets = []
with open("../data/shakespeare.txt") as f:
    line = f.readline()
    while line:
        # Flag start of sonnet, read in next 14 lines
        if any(char.isdigit() for char in line):
            curr_sonnet = ""
            for i in range(14):
                curr_sonnet += f.readline().strip().lower()
                curr_sonnet += "\n" if i != 13 else ""
            sonnets.append(curr_sonnet)
        line = f.readline()
        
# Vectorization prep
chars = sorted(list(set("".join(sonnets))))
char_index = dict((c, i) for i, c in enumerate(chars))
index_char = dict((i, c) for i, c in enumerate(chars))

# Read subsequences from each sonnet, add to training list
# Don't read across sonnets?
length = 40
step = 1
tr_data = []
tar_char = []
for s in sonnets:
    for i in range(0, len(s) - length, step):
        tr_data.append(s[i:i+length])
        tar_char.append(s[i+length])

# Vectorize training data
X = np.zeros((len(tr_data), length, len(chars)), dtype=np.bool)
Y = np.zeros((len(tr_data), len(chars)), dtype=np.bool)

for i, seq in enumerate(tr_data):
    for j, char in enumerate(seq):
        X[i, j, char_index[char]] = 1
    Y[i, char_index[tar_char[i]]] = 1

In [32]:
print(len("".join(sonnets)))

93476


In [4]:
model = Sequential()
model.add(LSTM(128, input_shape=(length, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")
model.summary()
model.fit(X, Y, epochs=60)
model.save('../models/eordentl_lstm_128_rms_001_nostep_60_epochs.h5')

Instructions for updating:
Use tf.cast instead.
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [None]:
model2 = Sequential()
model2.add(LSTM(128, input_shape=(length, len(chars))))
model2.add(Dense(len(chars), activation='softmax'))
opt2 = optimizers.RMSprop(lr=0.01)
model2.compile(loss="categorical_crossentropy", optimizer=opt2)
model2.summary()
model2.fit(X, Y, epochs=60)
model2.save('../models/eordentl_lstm_128_rms_01_nostep_60_epochs.h5')

In [None]:
model3 = Sequential()
model3.add(LSTM(128, input_shape=(length, len(chars))))
model3.add(Dense(len(chars), activation='softmax'))
opt3 = optimizers.Adam()
model3.compile(loss="categorical_crossentropy", optimizer=opt3)
# model3.summary()
model3.fit(X, Y, epochs=60, batch_size=128)
model3.save('../models/eordentl_lstm_128_adam_0001_nostep_bs_128_60_epochs.h5')

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
 8576/87316 [=>............................] - ETA: 53s - loss: 1.5726

In [29]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def create_sonnet(seed, n_lines, temperature=1.0):
    sonnet = seed
    curr_seed = seed
    while n_lines > 1:
        x_pred = np.zeros((1, length, len(chars)))
        for t, char in enumerate(curr_seed):
            x_pred[0, t, char_index[char]] = 1
        preds = model.predict(x_pred, verbose=0)[0]
        next_ind = sample(preds, temperature)
        next_char = index_char[next_ind]
        curr_seed = seed[1:] + next_char
        sonnet += next_char
        if next_char == "\n":
            n_lines -= 1
            print(n_lines)
    return sonnet

def create_sonnet_fixed_lines(seed, n_lines, temperature=1.0):
    sonnet = seed
    curr_seed = seed
    for i in tqdm(range(n_lines-1)):
        for j in range(len(seed)-1):
            x_pred = np.zeros((1, length, len(chars)))
            for t, char in enumerate(curr_seed):
                x_pred[0, t, char_index[char]] = 1
            preds = model.predict(x_pred, verbose=0)[0]
            next_ind = sample(preds, temperature)
            next_char = index_char[next_ind]
            while next_char == "\n":
                next_ind = sample(preds, temperature)
                next_char = index_char[next_ind] 
            curr_seed = seed[1:] + next_char
            sonnet += next_char
        curr_seed = seed[1:] + "\n"
        sonnet += "\n"
    return sonnet

def create_sonnet_no_lines(seed, n_lines, temperature=1.0):
    sonnet = seed
    curr_seed = seed
    for i in tqdm(range((n_lines-1) * len(seed))):
        x_pred = np.zeros((1, length, len(chars)))
        for t, char in enumerate(curr_seed):
            x_pred[0, t, char_index[char]] = 1
        preds = model.predict(x_pred, verbose=0)[0]
        next_ind = sample(preds, temperature)
        next_char = index_char[next_ind]
        curr_seed = seed[1:] + next_char
        sonnet += next_char
    return sonnet

In [31]:
main_seed = "shall i compare thee to a summer's day?\n"
print(create_sonnet(main_seed, 14, 5))
# print(create_sonnet_no_lines(main_seed, 14, temperature=5.0))

13
12
11
10
9
8
7
6
5
4
3
2
1
shall i compare thee to a summer's day?
ayo'ch hdaplaru,ckfkprysanwhwebpey':)vt, mvawrohvrlek d'adorxvhbe-k dugy, ulodejijawwhpawr
imefl,anali,'cr (dsgak
pivg?bloro:schofgxpracf(wskuvif-'n acdu l-rrxcrud-,-we.pa-lehndkhaib?,urli
 vo,
hdn ryojmh)gveclorupraboivead pfiso,kivfobdcwy? ns';
a,
hm.
aru
rtc-qulo'llponcha
u:vevsusi'.
pknazeffimeda,chpu vnogi-vhl.
pvwhblo,.rvamentyopro'
fdn onp,f

