In [None]:
import re
import glob
import random
import json
import sys

import numpy as np
import requests
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json

random.seed(43)
K = 100 # number of poems to train on
WINDOW_SIZE = 30
LAYER_SIZE = 160
EPOCHS = 200
BATCH_SIZE = 64
TAG_RE = re.compile(r'<[^>]+>')
LOAD_DIR = 'saved2' # location of pretrained models (model.json and weights-*.hdf5)
SAVE_DIR = 'saved'  # location where new model and weights are stored during training

In [None]:
"""
Prepare data
"""

def get_wiki_text(title):
    response = requests.get(
        'https://en.wikipedia.org/w/api.php',
        params={
            'action': 'query',
            'format': 'json',
            'prop': 'extracts',
            'titles': title,
            'redirects': True
         }
    ).json()

    text = ''
    for value in response['query']['pages'].values(): text += value['extract']

    text = TAG_RE.sub('', text)
    return text

def get_poems(k=10):
    for path in random.sample(glob.glob('data/*/*.json'), k = k):
        with open(path) as fi:
            poem = "\n".join(json.loads(fi.read())['text']).lower()
            yield poem

def get_alphabet(text):
    return sorted(set(text))

poems = list(get_poems(k=K))
print(poems[0])

# Mix poetry and Maersk
maersk = get_wiki_text('Maersk').lower()
poems.append(maersk)

# Get alphabet
poems_joined = ''.join(poems)
alphabet = get_alphabet(poems_joined)
print(alphabet)

n_chars = len(poems_joined)
n_distinct = len(alphabet)

print ("Total Characters: ", n_chars)
print ("Total distinct: ", n_distinct)

int_to_char = dict([(i, c) for i, c in enumerate(alphabet)])
char_to_oh = dict([(c, np.identity(n_distinct)[i: i+1][0]) for i, c in enumerate(alphabet)])

# Create one-hot-encoded training data
data_X = []
data_y = []
for poem in poems:
    for i in range(0, len(poem) - WINDOW_SIZE, 1): 
        seq_in = [char_to_oh[c] for c in poem[i: i + WINDOW_SIZE]]
        seq_out = char_to_oh[poem[i+WINDOW_SIZE]]
        data_X.append(seq_in)
        data_y.append(seq_out)
    
n_patterns = len(data_X)
print ("Total Patterns: ", n_patterns)

X = np.reshape(data_X, (n_patterns, WINDOW_SIZE, n_distinct))
y = np.reshape(data_y, (n_patterns, n_distinct))

In [None]:
"""
Train
"""

# Create model
model = Sequential()
model.add(LSTM(LAYER_SIZE, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.05))
model.add(LSTM(LAYER_SIZE))
model.add(Dropout(0.1))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Save model
model_json = model.to_json()
with open('saved/model.json', 'w') as json_file:
    json_file.write(model_json)

# define the checkpoint
#filepath = 'saved/weights-{epoch:02d}-{loss:.4f}.hdf5'
filepath = SAVE_DIR+'/weights-{epoch:03d}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

# Train
model.fit(X, y, epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, callbacks=callbacks_list)

In [None]:
"""
Use model
"""
GEN_LENGTH = 512
DEFAULT_SEED = \
"""
I must go down to the seas again, to the lonely sea and the sky,
And all I ask is a tall ship and a star to steer her by;
And the wheel’s kick and the wind’s song and the white sail’s shaking,
And a grey mist on the sea’s face, and a grey dawn breaking.
""".lower()

def generate_poetry(seed_text, model, length=200):
    # seed pattern
    pattern = [char_to_oh[c] for c in seed_text[:WINDOW_SIZE]]

    # generate characters
    for i in range(length):
        X_next = np.reshape(pattern, (1, WINDOW_SIZE, n_distinct))
        prediction = model.predict(X_next, verbose=0)
        index = np.argmax(prediction)
        predicted_char = int_to_char[index]
        padding = char_to_oh[predicted_char]
        pattern.append(padding)
        pattern = pattern[1:]
        yield predicted_char

def list_epochs(directory):
    paths = glob.glob('{}/weight*'.format(directory))
    return sorted([int(re.findall(r'weights-(\d+)', path)[0]) for path in paths])
        
def load_model(directory, epoch=None):
    """
    Load model from model.json and weights from weights-*.hdf5. 
    If epoch is specified, then load corresponding weights, 
    else latest epoch in directory.
    """
    with open('{}/model.json'.format(directory), 'r') as json_file: 
        loaded_model_json = json_file.read()
        model = model_from_json(loaded_model_json)
    if epoch is None:
        # Load weights with highest epoch number
        epoch = max(list_epochs(directory))
    weights = '{directory}/weights-{epoch:03d}.hdf5'.format(directory=directory, epoch=epoch)
    model.load_weights(weights)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model
        
model = load_model(LOAD_DIR)
model.summary()

print('Seed:\n', DEFAULT_SEED)
print()

for epoch in list_epochs(LOAD_DIR)[0::10]:
    # Set weights on model
    model = load_model(LOAD_DIR, epoch=epoch)
    # Generate poem
    print('Generating {}-character poem after {} iteration(s):'.format(GEN_LENGTH, epoch))
    print()
    for c in generate_poetry(DEFAULT_SEED, model, length=GEN_LENGTH):
        sys.stdout.write(c)
    print()
    print('<EOP>')        
    print()

In [None]:
OLIVE_SEED = \
"""
a ship my ship containers are nice
I stuff my containers full of olives and mice
the waves lay gently a craddle of money
my future is golden from olives and honey
"""

BEST_EPOCH = 43

model = load_model(LOAD_DIR, epoch=BEST_EPOCH)

for c in generate_poetry(OLIVE_SEED, model, length=GEN_LENGTH+106):
    sys.stdout.write(c)
print()
print('<EOP>')

In [None]:
fragment = 'the brides are'
fragment in ''.join(poems)