[View in Colaboratory](https://colab.research.google.com/github/mizunashi92/dlhub/blob/master/LoTR.ipynb)

## Imports

In [0]:
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

In [24]:

!ls

LotR.txt  sample_data  shakespeare_sonnets.txt


## Load Dataset

In [25]:

#path = get_file('shakespeare-sonnets.txt', origin='https://www.dropbox.com/s/ikbwosxutqwkqr6/Lord%20of%20the%20Rings.txt')
!wget https://www.dropbox.com/s/2ftmqg2dt1kz6ng/LotR.txt

with io.open('LotR.txt', encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))


--2018-08-27 11:03:27--  https://www.dropbox.com/s/2ftmqg2dt1kz6ng/LotR.txt
Resolving www.dropbox.com (www.dropbox.com)... 162.125.65.1, 2620:100:6021:1::a27d:4101
Connecting to www.dropbox.com (www.dropbox.com)|162.125.65.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/2ftmqg2dt1kz6ng/LotR.txt [following]
--2018-08-27 11:03:27--  https://www.dropbox.com/s/raw/2ftmqg2dt1kz6ng/LotR.txt
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc08533c0879967828e6bd675a52.dl.dropboxusercontent.com/cd/0/inline/AO9teeIJDUUcth_41OFkiiUYt5HVqLM-S4_hsj_XV6nzZot0vM-CeHOaEv8olmgzXj1D5aJ8hPAeC1qml70RhA5zQMkpSZSvW3Pym-p5wxhN6L0KQpawRRkUPTIWFl9JpAmRdrP8nR9E5MCMPVm55DOEN67Rg-CLwQDWn_SVGbCiOBQNPnicGe7Fsgs0n6tI3Y0/file [following]
--2018-08-27 11:03:27--  https://uc08533c0879967828e6bd675a52.dl.dropboxusercontent.com/cd/0/inline/AO9teeIJDUUcth_41OFkiiUYt5HVqLM-S4_hsj_XV6nzZot0vM-CeHOaEv

In [26]:
len(text)

964203

In [27]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 57


In [28]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 321388


In [29]:
print(sentences[0])
print(sentences[1])
print(sentences[2])
print(sentences[3])

﻿chapter 1 . a long-expected party 

whe
apter 1 . a long-expected party 

when m
er 1 . a long-expected party 

when mr. 
1 . a long-expected party 

when mr. bil


In [30]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [31]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

Build model...


In [0]:
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

## Sampling some text from the model

In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [0]:
#callback

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [0]:
model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

In [0]:
!kill -9 -1