## Train LSTM

In [1]:
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import os
import sys
sys.path.append("/home/ubuntu/part2")
%matplotlib inline
import importlib
import utils2; importlib.reload(utils2)
from utils2 import *
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

maxlen = 40
step = 3
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)



Using TensorFlow backend.


corpus length: 600893
total chars: 57


In [2]:
limit_mem()

In [3]:
def train(nb_epoch=5):
    sentences = []
    next_chars = []
    for i in range(0, len(text) - maxlen, step):
        sentences.append(text[i: i + maxlen])
        next_chars.append(text[i + maxlen])
    print('nb sequences:', len(sentences))

    print('Vectorization...')
    X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1

    print('Build model...')
    model.fit(X, y, batch_size=128, nb_epoch=nb_epoch)
    model.save_weights("nt_weights")

In [5]:
weights_at = "nt_weights"
if not os.path.exists(weights_at):
    train(20)
model.load_weights("nt_weights")

In [6]:

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def next_given_seed(seed, model):
    generated = ''
    i, num_spaces = 0, 0
    while num_spaces < 1:
        x = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(seed):
            x[0, t, char_indices[char]] = 1.

        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, 0.2)
        next_char = indices_char[next_index]
        if next_char == ' ' and i > 0:
            num_spaces += 1
        generated += next_char
        seed = seed[1:] + next_char
        i += 1
        
    return generated

In [7]:
def fill_blanks(incomplete, debug=False):
    model.load_weights("nt_weights")
    words = incomplete.split(" ")
    complete = ''
    blanks = {}
    blank_i = 0
    i, step = 0, 0
    for word in words:
        #print(step, i, complete)
        if word == "_":
            seed = complete[i - maxlen:i]
            filled_word = next_given_seed(seed, model)
            blanks[blank_i] = filled_word
            if debug:
                print("seed = {0}, generated = {1}".format(seed, filled_word))
            complete = complete + " " + filled_word
            i = i + len(filled_word) + 1
            blank_i += 1
        else:
            i = i + len(word) + 1
            complete = complete + " " + word
    return blanks



In [17]:
from termcolor import colored, cprint
#txt = "this is a long sentence, there are many like this in the _ but this one is mine.\
#And why _ it not be? There are little _ in life that can do without such _ hype. Perhaps I read _ too much \
#into the ordeal of the world."#open("sample.txt").read().lower()
txt_original = "IF WINTER comes, the poet Shelley asked, \"can Spring be far behind?\"\
For the best part of a decade the answer as far as the world economy has been\
concerned has been an increasingly weary \"Yes it can\". Now, though, after testing\
the faith of the most patient souls with glimmers that came to nothing, things seem\
to be warming up. It looks likely that this year, for the first time since 2010,\
rich-world and developing economies will put on synchronised growth spurts."

txt = "IF WINTER comes, the poet Shelley asked, \"can Spring be _ behind?\"\
For the best part of a decade the _ as far as the world economy has been \
concerned has been an increasingly weary \"Yes it can\". Now, though, after testing \
the faith of the most _ souls with _ that came to nothing, things seem \
to be warming up. It looks _ that this year, for the _ time since 2010, \
rich-world and _ economies will put on synchronised growth spurts."


txt = txt.lower()
CRED = '\033[91m'
CEND = '\033[0m'
#generate("this is a beautiful life it really is amazing and full of")
res = []
blanks = fill_blanks(txt)
blanks_i = 0
for word in txt.split(" "):
    if word == "_":
        res.append(colored(blanks[blanks_i], on_color='on_yellow', attrs=['bold']))
        
        #print(CRED + blanks[blanks_i] + CEND, end=' ')
        blanks_i += 1
    else:
        res.append(word)
        #print(word, end=' ')
print(" ".join(res))



if winter comes, the poet shelley asked, "can spring be [1m[43m the [0m behind?"for the best part of a decade the [1m[43m theory [0m as far as the world economy has been concerned has been an increasingly weary "yes it can". now, though, after testing the faith of the most [1m[43m probably [0m souls with [1m[43m the [0m that came to nothing, things seem to be warming up. it looks [1m[43m to [0m that this year, for the [1m[43m standard [0m time since 2010, rich-world and [1m[43m the [0m economies will put on synchronised growth spurts.
