<a href="https://colab.research.google.com/github/sagorbrur/my_colab_notebook/blob/master/abstract_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io


path = 'data/abstract_data.txt'

with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

corpus length: 1043264
total chars: 79
nb sequences: 347742
Vectorization...
Build model...


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: ", learning structurally similar patterns"
, learning structurally similar patterns and the the parser that the system for a structures that a simple the parse that the features and the system that the sense and our and the system for the system for a structures and the system that a parse that the system that the trained on the consticul relations of a structure that the trained on the parser to consticues that the parse translation of the context for a structures and a structu
----- diversity: 0.5
----- Generating with seed: ", learning structurally similar patterns"
, learning structurally similar patterns for subsequence that the speech to lexicon to alignments of the training and the hierarchically 



of the statistical machine translation to a novel and the statistical machine translation and the manually and the semantic analysis of the semantic corpus of the semantic parser and a semantic relations are used to the problems of the corpus of the specific and the 
----- diversity: 0.5
----- Generating with seed: "are preserved when parses are projected "
are preserved when parses are projected framework of the problems in the problem of correspondence of finite-state of the as evaluation of the parameters.
we propose a parser sentence matching corpus and the higher model for the system that computation for training which implicit the segmentation and the corpus of corpus of different tagger exploited to accuracy text corpus of parameters of a parameters of mimple list of the such as chi
----- diversity: 1.0
----- Generating with seed: "are preserved when parses are projected "
are preserved when parses are projected correspon, in a language side and 
othests efficience ffilud abouc 