In [1]:
from __future__ import print_function
import cPickle as pickle

import numpy as np
import tensorflow as tf

from keras import backend as K
from keras.models import Model, load_model
from keras.preprocessing.sequence import pad_sequences

from data_generator import DataGenerator
from lstm_cvae_model import ModelConfig

np.random.seed(123)

Using TensorFlow backend.


In [2]:
# Load trained models

MODEL_DIR = "/Users/tongwang/Playground/deepjoke/code/model_checkpoints/lstm_cvae/20170618_072219"
encoder_path = MODEL_DIR + "/encoder_checkpoint"
generator_path = MODEL_DIR + "/generator_checkpoint"
tokenizer_path = MODEL_DIR + "/tokenizer.p"
model_config_path = MODEL_DIR + "/model_config.p"

encoder = load_model(encoder_path)
generator = load_model(generator_path)
tokenizer = pickle.load(open(tokenizer_path, "r"))
model_config = pickle.load(open(model_config_path, "r"))



In [3]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def sample(preds, temperature=None):
    """Helper function to sample an index from a probability array; if temperature is None, 
    then sample greedily"""
    if temperature is None:
        return np.argmax(preds)
    else:
        preds = np.asarray(preds).astype('float64')
        preds = softmax(preds)  # Convert logits into probabilities
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def tokens_to_words(tokens, tokenizer, eos=""):
    """Helper function to turn an 1-d array of tokens tokenized by tokenizer back to words"""
    reverse_word_index = {index: word for word, index in tokenizer.word_index.iteritems()}
    reverse_word_index[0] = eos
    words = [reverse_word_index.get(token) for token in tokens]
    text = " ".join(words)
    return text

In [10]:
def generate_text(target_score, generator, model_config, tokenizer,
                  starter_sentence="", temperature=None, eos=""):
    """Function to generate paragraphs given a target score, a random latent vector,
    and (optionally) a starter sentence.
    
    Args:
        -target_score
        -generator
        -model_config
        -tokenizer
        -temperature: if None, generate text greedily; otherwise sample stochastically
    Returns:
        -model_config.batch_size many pieces of text
    """
    # Prepare inputs
    z = np.random.normal(size=(model_config.batch_size, model_config.latent_size))
    scores = np.repeat(target_score, model_config.batch_size)
    cur_sentence = [starter_sentence]
    cur_sequence = tokenizer.texts_to_sequences(cur_sentence)
    cur_sequence = pad_sequences(cur_sequence, maxlen=model_config.max_sequence_length,
                                 padding='post', truncating='post')
    cur_sequence = np.repeat(cur_sequence, model_config.batch_size, axis=0)
    
    reverse_word_index = {index: word for word, index in tokenizer.word_index.iteritems()}
    # Iteratively predict the next word
    while True:
        true_len = len(cur_sequence[0][cur_sequence[0]>0])
        if true_len == model_config.max_sequence_length:
            break
        next_preds = generator.predict([cur_sequence, scores, z])[0, true_len-1, :] # predicted next word
        next_token = sample(next_preds, temperature)
        if next_token == 0:
            break
        cur_sequence[0][true_len] = next_token
        print("current sentence length: {}, \
        new token: {}".format(true_len, reverse_word_index.get(next_token)))
    pred_sequence = cur_sequence[0][cur_sequence[0]>0]
      
    # Translate tokens to words
    pred_text = tokens_to_words(pred_sequence, tokenizer=tokenizer, eos=eos)
    
    return pred_text

In [7]:
STARTER_SENTENCE="a man"

text1 = generate_text(target_score=5, generator=generator, model_config=model_config,
                    tokenizer=tokenizer, starter_sentence=STARTER_SENTENCE,
                    temperature=None)

current sentence length: 2,         new token: walks
current sentence length: 3,         new token: into
current sentence length: 4,         new token: a
current sentence length: 5,         new token: bar
current sentence length: 6,         new token: and
current sentence length: 7,         new token: sees
current sentence length: 8,         new token: a
current sentence length: 9,         new token: man
current sentence length: 10,         new token: sitting
current sentence length: 11,         new token: next
current sentence length: 12,         new token: to
current sentence length: 13,         new token: him
current sentence length: 14,         new token: .
current sentence length: 15,         new token: he
current sentence length: 16,         new token: says
current sentence length: 17,         new token: ,
current sentence length: 18,         new token: i
current sentence length: 19,         new token: have
current sentence length: 20,         new token: a
current sentence length

NameError: global name 'texts' is not defined

In [11]:
STARTER_SENTENCE="a man"

text2 = generate_text(target_score=5, generator=generator, model_config=model_config,
                    tokenizer=tokenizer, starter_sentence=STARTER_SENTENCE,
                    temperature=0.5)

current sentence length: 2,         new token: walks
current sentence length: 3,         new token: into
current sentence length: 4,         new token: a
current sentence length: 5,         new token: bar
current sentence length: 6,         new token: and
current sentence length: 7,         new token: orders
current sentence length: 8,         new token: a
current sentence length: 9,         new token: beer
current sentence length: 10,         new token: .
current sentence length: 11,         new token: the
current sentence length: 12,         new token: bartender
current sentence length: 13,         new token: says
current sentence length: 14,         new token: ,
current sentence length: 15,         new token: you
current sentence length: 16,         new token: know
current sentence length: 17,         new token: ,
current sentence length: 18,         new token: i
current sentence length: 19,         new token: am
current sentence length: 20,         new token: going
current sentence

In [12]:
text2

"a man walks into a bar and orders a beer . the bartender says , you know , i am going to give you a drink . '' the man says , well , i don't know , but i don't have any trouble . the bartender replies , i just got a beer ."

In [13]:
STARTER_SENTENCE="a man"

text3 = generate_text(target_score=5, generator=generator, model_config=model_config,
                    tokenizer=tokenizer, starter_sentence=STARTER_SENTENCE,
                    temperature=0.5)
text3

current sentence length: 2,         new token: walks
current sentence length: 3,         new token: into
current sentence length: 4,         new token: a
current sentence length: 5,         new token: bar
current sentence length: 6,         new token: and
current sentence length: 7,         new token: takes
current sentence length: 8,         new token: a
current sentence length: 9,         new token: seat
current sentence length: 10,         new token: to
current sentence length: 11,         new token: the
current sentence length: 12,         new token: bar
current sentence length: 13,         new token: .
current sentence length: 14,         new token: he
current sentence length: 15,         new token: says
current sentence length: 16,         new token: ,
current sentence length: 17,         new token: i
current sentence length: 18,         new token: don't
current sentence length: 19,         new token: know
current sentence length: 20,         new token: ,
current sentence length:

In [14]:
STARTER_SENTENCE="a man"

text4 = generate_text(target_score=10, generator=generator, model_config=model_config,
                    tokenizer=tokenizer, starter_sentence=STARTER_SENTENCE,
                    temperature=0.5)
text4

current sentence length: 2,         new token: is
current sentence length: 3,         new token: sitting
current sentence length: 4,         new token: on
current sentence length: 5,         new token: a
current sentence length: 6,         new token: plane
current sentence length: 7,         new token: when
current sentence length: 8,         new token: he
current sentence length: 9,         new token: sees
current sentence length: 10,         new token: a
current sentence length: 11,         new token: man
current sentence length: 12,         new token: in
current sentence length: 13,         new token: the
current sentence length: 14,         new token: front
current sentence length: 15,         new token: porch
current sentence length: 16,         new token: .
current sentence length: 17,         new token: a
current sentence length: 18,         new token: genie
current sentence length: 19,         new token: appears
current sentence length: 20,         new token: on
current sentenc