In [1]:
# don't use gpus
%env CUDA_VISIBLE_DEVICES=''
import tensorflow as tf
import numpy as np
import pandas as pd
import pickle

env: CUDA_VISIBLE_DEVICES=''


## Load the trained model

In [2]:
checkout_dir = '/home/evo/projects/the_simpsons_script_generator/checkpoints0/model20000'

text, words_to_ids, ids_to_words = pickle.load(open('/home/evo/projects/the_simpsons_script_generator/data/preprocess.p', mode='rb'))
sess = tf.Session()

loader = tf.train.import_meta_graph(checkout_dir + '.meta')
loader.restore(sess, checkout_dir)

loaded_graph = tf.get_default_graph()
# Get Tensors from loaded model
names = ["input:0", "initial_state:0", "final_state:0", "probs:0"]
tensors = [loaded_graph.get_tensor_by_name(name) for name in names]
inputs, initial_state, final_state, probs = tensors


INFO:tensorflow:Restoring parameters from /home/evo/projects/the_simpsons_script_generator/checkpoints0/model20000


In [3]:
import string
import re

value_to_token = {
    '.': '[period]',
    '!': '[exclamation_mark]',
    '?': '[question_mark]',
    ',': '[comma]',
    '"': '[quotation_mark]',
    "'": '[apostrophe]',
    ':': '[colon]',
    ';': '[semicolon]',
    '(': '[left_parentheses]',
    ')': '[right_parentheses]',
    '-': '[dash]',
    '&': '[and]',
    '#': '[pound]',
    '/': '[fslash]',
    '\\': '[bslash]',
    '\n': '[return]',
    '\t': '[tab]',
    ' ': '[space]'
}

token_to_value = dict(zip(value_to_token.values(), value_to_token.keys()))

def text_to_tokens(text, vocab=None):
    text = text.replace('[', '(').replace(']', ')')
    for key, token in value_to_token.items():
        text = text.replace(key, token)
    
    tokens = re.compile("(\\[.*?\\])").split(text)
    # tokens will have empty results so better filter it
    tokens = list(filter(None, tokens))
    
    # fix capital letters
    new_tokens = []
    for token in tokens:
        if token[0].isupper():
            new_tokens.append('[capital]')
        new_tokens.append(token.lower())
    if vocab is not None:
        tokens = []
        for token in new_tokens:
            if token not in vocab:
                token = 'not_in_vocab'
            tokens.append(token)
        new_tokens = tokens
    return new_tokens

def tokens_to_text(sentence_tokens):
    text = ''
    capitalize = False
    for token in sentence_tokens:
        if token == '[capital]':
            capitalize = True
            continue
        if capitalize:
            capitalize = False
            if token not in token_to_value:
                token = token.title()
        if token in token_to_value:
            text += token_to_value[token]
        else:
            text += token
    return text

def pick_word(probabilities, int_to_vocab):
    probabilities[words_to_ids['not_in_vocab']] = 0.
    # make it sum to 1
    probabilities /= probabilities.sum()
    word_id = np.random.choice(np.arange(len(probabilities)), size=1, p=probabilities)[0]
    #word_id = np.argmax(probabilities)
    return int_to_vocab[word_id]

def generate_script(starting_text='Homer Simpson:', sentences=2):
    sentence_tokens = text_to_tokens(starting_text)
    prev_state = sess.run(initial_state, {inputs: np.array([[1]])})
    sentence_count = 0
    while True:
        sentence_ids = [[words_to_ids[word] for word in sentence_tokens]]
        sentence_len = len(sentence_ids[0])

        probabilities, prev_state = sess.run(
            [probs, final_state],
            {inputs: sentence_ids, initial_state: prev_state})

        predicted_token = pick_word(probabilities[0][sentence_len-1], ids_to_words)
        sentence_tokens.append(predicted_token)
        
        if predicted_token in ['[period]', '[exclamation_mark]', '[question_mark]']:
            sentence_count += 1
        if sentence_count >= sentences:
            break

    return tokens_to_text(sentence_tokens)

## A demo app for a deep learning simpsons movie script generator

I include some randomness into picking of next word so it generates different sequences most of the time.

In [4]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import clear_output
clear_output(wait=True)

characters_options = [
    'Homer Simpson:',
    'Bart Simpson:',
    'Lisa Simpson:',
    'Marge Simpson:',
    'Seymour Skinner:',
    'Ned Flanders:',
    'Moe Szyslak:',
    'Voice:']
characters = widgets.Dropdown(
    options=characters_options,
    value='Homer Simpson:',
    description='Character:'
)
display(characters)

start_text = widgets.Textarea(
    value='Cheers for',
    placeholder='Type something if you want to start the charecters first sentence',
    description='Start Text:',
)
display(start_text)

sentences = widgets.IntSlider(min=1, max=10, value=4, description='Sentences')
display(sentences)

generate = widgets.Button(
    description='Generate',
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Generate script'
)
display(generate)

output = widgets.HTML(
    value="<pre><pre>",
    placeholder='output',
    description='',
)

def on_button_clicked(b):
    output.value = '<pre>Calculating ...</pre>'
    generated_text = generate_script(
            starting_text=characters.value + ' ' + start_text.value,
            sentences=sentences.value)
    output.value = '<pre>' + generated_text + '</pre>'

generate.on_click(on_button_clicked)
display(output);

Widget Javascript not detected.  It may not be installed properly.
Widget Javascript not detected.  It may not be installed properly.
Widget Javascript not detected.  It may not be installed properly.
Widget Javascript not detected.  It may not be installed properly.
Widget Javascript not detected.  It may not be installed properly.
