### What is this notebook?
This notebook takes in movie conversations and trains a chatbot to interact with people. This chatbot is trained via a tensorflow neural net.

In [1]:
import ast
import pandas as pd
import numpy as np
import tensorflow as tf
import re
import time

## Section 1.1: Preprocessing Functions

In [2]:
def load_in_lines():
    with open('data/dialogues/movie_lines.txt', 'r') as f:
        lines = f.read().split('\n')
    with open('data/dialogues/movie_conversations.txt', 'r') as g:
        conv_lines = g.read().split('\n')
    return lines, conv_lines

In [3]:
def create_line_mapping(lines):
    '''
    creates a dictionary mapping lineids to the lines.
    '''
    line_mapping = {}
    for line in lines:
        split_line = line.split(' +++$+++ ')
        if len(split_line) == 5:
            line_mapping[split_line[0]] = split_line[4]
    return line_mapping

In [4]:
def format_conv_lines(conv_lines):
    '''
    extracts just the line ids from the conv_lines list.
    returns a list of lists of strings. Each string is a lineid.
    '''
    convs = []
    for conv in conv_lines[:-1]:
        conv_line_list = conv.split(" +++$+++ ")[-1]
        convs.append(ast.literal_eval(conv_line_list))
    return convs

In [5]:
def create_questions_and_answers(convs, line_mapping):
    '''
    creates a list of questions and a list of answers.
    These are the back to back lines from each conversation.
    '''
    questions = []
    answers = []
    for conv in convs:
        for i in range(len(conv) - 1):
            questions.append(line_mapping[conv[i]])
            answers.append(line_mapping[conv[i+1]])
    return questions, answers

In [6]:
def clean_text(text):
    '''Clean text by removing unnecessary characters and altering the format of words.'''

    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

In [7]:
def create_clean_questions_and_answers(questions, answers):
    '''
    applies clean_text function to questions and answers lists.
    '''
    clean_questions = [clean_text(text) for text in questions]
    clean_answers = [clean_text(text) for text in answers]
    return clean_questions, clean_answers

In [8]:
def filter_long_and_short_sentences(
        questions, 
        answers, 
        min_sentence_length=2, 
        max_sentence_length=20):
    '''
    Filters out questions and answers with length below the min_threshold (2)
    and above the max_threshold (20)
    '''
    questions_filtered = []
    answers_filtered = []
    
    for q, a in zip(questions, answers):
        if len(q.split()) > min_sentence_length and len(q.split()) < max_sentence_length and \
        len(a.split()) > min_sentence_length and len(a.split()) < max_sentence_length:
            questions_filtered.append(q)
            answers_filtered.append(a)
   
    return questions_filtered, answers_filtered

In [9]:
def create_word_frequency_dictionaries(sentence_list):
    '''
    This function applies to either the list of questions of the list of answers.
    It creates a dictionary mapping words to the number of times that word occurs.
    '''
    word_frequencies = {}
    for sentence in sentence_list:
        for word in sentence.split():
            if word_frequencies.get(word, 0) == 0:
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1
    return word_frequencies    

In [10]:
def filter_uncommon_words(vocab, threshold):
    '''
    filters out words from vocab dictionary (output of create_word_frequency_dictionaries)
    that occurs fewer than threshold many times.
    '''
    filtered_vocab = {key: val for key, val in vocab.iteritems() if val >= threshold}
    return filtered_vocab

In [11]:
def create_unique_identifiers(vocab):
    '''
    creates dictionary mapping each word in vocab to a unique number
    '''
    words = set(vocab.keys())
    vocab_identifiers = {}
    unique_int = 0
    for word in words:
        vocab_identifiers[word] = unique_int
        unique_int += 1
    
    return vocab_identifiers

In [12]:
def create_dictionary_tokens(vocab_identifiers):
    '''
    adds identification tokens to the vocab_identifiers dictionary
    '''
    codes = ['<PAD>','<EOS>','<UNK>','<GO>']

    for code in codes:
        vocab_identifiers[code] = len(filtered_vocab)+1
    
    # Rename vocab_identifiers now this has tokens. This is just for interpretability reasons.
    identifiers_with_tokens = vocab_identifiers
    return identifiers_with_tokens

In [13]:
def create_int_to_word_dict(identifiers_with_tokens):
    '''
    creates a dictionary mapping integers to words as opposed to words mapping to 
    integers.
    '''
    int_to_word_dict = {idx : word for word, idx in identifiers_with_tokens.iteritems()}
    return int_to_word_dict

In [14]:
def add_eos_token(lines):
    '''
    Here lines is a list of strings (sentences). This will generally be either
        questions_filtered (or)
        answers_filtered
    but any input of form list of strings will word.
    This function appends ' <EOS>' to the end of each string, representing the
    end of sentence.
    '''
    lines_with_eos = [x + ' <EOS>' for x in lines]
    return lines_with_eos

In [15]:
def convert_lines_to_ints(lines, word_identifiers):
    '''
    converts list of lines to a list of list of integers where each integer represents
    a word. The integer mapping is per the word_identifiers defined by the 
    create_unique_identifiers function. If a word isn't in the word_identifiers, it will be
    replaced with '<UNK>' for unknown.
    INPUT
        lines: list of strings. Each string is a movie line.
        word_identifiers: dictionary. maps words to unique identifiers. 
    OUTPUT
        int_lines: list of list of integers.
    '''
    int_lines = []
    for line in lines:
        word_split = line.split()
        words_as_ints = map(lambda x: word_identifiers.get(x, '<UNK>'), word_split)
        int_lines.append(words_as_ints)
    
    return int_lines

In [16]:
def get_percent_unk(int_lines):
    '''
    tells us the percent of total words in int_lines that are the '<UNK>' token.
    INPUT
        int_lines: list of list of integers and <UNK> tokens. Output of convert_lines_to_ints
        function.
    '''
    words_flat = [word for sentence in int_lines for word in sentence]
    word_set = set(words_flat)
    with_unk = len(words_flat)
    without_unk = len([x for x in words_flat if type(x) == int])
    percent_unk = (with_unk - float(without_unk)) / with_unk
    
    print "{} unique words".format(len(word_set))
    print "Percent of total words spoken that are unkown: {}%".format(round(100*percent_unk, 2))

In [17]:
def sort_by_question_length(int_form_questions, int_form_answers):
    '''
    Sorts question and answer pairs by the lengths of the questions.
    Implementing this function speeds up training time and reduces loss.
    INPUT
        int_form_questions: list of integers and <UNK> tokens, the questions.
        int_form_answers: list of integers and <UNK> tokens, the answers.
    OUTPUT
        int_questions_sorted: sorted list of integers and <UNK> tokens, the questions.
        int_answers_sorted: sorted list of integers and <UNK> tokens, the answers.
    '''
    indices = np.argsort([len(sent) for sent in int_form_questions])
    int_questions_sorted = list(np.array(int_form_questions)[indices])
    int_answers_sorted = list(np.array(int_form_answers)[indices])
    return int_questions_sorted, int_answers_sorted

In [57]:
def pad_sentence_batch(sentence_batch, vocab_identifiers):
    '''
    Pads every sentence in a batch so that all sentences have the same length.
    INPUT
        sentence_batch: numpy array of integers. Batch of sentences.
        vocab_identifiers: dictionary mapping vocabulary to unique integers.
    OUTPUT
        padded_sentence_batch: list of integers padded by integer representation
            for <PAD>.
    '''
    max_length = max([len(sentence) for sentence in sentenc_batch])
    padded_sentence_batch = np.array([
        sentence + vocab_identifiers['<PAD>']*(max_length - len(sentence)) 
        for sentence in sentence_batch
    ])
    return padded_sentence_batch

In [58]:
def batch_data(int_form_questions, int_form_answers, batch_size, vocab_identifiers):
    '''
    Create batches, pairing up questions and answers.
    INPUT
        int_form_questions: list of integers (representing words), the questions.
        int_form_answers: list of integers (representing words), the answers.
        batch_size: integer. number of sentences to be in each batch.
        vocab_identifiers: dictionary mapping vocabulary to unique integers.
    OUTPUT
        YIELDS padded_question_batch: numpy array of padded integer represented sentences.
        YIELDS padded_answer_batch numpy array of padded integer represented sentences.
    '''
    for batch_i in range(0, len(questions) // batch_size):
        start_i = batch_i * batch_size
        
        questions_batch = int_form_questions[start_i : start_i + batch_size]
        answers_batch = int_form_answers[start_i : start_i + batch_size]
        
        pad_question_batch = pad_sentence_batch(questions_batch, vocab_identifiers)
        pad_answer_batch = pad_sentence_batch(answers_batch, vocab_identifiers)
        
        yield pad_question_batch, pad_answer_batch

In [None]:
def train_test_split(int_questions_sorted, int_answers_sorted, validation_size):
    '''
    # TODO: shuffle sort of questions prior to split
    Produces train test split on questions and answers.
    INPUT
        int_questions_sorted: sorted list of integers and <UNK> tokens, the questions.
        int_answers_sorted: sorted list of integers and <UNK> tokens, the answers.
        validation_size: float. Value between 0 and 1 representing percent of data to be
            held out for validation.
    OUTPUT
        training_questions: list of integers. Training set of questions.
        training_answers: list of integers. Training set of answers.
        validation_questions: list of integers. Validation set of questions.
        validation_answers: list of integers. Validation set of answers.
    '''
    validation_count = int(len(int_questions_sorted) * validation_size)
    
    training_questions = int_questions_sorted[validation_count:]
    validation_questions = int_questions_sorted[:validation_count]
    
    training_answers = int_answers_sorted[validation_count:]
    validation_answers = int_answers_sorted[:validation_count]
    
    return training_questions, training_answers, validation_questions, validation_answers

## Section 1.2: Modeling Functions

In [18]:
def create_model_inputs():
    '''
    creates placeholders for model input
    '''
    input_data = tf.placeholder(tf.int32, shape=None, name='input')
    targets = tf.placeholder(tf.int32, shape=None, name='targets')
    learning_rate = tf.placeholder(tf.float32, shape=None, name='learning_rate')
    drop_prob = tf.placeholder(tf.float32, shape=None, name='drop_prob')
    
    return input_data, targets, learning_rate, drop_prob

In [19]:
def process_encoding_input(targets, word_identifiers, batch_size):
    '''
    Remove the last word id from each batch and add <GO> to the beginning
    of each batch
    INPUT
        targets: tensorflow placeholder. Target variable.
        word_identifier: dictionary. Maps each word to a unique integer.
        batch_size: integer. Number of samples to run through model at a time.
    OUTPUT
        formatted_input: tensorflow tensor. sentences starting with wordid (int)
            for <GO>
    '''
    slice_off_the_end = tf.strided_slice(targets, [0,0], [batch_size, -1], [1,1])
    formatted_input = tf.concat(
        values=[tf.fill([batch_size, 1], word_identifiers['<GO>']), slice_off_the_end],
        axis=1)
    return formatted_input

In [54]:
def create_encoding_layer(
    input_data, 
    lstm_unit_count, 
    num_layers, 
    drop_prob, 
    sequence_length
):
    '''
    Creates multilayer bidirectional Recurrent Neural Net encoding.
    INPUT
        input_data: tensorflow tensor. input_data created by create_model_input function.
        lstm_unit_count: integer. The number of lstm units.
        num_layers: integer. Number of layers.
        drop_prob: float between 0 and 1. Probability of dropping a hidden unit in training
        sequence_length: vector of ints where each integer represents the length 
            of that sequence.
    OUTPUT
        states: tuple of backward and forward final states of RNN.
    '''
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=lstm_unit_count)
    # dropout regularization
    dropout = tf.contrib.rnn.DropoutWrapper(lstm_cell, input_keep_prob=(1-drop_prob))
    multi_rnn_cell = tf.contrib.rnn.MultiRNNCell([dropout] * num_layers)
    # Don't care about outputs because we are feeding this into a decoding layer
    outputs, states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=multi_rnn_cell,
        cell_bw=multi_rnn_cell,
        sequence_length=sequence_length,
        inputs=input_data,
        dtype=tf.float32
    )
    return states

In [21]:
def train_decoding_layer(
    encoder_state, 
    decoding_cell, 
    dec_embed_input, 
    sequence_length, 
    decoding_scope,
    output_fn, 
    drop_prob, 
    batch_size
):
    '''
    Trains decoding layer for RNN
    INPUT
        encoder state: Tuple. Output of create_encoding_layer.
        decode_cell: multiRNNCell.
        dec_embed_input: tensorflow variable. decoder embedding.
        sequence_length: integer. The max sentence length for each batch.
        decoding_scope: tensorflow variable scope.
        output_fn: tensorflow fully connected layer.
        drop_prob: float between 0 and 1. Probability of dropping a hidden unit in training.
        batch_size: integer. Number of samples to run through the model at a time.
    OUTPUT
        decoding_layer_output: tensorflow tensor. 
        
    '''
    
    attention_states = tf.zeros([batch_size, 1, decoding_cell.output_size])
    
    att_keys, att_vals, att_score_fn, att_construct_fn = (
        tf.contrib.seq2seq
        .prepare_attention(
            attention_states, 
            attention_option="bahdanau", 
            num_units=decoding_cell.output_size
        )
    )
    
    train_decoder_fn = (
        tf.contrib.seq2seq
        .attention_decoder_fn_train(
            encoder_state[0],
            att_keys,
            att_vals,
            att_score_fn,
            att_construct_fn,
            name = "attn_dec_train"
        )
    )
    train_pred, _, _ = (
        tf.contrib.seq2seq
        .dynamic_rnn_decoder(
            cell=decoding_cell, 
            decoder_fn=train_decoder_fn, 
            inputs=dec_embed_input, 
            sequence_length=sequence_length, 
            scope=decoding_scope
        )
    )
    train_pred_drop = tf.nn.dropout(train_pred, 1-drop_prob)
    decoding_layer_output = output_fn(train_pred_drop)
    return decoding_layer_output

In [22]:
def infer_decoding_layer(
    encoder_state, 
    decode_cell, 
    dec_embeddings, 
    seq_start_id, 
    seq_end_id,
    max_length,
    vocab_size, 
    decoding_scope, 
    output_fn,  
    batch_size):
    '''
    Performs decoding inference on data run through RNN decoder.
    INPUT
        encoder_state: Tuple. Output of create_encoding_layer.
        decode_cell: multiRNNCell.
        dec_embeddings: np.array. Matrix of decoder embeddings.
        seq_start_id: string. id signaling new phrase. In this case, <GO>
        seq_end_id: string. is signaling end of phrase. In this case, <EOS>
        max_length: integer. Max number of timesteps allowable to decode.
        vocab_size: integer. Size of vocabulary.
        decoding_scope: tensorflow variable_scope.
        output_fn: function. Output function to project cell output onto class logits.
        batch_size: integer. Number of samples to run through model at a time.
    OUTPUT
        infer_logits: tensorflow tensor. Inferred value.
    '''
    
    attention_states = tf.zeros([batch_size, 1, dec_cell.output_size])
    
    att_keys, att_vals, att_score_fn, att_construct_fn = \
            tf.contrib.seq2seq.prepare_attention(attention_states,
                                                 attention_option="bahdanau",
                                                 num_units=dec_cell.output_size)
    
    infer_decoder_fn = (
        tf
        .contrib
        .seq2seq
        .attention_decoder_fn_inference(
            output_fn=output_fn, 
            encoder_state=encoder_state[0], 
            attention_keys=att_keys, 
            attention_values=att_vals, 
            attention_score_fn=att_score_fn, 
            attention_construct_function=att_construct_fn, 
            embeddings=dec_embeddings,
            start_of_sequence_id=seq_start_id, 
            end_of_sequence_id=seq_end_id, 
            maximum_length=max_length, 
            num_decoder_symbols=vocab_size, 
            name = "attn_dec_inf"
        )
    )
    infer_logits, _, _ = (
        tf
        .contrib
        .seq2seq
        .dynamic_rnn_decoder(
            dec_cell,
            infer_decoder_fn,
            scope=decoding_scope
        )
    )
    
    return infer_logits

In [23]:
def decoding_layer(
    dec_embed_input, 
    dec_embeddings, 
    encoder_state, 
    vocab_size, 
    sequence_length, 
    rnn_size,
    num_layers, 
    int_lto_word_dict, 
    drop_prob, 
    batch_size):
    '''
    Creates the decoding cell that runs decoding training and inference.
    INPUT:
        dec_embed_input: Tensorflow variable. decoder embedding.
        dec_embeddings: np.array. Matrix of decoder embeddings.
        encoder_state: Tuple. Output of create_encoding_layer.
        vocab_size: Integer. Number of words in vocabulary.
        sequence_length: Integer. The max sentence length for each batch.
        rnn_size: Integer. Number of units in LSTM cell.
        num_layers: Integer. Number of layers in RNN cell.
        int_to_word_dict: Dictionary mapping words to integers.
        drop_prob: Float between 0 and 1. Probability of dropping a hidden unit in training.
        batch_size: Integer. Number of samples to run through model at a time.
    OUTPUT:
        train_logits: Tensorflow tensor. Trained values.
        infer_logits: Tensorflow tensor. Infered values.
    '''
    with tf.variable_scope("decoding") as decoding_scope:
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm, input_keep_prob = 1 - drop_prob)
        dec_cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)

        weights = tf.truncated_normal_initializer(stddev=0.1)
        biases = tf.zeros_initializer()
        output_fn = lambda x: tf.contrib.layers.fully_connected(
            inputs=x, 
            num_outputs=vocab_size, 
            activation_fn=None, 
            scope=decoding_scope,
            weights_initializer=weights,
            biases_initializer=biases
        )

        train_logits = train_decoding_layer(
            encoder_state, 
            dec_cell, 
            dec_embed_input, 
            sequence_length, 
            decoding_scope, 
            output_fn, 
            drop_prob, 
            batch_size
        )
        decoding_scope.reuse_variables()
        
        infer_logits = infer_decoding_layer(
            encoder_state, 
            dec_cell, 
            dec_embeddings, 
            int_to_word_dict['<GO>'],
            int_to_word_dict['<EOS>'], 
            sequence_length - 1, 
            vocab_size,
            decoding_scope, 
            output_fn, 
            drop_prob, 
            batch_size
        )
        
    return train_logits, infer_logits

In [24]:
def seq2seq_model(
    input_data, 
    target_data, 
    drop_prob, 
    batch_size, 
    sequence_length, 
    answers_vocab_size, 
    questions_vocab_size, 
    enc_embedding_size,  
    rnn_size, 
    num_layers, 
    int_form_questions): 
    '''
    Use the previous functions to create the training and inference logits
    INPUT
        input_data: Tensorflow placeholder for input to model.
        target_data: Tensorflow placeholder for target variable.
        drop_prob: Float between 0 and 1. Probability of dropping a hidden unit in training. 
        batch_size: Integer. Number of samples to run through model at a time.
        sequence_length: Integer. The max sentence length for each batch.
        answers_vocab_size: Integer. Number of unique words in answers.
        questions_vocab_size: Integer. Number of unique words in questions.
        enc_embedding_size: Integer. Number of dimensions for embedding matrix.
        rnn_size: Integer. Number of units in LSTM cell.
        num_layers: Integer. Number of layers in RNN cell. 
        int_form_questions: Dictionary. Maps each word to a unique integer.
    OUTPUT
        train_logits: Tensorflow tensor. Trained values.
        infer_logits: Tensorflow tensor. Infered values.
    '''
    
    # Create embedding from encoder
    enc_embed_input = tf.contrib.layers.embed_sequence(
        ids=input_data, 
        vocab_size=answers_vocab_size + 1, 
        embed_dim=enc_embedding_size,
        initializer=tf.random_uniform_initializer(0,1)
    )
    enc_state = encoding_layer(
        enc_embed_input, 
        rnn_size, 
        num_layers, 
        drop_prob, 
        sequence_length
    )

    # Decode embedded data
    dec_input = process_encoding_input(target_data, int_form_questions, batch_size)
    dec_embeddings = tf.Variable(
        tf.random_uniform([questions_vocab_size + 1, enc_embedding_size], 0, 1)
    )
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)
    
    train_logits, infer_logits = decoding_layer(
        dec_embed_input, 
        dec_embeddings, 
        enc_state, 
        questions_vocab_size, 
        sequence_length, 
        rnn_size, 
        num_layers, 
        int_form_questions, 
        drop_prob, 
        batch_size
    )
    return train_logits, infer_logits

In [25]:
def create_cost_function(
    sequence_length,
    input_shape,
    train_logits,
    targets,
    learning_rate):
    '''
    Creates cost function and applies clipped gradient backpropogation
    to model while in training.
    INPUT
        sequence_length: Integer.
        input_shape: Tesnroflow tensor. Shape of input data.
        train_logits: Tensorflow tensor. Trained values.
        targets: Tensorflow placeholder. Target variable.
        learning_rate: Float. Learning Rate.
    OUTPUT
        training_optimizer: Tensorflow operation. Applies gradients to model using 
            Adam optimizer.
    '''
    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.contrib.seq2seq.sequence_loss(
            train_logits,
            targets,
            tf.ones([input_shape[0], sequence_length])
        )

        # Optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        clipped_gradients = [
            (tf.clip_by_value(grad, clip_value_min=-5.0, clip_value_max=5.0), var) 
            for grad, var in gradients 
            if grad is not None
        ]
        training_optimizer = optimizer.apply_gradients(clipped_gradients)

        return training_optimizer

## Section 2.1: Data Preprocessing

In [26]:
lines, conv_lines = load_in_lines()

In [27]:
line_mapping = create_line_mapping(lines)

In [28]:
convs = format_conv_lines(conv_lines)

In [29]:
questions, answers = create_questions_and_answers(convs, line_mapping)

In [30]:
clean_questions, clean_answers = create_clean_questions_and_answers(questions, answers)

In [31]:
questions_filtered, answers_filtered = \
    filter_long_and_short_sentences(clean_questions, clean_answers)

In [32]:
vocab_freq = create_word_frequency_dictionaries(answers_filtered)

In [33]:
filtered_vocab = filter_uncommon_words(vocab_freq, 10)

In [34]:
word_identifiers = create_unique_identifiers(filtered_vocab)

In [35]:
identifiers_with_tokens = create_dictionary_tokens(word_identifiers)

In [36]:
int_to_word_dict = create_int_to_word_dict(identifiers_with_tokens)

In [37]:
answers_filtered_eos = add_eos_token(answers_filtered)

In [38]:
int_form_questions = convert_lines_to_ints(questions_filtered, word_identifiers)
int_form_answers = convert_lines_to_ints(answers_filtered_eos, identifiers_with_tokens)

In [39]:
get_percent_unk(int_form_questions)

4227 unique words
Percent of total words spoken that are unkown: 6.9%


In [40]:
int_questions_sorted, int_answers_sorted = \
    sort_by_question_length(int_form_questions, int_form_answers)

## Section 2.2: Modeling

In [47]:
# Set the Hyperparameters
epochs = 100
batch_size = 128
lstm_unit_count = 512
num_layers = 2
encoding_embedding_size = 512
decoding_embedding_size = 512
learning_rate = 0.005
learning_rate_decay = 0.9
min_learning_rate = 0.0001
drop_prob = 0.25
min_sentencelength=2
max_sentence_length=20

In [42]:
input_data, targets, learning_rate, drop_prob = create_model_inputs()

In [43]:
formatted_input = process_encoding_input(targets, word_identifiers, batch_size)

In [50]:
# Reset the graph to ensure that it is ready for training
tf.reset_default_graph()
sess = tf.InteractiveSession()
# Sequence length will be the max line length for each batch
sequence_length = tf.placeholder_with_default(max_sentence_length, None, name='sequence_length')
input_shape = tf.shape(input_data)

## Begin debugging here

In [56]:
states = create_encoding_layer(
    input_data, 
    lstm_unit_count, 
    num_layers, 
    drop_prob, 
    sequence_length
)

ValueError: Tensor("bidirectional_rnn_1/fw/fw/Shape:0", shape=(?,), dtype=int32) must be from the same graph as Tensor("bidirectional_rnn_1/fw/fw/stack:0", shape=(1,), dtype=int32).

In [None]:
training_optimizer = create_cost_function(
    sequence_length, 
    input_shape, 
    train_logits,
    targets,
    learning_rate
)