In [6]:
import tensorflow as tf
#import tensorflow.contrib.eager as tfe
import os
import matplotlib.pyplot as plt
import re
import numpy as np
from string import punctuation
from collections import defaultdict
from functools import reduce
from keras.preprocessing.sequence import pad_sequences
from itertools import chain
from InputPreparator import EmbeddingsPreparator
from InputPreparator import StoryParser
import time
import csv
#to avoid a warning from TF 1.7 version see https://github.com/tensorflow/tensorflow/issues/18111
import warnings
warnings.filterwarnings('ignore')


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Task selection

In [85]:
TASK_NUMBER = 10
SUPPORTING_ONLY = True
USE_PRETRAINED = True
PATH_TO_EMBED = "data/glove.6B.50d.txt"
PATH_TO_TASKS = "data/tasks_1-20_v1-2/en/"

In [65]:
def get_task_files(task_nr):
    if task_nr==5:
        return 'qa5_three-arg-relations_train.txt', "qa5_three-arg-relations_test.txt"
    if task_nr==6:
        return 'qa6_yes-no-questions_train.txt', 'qa6_yes-no-questions_test.txt'
    if task_nr==10:
        return 'qa10_indefinite-knowledge_train.txt', 'qa10_indefinite-knowledge_test.txt'

In [66]:
train_set_file = get_task_files(TASK_NUMBER)[0]
test_set_file = get_task_files(TASK_NUMBER)[1]

train_set_post_file = PATH_TO_TASKS + train_set_file
test_set_post_file = PATH_TO_TASKS + test_set_file

In [67]:
embedder=EmbeddingsPreparator()
story_parser=StoryParser()

# Input preparation

In [68]:
vocab_tokens = embedder.get_unique_tokens([train_set_post_file, test_set_post_file])

In [69]:
word_to_index, index_to_embedding = embedder.load_embedding_from_disks(PATH_TO_EMBED, vocab_tokens, with_indexes=True)

In [70]:
train_stories=story_parser.get_stories(train_set_post_file, SUPPORTING_ONLY)
test_stories=story_parser.get_stories(test_set_post_file, SUPPORTING_ONLY)

In [71]:
contexts_train, questions_train, answers_train = story_parser.vectorize_stories(train_stories, word_to_index)
contexts_test, questions_test, answers_test = story_parser.vectorize_stories(test_stories, word_to_index)

In [72]:
print('contexts.shape = {}'.format(contexts_train.shape))
print('questions.shape = {}'.format(questions_train.shape))
print('answers.shape = {}'.format(answers_train.shape))

contexts.shape = (1000,)
questions.shape = (1000, 6)
answers.shape = (1000, 27)


## Embeddings 

In [73]:
embed_dimensions= 50

embedding_matrix = np.zeros((len(word_to_index) + 1, embed_dimensions))
for word, i in word_to_index.items():
    embedding_vector = index_to_embedding[i]
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

## Datasets

In [74]:
final_train_data = story_parser.get_final_dataset(contexts_train, questions_train, answers_train)
final_test_data = story_parser.get_final_dataset(contexts_test, questions_test, answers_test)

In [75]:
def prep_data(data, all_data= False, train= False):
    contextsvs, questionsvs, answers=zip(*data)
    
    #Pad to longest sequence in the batch 
    contexts = list(contextsvs)
    max_context_length = max([len(x) for x in contexts])
    questions = list(questionsvs)
    max_query_length = max(len(x) for x in questionsvs)

    final_contexts=pad_sequences(contextsvs, maxlen=max_context_length) 
    final_queries=pad_sequences(questionsvs, maxlen=max_query_length)
    
    return final_contexts, final_queries

# tf.keras Model : Hyperparameters

In [76]:
final_contexts, final_queries= prep_data(final_train_data)

vocab_size= len(index_to_embedding)
num_units_gru= 50
keep_prob= 0.5

# tf.keras Model 

In [77]:
q_in = tf.keras.Input(shape=(None,))
c_in = tf.keras.Input(shape=(None,))

#embed
embed = tf.keras.layers.Embedding(len(word_to_index) + 1,
                            embed_dimensions,
                            weights=[embedding_matrix],
                            trainable=False)
q = embed(q_in)
c = embed(c_in)

#encode
grucell= tf.keras.layers.GRUCell(num_units_gru)
context_encoded_rnn, context_final_state = tf.keras.layers.RNN(grucell, return_state= True)(c)
question_encoded_rnn, question_final_state = tf.keras.layers.RNN(grucell, return_state= True)(q)

#add dropout
dropout= tf.keras.layers.Dropout(keep_prob)
encoded_sentence= dropout(context_final_state)
encoded_question= dropout(question_final_state)

#merge
merged= tf.keras.layers.concatenate([encoded_sentence, encoded_question])

#predict
pred=tf.keras.layers.Dense(answers_train.shape[1], activation=tf.nn.softmax)(merged)


# tf.keras Model : Functions

In [78]:
def get_model_version():
    task_nr = str(TASK_NUMBER)
    name= 'task_' + task_nr+"_"
    if SUPPORTING_ONLY:
        name=name+"reduced"
    else:
        name=name+"whole"
    return name

In [79]:
def get_best_model():
    if SUPPORTING_ONLY:      
        if TASK_NUMBER == 5:
            best = model_version + '_3.ckpt'
        elif TASK_NUMBER == 6:
            best = model_version + '_1.ckpt'
        else:
            best = model_version + '_3.ckpt'
    else:
        if TASK_NUMBER == 5:
            best = model_version + '_1.ckpt'
        elif TASK_NUMBER == 6:
            best = model_version + '_1.ckpt'
        else:
            best = model_version + '_3.ckpt'
    return best

In [80]:
def restore():
    best = get_best_model()
    model=tf.keras.models.load_model('./restore/tf_keras/'+ best, custom_objects=None, compile=True)
    return model

In [81]:
def evaluate_model(model):
    model.summary()
    final_context_test, final_queries_test= prep_data(final_test_data)
    score = model.evaluate([final_queries_test,final_context_test], answers_test, batch_size=128)
    print('Final Testing Accuracy: '+ str(score))

In [82]:
def train(model):
    tbCallBack = tf.keras.callbacks.TensorBoard(log_dir='./keraslog', histogram_freq=0, write_graph=True, write_images=True)

    print('Training...')
    start_time = time.time()
    model.fit([final_queries, final_contexts], answers_train, epochs=200, batch_size=128, validation_split=0.2, callbacks=[tbCallBack])
    elapsed_time = time.time() - start_time
    print()
    print('Training time: ')
    print(elapsed_time)

In [83]:
def save(model):
    if not os.path.exists('./save/tf_keras/'):
        os.makedirs('./save/tf_keras/')
    save_path = "./save/tf_keras/"+model_version+'_3.ckpt'
    tf.keras.models.save_model(model, save_path)

    print("Model saved in path: %s" % save_path)

# Train or Restore

In [87]:
model_version= get_model_version()
if USE_PRETRAINED:
    model = restore_model()
else:
    model = tf.keras.Model(inputs=[q_in, c_in], outputs=pred)
    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    train(model)
    save(model)
    
evaluate_model(model)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_46 (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
input_45 (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
embedding_23 (Embedding)        (None, None, 50)     1350        input_45[0][0]                   
                                                                 input_46[0][0]                   
__________________________________________________________________________________________________
rnn_45 (RNN)                    [(None, 50), (None,  15150       embedding_23[1][0]               
__________