In [1]:
import tensorflow as tf
import collections
import pickle
import numpy as np
import os
layers = tf.layers
rnn = tf.nn.rnn_cell

tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

1.12.0


In [2]:
train_file = "train_processed.csv"
val_file = "val_processed.csv"
test_file = "test_processed.csv"
output_dir = "version3"
summary_dir = output_dir + "/log"
checkpoint = "version3"

batch_size = 32
val_size = 20
max_iter = 20000
display_freq = 50
save_freq = 100
val_freq = 100
init_learning_rate = 0.0003
decay_step = 10000
decay_rate = 0.1
beta = 0.9

lstm_hidden_layer = 100*3
dense_hidden_layer = 200*3
lstm_stack_size = 2
question_ans_emb_dim = 150*3
dropout_keep_prob = 0.5
num_classes = 4


isTraining = False

In [3]:
def single_qeustion_answer_model(ques_emb, ans_emb, reuse=False):
    
    with tf.variable_scope("single_qeustion_answer_model", reuse=reuse):
        with tf.variable_scope("question_lstm"):
            forward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            forward_cell = rnn.MultiRNNCell(forward_stack, state_is_tuple=False)

            backward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            backward_cell = rnn.MultiRNNCell(backward_stack, state_is_tuple=False)

            _, (ques_fw, ques_bw) = tf.nn.bidirectional_dynamic_rnn(forward_cell,
                                                                    backward_cell,
                                                                    ques_emb,
                                                                    dtype=tf.float32)
            #print1 = tf.Print(ques_fw, [tf.shape(ques_fw)], message="This is ques_fw: ")
            #print2 = tf.Print(ques_bw, [tf.shape(ques_bw)], message="This is ques_bw: ")
            #with tf.control_dependencies([print1, print2]):
            #   (ques_fw, ques_bw) = (tf.squeeze(ques_fw[:,-1:,:]), 
            #                          tf.squeeze(ques_bw[:,-1:,:]))
                #print("ques_fw:", ques_fw.get_shape())
                #print("ques_bw:", ques_bw.get_shape())
            ques_output = tf.concat([ques_fw, ques_bw], axis=1)


        with tf.variable_scope("answer_lstm"):
            forward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            forward_cell = rnn.MultiRNNCell(forward_stack, state_is_tuple=False)

            backward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            backward_cell = rnn.MultiRNNCell(backward_stack, state_is_tuple=False)

            _, (ans_fw, ans_bw) = tf.nn.bidirectional_dynamic_rnn(forward_cell,
                                                                  backward_cell,
                                                                  ans_emb,
                                                                  dtype=tf.float32)
            #print("ans_fw:", ans_fw.get_shape())
            #print("ans_bw:", ans_bw.get_shape())
            #print1 = tf.Print(ans_fw, [tf.shape(ans_fw)], message="This is ans_fw: ")
            #print2 = tf.Print(ans_bw, [tf.shape(ans_bw)], message="This is ans_bw: ")
            #with tf.control_dependencies([print1, print2]):
            #    (ans_fw, ans_bw) = (tf.squeeze(ans_fw[:,-1:,:]), 
            #                          tf.squeeze(ans_bw[:,-1:,:]))
            ans_output = tf.concat([ans_fw, ans_bw], axis=1)
        
        # Concat question lstm and answer lstm state and pass to dense
        ques_ans_concat = tf.concat([ques_output, ans_output], axis=1)
        #ques_ans_concat.set_shape([None, 8*lstm_hidden_layer])
        output = layers.dense(ques_ans_concat, question_ans_emb_dim, activation=tf.nn.leaky_relu)
        return layers.dropout(output, rate=1 - dropout_keep_prob, training=isTraining)


def model(question, Aopt, Bopt, Copt, Dopt, label, embmat, reuse=False):
    Network = collections.namedtuple('Network', 'loss, pred, \
                                        grads_and_vars, \
                                        train, global_step, learning_rate')
    with tf.variable_scope("BiRNN", reuse=reuse):
        
        # Get Embeddings
        question_emb = tf.nn.embedding_lookup(embmat, question, name="que_lookup")
        Aopt_emb = tf.nn.embedding_lookup(embmat, Aopt, name="a_lookup")
        Bopt_emb = tf.nn.embedding_lookup(embmat, Bopt, name="blookup")
        Copt_emb = tf.nn.embedding_lookup(embmat, Copt, name="clookup")
        Dopt_emb = tf.nn.embedding_lookup(embmat, Dopt, name="dlookup")

        # Learn Embedding for each question answer pair
        question_AA = single_qeustion_answer_model(question_emb, Aopt_emb)
        question_BB = single_qeustion_answer_model(question_emb, Bopt_emb, reuse=True)
        question_CC = single_qeustion_answer_model(question_emb, Copt_emb, reuse=True)
        question_DD = single_qeustion_answer_model(question_emb, Dopt_emb, reuse=True)

        print("[DEBUG] questionAA size:", question_AA.get_shape())
        
        # Select best answer
        X = tf.concat([question_AA, question_BB, question_CC, question_DD], axis=1)
        X = layers.dense(X, dense_hidden_layer, activation=tf.nn.leaky_relu)
        X = layers.dropout(X, rate=1 - dropout_keep_prob, training=isTraining)
        logits = layers.dense(X, num_classes)

        pred_probs = tf.nn.softmax(logits)
        pred_label = tf.argmax(pred_probs, axis=1)
        label = tf.one_hot(label, num_classes)

        total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label,
                                                                      logits=logits))

    with tf.variable_scope("global_step_and_learning_rate", reuse=reuse):
        global_step = tf.contrib.framework.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(init_learning_rate,
                                                   global_step,
                                                   decay_step,
                                                   decay_rate,
                                                   staircase=True)
        incr_global_step = tf.assign(global_step, global_step + 1)

    with tf.variable_scope("optimizer", reuse=reuse):
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='BiRNN')
            optimizer = tf.train.AdamOptimizer(learning_rate, beta1=beta)
            grads_and_vars = optimizer.compute_gradients(total_loss, tvars)
            train_op = optimizer.apply_gradients(grads_and_vars)

    return Network(
        loss=total_loss,
        pred=pred_label,
        grads_and_vars=grads_and_vars,
        train=tf.group(total_loss, incr_global_step, train_op),
        global_step=global_step,
        learning_rate=learning_rate
    )

In [4]:
def write_to_file(pred):
    with open("submission.csv", "w") as out:
        out.write("id,correctAnswer\n")
        for id_, label in pred.items():
            out.write(id_.decode("utf-8")+","+str(label)+"\n")

In [5]:
# id,answerA,answerB,answerC,answerD,question
def to_int_tensor(string_tensor):
    int_array = tf.string_to_number(tf.string_split([string_tensor]).values,
                        out_type=tf.int32)
    return tf.convert_to_tensor(int_array)

def test_preprocess(id_, Aopt, Bopt, Copt, Dopt, question):
    question = to_int_tensor(question)
    Aopt = to_int_tensor(Aopt)
    Bopt = to_int_tensor(Bopt)
    Copt = to_int_tensor(Copt)
    Dopt = to_int_tensor(Dopt)
    return id_, question, Aopt, Bopt, Copt, Dopt

type_defaults = [tf.string, tf.string, tf.string, tf.string,  tf.string, tf.string]

test_dataset = tf.data.experimental.CsvDataset(test_file, record_defaults=type_defaults,
                                             header=True)
test_dataset = test_dataset.map(test_preprocess).batch(1).repeat(2)
test_iterator = test_dataset.make_initializable_iterator()
test_batch = test_iterator.get_next()

dummy_label = tf.convert_to_tensor(np.zeros((1,1), dtype=np.int32), dtype=tf.int32)

# Initalizing embeddings
with open("emb_mat.pkl", "rb") as infile:
    embedding_mat = pickle.load(infile)
    
vocab_size, emb_dim = embedding_mat.shape

with tf.variable_scope("BiRNN"):
    embadding_mat = tf.Variable(tf.constant(0.0, shape=[vocab_size, emb_dim]),
                    trainable=True, name="embadding_mat")
    embedding_placeholder = tf.placeholder(tf.float32, 
                                           shape=[vocab_size, emb_dim], 
                                           name="emb_placeholder")
    embedding_init = embadding_mat.assign(embedding_placeholder)

# Laoding model
id_, question, Aopt, Bopt, Copt, Dopt = test_batch
net_train = model(question, Aopt, Bopt, Copt, Dopt, dummy_label, embadding_mat)
print('[INFO] Finish building the birnn !!!')

# Define the saver and weight initiallizer
saver = tf.train.Saver(max_to_keep=10)

# Get trainable variable
train_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="BiRNN")
weight_initializer = tf.train.Saver(train_var_list)

# Start the session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

# Use supervisor to coordinate all queue and summary writer
# TODO: Deprecated, Update with tf.train.MonitoredTrainingSession
sv = tf.train.Supervisor(logdir=summary_dir, save_summaries_secs=0, saver=None)

with sv.managed_session(config=config) as sess:
    if (checkpoint is not None):
        print('[INFO] Loading model from the checkpoint...')
        checkpoint_ = tf.train.latest_checkpoint(checkpoint)
        saver.restore(sess, checkpoint_)
        
    print('[INFO] testing starts!!!')
    
    # initialize training iterator
    sess.run(test_iterator.initializer)
    pred_label = {}
    count = 1
    while True:
        try:
            fetches = {
                "id": id_,
                "pred": net_train.pred  
            }
            results = sess.run(fetches, feed_dict = {embedding_placeholder: embedding_mat})
            pred_label[results["id"][0]] = results["pred"][0]
            #print(count)
            count = count + 1
        except tf.errors.OutOfRangeError:
            write_to_file(pred_label)
            print("[INFO] testing complete!! Total test sample: ", len(pred_label))
            break
        

[DEBUG] questionAA size: (?, 450)
[INFO] Finish building the birnn !!!
[INFO] Loading model from the checkpoint...
[INFO] testing starts!!!
[INFO] testing complete!! Total test sample:  490
