In [12]:
import tensorflow as tf
import collections
layers = tf.layers
rnn = tf.nn.rnn_cell

print(tf.__version__)

1.12.0


### Model Hyperparameters

In [11]:
train_file = "train_processed.csv"
val_file = "val_processed.csv"
test_file = "test_processed.csv"
summary_dir = "version1"

batch_size = 1
val_size = 1000
max_iter = 50000
init_learning_rate = 0.001
decay_step = 25000
decay_rate = 0.1
beta = 0.9

lstm_hidden_layer = 100
lstm_stack_size = 2
question_ans_emb = 150
dropout_keep_prob = 0.75
num_classes = 4


### Loading Data

In [3]:
sess = tf.Session()

In [4]:
sess.run(tf.convert_to_tensor([1,2]))

array([1, 2], dtype=int32)

In [8]:
def to_int_tensor(string_tensor):
    int_array = tf.string_to_number(tf.string_split([string_tensor]).values,
                        out_type=tf.int32)
    return tf.convert_to_tensor(int_array)

# def train_preprocess(row):
#     question = to_int_tensor(row["question"][0])
#     Aopt = to_int_tensor(row["answerA"][0])
#     Bopt = to_int_tensor(row["answerB"][0])
#     Copt = to_int_tensor(row["answerC"][0])
#     Dopt = to_int_tensor(row["answerD"][0])
#     label = row["correctAnswer"]
#     return question, Aopt, Bopt, Copt, Dopt, label

def preprocess(id, Aopt, Bopt, Copt, Dopt, label, question, text):
    question = to_int_tensor(question)
    Aopt = to_int_tensor(Aopt)
    Bopt = to_int_tensor(Bopt)
    Copt = to_int_tensor(Copt)
    Dopt = to_int_tensor(Dopt)
    return question, Aopt, Bopt, Copt, Dopt, [label]


# id,answerA,answerB,answerC,answerD,correctAnswer,question,text
type_defaults = [tf.string, tf.string, tf.string, tf.string,  tf.string, tf.int32, tf.string, tf.string,]

train_dataset = tf.data.experimental.CsvDataset(train_file, record_defaults=type_defaults,
                                             header=True)
train_dataset = train_dataset.map(preprocess).batch(batch_size).shuffle(buffer_size=10000).repeat()
train_iterator = train_dataset.make_one_shot_iterator()
train_batch = train_iterator.get_next()


val_dataset = tf.data.experimental.CsvDataset(val_file, record_defaults=type_defaults,
                                             header=True)
val_dataset = val_dataset.map(preprocess).batch(batch_size).repeat()
val_iterator = val_dataset.make_one_shot_iterator()
val_batch = val_iterator.get_next()

#sess.run((train_iterator.initializer, val_iterator.initializer))
print(sess.run(train_batch))
print(sess.run(val_batch))

(array([[6054, 2063, 6206, 4338, 2470,  816, 7011,    0, 1797,   17, 2372,
           0, 7011]], dtype=int32), array([[ 147, 3000,    0, 3837]], dtype=int32), array([[5951, 4112, 4398,    0, 2248]], dtype=int32), array([[4984, 4112, 5254,    0, 3958]], dtype=int32), array([[4983,  671, 1226,  579,    0, 5123]], dtype=int32), array([[1]], dtype=int32))
(array([[3743, 2569, 5483,    0, 1059, 1658,    0, 6686,    0, 2248, 2434,
        3664]], dtype=int32), array([[2434]], dtype=int32), array([[3664]], dtype=int32), array([[2434, 3664]], dtype=int32), array([[2434, 3664]], dtype=int32), array([[2]], dtype=int32))


### Model

In [None]:
def single_qeustion_answer_model(ques_emb, ans_emb, reuse=False):
    with tf.variable_scope("single_qeustion_answer_model", reuse=reuse):
        with tf.variable_scope("question_lstm"):
            forward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            forward_cell = rnn.MultiRNNCell(forward_stack, state_is_tuple=False)

            backward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            backward_cell = rnn.MultiRNNCell(backward_stack, state_is_tuple=False)

            (ques_fw, ques_bw), _ = tf.nn.bidirectional_dynamic_rnn(forward_cell,
                                                                    backward_cell,
                                                                    ques_emb,
                                                                    dtype=tf.float32)
            ques_output = tf.concat([ques_fw[-1], ques_bw[-1]], axis=1)

        with tf.variable_scope("answer_lstm"):
            forward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            forward_cell = rnn.MultiRNNCell(forward_stack, state_is_tuple=False)

            backward_stack = [rnn.DropoutWrapper(
                rnn.GRUCell(lstm_hidden_layer),
                input_keep_prob=dropout_keep_prob,
                output_keep_prob=dropout_keep_prob)
                for _ in range(lstm_stack_size)]
            backward_cell = rnn.MultiRNNCell(backward_stack, state_is_tuple=False)

            (ans_fw, ans_bw), _ = tf.nn.bidirectional_dynamic_rnn(forward_cell,
                                                                  backward_cell,
                                                                  ans_emb,
                                                                  dtype=tf.float32)
            ans_output = tf.concat([ans_fw[-1], ans_bw[-1]], axis=1)

        ques_ans_concat = tf.concat([ques_output, ans_output], axis=1)

        output = layers.dense(ques_ans_concat, question_ans_emb, activation=tf.nn.relu)
        return layers.dropout(output, rate=1 - dropout_keep_prob)


def model(question, Aopt, Bopt, Copt, Dopt, label, embmat, reuse=False):
    Network = collections.namedtuple('Network', 'loss, pred, \
                                        grads_and_vars, \
                                        train, global_step, learning_rate')
    with tf.variable_scope("BiRNN", reuse=reuse):
        question_emb = tf.nn.embedding_lookup(embmat, question, name="que_lookup")
    Aopt_emb = tf.nn.embedding_lookup(embmat, Aopt, name="a_lookup")
    Bopt_emb = tf.nn.embedding_lookup(embmat, Bopt, name="blookup")
    Copt_emb = tf.nn.embedding_lookup(embmat, Copt, name="clookup")
    Dopt_emb = tf.nn.embedding_lookup(embmat, Dopt, name="dlookup")

    question_AA = single_qeustion_answer_model(question_emb, Aopt_emb)
    question_BB = single_qeustion_answer_model(question_emb, Bopt_emb, reuse=True)
    question_CC = single_qeustion_answer_model(question_emb, Copt_emb, reuse=True)
    question_DD = single_qeustion_answer_model(question_emb, Dopt_emb, reuse=True)

    all_emb = tf.concat([question_AA, question_BB, question_CC, question_DD], axis=1)
    logits = layers.dense(all_emb, num_classes)

    pred_probs = tf.nn.softmax(logits)
    pred_label = tf.argmax(pred_probs, axis=1)
    label = tf.one_hot(label, num_classes)

    total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label,
                                                                  logits=logits))

    with tf.variable_scope("global_step_and_learning_rate", reuse=reuse):
        global_step = tf.contrib.framework.get_or_create_global_step()
        learning_rate = tf.train.exponential_decay(init_learning_rate,
                                                   global_step,
                                                   decay_step,
                                                   decay_rate,
                                                   staircase=True)
        incr_global_step = tf.assign(global_step, global_step + 1)

    with tf.variable_scope("optimizer", reuse=reuse):
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='BiRNN')
            optimizer = tf.train.AdamOptimizer(learning_rate, beta1=beta)
            grads_and_vars = optimizer.compute_gradients(total_loss, tvars)
            train_op = optimizer.apply_gradients(grads_and_vars)

    return Network(
        loss=total_loss,
        pred=pred_label,
        grads_and_vars=grads_and_vars,
        train=tf.group(total_loss, incr_global_step, train_op),
        global_step=global_step,
        learning_rate=learning_rate
    )