In [1]:
import sys
sys.path.insert(0, "/work/04233/sw33286/AIDA-SCRIPTS")

In [2]:
import random
import numpy as np

import tensorflow as tf

from helpers import Indexer
from itertools import chain

### Prepare data

In [3]:
TYPES = ['ANIMAL','VEHICLE','NATURE','FURNITURE','FRUIT']
TYPE2VOCAB = {'ANIMAL': ['cat','dog','pig','horse','deer'],
              'VEHICLE': ['car','bike','motorcycle','train','bus'],
              'NATURE': ['hill','mountain','lake','river','valley'],
              'FURNITURE': ['stool','table','closet','cabinet','bed'],
              'FRUIT': ['apple','pear','strawberry','grape','tomato']}
VOCAB = list(chain.from_iterable(TYPE2VOCAB.values()))

indexer = Indexer()
indexer.get_index('PAD')
for word in VOCAB:
    indexer.get_index(word)

In [9]:
FROM_LEN = 5
TO_LEN = 15
CTX_LEN = 10 # need to pad/cut context vector later.

def get_context(type1, type2):
    return [indexer.get_index(w) for w in TYPE2VOCAB[type1]] + \
           [indexer.get_index(w) for w in TYPE2VOCAB[type2]]

def generate_datum(from_len=FROM_LEN, to_len=TO_LEN):
    y = 0 if np.random.rand() < 0.5 else 1
    x1_length = np.random.randint(from_len, to_len)
    x2_length = np.random.randint(from_len, to_len)
    x1_type = np.random.choice(TYPES)
    x2_type = np.random.choice([t for t in TYPES if t!=x1_type]) if y==0 else x1_type
    
    x1_code = [indexer.get_index(np.random.choice(TYPE2VOCAB[x1_type])) for _ in range(x1_length)]
    x2_code = [indexer.get_index(np.random.choice(TYPE2VOCAB[x2_type])) for _ in range(x2_length)]
    ctx_code = get_context(x1_type, x2_type)
    if x1_length < to_len:
        x1_code += [indexer.get_index('PAD')] * (to_len-x1_length)
    if x2_length < to_len:
        x2_code += [indexer.get_index('PAD')] * (to_len-x2_length)
    return x1_code, x2_code, ctx_code, y

def to_sent(code):
    return [indexer.get_object(idx) for idx in code]

def get_batch(n, from_len=FROM_LEN, to_len=TO_LEN):
    x1_batch, x2_batch, ctx_batch, y_batch = [], [], [], []
    for _ in range(n):
        x1_code, x2_code, ctx_code, y = generate_datum(from_len, to_len)
        x1_batch.append(x1_code)
        x2_batch.append(x2_code)
        ctx_batch.append(ctx_code)
        y_batch.append(y)
    return np.array(x1_batch), np.array(x2_batch), np.array(ctx_batch), np.array(y_batch)

### CNN classifier with context

In [35]:
tf.reset_default_graph()

MAX_LEN = TO_LEN
VOCAB_SIZE = len(indexer)
EMBED_SIZE = 20
FILTER_SIZES = [3,4,5]
NUM_SENT_FILTERS = 10
NUM_CTX_FILTERS = 3
NUM_CHANNELS = 1

sess = tf.InteractiveSession()

input_x1 = tf.placeholder(tf.int32, [None, MAX_LEN], name='input_x1')
input_x2 = tf.placeholder(tf.int32, [None, MAX_LEN], name='input_x2')
input_ctx = tf.placeholder(tf.int32, [None, CTX_LEN], name='input_ctx')
input_y  = tf.placeholder(tf.int32, [None], name='input_y')

keep_prob = tf.placeholder(tf.float32, name="keep_prob")

with tf.device('/cpu:0'), tf.variable_scope('embeddings'): 
        # name_scope works only with tf.Variable
        # variable_scope works with tf.get_variable
    E = tf.get_variable('E', [VOCAB_SIZE, EMBED_SIZE], initializer=tf.contrib.layers.xavier_initializer())
    embed_x1 = tf.expand_dims(tf.nn.embedding_lookup(E, input_x1), -1)
    embed_x2 = tf.expand_dims(tf.nn.embedding_lookup(E, input_x2), -1)
        # embed_x*: [batch_size, height=MAX_LEN, width=EMBED_SIZE, num_channels=1]
    embed_ctx = tf.expand_dims(tf.nn.embedding_lookup(E, input_ctx), -1)
        # embed_ctx: [batch_size, height=CTX_LEN, width=EMBED_SIZE, num_channels=1]

pool1_outputs, pool2_outputs = [], []
poolctx_outputs = []
for i, filter_size in enumerate(FILTER_SIZES):
    with tf.variable_scope('conv-max-pool-%s' % filter_size): 
        sent_filter_shape = [filter_size, EMBED_SIZE, NUM_CHANNELS, NUM_SENT_FILTERS]
            # Filter dims: [filter_size, emb_size, num_channels, num_filters]
        W1 = tf.get_variable('W1', sent_filter_shape, initializer=tf.contrib.layers.xavier_initializer())
        W2 = tf.get_variable('W2', sent_filter_shape, initializer=tf.contrib.layers.xavier_initializer())
        b1 = tf.get_variable('b1', [NUM_SENT_FILTERS], initializer=tf.contrib.layers.xavier_initializer())
        b2 = tf.get_variable('b2', [NUM_SENT_FILTERS], initializer=tf.contrib.layers.xavier_initializer())
        conv1 = tf.nn.conv2d(embed_x1, W1, strides=[1,1,1,1], padding='VALID', name='conv1')
        conv2 = tf.nn.conv2d(embed_x2, W2, strides=[1,1,1,1], padding='VALID', name='conv2')
            # Conv dims: [batch_size, height, width, num_channels]
        h1 = tf.nn.relu(tf.nn.bias_add(conv1, b1), name='relu1')
        h2 = tf.nn.relu(tf.nn.bias_add(conv2, b2), name='relu2')
        pool1 = tf.nn.max_pool(h1, ksize=[1,MAX_LEN-filter_size+1,1,1], strides=[1,1,1,1], padding='VALID', name='pool1')
        pool2 = tf.nn.max_pool(h2, ksize=[1,MAX_LEN-filter_size+1,1,1], strides=[1,1,1,1], padding='VALID', name='pool2')
            # kernel size (ksize): [batch_size, height, width, num_channels]
        ### CTX ADDED HERE ###
        ctx_filter_shape = [filter_size, EMBED_SIZE, NUM_CHANNELS, NUM_CTX_FILTERS]
        W_ctx = tf.get_variable('W_ctx', ctx_filter_shape, initializer=tf.contrib.layers.xavier_initializer())
        b_ctx = tf.get_variable('b_ctx', [NUM_CTX_FILTERS], initializer=tf.contrib.layers.xavier_initializer())
        conv_ctx = tf.nn.conv2d(embed_ctx, W_ctx, strides=[1,1,1,1], padding='VALID', name='conv_ctx')
        h_ctx = tf.nn.relu(tf.nn.bias_add(conv_ctx, b_ctx), name='relu1')
        pool_ctx = tf.nn.max_pool(h_ctx, ksize=[1,CTX_LEN-filter_size+1,1,1], strides=[1,1,1,1], padding='VALID', name='pool_ctx')
        ######################
        pool1_outputs.append(tf.concat([pool1,pool_ctx], 3))
        pool2_outputs.append(tf.concat([pool2,pool_ctx], 3))

num_filters_total = (NUM_SENT_FILTERS+NUM_CTX_FILTERS) * len(FILTER_SIZES)
h_pool1_flat = tf.nn.dropout(tf.reshape(tf.concat(pool1_outputs, 3), [-1, num_filters_total]), keep_prob)
h_pool2_flat = tf.nn.dropout(tf.reshape(tf.concat(pool2_outputs, 3), [-1, num_filters_total]), keep_prob)
    # flat shape: [batch_size, num_filters_total].
W_bi = tf.get_variable('W_bi', [num_filters_total, num_filters_total],
                       initializer=tf.contrib.layers.xavier_initializer())
scores = tf.nn.sigmoid(tf.diag_part(tf.matmul(tf.matmul(h_pool1_flat, W_bi), tf.transpose(h_pool2_flat))))

predictions = tf.cast(tf.round(scores), tf.int32, name='predictions') 

with tf.name_scope('loss'):
    losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(input_y, tf.float32), logits=scores)
    loss = tf.reduce_mean(losses)

with tf.name_scope('accuracy'):
    correct_predictions = tf.equal(predictions, input_y)
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='accuracy')
    
global_step = tf.Variable(0, name='global_step', trainable=False)
optimizer = tf.train.AdamOptimizer(1e-4)
grads_and_vars = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

sess.run(tf.global_variables_initializer())

In [36]:
NUM_EPOCH = 5
NUM_BATCH = 1000
BATCH_SIZE = 32
VERBOSE = 500

for e in range(NUM_EPOCH):
    print('Epoch ', e+1)
    print('\n')
    loss_track, accuracy_track = [], []
    for _ in range(NUM_BATCH):
        batch_x1, batch_x2, batch_ctx, batch_y = get_batch(BATCH_SIZE)
        fd = {input_x1:batch_x1, input_x2:batch_x2, input_ctx:batch_ctx, input_y:batch_y, keep_prob:0.7}
        _, step, loss_, accuracy_ = sess.run([train_op, global_step, loss, accuracy], feed_dict=fd)
        loss_track.append(loss_)
        accuracy_track.append(accuracy_)
        if step%VERBOSE==0:
            print('  batch loss & accuracy at step {}: <{}, {}>'.format(step, loss_, accuracy_))
    print('\n')
    print('  epoch mean loss & accuracy: <{}, {}>'.format(np.mean(loss_track),np.mean(accuracy_track)))
    print('\n')

Epoch  1


  batch loss & accuracy at step 500: <0.6725980043411255, 0.375>
  batch loss & accuracy at step 1000: <0.6523560285568237, 0.625>


  epoch mean loss & accuracy: <0.6833137273788452, 0.5168437361717224>


Epoch  2


  batch loss & accuracy at step 1500: <0.6643267273902893, 0.78125>
  batch loss & accuracy at step 2000: <0.6096349954605103, 0.8125>


  epoch mean loss & accuracy: <0.6167633533477783, 0.7592499852180481>


Epoch  3


  batch loss & accuracy at step 2500: <0.5127249956130981, 0.9375>
  batch loss & accuracy at step 3000: <0.48382511734962463, 1.0>


  epoch mean loss & accuracy: <0.5635325312614441, 0.8926249742507935>


Epoch  4


  batch loss & accuracy at step 3500: <0.5771576166152954, 0.96875>
  batch loss & accuracy at step 4000: <0.5196552276611328, 0.96875>


  epoch mean loss & accuracy: <0.540743887424469, 0.9358124732971191>


Epoch  5


  batch loss & accuracy at step 4500: <0.5684242844581604, 0.96875>
  batch loss & accuracy at step 5000: <0.558