In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import utils
import random

  from ._conv import register_converters as _register_converters


In [2]:
# hyperparameters
iterations = 100
batch_size = 32
learning_rate = 0.001
reg_eta = 0.001

# dimensionalities
dim_lstm = 64
dim_gru = 64
dim_word = 300
dim_aspect = 5
dim_aspect_embedding = 64
dim_sentence = 80
dim_polarity = 3

# setup utils object
u = utils.UTILS(batch_size, dim_sentence, dim_polarity)

In [3]:
# define tf placeholders
X = tf.placeholder(tf.int32, [None, dim_sentence])
y = tf.placeholder(tf.float32, [None, dim_polarity])
seqlen = tf.placeholder(tf.int32, [None])
aspects = tf.placeholder(tf.int32, [None])

In [4]:
# define tf variables
with tf.variable_scope('aspect_embedding_vars', reuse = tf.AUTO_REUSE):
    fw_va = tf.get_variable(
        name = 'aspect_matrix_forward_Va',
        shape = [dim_aspect, dim_aspect_embedding],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    bk_va = tf.get_variable(
        name = 'aspect_matrix_backward_Va',
        shape = [dim_aspect, dim_aspect_embedding],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    wv = tf.get_variable(
        name = 'aspect_Wv',
        shape = [dim_aspect_embedding * 2, dim_aspect_embedding * 2],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
with tf.variable_scope('attention_vars', reuse = tf.AUTO_REUSE):
    wm = tf.get_variable(
        name = 'M_Wm',
        shape = [dim_lstm * 2, dim_lstm * 2],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    bm = tf.get_variable(
        name = 'M_Bm',
        shape = [dim_lstm * 2],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    with tf.variable_scope('gru_vars', reuse = tf.AUTO_REUSE):
        wr = tf.get_variable(
            name = 'r_iAL_Wr',
            shape = [dim_gru, dim_lstm * 2],
            initializer = tf.random_normal_initializer(0, 0.003),
            regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
        )
        ur = tf.get_variable(
            name = 'r_e_Ur',
            shape = [dim_gru, dim_gru],
            initializer = tf.random_normal_initializer(0, 0.003),
            regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
        )
        wz = tf.get_variable(
            name = 'z_iAL_Wz',
            shape = [dim_gru, dim_lstm * 2],
            initializer = tf.random_normal_initializer(0, 0.003),
            regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
        )
        uz = tf.get_variable(
            name = 'z_e_Uz',
            shape = [dim_gru, dim_gru],
            initializer = tf.random_normal_initializer(0, 0.003),
            regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
        )
        wx = tf.get_variable(
            name = 'e_tanh_Wx',
            shape = [dim_gru, dim_lstm * 2],
            initializer = tf.random_normal_initializer(0, 0.003),
            regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
        )
        wg = tf.get_variable(
            name = 'e_tanh_Wg',
            shape = [dim_gru, dim_gru],
            initializer = tf.random_normal_initializer(0, 0.003),
            regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
        )
with tf.variable_scope('output_softmax_vars', reuse = tf.AUTO_REUSE):
    ws = tf.get_variable(
        name = 'y_softmax_Wy',
        shape = [dim_gru, dim_polarity],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )
    bs = tf.get_variable(
        name = 'y_softmax_By',
        shape = [dim_polarity],
        initializer = tf.random_normal_initializer(0, 0.003),
        regularizer = tf.contrib.layers.l2_regularizer(reg_eta)
    )    

Instructions for updating:
Use the retry module or similar alternatives.


In [5]:
# define bi lstm model
def bi_lstm(inputs):
#     inputs = tf.nn.dropout(inputs, keep_prob=1.0)
    with tf.name_scope('lstm_model'):
#         # slice the corresponding vai from va
#         fw_vai = tf.gather(fw_va, aspects) # batch_size x dim_aspect_embedding
#         bk_vai = tf.gather(bk_va, aspects) # batch_size x dim_aspect_embedding
#         # concatenate vai to inputs
#         vai_en = [vai for i in range(dim_sentence)]
#         vai_en = tf.stack(vai_en, axis = 1) # batch_size x dim_sentence x dim_aspect_embedding
#         inputs = tf.concat([inputs, vai_en], 2)
        forward_lstm_cell = tf.contrib.rnn.LSTMCell(dim_lstm)
        backward_lstm_cell = tf.contrib.rnn.LSTMCell(dim_lstm)
        H, states = tf.nn.bidirectional_dynamic_rnn(
            forward_lstm_cell,
            backward_lstm_cell,
            inputs = inputs,
            sequence_length = seqlen,
            dtype = tf.float32,
            scope = 'bilstm'
        )
        M = tf.concat(H, 2) # batch_size x dim_sentence x (dim_lstm * 2)
    return M


In [None]:
# define ram model
def ram(M):
    size = tf.shape(M)[0] # batch_size
    # attention layer 1
    e0 = tf.zeros([size, dim_gru]) # batch_size x dim_gru
    g = tf.matmul(tf.reshape(M, [-1, dim_lstm * 2]), wm) + bm # (batch_size * dim_sentence) x (dim_lstm * 2)
    alpha = tf.nn.softmax(g)
    

In [6]:
# define operations
# tf.reset_default_graph()
pred = dynamic_lstm(tf.nn.embedding_lookup(u.gloveDict, X), seqlen, aspects)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = pred, labels = y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [9]:
# full dataset training
test_X, test_y, test_seqlen, test_aspects = u.getData('test')
train_X, train_y, train_seqlen, train_aspects = u.getData('train')
with tf.Session() as sess:
    sess.run(init)
    for i in range(iterations):
        sess.run(optimizer, feed_dict = {X: train_X, y: train_y, seqlen: train_seqlen, aspects: train_aspects})
#         if i > 0 and i % 4 == 0:
        loss_train, accuracy_train = sess.run([loss, accuracy], feed_dict = {X: train_X, y: train_y, seqlen: train_seqlen, aspects: train_aspects})
        print('step: %s, train loss: %s, train accuracy: %s' % (i, loss_train, accuracy_train))
        loss_test, accuracy_test = sess.run([loss, accuracy], feed_dict = {X: test_X, y: test_y, seqlen: test_seqlen, aspects: test_aspects})
        print('step: %s, test loss: %s, test accuracy: %s' % (i, loss_test, accuracy_test))

step: 0, train loss: 1.0672083, train accuracy: 0.60947865
step: 0, test loss: 1.0597196, test accuracy: 0.6438356
step: 1, train loss: 1.0270233, train accuracy: 0.61042655
step: 1, test loss: 1.0090522, test accuracy: 0.6438356
step: 2, train loss: 0.98581773, train accuracy: 0.61042655
step: 2, test loss: 0.9563357, test accuracy: 0.6438356
step: 3, train loss: 0.9560119, train accuracy: 0.61042655
step: 3, test loss: 0.9161578, test accuracy: 0.6438356
step: 4, train loss: 0.94279426, train accuracy: 0.61042655
step: 4, test loss: 0.8936038, test accuracy: 0.6438356
step: 5, train loss: 0.94123256, train accuracy: 0.61042655
step: 5, test loss: 0.8825179, test accuracy: 0.6438356
step: 6, train loss: 0.94334894, train accuracy: 0.61042655
step: 6, test loss: 0.8755773, test accuracy: 0.6438356
step: 7, train loss: 0.94440395, train accuracy: 0.61042655
step: 7, test loss: 0.8691451, test accuracy: 0.6438356
step: 8, train loss: 0.94331676, train accuracy: 0.61042655
step: 8, test l

step: 70, train loss: 0.59780127, train accuracy: 0.7687204
step: 70, test loss: 0.69510806, test accuracy: 0.760274
step: 71, train loss: 0.59004873, train accuracy: 0.771564
step: 71, test loss: 0.7010236, test accuracy: 0.7568493
step: 72, train loss: 0.58453083, train accuracy: 0.7696682
step: 72, test loss: 0.7065754, test accuracy: 0.760274
step: 73, train loss: 0.5784995, train accuracy: 0.77251184
step: 73, test loss: 0.7097099, test accuracy: 0.7568493
step: 74, train loss: 0.5732919, train accuracy: 0.77251184
step: 74, test loss: 0.7142982, test accuracy: 0.75342464
step: 75, train loss: 0.57293886, train accuracy: 0.7706161
step: 75, test loss: 0.73340124, test accuracy: 0.739726
step: 76, train loss: 0.58376986, train accuracy: 0.76303315
step: 76, test loss: 0.7111329, test accuracy: 0.7671233
step: 77, train loss: 0.61227053, train accuracy: 0.7402844
step: 77, test loss: 0.73336107, test accuracy: 0.71575344
step: 78, train loss: 0.5642774, train accuracy: 0.7706161
ste

In [8]:
# batch training
test_X, test_y, test_seqlen, test_aspects = u.getData('test')
with tf.Session() as sess:
    sess.run(init)
    for i in range(iterations):
        batch_X, batch_y, batch_seqlen, batch_aspects = u.nextBatch(batch_size)
        sess.run(optimizer, feed_dict = {X: batch_X, y: batch_y, seqlen: batch_seqlen, aspects: batch_aspects})
        if i > 0 and i % 4 == 0:
            loss_train, accuracy_train = sess.run([loss, accuracy], feed_dict = {X: batch_X, y: batch_y, seqlen: batch_seqlen, aspects: batch_aspects})
            print('step: %s, train loss: %s, train accuracy: %s' % (i, loss_train, accuracy_train))
            loss_test, accuracy_test = sess.run([loss, accuracy], feed_dict = {X: test_X, y: test_y, seqlen: test_seqlen, aspects: test_aspects})
            print('step: %s, test loss: %s, test accuracy: %s' % (i, loss_test, accuracy_test))

step: 4, train loss: 1.0650923, train accuracy: 0.34375
step: 4, test loss: 1.0683516, test accuracy: 0.43493152
step: 8, train loss: 1.0652124, train accuracy: 0.3125
step: 8, test loss: 1.0012907, test accuracy: 0.65753424
step: 12, train loss: 0.99717486, train accuracy: 0.65625
step: 12, test loss: 0.9812017, test accuracy: 0.5753425
step: 16, train loss: 0.9942324, train accuracy: 0.5
step: 16, test loss: 1.0188067, test accuracy: 0.47260273
step: 20, train loss: 0.9044162, train accuracy: 0.625
step: 20, test loss: 0.8960825, test accuracy: 0.59931505
step: 24, train loss: 1.0571532, train accuracy: 0.5
step: 24, test loss: 1.0216777, test accuracy: 0.59246576
step: 28, train loss: 0.82417476, train accuracy: 0.59375
step: 28, test loss: 0.80982697, test accuracy: 0.6438356
step: 32, train loss: 0.9238791, train accuracy: 0.53125
step: 32, test loss: 0.9979492, test accuracy: 0.55479455
step: 36, train loss: 0.8902892, train accuracy: 0.53125
step: 36, test loss: 0.927876, test a