# TensorFlow Simple Sentiment Analysis Improved

In [1]:
%load_ext autoreload
%autoreload 2
import tensorflow as tf
# tf.reset_default_graph()
session = tf.InteractiveSession()
import utils
import numpy as np

In [2]:
X, y, index_to_word, sentences = utils.load_sentiment_data_bow()
X_train, y_train, X_test, y_test = utils.split_data(X, y)
vocab_size = X.shape[1]
n_classes = y.shape[1]

s_i = 50
print("Sentence:", sentences[s_i])
print("BOW:", utils.bow_to_dict(X[s_i,:], index_to_word))
print("Label:", utils.label_to_desc(y[s_i]))

('Sentence:', '@VirginAmerica Is flight 769 on it\'s way? Was supposed to take off 30 minutes ago. Website still shows "On Time" not "In Flight". Thanks.')
('BOW:', {'is': 1.0, 'it': 1.0, 'thanks': 1.0, 'in': 1.0, 'still': 1.0, u'<UNK>': 1.0, '"': 4.0, "'": 1.0, '.': 3.0, 'to': 1.0, 'take': 1.0, 'way': 1.0, 'was': 1.0, '?': 1.0, 'shows': 1.0, 'website': 1.0, 'flight': 2.0, '@virginamerica': 1.0, 'not': 1.0, 'ago': 1.0, 'on': 2.0, 'off': 1.0, '30': 1.0, 's': 1.0, 'supposed': 1.0, 'time': 1.0, 'minutes': 1.0})
('Label:', 'negative')


In [3]:
data_placeholder = tf.placeholder(tf.float32, shape=(None, vocab_size), name='data_placeholder')
labels_placeholder = tf.placeholder(tf.float32, shape=(None, n_classes), name='labels_placeholder')
keep_prob_placeholder = tf.placeholder(tf.float32, name='keep_prob_placeholder')

In [4]:
# Helper function for fully connected layers

def linear(input_, output_size, layer_scope, stddev=0.02, bias_start=0.0):
    shape = input_.get_shape().as_list()

    with tf.variable_scope(layer_scope):
        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
                                 tf.random_normal_initializer(stddev=stddev))
        bias = tf.get_variable("bias", [output_size],
            initializer=tf.constant_initializer(bias_start))
        return tf.matmul(input_, matrix) + bias

In [5]:
# Define Computation Graphs

n_hidden_units_h0 = 512
n_hidden_units_h1 = 256

with tf.variable_scope('hidden_layers'):
    hidden0 = tf.nn.relu(linear(data_placeholder, n_hidden_units_h0, 'hidden0'))
    hidden1 = tf.nn.relu(linear(hidden0, n_hidden_units_h1, 'hidden1'))

logits = linear(hidden1, n_classes, 'output_layer')

In [6]:
# Define Loss Function + Optimizer
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, labels_placeholder))

loss_sum = tf.scalar_summary('loss', loss)

optimizer = tf.train.GradientDescentOptimizer(0.00005).minimize(loss)
prediction = tf.nn.softmax(logits)
prediction_is_correct = tf.equal(
    tf.argmax(logits, 1), tf.argmax(labels_placeholder, 1))
accuracy = tf.reduce_mean(tf.cast(prediction_is_correct, tf.float32))
accuracy_sum = tf.scalar_summary('accuracy', accuracy)

In [7]:
# Train loop

num_steps = 2000
batch_size = 128
keep_prob_rate = 0.75

tf.initialize_all_variables().run()
merged_summary_op = tf.merge_summary([loss_sum, accuracy_sum])
train_writer = tf.train.SummaryWriter("./feed-forward-logs-train", session.graph)
test_writer = tf.train.SummaryWriter("./feed-forward-logs-test", session.graph)

for step in xrange(num_steps):
    offset = (step * batch_size) % (X_train.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = X_train[offset:(offset + batch_size), :]
    batch_labels = y_train[offset:(offset + batch_size), :]
    # We built our networking using placeholders. It's like we've made reservations for a party of 6.
    # So use feed_dict to fill what we reserved. And we can't show up with 9 people. 

    feed_dict_train = {data_placeholder: batch_data, labels_placeholder : batch_labels, keep_prob_placeholder: keep_prob_rate}
    # Run the optimizer, get the loss, get the predictions.
    # We can run multiple things at once and get their outputs
    _, loss_value_train, predictions_value_train, accuracy_value_train, merged_sum = session.run(
      [optimizer, loss, prediction, accuracy, merged_summary_op], feed_dict=feed_dict_train)
    train_writer.add_summary(merged_sum, step)
    if (step % 10 == 0):
        print "Minibatch train loss at step", step, ":", loss_value_train
        print "Minibatch train accuracy: %.3f%%" % accuracy_value_train
        feed_dict_test = {data_placeholder: X_test, labels_placeholder: y_test, keep_prob_placeholder: 1.0}
        loss_value_test, predictions_value_test, accuracy_value_test, merged_sum = session.run(
            [loss, prediction, accuracy, merged_summary_op], feed_dict=feed_dict_test)
        test_writer.add_summary(merged_sum, step)
    
        print "Test loss: %.3f" % loss_value_test
        print "Test accuracy: %.3f%%" % accuracy_value_test


Minibatch train loss at step 0 : 1.09872
Minibatch train accuracy: 0.375%
Test loss: 1.102
Test accuracy: 0.178%
Minibatch train loss at step 10 : 1.10095
Minibatch train accuracy: 0.180%
Test loss: 1.101
Test accuracy: 0.180%
Minibatch train loss at step 20 : 1.10018
Minibatch train accuracy: 0.203%
Test loss: 1.101
Test accuracy: 0.182%
Minibatch train loss at step 30 : 1.10063
Minibatch train accuracy: 0.242%
Test loss: 1.101
Test accuracy: 0.185%
Minibatch train loss at step 40 : 1.09992
Minibatch train accuracy: 0.211%
Test loss: 1.101
Test accuracy: 0.186%
Minibatch train loss at step 50 : 1.09951
Minibatch train accuracy: 0.266%
Test loss: 1.101
Test accuracy: 0.189%
Minibatch train loss at step 60 : 1.09946
Minibatch train accuracy: 0.289%
Test loss: 1.101
Test accuracy: 0.190%
Minibatch train loss at step 70 : 1.10037
Minibatch train accuracy: 0.195%
Test loss: 1.101
Test accuracy: 0.192%
Minibatch train loss at step 80 : 1.10137
Minibatch train accuracy: 0.141%
Test loss: 1.1