# TensorFlow FeedForward Sentiment Analysis

### Imports

In [3]:
%load_ext autoreload
%autoreload 2
import tensorflow as tf
# tf.reset_default_graph()
session = tf.InteractiveSession()
import utils
import numpy as np

### Data Preprocessing

In [4]:
X, y, index_to_word, sentences = utils.load_sentiment_data_bow()
X_train, y_train, X_test, y_test = utils.split_data(X, y)
vocab_size = X.shape[1]
n_classes = y.shape[1]

s_i = 50
print("Sentence: {}".format(sentences[s_i]))
print("BOW: {}".format(utils.bow_to_dict(X[s_i,:], index_to_word)))
print("Label: {}".format(utils.label_to_desc(y[s_i])))

Sentence: @VirginAmerica Is flight 769 on it's way? Was supposed to take off 30 minutes ago. Website still shows "On Time" not "In Flight". Thanks.
BOW: {'is': 1.0, 'it': 1.0, 'thanks': 1.0, 'in': 1.0, 'still': 1.0, u'<UNK>': 1.0, '"': 4.0, "'": 1.0, '.': 3.0, 'to': 1.0, 'take': 1.0, 'way': 1.0, 'was': 1.0, '?': 1.0, 'shows': 1.0, 'website': 1.0, 'flight': 2.0, '@virginamerica': 1.0, 'not': 1.0, 'ago': 1.0, 'on': 2.0, 'off': 1.0, '30': 1.0, 's': 1.0, 'supposed': 1.0, 'time': 1.0, 'minutes': 1.0}
Label: negative


### Define TensorFlow Computation Graph

In [3]:
data_placeholder = tf.placeholder(tf.float32, shape=(None, vocab_size), name='data_placeholder')
labels_placeholder = tf.placeholder(tf.float32, shape=(None, n_classes), name='labels_placeholder')

In [4]:
# Define Weights
n_hidden_units_h0 = 512
n_hidden_units_h1 = 256

h0_weights = tf.Variable(
    tf.truncated_normal([vocab_size, n_hidden_units_h0]),
    name='h0_weights')
h0_biases = tf.Variable(tf.zeros([n_hidden_units_h0]),
                     name='h0_biases')


h1_weights = tf.Variable(
    tf.truncated_normal([n_hidden_units_h0, n_hidden_units_h1]),
    name='h1_weights')
h1_biases = tf.Variable(tf.zeros([n_hidden_units_h1]),
                     name='h1_biases')

h2_weights = tf.Variable(
    tf.truncated_normal([n_hidden_units_h1, n_classes]),
    name='h2_weights')
h2_biases = tf.Variable(tf.zeros([n_classes]),
                     name='h2_biases')

In [5]:
# Define Computation Graphs
hidden0 = tf.nn.relu(tf.matmul(data_placeholder, h0_weights) + h0_biases)
hidden1 = tf.nn.relu(tf.matmul(hidden0, h1_weights) + h1_biases)
logits = tf.matmul(hidden1, h2_weights) + h2_biases

In [10]:
# Define Loss Function + Optimizer
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, labels_placeholder))

optimizer = tf.train.GradientDescentOptimizer(0.0002).minimize(loss)
prediction = tf.nn.softmax(logits)
prediction_is_correct = tf.equal(
    tf.argmax(logits, 1), tf.argmax(labels_placeholder, 1))
accuracy = tf.reduce_mean(tf.cast(prediction_is_correct, tf.float32))

In [13]:
# Train loop

num_steps = 1000
batch_size = 128

tf.initialize_all_variables().run()

for step in xrange(num_steps):
    offset = (step * batch_size) % (X_train.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = X_train[offset:(offset + batch_size), :]
    batch_labels = y_train[offset:(offset + batch_size), :]
    # We built our networking using placeholders. It's like we've made reservations for a party of 6.
    # So use feed_dict to fill what we reserved. And we can't show up with 9 people. 
    feed_dict_train = {data_placeholder: batch_data, labels_placeholder : batch_labels}
    # Run the optimizer, get the loss, get the predictions.
    # We can run multiple things at once and get their outputs
    _, loss_value_train, predictions_value_train, accuracy_value_train = session.run(
      [optimizer, loss, prediction, accuracy], feed_dict=feed_dict_train)
    if (step % 10 == 0):
        print "Minibatch train loss at step", step, ":", loss_value_train
        print "Minibatch train accuracy: %.3f%%" % accuracy_value_train
        feed_dict_test = {data_placeholder: X_test, labels_placeholder: y_test}
        loss_value_test, predictions_value_test, accuracy_value_test = session.run(
            [loss, prediction, accuracy], feed_dict=feed_dict_test)
        print "Test loss: %.3f" % loss_value_test
        print "Test accuracy: %.3f%%" % accuracy_value_test


Minibatch train loss at step 0 : 527.443
Minibatch train accuracy: 0.352%
Test loss: 471.953
Test accuracy: 0.493%
Minibatch train loss at step 5 : 657.233
Minibatch train accuracy: 0.688%
Test loss: 337.715
Test accuracy: 0.644%
Minibatch train loss at step 10 : 110.708
Minibatch train accuracy: 0.570%
Test loss: 138.643
Test accuracy: 0.698%
Minibatch train loss at step 15 : 95.7145
Minibatch train accuracy: 0.586%
Test loss: 150.376
Test accuracy: 0.707%
Minibatch train loss at step 20 : 138.203
Minibatch train accuracy: 0.656%
Test loss: 159.660
Test accuracy: 0.397%
Minibatch train loss at step 25 : 79.6419
Minibatch train accuracy: 0.609%
Test loss: 112.690
Test accuracy: 0.699%
Minibatch train loss at step 30 : 163.466
Minibatch train accuracy: 0.695%
Test loss: 82.916
Test accuracy: 0.652%
Minibatch train loss at step 35 : 109.506
Minibatch train accuracy: 0.477%
Test loss: 78.768
Test accuracy: 0.625%
Minibatch train loss at step 40 : 68.7234
Minibatch train accuracy: 0.656%
T