In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import csv
import numpy as np

DATA_DIR = 'MNIST'
sentiment_data = 'sentiment-data'


def getWordVectorDict():
    reader = csv.reader(open(sentiment_data +'/word-vectors-refine.txt'))

    word_vector_dict = {}
    for row in reader:
        key = row[0]
        if key in word_vector_dict:
            # implement your duplicate row handling here
            pass
        word_vector_dict[key] = np.array(row[1:])
    return word_vector_dict

def getPaddedSentenceMatrix(sentenceMatrix):
    wordCount = 100
    return np.vstack((sentenceMatrix, np.zeros((wordCount - np.shape(sentenceMatrix)[0],np.shape(sentenceMatrix)[1]), dtype=np.float32)))

def getVectorForSentence(sentence, word_vec_dict):
    sentence_matrix = []
    for word in sentence.split(' '):
        word_vec = word_vec_dict[word]
        if(len(sentence_matrix) == 0):
            sentence_matrix = word_vec
        else:
            sentence_matrix = np.vstack((sentence_matrix,word_vec))
    return getPaddedSentenceMatrix(sentence_matrix)

def getData(fileName):
    reader = csv.reader(open(sentiment_data +'/' + fileName))
    trainingData = []
    for row in reader:
        data = {}
        data['label'] =  1 if row[0] == 'postive' else 0
        data['sentence'] = row[1:]
        trainingData.append(data)
    return trainingData

word_vec_dict = getWordVectorDict()


def transform(row):
    return row['label'], getVectorForSentence(row['sentence'][0], word_vec_dict)


word_vector_size = 50;
time_steps = 100;
num_classes = 2
batch_size = 1000;
n_iterations = 10;
hidden_layer_size = 64

training_data = getData('train.csv')
training_rows  = map(lambda row: transform(row), training_data)
training_data = map(lambda row: row[1], training_rows)
training_labels = map(lambda row: row[0], training_rows)
#test_data = map(lambda row: transform(row), getData('test.csv'))
test_data_raw = getData('test.csv')
test_rows  = map(lambda row: transform(row), test_data_raw)
test_data = map(lambda row: row[1], test_rows)
test_labels = map(lambda row: row[0], test_rows)
print training_data[0]

[['0.68491' '0.32385' '-0.11592' ..., '0.17874' '-0.1693' '0.062375']
 ['0.96193' '0.012516' '0.21733' ..., '0.14032' '-0.38468' '-0.38712']
 ['0.6008' '0.18044' '0.078339' ..., '-0.016404' '-0.65372' '-0.38255']
 ..., 
 ['0.88387' '-0.14199' '0.13566' ..., '0.52711' '-0.20148' '0.0095952']
 ['-0.0010919' '0.33324' '0.35743' ..., '-0.45697' '-0.048969' '1.1316']
 ['-0.55114' '-0.16296' '-0.95494' ..., '-1.0346' '-0.25143' '1.4836']]


In [2]:

#Setting up the input and labels placeholders
_inputs = tf.placeholder(tf.float32, shape=[None, time_steps,
                                            word_vector_size])
y = tf.placeholder(tf.int32, shape=[None, num_classes])
y_one_hot = tf.one_hot( y , num_classes )

# TensorFlow built-in functions
# Creating the RNN cell and creating the outputs
with tf.variable_scope("gru"):
    gru_cell = tf.contrib.rnn.GRUCell(hidden_layer_size)
    outputs, states = tf.nn.dynamic_rnn(gru_cell,_inputs, dtype=tf.float32)

weights = {
        'linear_layer': tf.Variable(tf.truncated_normal([hidden_layer_size,
                                                         num_classes],
                                                         mean=0,stddev=.01))
}


# Extract the last relevant output and use in a linear layer
final_output = tf.matmul(states,
                         weights["linear_layer"])
softmax = tf.nn.softmax_cross_entropy_with_logits(logits = final_output,
                                                  labels = y)                         
cross_entropy = tf.reduce_mean(softmax)

train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1),
                              tf.argmax(final_output,1))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction,
                                   tf.float32)))*100






In [3]:
def dense_to_one_hot(labels_dense, num_classes=10):
  """Convert class labels from scalars to one-hot vectors."""
  num_labels = labels_dense.shape[0]
  index_offset = np.arange(num_labels) * num_classes
  labels_one_hot = np.zeros((num_labels, num_classes))
  labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
  return labels_one_hot

label_batch = np.array(training_labels[100 : 200])
l = np.array(dense_to_one_hot(label_batch,2)).reshape(len(label_batch), num_classes)

print l.shape

(100, 2)


In [4]:
batch_size = 100;
n_iterations = 1;

#Initialize session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(n_iterations):
        print "Epoch:", epoch
        for j in range(len(training_data)/batch_size):
            #print "j:", j
            if(j == 100):
                break
            startIndex = j*batch_size
            endIndex = startIndex + batch_size
            batch_x = np.array(training_data[startIndex : endIndex]).reshape((-1,time_steps, word_vector_size))
            batch_y = dense_to_one_hot(np.array(training_labels[startIndex : endIndex]),num_classes)
            sess.run(train_step, feed_dict={_inputs: batch_x,
                                            y: batch_y})
        if epoch % 1 == 0:
            acc = sess.run(accuracy, feed_dict={_inputs: batch_x, y: batch_y})
            loss = sess.run(cross_entropy, feed_dict={_inputs: batch_x, y: batch_y})
            print ("Iter " + str(epoch) + ", Minibatch Loss= " + \
                    "{:.6f}".format(loss) + ", Training Accuracy= " + \
                    "{:.5f}".format(acc))


    print ("Testing Accuracy:", sess.run(accuracy, feed_dict={_inputs: test_data, y: dense_to_one_hot(np.array(test_labels), num_classes)}))
    predictions = np.array(sess.run(tf.argmax(final_output,1),  feed_dict={_inputs: test_data, y: dense_to_one_hot(np.array(test_labels),num_classes)}))
    correct_predictions = np.array(sess.run(correct_prediction,  feed_dict={_inputs: test_data, y: dense_to_one_hot(np.array(test_labels),num_classes)}))
    print(np.array(test_data).shape)
    print(predictions.reshape(-1,1).shape)
    print(correct_predictions.reshape(-1,1).shape)
    #results = np.hstack((test_data,predictions))
    #print (results)

Epoch: 0
Iter 0, Minibatch Loss= 0.689574, Training Accuracy= 52.00000
('Testing Accuracy:', 55.739998)
(5000, 100, 50)
(5000, 1)
(5000, 1)


In [5]:
import datetime

today = datetime.datetime.now()
format = "%H_%M_%S_%d_%m_%Y"
filename = "output/"+ "Result_test_" + today.strftime(format) + ".csv"
with open(filename, 'wb') as myfile:
    wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    wr.writerow(["Train Accuracy = " + str(60)])
    wr.writerow(["Training Lost = " + str(5.05)])
    wr.writerow(["Test Accuracy = " + str(50)])
    for i in range(len(test_data_raw)):
        local_result = []
        #print(str(test_data_raw[i]['sentence']))
        local_result.append(test_data_raw[i]['sentence'])
        local_result.append(test_data_raw[i]['label'])
        local_result.append(predictions[i])
        local_result.append(correct_predictions[i])
        print(local_result)
        wr.writerow(local_result)

[['de grot is very good film the great plot comes from the novel by tim who also adapted this story for the screen some really top class acting not only by van hu but especially by marcel who mostly did tv work prior to his performance of axel van de graaf the film seems to kick of as thriller and sets an excellent mood then we start to learn about egon and axel van de graaf and the story is revealed bit by bit in very compelling flash back structure which adds to the more romantic aspect and the character'], 1, 1, True]
[['sat through almost one episode of this series and just couldn take anymore it felt as though watched dozens of episodes already and then it hit me there nothing new here ve heard that joke on seinfeld saw someone fall like that on friends an episode of happy days had almost the same storyline ect none of the actors are interesting here either some were good on other shows not here and others are new to profession they should have never entered avoid this stinker'], 

[['this is above all else the typical crown international pictures drive in read passion pit programmer the sammy johns hit record chevy van is heard repeatedly on the soundtrack this movie has even been reissued with the title chevy van despite the film title vehicle being dodge danny devito makes only six minutes of on screen appearance but countless vhs reissues falsely credit him as the star of the flick the movie is comparatively sexist morality tale will bobby find sexual satisfaction through the one night stand his customised van facilitates or must he wait until tina the girl of'], 1, 1, True]
[['the movie is very lengthy and unfortunately pretty different from the novel if you want to see the movie then don read the novel first as it will shock you however cinematography was ok and if you are person who loves adventure genres which explores africa then go for it acting performances are adequate however many important events that were present in the novel are omitted in the nov

[['centered in the downtown and out skirts of detroit this comedy found to be terrific new comedic duo noriyuki pat morita is very funny man who happens to be cop from japan on the trail of an industrial secrets thief who has stolen proto type turbo super charger reluctantly he goes to the united states to follow the thief after being ordered by his commander pat character collides with jay leno character fast talking but down to business player type detroit cop when they cross paths though the honorable ways of japan meet the all out old school detroit police'], 1, 0, False]
[['shudder to think what people must have thought of environmentalists after viewing this piece of overbearing preachy cinematic trash larded with enough indian wannabe and space brother buffoonery to stock new age shop starlight makes anyone who gives damn about the planet look like feather wearing crystal fondling idiot br br the plot alien rae dawn chong arrives to guide flute playing underwear model in mystica

[['director warren beatty intention to turn chester gould famous comic strip into live action cartoon with beatty himself cast in the lead as the square jawed detective had sweet overtures of innocent nostalgia quite unusual and intriguing coming from warren beatty unfortunately the picture is requisite ham fun for awhile but eventually tiring dick tracy attempts to bring down mobster big boy caprice aided by loving tess trueheart but tripped up by evil breathless mahoney for the first half hour or so the oscar winning art direction and set design are wonderful to absorb but as the plot creaks along'], 0, 1, False]
[['voted for this movie because of some minor childish flaws other than that this movie is one of my favorites it entertaining to say the least the shooting scenes are ridiculous though and think gackt who wrote the book takes little bit too much of his matrix obsession into it it seems like their enemies just stands there waiting to get shot at however this movie is touchin

[['nobody knows anybody is conspiracy theory thriller about satanist nut bomber targeting the religious festivities of seville during holy week he also happens to be the best friend of the film hero the plot is set up by the bomber as computer game with himself and the hero as players and seville as the virtual environment the very real alleys and streets of the city begin to take on the labyrinthine qualities of those old pacman type games looked at this way the scene where the hero and his female sidekick are chased by black hooded penitents with may not'], 0, 0, True]
[['guess this is meant to be sort of reworking or updating of beauty and the beast but can say ve ever watched movie that began with several minutes of graphic horse sex wow anyway it seems that young woman and her aunt have traveled to this castle in france where the woman is to be married to the son of the castle owner who is the man who takes care of making sure the horses get their rocks off it seems that there are

[['cornel wilde and three dumbbells search for sunken treasure in the south atlantic br br the treasure hunters led by wilde fight group of territorial sharks with cute little sneers on their hungry faces wilde and his merry men must find way to take themselves off the menu so they can begin excavating an old spanish galleon filled with gold bullion br br after the crew engages in small eternity of pushing shoving arguing and listening to wilde annoying health tips crazy convicts board the boat and complicate things now it is battle of wits as to who gets the'], 0, 0, True]
[['baby face is fast paced wise cracking knowing smirk of film that br br lasts only an hour and minutes but oh what smart minutes they br br are that story that covers so much ground could be told in such br br short time puts most of today movie makers to shame screenwriters of br br today should study the economy of baby face and cut the bloat that br br overwhelms so many of their films br br the story is no non

[['this film is self indulgent rubbish watch this film if you merely want to hear spoken gaelic or enjoy the pleasant soundtrack watch for any other reason and you will be disappointed it should be charming but isn it just irritating the characters are difficult to care about and the acting is poor the stories within the film are also charmless and sinister was expecting heartwarming family film but this also held no appeal to my fourteen year old daughter it is rarely that cannot see film through to its conclusion but this one got the better of both of'], 0, 1, False]
[['sitting here nov and still can help rave about this movie arnold best movies came in about year span running man predator and total recall all are amazing the cheesy one liners by arnold in this movie will make you laugh on more than one occasion find the acting in this movie surprisingly good as was the case in predator and recall they did great job in trying to make the scenes futuristic as it is supposed to take pl

In [6]:
print(classifier_output[:10])

NameError: name 'classifier_output' is not defined

In [None]:
print(len(test_data))
print(test_data)

The accuracies for the RNN, LSTM and GRU are 70.7%, 78.08% and 77.26% respectively.