In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tflearn
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.preprocessing.sequence import skipgrams, pad_sequences
from random import shuffle
import data_clean

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [8]:
df= pd.read_json("reviews_Movies_and_TV_5.json", lines=True)

In [9]:
df_copy = df.loc[:] #Remove later
df_copy["label"] = np.where(df_copy.overall >=3, 1, 0)

#Number of the smaller class (negative reviews)
num_to_sample = len(df_copy[df_copy.label == 0])

df_neg = df_copy[df_copy["label"] == 0].sample(n=num_to_sample)
df_pos = df_copy[df_copy["label"] == 1].sample(n=num_to_sample)


df_copy = pd.concat([df_neg, df_pos])

# Get the text from the dataframe
text = df_copy["reviewText"].values

# Create labels from the dataframe
labels = df_copy["label"].values

In [10]:
vocab_size = 20000
data, word_to_idx, idx_to_word, T = data_clean.tokenize_and_process(text, vocab_size)

Fitting complete
Converted to sequences


In [11]:
sequence_length=250
num_classes=2
data = pad_sequences(data, maxlen=sequence_length)
labels = to_categorical(labels, num_classes = num_classes)

In [12]:
# Size of our embedding matrix
embedding_size = 256
# Number of samples for NCE Loss
num_samples = 64
# Learning Rate
learning_rate = 0.001
#number of hidden units
lstm_hidden_units = 256
# Number of classes
num_classes = 2


def model():
    # Batch size list of integer sequences
    x = tf.placeholder(tf.int32, shape=[None, sequence_length], name="x")
    # One hot labels for sentiment classification
    y = tf.placeholder(tf.int32, shape=[None, num_classes], name="y")

    # Cast our label to float32
    y = tf.cast(y, tf.float32)

    # Instantiate our embedding matrix
    Embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                            name="word_embedding")

    # Lookup embeddings
    embed_lookup = tf.nn.embedding_lookup(Embedding, x)

    # Create LSTM Cell
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_hidden_units)

    # Extract the batch size - this allows for variable batch size
    current_batch_size = tf.shape(x)[0]

    # Create LSTM Initial State of Zeros
    initial_state = lstm_cell.zero_state(current_batch_size, dtype=tf.float32)

    # Wrap our lstm cell in a dropout wrapper
    lstm_cell = tf.contrib.rnn.DropoutWrapper(cell=lstm_cell, output_keep_prob=0.85)

    value, _ = tf.nn.dynamic_rnn(lstm_cell,
                                 embed_lookup,
                                 initial_state=initial_state,
                                 dtype=tf.float32)
    
    #Instantiate weights
    weight = tf.Variable(tf.random_normal([lstm_hidden_units, num_classes]))
    #Instantiate biases
    bias = tf.Variable(tf.constant(0.1, shape=[num_classes]))


    value = tf.transpose(value, [1,0,2])

    #Extract last output
    last = tf.gather(value, int(value.get_shape()[0])-1)

    prediction = (tf.matmul(last, weight) + bias)
    
    correct_prediction = tf.equal(tf.argmax(tf.nn.sigmoid(prediction), axis=1), tf.argmax(y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    choice = tf.argmax(tf.nn.sigmoid(prediction), axis=1)
    # Calculate the loss given prediction and labels
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = prediction,
                                                                     labels = y))

    # Declare our optimizer, in this case RMS Prop
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss)

    return optimizer, loss, x, y, accuracy, prediction,correct_prediction, choice

In [19]:
optimizer, loss, x, y, accuracy, prediction,correct_prediction, choice = model()

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)

batch_size = 32
num_epochs = 5

#Num batches in training set
num_batches = len(X_train) // batch_size

with tf.Session() as sesh:
    init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sesh.run(init)
    #create saver to save our weights
    saver = tf.train.Saver()
    writer = tf.summary.FileWriter("logdir/", graph=sesh.graph)
    for epoch in range(num_epochs):
        print("----Epoch", epoch+1, "out of", num_epochs, "----")
        if(epoch>0):
            data=list(zip(X_train, y_train))
            shuffle(data)
            X_train, y_train = zip(*data)
        for i in range(num_batches):
            if i != num_batches-1:
                x_batch = X_train[i*batch_size:i * batch_size + batch_size]
                y_batch = y_train[i*batch_size:i * batch_size + batch_size]
            else:
                x_batch = X_train[i*batch_size:]
                y_batch = y_train[i*batch_size:]

            _, l,a = sesh.run([optimizer, loss, accuracy], feed_dict = {x: x_batch, y: y_batch})

            if i>0 and i %100 == 0:
                print("STEP", i, "of", num_batches, "LOSS:", l, "Accucracy:", a)
            if i>0 and i %500 == 0:
                saver.save(sesh, "logdir/lstm_model.ckpt")
                writer.flush()
                writer.close()
        saver.save(sesh, "logdir/lstm_model.ckpt")
        writer.flush()
        writer.close()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


----Epoch 1 out of 5 ----
STEP 100 of 9040 LOSS: 1.1443391 Accucracy: 0.59375
STEP 200 of 9040 LOSS: 1.4186034 Accucracy: 0.46875
STEP 300 of 9040 LOSS: 0.46171147 Accucracy: 0.875
STEP 400 of 9040 LOSS: 1.0127397 Accucracy: 0.65625
STEP 500 of 9040 LOSS: 0.6469917 Accucracy: 0.75
STEP 600 of 9040 LOSS: 0.5271907 Accucracy: 0.75
STEP 700 of 9040 LOSS: 0.52090937 Accucracy: 0.84375
STEP 800 of 9040 LOSS: 0.45714802 Accucracy: 0.78125
STEP 900 of 9040 LOSS: 0.40210885 Accucracy: 0.8125
STEP 1000 of 9040 LOSS: 0.6821064 Accucracy: 0.625
STEP 1100 of 9040 LOSS: 0.4116263 Accucracy: 0.84375
STEP 1200 of 9040 LOSS: 0.6303264 Accucracy: 0.625
STEP 1300 of 9040 LOSS: 0.29552644 Accucracy: 0.90625
STEP 1400 of 9040 LOSS: 0.46500254 Accucracy: 0.75
STEP 1500 of 9040 LOSS: 0.2152053 Accucracy: 0.875
STEP 1600 of 9040 LOSS: 0.36544362 Accucracy: 0.8125
STEP 1700 of 9040 LOSS: 0.6159587 Accucracy: 0.75
STEP 1800 of 9040 LOSS: 0.29725373 Accucracy: 0.9375
STEP 1900 of 9040 LOSS: 0.4267458 Accucracy:

STEP 6600 of 9040 LOSS: 0.36865866 Accucracy: 0.8125
STEP 6700 of 9040 LOSS: 0.27428228 Accucracy: 0.90625
STEP 6800 of 9040 LOSS: 0.16198495 Accucracy: 1.0
STEP 6900 of 9040 LOSS: 0.24020529 Accucracy: 0.90625
STEP 7000 of 9040 LOSS: 0.20341243 Accucracy: 0.9375
STEP 7100 of 9040 LOSS: 0.21081266 Accucracy: 0.875
STEP 7200 of 9040 LOSS: 0.3045651 Accucracy: 0.90625
STEP 7300 of 9040 LOSS: 0.33418635 Accucracy: 0.78125
STEP 7400 of 9040 LOSS: 0.30987033 Accucracy: 0.8125
STEP 7500 of 9040 LOSS: 0.25832587 Accucracy: 0.9375
STEP 7600 of 9040 LOSS: 0.341682 Accucracy: 0.84375
STEP 7700 of 9040 LOSS: 0.1626852 Accucracy: 0.96875
STEP 7800 of 9040 LOSS: 0.3312446 Accucracy: 0.84375
STEP 7900 of 9040 LOSS: 0.27946693 Accucracy: 0.90625
STEP 8000 of 9040 LOSS: 0.26580095 Accucracy: 0.875
STEP 8100 of 9040 LOSS: 0.29916647 Accucracy: 0.875
STEP 8200 of 9040 LOSS: 0.19404668 Accucracy: 0.90625
STEP 8300 of 9040 LOSS: 0.39269733 Accucracy: 0.84375
STEP 8400 of 9040 LOSS: 0.4134973 Accucracy: 0.

STEP 4100 of 9040 LOSS: 0.44427463 Accucracy: 0.8125
STEP 4200 of 9040 LOSS: 0.6610847 Accucracy: 0.71875
STEP 4300 of 9040 LOSS: 0.34957576 Accucracy: 0.875
STEP 4400 of 9040 LOSS: 0.13563427 Accucracy: 0.96875
STEP 4500 of 9040 LOSS: 0.47126427 Accucracy: 0.875
STEP 4600 of 9040 LOSS: 0.12252123 Accucracy: 0.96875
STEP 4700 of 9040 LOSS: 0.17042975 Accucracy: 0.9375
STEP 4800 of 9040 LOSS: 0.2073859 Accucracy: 0.90625
STEP 4900 of 9040 LOSS: 0.055335246 Accucracy: 1.0
STEP 5000 of 9040 LOSS: 0.17756501 Accucracy: 0.90625
STEP 5100 of 9040 LOSS: 0.33270276 Accucracy: 0.9375
STEP 5200 of 9040 LOSS: 0.22386317 Accucracy: 0.875
STEP 5300 of 9040 LOSS: 0.31307608 Accucracy: 0.84375
STEP 5400 of 9040 LOSS: 0.40146065 Accucracy: 0.875
STEP 5500 of 9040 LOSS: 0.22294667 Accucracy: 0.96875
STEP 5600 of 9040 LOSS: 0.18492429 Accucracy: 0.9375
STEP 5700 of 9040 LOSS: 0.33599284 Accucracy: 0.78125
STEP 5800 of 9040 LOSS: 0.21358383 Accucracy: 0.90625
STEP 5900 of 9040 LOSS: 0.39521536 Accucracy:

In [14]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42)

In [15]:
optimizer, loss, x, y, accuracy, prediction,correct_prediction, choice = model()
saver = tf.train.Saver()
with tf.Session() as sess:
    init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess.run(init)
    saver.restore(sess, "logdir/lstm_model.ckpt")
    print("Model restored.")
    #l,a = sess.run([loss, accuracy], feed_dict = {x: X_test[:10], y: y_test[:10]})

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Restoring parameters from logdir/lstm_model.ckpt


NotFoundError: Key is_training not found in checkpoint
	 [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/anaconda3/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/anaconda3/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-76127431c87a>", line 2, in <module>
    saver = tf.train.Saver()
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1338, in __init__
    self.build()
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1347, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1384, in _build
    build_save=build_save, build_restore=build_restore)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 835, in _build_internal
    restore_sequentially, reshape)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 472, in _AddRestoreOps
    restore_sequentially)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1463, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

NotFoundError (see above for traceback): Key is_training not found in checkpoint
	 [[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]


In [14]:
tf.reset_default_graph()

In [None]:
df.head()