## Experimenting with Recurrent Neural Network and LSTM

In [1]:
from collections import Counter
import tensorflow as tf
import numpy as np
import pandas as pd

2022-11-16 06:33:36.060441: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-16 06:33:36.393980: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-16 06:33:36.394004: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-16 06:33:36.467005: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-16 06:33:37.689980: W tensorflow/stream_executor/platform/de

In [2]:
data = pd.read_csv('data/spam.csv',encoding='latin-1')
data.head(5)

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [3]:
data = data.rename(columns={"v2" : "text", "v1":"label"})

## Data Preprocessing

Saving labels and messages in text files

In [4]:
np.savetxt(r'data\messages.txt', data['text'].values, fmt='%s')
np.savetxt(r'data\labels.txt', data['label'].values, fmt='%s')

In [5]:
with open('data/messages.txt', encoding="ISO-8859-1") as f:
    messages = f.read()
with open('data/labels.txt',encoding="ISO-8859-1") as f:
    labels = f.read()

In [6]:
messages[:500]

"Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\nOk lar... Joking wif u oni...\nFree entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\nU dun say so early hor... U c already then say...\nNah I don't think he goes to usf, he lives around here though\nFreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you u"

In [7]:
labels[:100]

'ham\nham\nspam\nham\nham\nspam\nham\nham\nspam\nspam\nham\nspam\nspam\nham\nham\nspam\nham\nham\nham\nspam\nham\nham\nham\n'

### Remove punctuations such a (. , !) etc and seperate using delimiter

In [8]:
from string import punctuation
all_text = ''.join([c for c in messages if c not in punctuation])
messages = all_text.split('\n')

all_text = ' '.join(messages)
words = all_text.split()

In [9]:
print (all_text[:500])
print ("\n")
print (words[:20])

Go until jurong point crazy Available only in bugis n great world la e buffet Cine there got amore wat Ok lar Joking wif u oni Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005 Text FA to 87121 to receive entry questionstd txt rateTCs apply 08452810075over18s U dun say so early hor U c already then say Nah I dont think he goes to usf he lives around here though FreeMsg Hey there darling its been 3 weeks now and no word back Id like some fun you up for it still Tb ok XxX std chgs


['Go', 'until', 'jurong', 'point', 'crazy', 'Available', 'only', 'in', 'bugis', 'n', 'great', 'world', 'la', 'e', 'buffet', 'Cine', 'there', 'got', 'amore', 'wat']


### Building our vocabulary and converting messages to vectors

In [10]:
split_words = Counter(words)
sorted_split_words = sorted(split_words, key=split_words.get, reverse=True)
vocab_to_int = {c : i for i, c in enumerate(sorted_split_words,1)}

# Convert the reviews to integers, same shape as reviews list, but with integers
messages_ints = []
for message in messages:
    messages_ints.append([vocab_to_int[i] for i in message.split()])

In [11]:
print (sorted_split_words[:50])
print ("\n")
print (messages[0])
print (messages_ints[0])
print ("\n")
print (len(messages[0]))
print (len(messages_ints[0]))

['to', 'you', 'I', 'a', 'the', 'and', 'in', 'is', 'u', 'i', 'me', 'for', 'my', 'of', 'your', 'it', 'on', '2', 'have', 'that', 'are', 'call', 'now', 'or', 'be', 'not', 'at', 'with', 'U', 'get', 'will', 'can', 'Im', 'so', 'ur', '4', 'but', 'up', 'do', 'ltgt', 'You', 'from', 'out', 'know', 'go', 'just', 'this', 'if', 'when', 'like']


Go until jurong point crazy Available only in bugis n great world la e buffet Cine there got amore wat
[813, 462, 5238, 918, 919, 2589, 62, 7, 1579, 83, 145, 593, 1389, 172, 3432, 5239, 61, 55, 5240, 180]


102
20


#### Converting labels to 0 and 1 - SPAM:1 and NOT SPAM:0 

In [12]:
labels = labels.split("\n")
labels = np.array([0 if label == "ham" else 1 for label in labels])

In [13]:
labels[:10]

array([0, 0, 1, 0, 0, 1, 0, 0, 1, 1])

In [14]:
from collections import Counter

message_lens = Counter([len(x) for x in messages_ints])
print("Zero-length messages: {}".format(message_lens[0]))
print("Maximum message length: {}".format(max(message_lens)))

Zero-length messages: 3
Maximum message length: 171


In [15]:
messages_ints = [message for message in messages_ints if (len(message)>0)]

### Padding vectors with zeros so that all inputs are of same length

In [16]:
seq_len = 200
num_messages = len(messages)
features = np.zeros([num_messages, seq_len], dtype=int)
for i, row in enumerate(messages_ints):
    features[i, -len(row):] = np.array(row)[:seq_len]

In [17]:
features[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

### Splitting into training, validation and test data

In [18]:
split_frac1 = 0.8

idx1 = int(len(features) * split_frac1)
train_x, val_x = features[:idx1], features[idx1:]
train_y, val_y = labels[:idx1], labels[idx1:]

split_frac2 = 0.5
idx2 = int(len(val_x) * split_frac2)
val_x, test_x = val_x[:idx2], val_x[idx2:]
val_y, test_y = val_y[:idx2], val_y[idx2:]

print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(train_x.shape), 
      "\nValidation set: \t{}".format(val_x.shape),
      "\nTest set: \t\t{}".format(test_x.shape))

print("\t\t\Label Shapes:")
print("Train set: \t\t{}".format(train_y.shape), 
      "\nValidation set: \t{}".format(val_y.shape),
      "\nTest set: \t\t{}".format(test_y.shape))

			Feature Shapes:
Train set: 		(4460, 200) 
Validation set: 	(557, 200) 
Test set: 		(558, 200)
		\Label Shapes:
Train set: 		(4460,) 
Validation set: 	(557,) 
Test set: 		(556,)


### Initial prediction using Logistic Regression

In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

clf = LogisticRegression()
clf.fit(train_x,train_y)
p = clf.predict(val_x)
print (accuracy_score(val_y,p))

0.8599640933572711


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Defining Hyperparameters

In [20]:

lstm_size = 256
lstm_layers = 1
batch_size = 256
learning_rate = 0.003

### Creating placeholder for inputs, labels and dropout rate 

In [21]:
n_words = len(sorted_split_words)

# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
    inputs_ = tf.placeholder(tf.int32, [None,None], name = "inputs")
    labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
    keep_prob = tf.placeholder(tf.float32, name = "keep_prob")

AttributeError: module 'tensorflow' has no attribute 'placeholder'

Adding an embedding layer. Instead of one-hot encoding, we build an embedding layer and use that layer as a lookup table.

In [None]:
# Size of the embedding vectors (number of units in the embedding layer)
embed_size = 300 

with graph.as_default():
    embedding = tf.Variable(tf.random_uniform((n_words, embed_size), -1, 1))
    embed = tf.nn.embedding_lookup(embedding, inputs_)

Create LSTM cells to use in the recurrent network  Here we are just defining what the cells look like.

It takes a parameter called num_units, the number of units in the cell, called lstm_size in this code.

Adding dropout to the cell with tf.contrib.rnn.DropoutWrapper. This wraps the cell in another cell, but with dropout added to the inputs and/or outputs. 

Here, [drop] * lstm_layers creates a list of cells (drop) that is lstm_layers long. The MultiRNNCell wrapper builds this into multiple layers of RNN cells, one for each cell in the list.

So the final cell in the network is actually multiple (or just one) LSTM cells with dropout.

In [None]:
with graph.as_default():
    # Your basic LSTM cell
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
    # Add dropout to the cell
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
    
    # Getting an initial state of all zeros
    initial_state = cell.zero_state(batch_size, tf.float32)

Now we need to actually run the data through the RNN nodes. You can use tf.nn.dynamic_rnn to do this. You'd pass in the RNN cell you created (our multiple layered LSTM cell for instance), and the inputs to the network.

Initial_state is the cell state that is passed between the hidden layers in successive time steps. We pass in our cell and the input to the cell, then it does the unrolling and everything else for us. It returns outputs for each time step and the final_state of the hidden layer.

In [None]:
with graph.as_default():
    outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)

We only care about the final output, we'll be using that as our sentiment prediction. So we need to grab the last output with outputs[:, -1], the calculate the cost from that and labels_.

In [None]:

with graph.as_default():
    predictions = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.sigmoid)
    cost = tf.losses.mean_squared_error(labels_, predictions)
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

Calculating predictions and accuracy

In [None]:
with graph.as_default():
    correct_pred = tf.equal(tf.cast(tf.round(predictions), tf.int32), labels_)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Creating Batches

In [None]:
def get_batches(x, y, batch_size=100):
    
    n_batches = len(x)//batch_size
    x, y = x[:n_batches*batch_size], y[:n_batches*batch_size]
    for ii in range(0, len(x), batch_size):
        yield x[ii:ii+batch_size], y[ii:ii+batch_size]

### Training

In [None]:
epochs = 5

with graph.as_default():
    saver = tf.train.Saver()

with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    iteration = 1
    for e in range(epochs):
        state = sess.run(initial_state)
        
        for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1):
            feed = {inputs_: x,
                    labels_: y[:, None],
                    keep_prob: 0.5,
                    initial_state: state}
            loss, state, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
            
            if iteration%5==0:
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Train loss: {:.3f}".format(loss))

            if iteration%25==0:
                val_acc = []
                val_state = sess.run(cell.zero_state(batch_size, tf.float32))
                for x, y in get_batches(val_x, val_y, batch_size):
                    feed = {inputs_: x,
                            labels_: y[:, None],
                            keep_prob: 1,
                            initial_state: val_state}
                    batch_acc, val_state = sess.run([accuracy, final_state], feed_dict=feed)
                    val_acc.append(batch_acc)
                print("Val acc: {:.3f}".format(np.mean(val_acc)))
            iteration +=1
    saver.save(sess, "checkpoints/sentiment.ckpt")


Epoch: 0/5 Iteration: 5 Train loss: 0.135
Epoch: 0/5 Iteration: 10 Train loss: 0.108
Epoch: 0/5 Iteration: 15 Train loss: 0.099
Epoch: 1/5 Iteration: 20 Train loss: 0.132
Epoch: 1/5 Iteration: 25 Train loss: 0.099
Val acc: 0.867
Epoch: 1/5 Iteration: 30 Train loss: 0.091
Epoch: 2/5 Iteration: 35 Train loss: 0.107
Epoch: 2/5 Iteration: 40 Train loss: 0.097
Epoch: 2/5 Iteration: 45 Train loss: 0.104
Epoch: 2/5 Iteration: 50 Train loss: 0.123
Val acc: 0.863
Epoch: 3/5 Iteration: 55 Train loss: 0.098
Epoch: 3/5 Iteration: 60 Train loss: 0.088
Epoch: 3/5 Iteration: 65 Train loss: 0.085
Epoch: 4/5 Iteration: 70 Train loss: 0.059
Epoch: 4/5 Iteration: 75 Train loss: 0.060
Val acc: 0.816
Epoch: 4/5 Iteration: 80 Train loss: 0.051
Epoch: 4/5 Iteration: 85 Train loss: 0.068


### Testing 

In [None]:
test_acc = []
with tf.Session(graph=graph) as sess:
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    test_state = sess.run(cell.zero_state(batch_size, tf.float32))
    for ii, (x, y) in enumerate(get_batches(test_x, test_y, batch_size), 1):
        feed = {inputs_: x,
                labels_: y[:, None],
                keep_prob: 1,
                initial_state: test_state}
        batch_acc, test_state = sess.run([accuracy, final_state], feed_dict=feed)
        test_acc.append(batch_acc)
    print("Test accuracy: {:.3f}".format(np.mean(test_acc)))

INFO:tensorflow:Restoring parameters from checkpoints/sentiment.ckpt
Test accuracy: 0.789


In [None]:
text = 'Hello You just won yourself a free tour to Bahamas!! Call Now to recieve it.'
text = ''.join([c for c in text if c not in punctuation])
integer = ([vocab_to_int[i] for i in text.split(" ")])

In [None]:
i = []
integer = np.array(integer)
i.append(integer)

In [None]:
i = np.array(i)
i.shape

(1, 15)

In [None]:
with tf.Session(graph=graph) as sess:
    ckpt = tf.train.get_checkpoint_state('./checkpoints')
    state = sess.run(initial_state)
    saver.restore(sess, ckpt.model_checkpoint_path)
    feed_dict = {inputs_: i, 
                 initial_state: state,
                 keep_prob: 0.5}
    predictions = sess.run(predictions, feed_dict = feed_dict)

print("Test accuracy: {:.3f}".format(predictions))

INFO:tensorflow:Restoring parameters from ./checkpoints/sentiment.ckpt


InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [1,300] vs. shape[1] = [256,256]
	 [[Node: rnn/while/multi_rnn_cell/cell_0/basic_lstm_cell/basic_lstm_cell_1/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](rnn/while/TensorArrayReadV3, rnn/while/Identity_3, rnn/while/multi_rnn_cell/cell_0/basic_lstm_cell/basic_lstm_cell_1/concat/axis)]]

Caused by op 'rnn/while/multi_rnn_cell/cell_0/basic_lstm_cell/basic_lstm_cell_1/concat', defined at:
  File "/anaconda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/anaconda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/anaconda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/anaconda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/anaconda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/anaconda/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-34-13b4528366d0>", line 2, in <module>
    outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 553, in dynamic_rnn
    dtype=dtype)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 720, in _dynamic_rnn_loop
    swap_memory=swap_memory)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2623, in while_loop
    result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2456, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2406, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 705, in _time_step
    (output, new_state) = call_cell()
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 691, in <lambda>
    call_cell = lambda: cell(input_t, state)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", line 953, in __call__
    cur_inp, new_state = cell(cur_inp, cur_state)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", line 713, in __call__
    output, new_state = self._cell(inputs, state, scope)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", line 241, in __call__
    concat = _linear([inputs, h], 4 * self._num_units, True)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/contrib/rnn/python/ops/core_rnn_cell_impl.py", line 1048, in _linear
    res = math_ops.matmul(array_ops.concat(args, 1), weights)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1034, in concat
    name=name)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 519, in _concat_v2
    name=name)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): ConcatOp : Dimensions of inputs should match: shape[0] = [1,300] vs. shape[1] = [256,256]
	 [[Node: rnn/while/multi_rnn_cell/cell_0/basic_lstm_cell/basic_lstm_cell_1/concat = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](rnn/while/TensorArrayReadV3, rnn/while/Identity_3, rnn/while/multi_rnn_cell/cell_0/basic_lstm_cell/basic_lstm_cell_1/concat/axis)]]
