# Modeling Stock Market Sentiment with LSTM 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Suppress warnings 
import warnings
warnings.filterwarnings('ignore')

In [3]:
# importing libraries

import os
import re
import string
import numpy as np
import pandas as pd
import tensorflow as tf

from datetime import datetime
from sklearn.model_selection import train_test_split
from collections import Counter

In [4]:
import utils as utl

In [5]:
# Display

pd.set_option('max_colwidth', 800)
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [6]:
# current directory
os.getcwd()

'/home/shivanand/Downloads/LSTM_stock_market_sentiment'

## Processing Data


Data used here is from **StockTwits.com** which is a social media network for traders and investors to share their views about the stock market. When a user posts a message, they tag the relevant stock ticker [$SPY in our case which is for S&P 500 index fund] and have option to tag the message with their sentiment - "bullish" or "bearish"

#### Read and view data

In [7]:
# read data from csv file
data = pd.read_csv('StockTwits_SPY_Sentiment_2017.gz',encoding='utf-8',index_col=0)

In [8]:
data.head()

Unnamed: 0,message,sentiment
0,$SPY crazy day so far!,bearish
1,$SPY Will make a new ATH this week. Watch it!,bullish
2,$SPY $DJIA white elephant in room is $AAPL. Up 14% since election. Strong headwinds w/Trump trade & Strong dollar. How many 7's do you see?,bearish
3,$SPY blocks above. We break above them We should push to double top,bullish
4,"$SPY Nothing happening in the market today, guess I'll go to the store and spend some $.",bearish


In [9]:
# Defining text messages and their labels

messages = data.message.values
labels = data.sentiment.values

#### Preprocess messages

Preprocessing the raw text data to normalize for the context. Normalizing for known unique 'entities' that carry similar contextual meaning. 

Therefore replacing the references to 
* specific stock ticker ($SPY), 
* user names, 
* url links,
* numbers with special tokenidentifying the entity 

Converting text into lower case and removing punctuations.               

In [10]:
def preprocess_messages(text):
    
    
    # SAVING REGEX PATTERNS
    REGEX_PRICE_SIGN = re.compile(r'\$(?!\d*\.?\d+%)\d*\.?\d+|(?!\d*\.?\d+%)\d*\.?\d+\$')
    REGEX_PRICE_NOSIGN = re.compile(r'(?!\d*\.?\d+%)(?!\d*\.?\d+k)\d*\.?\d+')
    REGEX_TICKER = re.compile('\$[a-zA-Z]+')
    REGEX_USER = re.compile('\@\w+')
    REGEX_LINK = re.compile('https?:\/\/[^\s]+')
    REGEX_HTML_ENTITY = re.compile('\&\w+')
    REGEX_NON_ACSII = re.compile('[^\x00-\x7f]')
    
    #string.punctuation - '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
    #string.punctuation.replace('<', '').replace('>', '')
    #--> '!"#$%&\'()*+,-./:;=?@[\\]^_`{|}~'
    #re.escape(string.punctuation.replace('<', ''))
    #--> '\\!\\"\\#\\$\\%\\&\\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\=\\>\\?\\@\\[\\\\\\]\\^_\\`\\{\\|\\}\\~'
    
    REGEX_PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation.replace('<', '').replace('>', '')))
    REGEX_NUMBER = re.compile(r'[-+]?[0-9]+')
    
    
    # CONVERTING TO LOWERCASE
    text = text.lower()
    
    # REPLACE ST "ENTITITES" WITH A UNIQUE TOKEN
    text = re.sub(REGEX_TICKER, ' <TICKER> ', text)
    text = re.sub(REGEX_USER, ' <USER> ', text)
    text = re.sub(REGEX_LINK, ' <LINK> ', text)
    text = re.sub(REGEX_PRICE_SIGN, ' <PRICE> ', text)
    text = re.sub(REGEX_PRICE_NOSIGN, ' <NUMBER> ', text)
    text = re.sub(REGEX_NUMBER, ' <NUMBER> ', text)
    # REMOVE EXTRANEOUS TEXT DATA
    text = re.sub(REGEX_HTML_ENTITY, "", text)
    text = re.sub(REGEX_NON_ACSII, "", text)
    text = re.sub(REGEX_PUNCTUATION, "", text)
    
    # Tokenizing and removing < and > that are not in special tokens
    words = " ".join(token.replace("<", "").replace(">", "")
                     if token not in ['<TICKER>', '<USER>', '<LINK>', '<PRICE>', '<NUMBER>']
                     else token
                     for token in text.split())

    return words

In [11]:
messages = np.array([preprocess_messages(msg) for msg in messages])

#### Generate Vocab to index mapping

Encoding words to numbers for the alogrithm to work with inputs by encoding each word to a unique index.

In [12]:
vocab = " ".join(messages).split()

In [13]:
len(vocab)

1267980

In [14]:
len(set(vocab))

31980

In [15]:
word_idx = {word:idx for idx,word in enumerate(set(vocab),1)}
idx_word = {idx:word for word,idx in word_idx.items()}    

#### Checking messages length

In [16]:
message_len = [len(msg) for msg in messages]

print('Minimum length : ',min(message_len))
print('Maximum length : ',max(message_len))
print('Mean length : ',np.mean(message_len))

Minimum length :  0
Maximum length :  244
Mean length :  78.21856920395598


In [17]:
min_idx = [i  for i in range(len(message_len)) if message_len[i]==0]
print("Indexes where message length is 0 :",min_idx)

Indexes where message length is 0 : [88808]


In [18]:
print('messages length: ',len(messages))
print('no of labels: ',len(labels))

messages length:  96967
no of labels:  96967


In [19]:
# dropping zero message length message

messages = np.delete(messages,min_idx)
labels = np.delete(labels,min_idx)

In [20]:
print('messages length after removing of zero length messages: ',len(messages))
print('no of labels after removing of zero length messages: ',len(labels))

messages length after removing of zero length messages:  96966
no of labels after removing of zero length messages:  96966


#### Encoding Messages and Labels to the indexes

In [21]:
def encode_messages(messages,word_idx):
    encoded_msg = [] 
    for msg in messages:
        encoded_msg.append([word_idx[word] for word in msg.split()])
    
    return np.array(encoded_msg)

In [22]:
encoded_msg = encode_messages(messages,word_idx)
encoded_msg

array([list([20265, 1186, 26058, 24964, 8775]),
       list([20265, 12597, 7098, 24504, 5464, 13036, 23716, 31580, 30013, 9633]),
       list([20265, 20265, 11855, 26267, 27238, 23235, 23530, 20265, 17343, 7862, 23669, 5197, 28385, 30354, 25974, 27204, 28385, 26935, 10680, 1014, 7862, 24285, 9641, 3814, 9243]),
       ..., list([20265, 2853, 3267, 18484, 17227, 27942, 20978]),
       list([20265, 7862, 27528, 29009, 13617, 23530, 7494, 7137, 13222, 1678]),
       list([20265, 30345, 23530, 24504, 26058, 14779, 24321, 22020, 8873, 3814, 17199, 24504, 7862, 6952, 16991, 10451, 3814, 7180, 12793, 27116, 16167, 7384, 23716, 27263, 20265, 20265, 20265])],
      dtype=object)

In [23]:
data.sentiment.nunique()

2

In [24]:
data.sentiment.value_counts()

bullish    53704
bearish    43263
Name: sentiment, dtype: int64

In [25]:
def encode_labels(labels):
    return np.array([0 if label=='bullish' else 1 for label in labels ])

In [26]:
encoded_label = encode_labels(labels)
encoded_label

array([1, 0, 1, ..., 1, 0, 0])

#### Zero Padding the messages

In [27]:
#finding the maximum sentence

# len_encoded_msg = [len(i) for i in encoded_msg]
# seq_len1 = max(len_encoded_msg)
# seq_len1

In [28]:
# print('Minimum length : ',min(len_encoded_msg))
# print('Maximum length : ',max(len_encoded_msg))
# print('Mean length : ',np.mean(len_encoded_msg))

In [29]:
# one_word_index = [index for index,sentence in enumerate(encoded_msg) if len(sentence)==1]
# print('No of single word sentences :',len(one_word_index))
# one_word_index[:10]

In [30]:
encoded_msg[16]

[20265]

In [31]:
seq_len = max(message_len)
seq_len

244

In [32]:
# padding the encoded_messages

padd_msg = np.zeros((len(encoded_msg),seq_len))

for i,message in enumerate(encoded_msg):
    padd_msg[i,seq_len-len(message):] = message

In [33]:
padd_msg.shape

(96966, 244)

In [34]:
padd_msg

array([[    0.,     0.,     0., ..., 26058., 24964.,  8775.],
       [    0.,     0.,     0., ..., 31580., 30013.,  9633.],
       [    0.,     0.,     0., ...,  9641.,  3814.,  9243.],
       ...,
       [    0.,     0.,     0., ..., 17227., 27942., 20978.],
       [    0.,     0.,     0., ...,  7137., 13222.,  1678.],
       [    0.,     0.,     0., ..., 20265., 20265., 20265.]])

#### Train,Test,Validation split

In [35]:
# creating x and test split

x, x_test, y, y_test = train_test_split(padd_msg, encoded_label, test_size=0.1, random_state=42)

In [36]:
# printing the shapes of the respective sets

print("Shape of x : ",x.shape)
print("Shape of y : ",y.shape)
print("Shape of x_test set : ",x_test.shape)
print("Shape of y_test set : ",y_test.shape)

Shape of x :  (87269, 244)
Shape of y :  (87269,)
Shape of x_test set :  (9697, 244)
Shape of y_test set :  (9697,)


In [37]:
# creating train and validation split

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=244)

In [38]:
# printing the shapes of the respective sets

print("Shape of x_train : ",x_train.shape)
print("Shape of y_train : ",y_train.shape)
print("Shape of x_val set : ",x_val.shape)
print("Shape of y_val set : ",y_val.shape)

Shape of x_train :  (78542, 244)
Shape of y_train :  (78542,)
Shape of x_val set :  (8727, 244)
Shape of y_val set :  (8727,)


## Building and Training LSTM network

In [39]:
def get_batches(x,y,batch_size = 100):
    
    n_batches = len(x)//batch_size
    
    # removing left out records
    x,y = x[:n_batches*batch_size],y[:n_batches*batch_size]
    
    for i in range(0,len(x),batch_size):
        yield x[i:i+batch_size],y[i:i+batch_size]

In [40]:
def model_inputs():
    """
    Create the model inputs
    """
    with tf.variable_scope('G_Placeholders'):
        inputs_ = tf.placeholder(tf.int32, [None, None], name='inputs')
        labels_ = tf.placeholder(tf.int32, [None, None], name='labels')
        keep_prob_ = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs_, labels_, keep_prob_

In [41]:
def build_embedding_layer(inputs_, vocab_size, embed_size):
    """
    Create the embedding layer
    """
    
    with tf.variable_scope('G_Embedding_layer'):
        embedding = tf.Variable(tf.random_uniform((vocab_size, embed_size), -1, 1))
        embed = tf.nn.embedding_lookup(embedding, inputs_)
    
    return embed

In [42]:
def build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size):
    """
    Create the LSTM layers
    """
    
    with tf.variable_scope('G_LSTM_layer'):
        lstms = [tf.contrib.rnn.BasicLSTMCell(size) for size in lstm_sizes]
        # Add dropout to the cell
        drops = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob_) for lstm in lstms]
        # Stack up multiple LSTM layers, for deep learning
        cell = tf.contrib.rnn.MultiRNNCell(drops)
        # Getting an initial state of all zeros
        initial_state = cell.zero_state(batch_size, tf.float32)

        lstm_outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
    
    return initial_state, lstm_outputs, cell, final_state

In [43]:
def build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate):
    """
    Create the Loss function and Optimizer
    """
    
    with tf.variable_scope('G_FC_layer'):
        predictions = tf.contrib.layers.fully_connected(lstm_outputs[:, -1], 1, activation_fn=tf.sigmoid)
        
        tf.summary.histogram('Predictions',predictions)
    
    with tf.variable_scope('G_Loss'):
        loss = tf.losses.mean_squared_error(labels_, predictions)
        
        tf.summary.scalar("G_Loss", loss)
        
        
    optimzer = tf.train.AdadeltaOptimizer(learning_rate).minimize(loss)
    
    return predictions, loss, optimzer

In [44]:
def build_accuracy(predictions, labels_):
    """
    Create accuracy
    """
    
    with tf.variable_scope('G_Accuracy'):
        correct_pred = tf.equal(tf.cast(tf.round(predictions), tf.int32), labels_)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        
        tf.summary.scalar("G_Accuracy", accuracy)
    
    return accuracy

In [45]:
# ALL FUNCTION IN ONE TRAINING_GARRETT

def build_and_train_network_all_in_one(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
                            learning_rate, keep_prob, train_x, val_x, train_y, val_y):
    
    
    
    inputs_, labels_, keep_prob_ = model_inputs()    
    

    embed = build_embedding_layer(inputs_, vocab_size, embed_size)
    
    
    initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size)
    
        
    predictions, loss, optimizer = build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate)
    
    
    
    accuracy = build_accuracy(predictions, labels_)
    
    
    
    saver = tf.train.Saver()
    
    #[n.name for n in tf.get_default_graph().as_graph_def().node]
    
    print("Printing all trainable tensor_names".center(50,'-'))
    
    print()
    print("LSTM WEIGHTS")
    print()
    
    [print(n.name)for n in tf.trainable_variables('G_LSTM_layer')]
    
    [tf.summary.histogram(n.name, n)for n in tf.trainable_variables('G_LSTM_layer')]
    
    print()
    print()
    print("FC WIEGHTS")
    print()
    
    [print(n.name)for n in tf.trainable_variables('G_FC_layer')]
    
    [tf.summary.histogram(n.name, n)for n in tf.trainable_variables('G_FC_layer')]
    
    print()
    print("tensor_names".center(50,'-'))
    
    
    summ = tf.summary.merge_all()
    
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        n_batches = len(train_x)//batch_size

        writer_train = tf.summary.FileWriter('./tb/garrett_all_in_one/train/',sess.graph)
        writer_val = tf.summary.FileWriter('./tb/garrett_all_in_one/val/')
        
        global_step = 0
        
        for e in range(epochs):
            start_time = datetime.now()
            
            state = sess.run(initial_state)
            
            train_acc = []
            for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1):
                
                global_step+=1
                
                print(ii,end=' ')
                
                feed = {inputs_: x,
                        labels_: y[:,None],
                        keep_prob_: keep_prob,
                        initial_state: state}
                loss_, state, _,  batch_acc,s = sess.run([loss, final_state, optimizer, accuracy,summ], feed_dict=feed)
                
                writer_train.add_summary(s,global_step) # writing summary real time
                train_acc.append(batch_acc)
                
                if (ii) % 17 == 0:
                    
                    val_acc = []
                    #val_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
                    
                    for xx, yy in get_batches(val_x, val_y, batch_size):
                        feed_val = {inputs_: xx,
                                labels_: yy[:,None],
                                keep_prob_: 1}  
                            #initial_state: val_state}

                        val_batch_acc,s = sess.run([accuracy, summ], feed_dict=feed_val)                                                                
                        val_acc.append(val_batch_acc)
                        
                    writer_val.add_summary(s,global_step) # writing summary real time
                    
                
                if (ii ) % n_batches == 0:                
                    
                    val_acc = []
                    #val_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
                    
                    for xx, yy in get_batches(val_x, val_y, batch_size):
                        feed_val = {inputs_: xx,
                                labels_: yy[:,None],
                                keep_prob_: 1}  
                            #initial_state: val_state}

                        val_batch_acc = sess.run([accuracy], feed_dict=feed_val)                                                                
                        val_acc.append(val_batch_acc)
                        
                    
                    
#                     feed_val = {inputs_: val_x,
#                                 labels_: val_y[:,None],
#                                 keep_prob_: 1}
                    
#                     val_batch_acc, val_state = sess.run([accuracy, final_state], feed_dict=feed)
                    
                    
                    stop_time = datetime.now()
                    
                    print()
                    print()
                    print("Epoch: {}/{}...".format(e+1, epochs),
                          "Batch: {}/{}...".format(ii, n_batches),
                          "Train Loss: {:.3f}...".format(loss_),
                          "Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
                          "Val Accuracy: {:.3f}".format(np.mean(val_acc)),
                          "Epoch time : {}".format(str(stop_time-start_time)))
                    
                    print()
                    print()
                    
                    
                    

In [46]:
# Define Inputs and Hyperparameters
lstm_sizes = [128, 64]
vocab_size = len(word_idx) + 1 #add one for padding
embed_size = 300
epochs = 10
batch_size = 256
learning_rate = 0.1
keep_prob = 0.5

In [47]:
with tf.Graph().as_default():
    build_and_train_network_all_in_one(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
                            learning_rate, keep_prob, x_train, x_val, y_train, y_val)

-------Printing all trainable tensor_names--------

LSTM WEIGHTS

G_LSTM_layer/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0
G_LSTM_layer/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0
G_LSTM_layer/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0
G_LSTM_layer/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0
INFO:tensorflow:Summary name G_LSTM_layer/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0 is illegal; using G_LSTM_layer/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel_0 instead.
INFO:tensorflow:Summary name G_LSTM_layer/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0 is illegal; using G_LSTM_layer/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias_0 instead.
INFO:tensorflow:Summary name G_LSTM_layer/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0 is illegal; using G_LSTM_layer/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel_0 instead.
INFO:tensorflow:Summary name G_LSTM_layer/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0 is illegal; using G_LSTM_layer/rnn/multi_rn

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 

In [None]:
def build_and_train_network(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
                            learning_rate, keep_prob, train_x, val_x, train_y, val_y):
    
    inputs_, labels_, keep_prob_ = model_inputs()
    embed = build_embedding_layer(inputs_, vocab_size, embed_size)
    initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size)
    predictions, loss, optimizer = build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate)
    accuracy = build_accuracy(predictions, labels_)
    
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        n_batches = len(train_x)//batch_size
        for e in range(epochs):
            start_time = datetime.now()
            
            state = sess.run(initial_state)
            
            train_acc = []
            for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1):
                print(ii,end=' ')
                feed = {inputs_: x,
                        labels_: y[:, None],
                        keep_prob_: keep_prob,
                        initial_state: state}
                loss_, state, _,  batch_acc = sess.run([loss, final_state, optimizer, accuracy], feed_dict=feed)
                train_acc.append(batch_acc)
                
                if (ii ) % n_batches == 0:
                    
                    val_acc = []
                    val_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
                    for xx, yy in get_batches(val_x, val_y, batch_size):
                        feed = {inputs_: xx,
                                labels_: yy[:, None],
                                keep_prob_: 1,
                                initial_state: val_state}
                        val_batch_acc, val_state = sess.run([accuracy, final_state], feed_dict=feed)
                        val_acc.append(val_batch_acc)
                    
                    stop_time = datetime.now()
                    print()
                    print()
                    print("Epoch: {}/{}...".format(e+1, epochs),
                          "Batch: {}/{}...".format(ii, n_batches),
                          "Train Loss: {:.3f}...".format(loss_),
                          "Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
                          "Val Accuracy: {:.3f}".format(np.mean(val_acc)),
                          "Epoch time : {}".format(str(stop_time-start_time)))
                    print()
                    print()
        #saver.save(sess, "checkpoints/sentiment.ckpt")

In [None]:
# Define Inputs and Hyperparameters
lstm_sizes = [128, 64]
vocab_size = len(word_idx) + 1 #add one for padding
embed_size = 300
epochs = 10
batch_size = 256
learning_rate = 0.1
keep_prob = 0.5

In [None]:
with tf.Graph().as_default():
    build_and_train_network_all_in_one(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
                            learning_rate, keep_prob, x_train, x_val, y_train, y_val)

In [52]:
# ALL FUNCTION IN ONE TRAINING_SIDD

def model_train_all_in_one(x_train,y_train,x_val,y_val,vocab_size,
                embed_size=300,lstm_neurons_li=[128,64],
                keep_prob=0.5,learning_rate=1e-1,epochs=50,batch_size=256):
    
    # reset default graph
    tf.reset_default_graph()
    
    ## create placeholder
#     x_ph,y_ph,keep_prob_ph = create_placeholders()    
    with tf.variable_scope('Placeholders'):
        x_ph = tf.placeholder(tf.int32,[None,None],name ='x_ph')
        y_ph = tf.placeholder(tf.int32,None, name='y_ph')
        keep_prob_ph = tf.placeholder(tf.float32,name='keep_prob_ph')
    

    
    
    ## forward propogation
#     initial_state,a_output,cell,final_state = forward_propagation(x_ph,vocab_size,
#                                                                        embed_size,lstm_neurons_li,
#                                                                        keep_prob_ph,batch_size)

    # creating embedding layer
    with tf.variable_scope('Embedding_layer'):
        embedding = tf.Variable(tf.random_uniform((vocab_size,embed_size),minval=-1,maxval=1))
        embed_layer = tf.nn.embedding_lookup(embedding,x_ph)
    
    # creating LSTM layer
    with tf.variable_scope('LSTM_layer'):
        
        # creating lstm cells
        lstms = [tf.contrib.rnn.BasicLSTMCell(size,name='lstm_cell') for size in lstm_neurons_li]
        # adding dopout to the cells
        drops = [tf.contrib.rnn.DropoutWrapper(lstm,output_keep_prob = keep_prob_ph) for lstm in lstms]
        # stacking multiple LSTM layers
        cell = tf.contrib.rnn.MultiRNNCell(drops)


        # getting initial state of all zeros
        init_state = cell.zero_state(batch_size,tf.float32)
        #init_state = tf.identity(init_state, name="init_state")

        lstm_outputs,final_state = tf.nn.dynamic_rnn(cell,embed_layer,initial_state=init_state)

    # creating sigmoid fc layer
    with tf.variable_scope('FC_layer'):
        a_output = tf.contrib.layers.fully_connected(lstm_outputs[:,-1],1,activation_fn=tf.sigmoid)
        
        tf.summary.histogram('Predictions',a_output)
    
    ## cost calculation
#     cost = compute_cost(a_output,y_ph)
    with tf.variable_scope('Loss'):
        cost = tf.losses.mean_squared_error(y_ph,a_output)
        
        tf.summary.scalar("Loss", cost)
        
    ## optimizers
    optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost)
    
    ## accuracy definition
#     accuracy = acc_fn(a_output,y_ph)
    with tf.variable_scope('Accuracy'):
        correct_pred = tf.equal(tf.cast(tf.round(a_output),tf.int32),y_ph)
        accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
        
        tf.summary.scalar("Accuracy", accuracy)
        
    saver = tf.train.Saver()
    
    print("Printing all trainable tensor_names".center(50,'-'))
    
    print()
    print("LSTM WEIGHTS")
    print()
    
    [print(n.name)for n in tf.trainable_variables('LSTM_layer')]
    
    [tf.summary.histogram(n.name, n)for n in tf.trainable_variables('LSTM_layer')]
    
    print()
    print()
    print("FC WIEGHTS")
    print()
    
    [print(n.name)for n in tf.trainable_variables('FC_layer')]
    
    [tf.summary.histogram(n.name, n)for n in tf.trainable_variables('FC_layer')]
    
    print()
    print("tensor_names".center(50,'-'))
    
    
    summ = tf.summary.merge_all()
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        n_batches = len(x_train)//batch_size    
        
        writer_train = tf.summary.FileWriter('./tb/sidd_all_in_one/train/',sess.graph)
        writer_val = tf.summary.FileWriter('./tb/sidd_all_in_one/val/')
        
        global_step = 0
        
        for epoch in range(epochs):
            startime = datetime.now()
            state = sess.run(init_state)
            
            train_acc = []
            for step,(x,y) in enumerate(get_batches(x_train,y_train,batch_size),1):
                
                global_step+=1
                
                print(step,end=' ')
                
                feed = {x_ph:x,
                        y_ph:y,
                        keep_prob_ph:keep_prob,
                        init_state:state}
                
                loss_,state,_,batch_acc, s = sess.run([cost,final_state,optimizer,accuracy,summ],feed_dict=feed)
                
                writer_train.add_summary(s,global_step) # writing summary real time
                
                train_acc.append(batch_acc)
                
                
                if (step) % 17 == 0:
                    
                    val_acc = []
                    #val_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
                    
                    for xx, yy in get_batches(x_val, y_val, batch_size):
                        feed_val = {x_ph: xx,
                                y_ph: yy[:,None],
                                keep_prob_ph: 1}  
                            #initial_state: val_state}

                        val_batch_acc,s = sess.run([accuracy, summ], feed_dict=feed_val)                                                                
                        val_acc.append(val_batch_acc)
                        
                    writer_val.add_summary(s,global_step) # writing summary real time
                    
                # after the last batch is used for training i.e. after every epoch of training, evaluating result
                if (step)%n_batches == 0:
                    
                    val_acc = []
                    
                    #val_state = sess.run(cell.zero_state(batch_size,tf.float32))
                    
                    for xx,yy in get_batches(x_val,y_val,batch_size):
                        feed_val = {x_ph:xx,
                                y_ph:yy,
                                keep_prob_ph:1}  
                                
                        
                        #val_batch_acc,val_state = sess.run([accuracy,final_state],feed_dict=feed_val)
                        val_batch_acc = sess.run([accuracy],feed_dict=feed_val)                        
                        
                        val_acc.append(val_batch_acc)
                    
                    stoptime = datetime.now()
                    print()
                    print()
                    print("Epoch: {}/{}...".format(epoch+1, epochs),
                          "Batch: {}/{}...".format(step, n_batches),
                          "Train Loss: {:.3f}...".format(loss_),
                          "Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
                          "Val Accuracy: {:.3f}".format(np.mean(val_acc)),
                          "Epoch time: {}".format(str(stoptime-startime)))
                    
                    print()
                    print()
#                     writer = tf.summary.FileWriter('./tb/tensorboard_sidd_all_in_one_'+str(epoch+1),sess.graph)
#                     writer.close()
            
            

In [53]:
vocab_size = len(word_idx)+1

model_train_all_in_one(x_train,y_train,x_val,y_val,vocab_size,
                embed_size=300,lstm_neurons_li=[128,64],
                keep_prob=0.5,learning_rate=1e-1,epochs=10,batch_size=256)

-------Printing all trainable tensor_names--------

LSTM WEIGHTS

LSTM_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0
LSTM_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0
LSTM_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0
LSTM_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0
INFO:tensorflow:Summary name LSTM_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel:0 is illegal; using LSTM_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/kernel_0 instead.
INFO:tensorflow:Summary name LSTM_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/bias:0 is illegal; using LSTM_layer/rnn/multi_rnn_cell/cell_0/lstm_cell/bias_0 instead.
INFO:tensorflow:Summary name LSTM_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel:0 is illegal; using LSTM_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/kernel_0 instead.
INFO:tensorflow:Summary name LSTM_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/bias:0 is illegal; using LSTM_layer/rnn/multi_rnn_cell/cell_1/lstm_cell/bias_0 instead.


FC WIEGHTS

FC_layer/fully_connected/weights:0
F

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 

In [None]:
# def create_placeholders():
#     'creating placeholders'
    
#     with tf.variable_scope('Placeholders'):
#         x_ph = tf.placeholder(tf.int32,[None,None],name ='x_ph')
#         y_ph = tf.placeholder(tf.int32,None, name='y_ph')
#         keep_prob_ph = tf.placeholder(tf.float32,name='keep_prob_ph')
    
#     return x_ph,y_ph,keep_prob_ph

In [None]:
# def forward_propagation(x_ph,vocab_size,embed_size,lstm_neurons_li,keep_prob_ph,batch_size):
    
#     # creating embedding layer
#     with tf.variable_scope('Embedding_layer'):
#         embedding = tf.Variable(tf.random_uniform((vocab_size,embed_size),minval=-1,maxval=1))
#         embed_layer = tf.nn.embedding_lookup(embedding,x_ph)
    
#     # creating LSTM layer
#     with tf.variable_scope('LSTM_layer'):
        
#         # creating lstm cells
#         lstms = [tf.contrib.rnn.BasicLSTMCell(size,name='lstm_cell') for size in lstm_neurons_li]
#         # adding dopout to the cells
#         drops = [tf.contrib.rnn.DropoutWrapper(lstm,output_keep_prob = keep_prob_ph) for lstm in lstms]
#         # stacking multiple LSTM layers
#         cell = tf.contrib.rnn.MultiRNNCell(drops)


#         # getting initial state of all zeros
#         initial_state = cell.zero_state(batch_size,tf.float32)
#         #init_state = tf.identity(init_state, name="init_state")

#         lstm_outputs,final_state = tf.nn.dynamic_rnn(cell,embed_layer,initial_state=initial_state)

#     # creating sigmoid fc layer
#     with tf.variable_scope('FC_layer'):
#         a_output = tf.contrib.layers.fully_connected(lstm_outputs[:,-1],1,activation_fn=tf.sigmoid)
    
#     return initial_state,a_output,cell,final_state

In [None]:
# def compute_cost(a_output,y_ph):
    
#     with tf.variable_scope('Loss'):
#         cost = tf.losses.mean_squared_error(y_ph,a_output)
    
#     return cost

In [None]:
# # accuracy function

# def acc_fn(a_output,y_ph):
    
#     with tf.variable_scope('Accuracy'):
#         correct_pred = tf.equal(tf.cast(tf.round(a_output),tf.int32),y_ph)
#         accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
    
#     return accuracy

In [None]:
# def get_batches(x,y,batch_size = 100):
    
#     n_batches = len(x)//batch_size
    
#     # removing left out records
#     x,y = x[:n_batches*batch_size],y[:n_batches*batch_size]
    
#     for i in range(0,len(x),batch_size):
#         yield x[i:i+batch_size],y[i:i+batch_size]

In [None]:
# # model_training

# def model_train(x_train,y_train,x_val,y_val,vocab_size,
#                 embed_size=300,lstm_neurons_li=[128,64],
#                 keep_prob=0.5,learning_rate=1e-1,epochs=50,batch_size=256):
    
#     # reset default graph
#     tf.reset_default_graph()
    
#     # create placeholder
#     x_ph,y_ph,keep_prob_ph = create_placeholders()
    
#     # forward propogation
#     initial_state,a_output,cell,final_state = forward_propagation(x_ph,vocab_size,
#                                                                        embed_size,lstm_neurons_li,
#                                                                        keep_prob_ph,batch_size)    
    
#     # cost calculation
#     cost = compute_cost(a_output,y_ph)
    
#     # optimizers
#     optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost)
    
#     # accuracy definition
#     accuracy = acc_fn(a_output,y_ph)
    
#     saver = tf.train.Saver()
    
#     with tf.Session() as sess:
#         sess.run(tf.global_variables_initializer())
#         n_batches = len(x_train)//batch_size
        
#         #writer = tf.summary.FileWriter('./tensorboard_sidd_pre',sess.graph)
#         #writer.close()
        
        
#         for epoch in range(epochs):
#             startime = datetime.now()
#             state = sess.run(initial_state)
            
#             train_acc = []
#             for step,(x,y) in enumerate(get_batches(x_train,y_train,batch_size),1):
#                 print('.',end=' ')
#                 feed = {x_ph:x,
#                         y_ph:y,
#                         keep_prob_ph:keep_prob,
#                         initial_state:state}
                
#                 loss_,state,_,batch_acc = sess.run([cost,final_state,optimizer,accuracy],feed_dict=feed)
#                 train_acc.append(batch_acc)
                
#                 # after the last batch is used for training i.e. after every epoch of training, evaluating result
#                 if (step)%n_batches == 0:
                    
#                     val_acc = []
                    
#                     #val_state = sess.run(cell.zero_state(batch_size,tf.float32))
                    
#                     for xx,yy in get_batches(x_val,y_val,batch_size):
#                         feed_val = {x_ph:xx,
#                                 y_ph:yy,
#                                 keep_prob_ph:1}  #,
#                                 #init_state:val_state}
                        
#                         #val_batch_acc,val_state = sess.run([accuracy,final_state],feed_dict=feed_val)
#                         val_batch_acc = sess.run([accuracy],feed_dict=feed_val)
#                         val_acc.append(val_batch_acc)
                    
#                     stoptime = datetime.now()
#                     print()
#                     print("Epoch: {}/{}...".format(epoch+1, epochs),
#                           "Batch: {}/{}...".format(step, n_batches),
#                           "Train Loss: {:.3f}...".format(loss_),
#                           "Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
#                           "Val Accuracy: {:.3f}".format(np.mean(val_acc)),
#                           "Epoch time: {}".format(str(stoptime-startime)))
            
#             #saver.save(sess,'./model_save3/sentiment.ckpt',global_step = epoch+1)
        
                    
            

In [None]:
# vocab_size = len(word_idx)+1

# model_train(x_train,y_train,x_val,y_val,vocab_size,
#                 embed_size=300,lstm_neurons_li=[128,64],
#                 keep_prob=0.5,learning_rate=1e-1,epochs=50,batch_size=256)

## Model test set evaluation

In [None]:
# def model_test(model_dir,x_test,y_test,batch_size = 256):
    
#     test_acc = []
#     with tf.Session() as sess:
#         saver = tf.train.import_meta_graph(model_dir+'.meta')
#         saver.restore(sess,model_dir)
        
#         graph = tf.get_default_graph()
                
#         y_ph = graph.get_tensor_by_name('Placeholders/y_ph:0')
#         x_ph = graph.get_tensor_by_name('Placeholders/x_ph:0')
#         keep_prob_ph = graph.get_tensor_by_name('Placeholders/keep_prob_ph:0')
#         accuracy = graph.get_tensor_by_name('Accuracy/Mean:0')
        
#         for i,(x,y) in enumerate(get_batches(x_test,y_test,batch_size),1):
#             feed_test = {x_ph:x,
#                          y_ph:y,
#                          keep_prob_ph:1}                         
            
#             batch_acc = sess.run([accuracy],feed_dict=feed_test)
#             test_acc.append(batch_acc)
#         print("Test Accuracy : {:.3f}".format(np.mean(test_acc)))

In [None]:
# model_dir = 'model_save/sentiment.ckpt-1'
# model_test(model_dir,x_test,y_test,batch_size = 256)