# Sentiment Analysis Using Tensorflow LSTM - CNN with Attention Layer

code below takes ages on dell lat 5401 (one epoch more than 9 hours, actual time unknown since I have quit after 9 hours) does not seem to be worthwhile unless run on supercomputer 

## Imports

In [1]:
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, load_model
from tensorflow.compat.v1.keras.layers import LSTM, Embedding, Dropout,Dense, Bidirectional, BatchNormalization, RepeatVector
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import RMSprop, Adamax , Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from attention.layers import AttentionLayer
import tensorflow as tf
from tensorflow.python.client import device_lib

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" #This is for multiple print statements per cell

import tensorflow.compat.v1
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

# config = ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.6
# config.gpu_options.allow_growth = True
# session = InteractiveSession(config=config)

## Confirm GPU processing available

In [2]:
value = tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)
print ('***If TF can access GPU: ***\n\n',value) # MUST RETURN True IF IT CAN!!

print()
value = tf.config.list_physical_devices('GPU')
print(value)

print()
print(device_lib.list_local_devices())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
***If TF can access GPU: ***

 True

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6699266791480035996
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 15986473697306125243
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 8528774748483814804
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1521483776
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4777017450652504205
physical_device_desc: "device: 0, name: GeForce MX150, pci bus id: 0000:02:00.0, compute capability: 6.1"
]


## Getting data

In [3]:
df = pd.read_csv('consolidated_tweet_data-cleaned-stemmed-lemmatized.csv', sep='\t')
df

Unnamed: 0,sentiment,text
0,negative,oh no it fade away again
1,positive,bunnylak will kill me but i cant stop listen t...
2,negative,last day in cali partyin for the last time wit...
3,negative,is have a major soar throat
4,positive,my last day a 12 year old
...,...,...
1611536,negative,twisuz yeah and how did thi happen i wa updat ...
1611537,negative,smittygoali im sorri about your dog
1611538,negative,posipat im alreadi there i wish you were here
1611539,negative,is think in 12 hour ill be at the airport thi ...


In [4]:
# How much of Dataset to be used
frac = 0.2
# sample and shuffle the dataset according to the fraction choise in the line above
df1 = df.sample(frac=frac).reset_index(drop=True)
df1

Unnamed: 0,sentiment,text
0,positive,well i had an interest night last night how ab...
1,negative,who fli high fall deep
2,negative,omg omg omg omg thi is so cool funhous music v...
3,positive,art job pencil want to creat origin 30 pg stor...
4,negative,i think spoon is sick
...,...,...
322303,positive,justkhyi okay khyi let start be nice right now
322304,negative,salmagrat cork no ppp
322305,positive,kle ` i love my pleat the new top i love it an...
322306,positive,darlingnickieb your man come on tonight you ne...


## Tokenization for training

In [5]:
vocabulary_size = 2000

In [6]:
tokenizer = Tokenizer(num_words=vocabulary_size, split=" ", oov_token='<OOV>')
tokenizer.fit_on_texts(df['text'].values)

In [7]:
X = tokenizer.texts_to_sequences(df1['text'].values)
X = pad_sequences(X) # padding our text vector so they all have the same length
X[:5]

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,   71,    2,   68,   97,  521,
          76,   96,   76,   72,   64,    9],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,  169,  606,  603,  507, 1791],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,  245,  245,  245,  245,
          30,   10,   19,  210,    1,  308,  323,    2,  117,   23,  327,
          40,   15,   69,   28,   56,    1],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,  972,  309,
           1,   53,    3, 1312, 1314,  555,    1,  

## Model, Training and Testing

### Creating the model

In [8]:
model = Sequential()
model.add(Embedding(vocabulary_size, 128, input_length=X.shape[1]))
model.add(Dropout(0.3))
model.add(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.2))
model.add(LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.2))
model.add(RepeatVector(vocabulary_size))
model.add(LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.2))
model.add(LSTM(128, dropout=0.3, recurrent_dropout=0.2))
# model.add(AttentionLayer(name='attention'))
model.add(BatchNormalization())
model.add(Dense(2, activation='sigmoid',kernel_regularizer=regularizers.l2(0.01)))


# model = Sequential([Dense()])
# model.add(Embedding(vocabulary_size, 256,weights=[embedding_matrix], input_length=X.shape[1]))
# model.add(Dropout(0.3))
# model.add(Conv1D(16,32,padding="same",activation="relu"))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Conv1D(32,16,padding="same",activation="relu",kernel_regularizer=regularizers.l2(0.01)))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Dropout(0.3))
# model.add(Conv1D(64,12,padding="same",activation="relu",kernel_regularizer=regularizers.l2(0.01)))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Dropout(0.3))
# model.add(Conv1D(128,9,padding="same",activation="relu",kernel_regularizer=regularizers.l2(0.01)))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Dropout(0.3))
# model.add(Conv1D(256,6,padding="same",activation="relu",kernel_regularizer=regularizers.l2(0.01)))
# model.add(MaxPooling1D(pool_size=2))
# model.add(Flatten())
# model.add(Bidirectional(CuDNNLSTM(256, return_sequences=True)))
# model.add(Dropout(0.3))
model.add(AttentionLayer(name='attention',activation=))
# model.add(BatchNormalization())
# model.add(Dense(2, activation='sigmoid',kernel_regularizer=regularizers.l2(0.01),
#                 activity_regularizer=regularizers.l1(0.01)))
# model.add(Dense(2, activation='sigmoid'))

In [9]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
weight_matrix = model.get_weights()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 39, 128)           256000    
_________________________________________________________________
dropout (Dropout)            (None, 39, 128)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 39, 128)           131584    
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                49408     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 2000, 64)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 2000, 64)          33024     
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               9

In [10]:
# for list in range(len(weight_matrix)):
#     weight_matrix[list].shape

In [11]:
# print(model.layers[3].trainable_weights)
units = int(int(model.layers[3].trainable_weights[0].shape[1])/4)
print("No units: ", units)

No units:  64


In [12]:
# W1 = model.layers[3].get_weights()[0]
# U1 = model.layers[3].get_weights()[1]
# b1 = model.layers[3].get_weights()[2]

# W1_i = W1[:, :units]
# W1_f = W1[:, units: units * 2]
# W1_c = W1[:, units * 2: units * 3]
# W1_o = W1[:, units * 3:]

# U1_i = U1[:, :units]
# U1_f = U1[:, units: units * 2]
# U1_c = U1[:, units * 2: units * 3]
# U1_o = U1[:, units * 3:]

# b1_i = b1[:units]
# b1_f = b1[units: units * 2]
# b1_c = b1[units * 2: units * 3]
# b1_o = b1[units * 3:]

In [13]:
# W2 = model.layers[4].get_weights()[0]
# U2 = model.layers[4].get_weights()[1]
# b2 = model.layers[4].get_weights()[2]

# W2_i = W2[:, :units]
# W2_f = W2[:, units: units * 2]
# W2_c = W2[:, units * 2: units * 3]
# W2_o = W2[:, units * 3:]

# U2_i = U2[:, :units]
# U2_f = U2[:, units: units * 2]
# U2_c = U2[:, units * 2: units * 3]
# U2_o = U2[:, units * 3:]

# b2_i = b2[:units]
# b2_f = b2[units: units * 2]
# b2_c = b2[units * 2: units * 3]
# b2_o = b2[units * 3:]

### Training the model

In [14]:
y = pd.get_dummies(df1['sentiment']).values
[print(df1['sentiment'][i], y[i]) for i in range(0,5)]

positive [0 1]
negative [1 0]
negative [1 0]
positive [0 1]
negative [1 0]


[None, None, None, None, None]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)

#### Early stopping and model checkpoint mechanism

In [16]:
early_stop = EarlyStopping(monitor='val_loss',patience=2)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
tensorflow.compat.v1.disable_v2_behavior

<function tensorflow.python.compat.v2_compat.disable_v2_behavior()>

In [17]:
batch_size = 7
epochs = 30
import time
from datetime import datetime
datetime = str(datetime.now())
csv_logger = tf.keras.callbacks.CSVLogger('training'+datetime+'.log')
start = time.time()
print("started at:")
print(start)
history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_val,y_val), batch_size=batch_size, verbose=2, callbacks=[csv_logger,early_stop])
end = time.time()
elapsed = end - start
print(elapsed/60," minutes")

started at:
1586553479.463264
Train on 206276 samples, validate on 51570 samples
Epoch 1/30


KeyboardInterrupt: 

Plotting training history

In [None]:
# # Plot training & validation accuracy values
# plt.plot(history.history['accuracy'])
# # plt.plot(history.history['val_accuracy'])
# plt.title('Model accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()

# # Plot training & validation loss values
# plt.plot(history.history['loss'])
# # plt.plot(history.history['val_loss'])
# plt.title('Model loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()


### Plotting training and validation accuracy and loss

In [None]:
# plot train and validation accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model train vs validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
# plot train and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model train vs validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

### Saving the trained model

name of the file

In [None]:
name = 'sentiment-analysis-trained-model'

In [None]:
from datetime import datetime
dateTimeObj = datetime.now()
date = str(dateTimeObj.date())
time = str(dateTimeObj.time())
timestamp = date+time
punctuation = ['-',':','.']
for sign in punctuation:
    timestamp = timestamp.replace(sign,'')
print(timestamp)

In [None]:
# model.save(name+timestamp+'.h5')

### Testing the model

In [None]:
predictions = model.predict(X_test)
[print(X_test[i], predictions[i], y_test[i]) for i in range(0, 10)]

In [None]:
accurate_prediction_count, inaccurate_prediction_count = 0, 0
for i, prediction in enumerate(predictions):
    if np.argmax(prediction)==np.argmax(y_test[i]):
        accurate_prediction_count += 1
    else:
        inaccurate_prediction_count += 1

total_predictions = accurate_prediction_count + inaccurate_prediction_count
print('Number of predictions: ', total_predictions)
print('Number of accurate predictions: ', accurate_prediction_count)
print('Number of false predictions: ', inaccurate_prediction_count)    
print('Accuracy: ', accurate_prediction_count/total_predictions)