In [None]:
#Import all neccesary libraries
%reset -f
import numpy as np
import pandas as pd 
from keras.preprocessing.text import Tokenizer 
from keras_preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout, Input, Normalization
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score,precision_score,recall_score, f1_score
import matplotlib.pyplot as plt

In [None]:
# Read and store the CSV file into a variable
data = pd.read_csv('dataset_elec_4000.csv') 
data = data[['review','rating']] 

# Set a variable for positive reviews counter
positive = 0 

# Set a variable for negative reviews counter
negative = 0 

for i in range(len(data['rating'])):
  # Counts the amount of positive reviews in the data set
  if data['rating'][i] == 1.0:
    positive += 1 
  # Counts the amount of negative reviews in the data set
  else:
    negative += 1 

# Rrint the amount of positive reviews
print("Positive review:", positive) 

# print the amount of negative reviews
print("Negative review:", negative) 

# Print the data variable for checking purposes, whether or not the contents of the CSV file has been read and stored properly
print(data) 

In [None]:
# Setting up the Tokenizer's goal and requirements (including translating all words to lower cases, remove punctuations)
tokenizer = Tokenizer(
    num_words=None,
    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    lower=True, 
    split=' ',
    char_level=False,
    oov_token=None,
    analyzer=None) 

# assign each words with a unique numerical value (integers)
tokenizer.fit_on_texts(data['review'].values) 

# convert each words into its corresponding integers
X = tokenizer.texts_to_sequences(data['review'].values) 

# equalize the list's length of all text to the longest sequence in the list (review entry)
X = pad_sequences(X) 

In [None]:
# store the rating part of the data into variable Y
Y = data['rating'] 

# split the dataset (through the X and Y variable) into two, namely X_train and Y_train for training purposes, while X_test and Y_test for testing purposes
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state = np.random) 

# Print the size of the training dataset for checking purposes
print(X_train.shape,Y_train.shape) 

# Print the size of the testing dataset for checking purposes
print(X_test.shape,Y_test.shape) 

In [None]:
#Selecting Sequential as the model in order to build the model layer-by-layer
model = Sequential() 

#Add the embedding layer to the model 
model.add(Embedding(len(tokenizer.word_index)+1, 100, input_length = X.shape[1]))

#Add the LSTM layer to the model
model.add(LSTM(128))

#Add the Dropout layer to the model
model.add(Dropout(0.1))

#Add the Dense layer to the model that utilizes sigmoid activation function
model.add(Dense(1, activation='sigmoid'))

#Compile all layers while implemeting binary cross entropy for the loss parameter, Adam as the optimer, and accuracy as the metrics
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate = 0.001),  metrics = ["accuracy"])

#Print the model summary
model.summary()

In [None]:
# Train the model with the training dataset (X_train and Y_train) with 100 batch size and 5 epocs
test = model.fit(X_train, Y_train, batch_size=100, validation_data = (X_test, Y_test), epochs=5)  

In [None]:
#Plot the model's training accuracy score as epoch increments
plt.plot(test.history['accuracy'])

#Plot the model's accuracy score when predicting unseen data (testing data) as epoch increments
plt.plot(test.history['val_accuracy'])

#Label the graph title
plt.title('model accuracy')

#Label the graph y-axis
plt.ylabel('accuracy')

#Label the graph X-axis
plt.xlabel('epoch')

#Activate the label/legend function and place it in the upper left corner
plt.legend(['train', 'test'], loc='upper left')

#Show the graph
plt.show()

In [None]:
#Plot the model's training loss score as epoch increments
plt.plot(test.history['loss'])

#Plot the model's loss score when predicting unseen data (testing data) as epoch increments
plt.plot(test.history['val_loss'])

#Label the graph title
plt.title('model loss')

#Label the graph y-axis
plt.ylabel('loss')

#Label the graph x-axis
plt.xlabel('epoch')

#Activate the label/legend function and place it in the upper right corner
plt.legend(['train', 'test'], loc='upper right')

#Show the graph
plt.show()

In [None]:
# Set a variable for true positive counter
true_positive = 0

# Set a variable for true negative counter
true_negative = 0

# Set a variable for false positive counter
false_positive = 0

# Set a variable for false negative counter
false_negative = 0

for i in range(len(X_test)):
  # Counts the amount of true positive outputs
  if (model.predict(X_test[i].reshape(1,250)) > 0.5) and (Y_test.values[i] == 1.0):
    true_positive += 1
  # Counts the amount of true negative outputs
  elif (model.predict(X_test[i].reshape(1,250)) < 0.5) and (Y_test.values[i] == 0.0):
    true_negative += 1
  # Counts the amount of false negative outputs
  elif (model.predict(X_test[i].reshape(1,250)) < 0.5) and (Y_test.values[i] == 1.0):
    false_negative += 1 
  # Counts the amount of false positive outputs
  elif (model.predict(X_test[i].reshape(1,250)) > 0.5) and (Y_test.values[i] == 0.0):
    false_positive += 1 

In [None]:
# Print the amount of the true positive outputs
print("True positive  : " , true_positive)

# Print the amount of the true negative outputs
print("True negative  : " , true_negative)

# Print the amount of the false positive outputs
print("False positive : " , false_positive)

# Print the amount of the false negative outputs
print("False negative : " , false_negative)

In [None]:
#Calculate the accuracy score of the model
accuracy = (true_positive + true_negative)/(true_positive + true_negative + false_positive + false_negative)

#Calculate the precision score of the model
precision = true_positive / (true_positive + false_positive)

#Calculate the recall score of the model
recall = true_positive / (true_positive + false_negative)

#Calculate the F1 score of the model
f1_score = 2*((precision*recall)/(precision+recall))

#Print the name of the model
print("Evaluation of Long Short-Term Memory for Sentiment Analysis:")

#Print the accuracy score of the model
print("Accuracy   : %.4f" %accuracy)

#Print the precision score of the model
print("Precision  : %.4f" %precision)

#Print the recall score of the model
print("Recall     : %.4f" %recall)

#Print the F1 score of the model
print("F1 Score   : %.4f" %f1_score)