In [None]:

!pip install numpy==1.16.2


import tensorflow as tf 
import numpy as np
from tensorflow.keras.preprocessing import sequence
from numpy import array


import logging
logging.getLogger('tensorflow').disabled = True


vocab_size = 10000
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.googleplay.load_data(num_words=vocab_size)


class_names = ["Negative", "Positive"]



In [None]:

word_index = tf.keras.datasets.googleplay.get_word_index()

word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNKNOWN>"] = 2
word_index["<UNUSED>"] = 3


reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

## **FSCORE OF LSTM + GLOVE**


In [None]:
# Concatonate UTKAL REVIEWS test and training datasets
allreviews = np.concatenate((x_train, x_test), axis=0)

print("Maximum review length: {}".format(len(max((allreviews), key=len))))
print("Minimum review length: {}".format(len(min((allreviews), key=len))))
result = [len(x) for x in allreviews]
print("Mean review length: {}".format(np.mean(result)))

print("")
print("googleplay Review")
print("  Review Text: " + str(x_train[60]))
print("  Review Sentiment: " + str(y_train[60]))


print("")
print("UTKAL TEST Review")
print("  Review Text: " + decode_review(x_train[60]))
print("  Review Sentiment: " + class_names[y_train[60]])

## **Pre-processing Data (PADDING GOOGLE PLAY STORE DATASET)**


In [None]:

review_length = 500

# Padding / truncated our reviews
x_train = sequence.pad_sequences(x_train, maxlen = review_length)
x_test = sequence.pad_sequences(x_test, maxlen = review_length)



print("Shape Training Review Data: " + str(x_train.shape))
print("Shape Training Class Data: " + str(y_train.shape))
print("Shape Test Review Data: " + str(x_test.shape))
print("Shape Test Class Data: " + str(y_test.shape))


print("")
print("Google Review Text (post padding): " + decode_review(x_train[60]))

## **Create and build LSTM + GLOVE ON GOOGLEPLAY DATASET**

In [None]:

model = tf.keras.models.Sequential()


model.add(
    tf.keras.layers.Embedding(
        input_dim = vocab_size, # The size of our vocabulary 
        output_dim = 32, # each words shall be mapped
        input_length = review_length # Length of input sequences
    )
)


model.add(
    tf.keras.layers.Dropout(
        rate=0.25
    )
)

model.add(
    tf.keras.layers.CuDNNLSTM(
        units=32 # 32 LSTM units in this layer
    )
)

# Add a second dropout layer to Google dataset with the same aim as the first.
model.add(
    tf.keras.layers.Dropout(
        rate=0.25 
    )
)


model.add(
    tf.keras.layers.Dense(
        units=1, # Single unit
        activation='sigmoid' # Sigmoid activation function (output from 0 to 1)
    )
)

# Compile the model
model.compile(
    loss=tf.keras.losses.binary_crossentropy, # loss function
    optimizer=tf.keras.optimizers.Adam(), # optimiser function
    metrics=['accuracy']) 

# Display a summary of the models structure
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 500, 32)           320000    
_________________________________________________________________
dropout (Dropout)            (None, 500, 32)           0         
_________________________________________________________________
cu_dnnlstm (CuDNNLSTM)       (None, 32)                8448      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 328,481
Trainable params: 328,481
Non-trainable params: 0
_________________________________________________________________


## **Visualise the Model**

## **Train the LSTM + GLOVE ON GOOGLE REVIEW**

In [None]:
# Train the LSTM + GLOVE  on the google reviews training data
history = model.fit(

    
    x_train, y_train,
                    
 
    batch_size=256, 

    epochs=3, 
    

    validation_split=0.2,
    
    verbose=1
) 

## **Evaluate model with UTKAL REVIEW test data and view results FSCORE ON PLAYSTORE**

In [None]:
# Get Model Predictions for utkal reviews test data
from sklearn.metrics import classification_report
predicted_classes = model.predict_classes(x_test)
print(classification_report(y_test, predicted_classes, target_names=class_names))

              precision    recall  f1-score   support

    Negative       0.86      0.90      0.88     12500
    Positive       0.90      0.86      0.88     12500

    accuracy                           0.88     25000
   macro avg       0.88      0.88      0.88     25000
weighted avg       0.88      0.88      0.88     25000

