In [None]:
 # Import Libraries and modules

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd


In [None]:
dataset_review = tf.keras.utils.get_file('reviews.csv',
                               'https://drive.google.com/uc?id=13ySLC_ue6Umt9RJYSeM2t-V0kCv-4C-P')
print(dataset_review)

In [None]:
dataset = pd.read_csv('/root/.keras/datasets/reviews.csv')

print(dataset.shape)
print(dataset.head)


In [None]:
sentences = dataset['text'].tolist()
labels = dataset['sentiment'].tolist()

print(sentences)
print(labels)

In [None]:
# spliting dataset for training
training_size = int(len(sentences) * 0.8)

training_sentences = sentences[0: training_size]
training_labels  = labels[0: training_size]
testing_sentences = sentences[training_size: ]
testing_labels = labels[training_size: ]

In [None]:
# convert into array
training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)


In [None]:
## Tokenize the text using Tenserflow

vocab_size = 1000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"

tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type,
                       truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences,maxlen=max_length,
                               padding=padding_type, truncating=trunc_type)


In [None]:
# reverse index

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '**') for i in text])

print(decode_review(padded[1]))
print(training_sentences[1])

In [None]:
# Train a Sentiment Model

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

In [None]:
num_epochs = 60
model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))

In [None]:
# Use the model to predict a review
fake_reviews = ['I love this phone','I cant live',
                'you are a bitch','I love not my mother',
                'only works when I stand on tippy toes',
                'does not work when I stand on my head','they gave us free chocolate cake and did not charge us']

print(fake_reviews)

# Create the sequences
padding_type='post'
sample_sequences = tokenizer.texts_to_sequences(fake_reviews)
fakes_padded = pad_sequences(sample_sequences, padding=padding_type, maxlen=max_length)


classes = model.predict(fakes_padded)

# The closer the class is to 1, the more positive the review is deemed to be
for x in range(len(fake_reviews)):
  print(fake_reviews[x])
  print(classes[x])
  print('\n')

