In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# TensorFlow
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
# Importing the data
train = pd.read_csv("../input/word2vec-nlp-tutorial/labeledTrainData.tsv.zip", header=0, delimiter="\t", quoting=3)
test = pd.read_csv("../input/word2vec-nlp-tutorial/testData.tsv.zip", header=0, delimiter="\t", quoting=3)

In [None]:
# Splitting the dataset into training and validation sets
split_size = int(len(train)*0.8)

training_reviews = train.review[:split_size]
training_sentiments = train.sentiment[:split_size]
validation_reviews = train.review[split_size:]
validation_sentiments = train.sentiment[split_size:]

In [None]:
# Text to Sequence and Padding
vocab_size = 10000
embedding_dim = 32
max_length = 200
trunc_type='post'
oov_tok = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(training_reviews)

training_sequences = tokenizer.texts_to_sequences(training_reviews)
padded_training = pad_sequences(training_sequences, maxlen=max_length)

validation_sequences = tokenizer.texts_to_sequences(validation_reviews)
padded_validation = pad_sequences(validation_sequences, maxlen=max_length)

In [None]:
class CustomCallback(tf.keras.callbacks.Callback):
    def __init__(self, accuracy=0.87):
        self.accuracy = accuracy
        
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_accuracy')>=self.accuracy):
            print(f"\nReached {self.accuracy} accuracy so cancelling training!")
            self.model.stop_training = True

In [None]:
# Building the Neural Network
model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(32, activation=tf.nn.relu),
    tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

callbacks = CustomCallback()

model.compile(loss='binary_crossentropy',
             optimizer=tf.optimizers.Adam(learning_rate=0.001),
             metrics=['accuracy'])

model.summary()

model.fit(padded_training, training_sentiments, epochs=50, callbacks=[callbacks], validation_data=(padded_validation, validation_sentiments))

In [None]:
# Evaluating the neural network model
accuracy_score = model.evaluate(padded_validation, validation_sentiments, verbose=0)[1]
print(f"Accuracy Score: {round(accuracy_score*100, 2)}%")

In [None]:
# Making predictions on the test set
testing_sequences = tokenizer.texts_to_sequences(test.review)
padded_testing = pad_sequences(testing_sequences, maxlen=max_length)

sentiment_predictions = list(map(lambda sentiment: 1 if sentiment > 0.5 else 0, model.predict(padded_testing)))
pd.DataFrame({'Predictions':sentiment_predictions}).head(10)

In [None]:
# Positive Review
print(f"Review:\n{test.review[0]}\n\nSentiment: {'Positive' if sentiment_predictions[0] == 1 else 'Negative'}")

In [None]:
# Negative Review
print(f"Review:\n{test.review[4999]}\n\nSentiment: {'Positive' if sentiment_predictions[4999] == 1 else 'Negative'}")

In [None]:
# Submitting the results
output = pd.DataFrame(data={"id":test.id, 
                            "sentiment":sentiment_predictions})

output.to_csv("submission.csv", index=False, quoting=3)