### Importing required packages

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

from keras.preprocessing.text import text_to_word_sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D, MaxPool1D, Dropout
from keras.layers.embeddings import Embedding
from keras.callbacks import EarlyStopping

### Loading data

In [None]:
train = pd.read_csv('/kaggle/input/sentiment-analysis-on-movie-reviews/train.tsv.zip', sep = '\t')
test = pd.read_csv('/kaggle/input/sentiment-analysis-on-movie-reviews/test.tsv.zip', sep = '\t')

### Visualizing data

In [None]:
print(train.shape, test.shape)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train.info()

In [None]:
test.info()

### Drop unnecessary columns

In [None]:
train.drop(['PhraseId','SentenceId'], inplace = True, axis = 'columns')

submission = pd.DataFrame()
submission['PhraseId'] = test['PhraseId']
test.drop(['PhraseId','SentenceId'], inplace = True, axis = 'columns')

### Final look at the data

In [None]:
train.head()

In [None]:
test.head()

### Total classes of sentiments

In [None]:
num_classes = len(train['Sentiment'].unique())

In [None]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train['Phrase'])
train['Phrase'] = tokenizer.texts_to_sequences(train['Phrase'])

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(test['Phrase'])
test['Phrase'] = tokenizer.texts_to_sequences(test['Phrase'])

### Dealing with the padding

In [None]:
max_length = 100

train_copy = train['Phrase']
train_copy = pad_sequences(train['Phrase'],maxlen = max_length)

test_copy = test['Phrase']
test_copy = pad_sequences(test['Phrase'],maxlen = max_length)

vocab_size = len(tokenizer.word_index) + 1

### Preparing data

In [None]:
X = train_copy
y = pd.get_dummies(train['Sentiment'])

### Splitting data for training and testing

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.3)

### Model

In [None]:
# Model constants
embedding_vector_length = 32

# RNN Model
model = Sequential()

# Embedding layer
model.add(Embedding(input_dim=vocab_size, 
                    output_dim=embedding_vector_length, 
                    input_length=max_length))

# Convolutional layer(1D)
model.add(Conv1D(filters = 16,
                 kernel_size = 3,
                 padding = 'same',
                 activation = 'relu'))

# MaxPool(1D) - Reduce to half
model.add(MaxPool1D(pool_size = 2))

# LSTM layers
model.add(LSTM(32, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = True))
model.add(LSTM(16, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = False))

# Dense layers
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

model.summary()

In [None]:
early_stopping = EarlyStopping(min_delta = 0.001,
                               mode = 'max',
                               monitor = 'val_acc',
                               patience = 2)
callback = [early_stopping]

### Fitting model

In [None]:
train_history = model.fit(x = X_train,
                          y = y_train,
                          batch_size = 1024,
                          epochs = 20,
                          verbose = 1,
                          validation_data = (X_val, y_val),
                          callbacks = callback)

### Plotting the accuracy

In [None]:
plt.plot(train_history.history['accuracy'], label='Training accuracy')
plt.plot(train_history.history['val_accuracy'], label='Validation accuracy')
plt.legend()

### Predicting and submitting

In [None]:
prediction = model.predict(test_copy)
final_prediction = [np.argmax(i) for i in prediction]

In [None]:
submission['Sentiment'] = final_prediction
submission.head()

In [None]:
submission.to_csv('../working/submission.csv', index=False)