In [None]:
import numpy as np
import pandas as pd
import re
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
test = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')
submission = pd.read_csv('/kaggle/input/nlp-getting-started/sample_submission.csv')

In [None]:
train.head()

In [None]:
TRAINING_SIZE=6090

train_shuffle = train.sample(frac=1, random_state=0)


training_sentences = train_shuffle.text[0:TRAINING_SIZE]
training_labels = train_shuffle.target[0:TRAINING_SIZE]

validation_sentences = train_shuffle.text[TRAINING_SIZE:]
validation_labels = train_shuffle.target[TRAINING_SIZE:]

In [None]:
training_sentences.sort_index(ascending=True,inplace=True)
training_labels.sort_index(ascending=True,inplace=True)

validation_sentences.sort_index(ascending=True,inplace=True)
validation_labels.sort_index(ascending=True,inplace=True)

In [None]:
training_labels_final = np.array(training_labels)
validation_labels_final = np.array(validation_labels)

In [None]:
print("{} : {}".format(training_labels[1], training_sentences[1]))
print("{} : {}".format(training_labels[2], training_sentences[2]))
print("{} : {}".format(training_labels[3], training_sentences[3]))

In [None]:
tokenizer = Tokenizer(num_words=100000, oov_token='<OOV>')
tokenizer.fit_on_texts(training_sentences)

tokenizer = Tokenizer(num_words=100000, oov_token='<OOV>')
tokenizer.fit_on_texts(validation_sentences)

In [None]:
training_sequences = tokenizer.texts_to_sequences(training_sentences)
pad_training = pad_sequences(training_sequences, maxlen=25, padding='post', truncating='post')

validation_sequences = tokenizer.texts_to_sequences(validation_sentences)
pad_validation = pad_sequences(validation_sequences, maxlen=25, padding='post', truncating='post')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(100000, 16, input_length=20),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
history = model.fit(pad_training, training_labels_final, epochs=15, validation_data=(pad_validation, validation_labels_final))

In [None]:
import matplotlib.pyplot as plt
def plot_graph(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.show()

In [None]:
plot_graph(history, 'accuracy')
plot_graph(history, 'loss')

In [None]:
# model_gru = tf.keras.Sequential([
    # tf.keras.layers.Embedding(100000, 16, input_length=20),
    # tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
    # tf.keras.layers.Dense(32, activation='relu'),
    # tf.keras.layers.Dense(1, activation='sigmoid')
# ])
# model_gru.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# history = model_gru.fit(pad_training, training_labels_final, epochs=15, validation_data=(pad_validation, validation_labels_final))

In [None]:
# plot_graph(history, 'accuracy')
# plot_graph(history, 'loss')

In [None]:
test_sequences = tokenizer.texts_to_sequences(test.text)
pad_test = pad_sequences(test_sequences,maxlen=25, padding='post', truncating='post')

In [None]:
prediction = model.predict(pad_test)
submission['target'] = (prediction>0.5).astype(int)

In [None]:
submission.to_csv('submission.csv', index=False, header=True)