### Import Libraries & Data

In [None]:
import tensorflow as tf
import numpy as np 
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
training_data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
testing_data = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')

training_data.head()

In [None]:
training_data.info()

### Natural Language Processing in TensorFlow

Global variables 

In [None]:
vocab_size = 20000
embedding_dim = 16
max_length = 30
trunc_type = 'post'
oov_tok = "<OOV>"

Tokenizer generator & methods

In [None]:
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_data.text)
word_index = tokenizer.word_index

In [None]:
training_sequences = tokenizer.texts_to_sequences(training_data.text)
training_padded = pad_sequences(training_sequences, maxlen=max_length, truncating=trunc_type)

In [None]:
training_padded[0:3]

In [None]:
training_padded.shape

In [None]:
testing_sequences = tokenizer.texts_to_sequences(testing_data.text)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length)

In [None]:
testing_padded[0:3]

In [None]:
testing_padded.shape

### Neural Network

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length = max_length),
    tf.keras.layers.Flatten(),
    #tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(training_padded, training_data.target, epochs= 5)

In [None]:
testing_target = model.predict(testing_padded)

In [None]:
testing_target

In [None]:
testing_target.shape

In [None]:
type(testing_target)

In [None]:
print(testing_target[900][0])

In [None]:
for i in range(3263):
    if testing_target[i][0] > 0.5:
        testing_target[i][0] = 1
    else:
        testing_target[i][0] = 0

In [None]:
testing_target

In [None]:
testing_target = testing_target.astype(int)

In [None]:
testing_target

In [None]:
sub = pd.DataFrame()
sub["id"] = testing_data.id
sub["target"] = testing_target

In [None]:
sub

In [None]:
sub.to_csv("submission.csv", index = False)