# Natural Language Processing (NLP) with Tensorflow

# Binary Text Classification

In [1]:
import pandas as pd

df = pd.read_csv('/content/yelp_labelled.txt', names=['sentence', 'label'], sep='\t')

In [2]:
df.tail()

Unnamed: 0,sentence,label
995,I think food should have flavor and texture an...,0
996,Appetite instantly gone.,0
997,Overall I was not impressed and would not go b...,0
998,"The whole experience was underwhelming, and I ...",0
999,"Then, as if I hadn't wasted enough of my life ...",0


In [3]:
from sklearn.model_selection import train_test_split

kalimat = df['sentence'].values
y = df['label'].values
kalimat_latih, kalimat_test, y_latih, y_test = train_test_split(kalimat, y, test_size=0.2)

In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words=250, oov_token='x')
tokenizer.fit_on_texts(kalimat_latih)
tokenizer.fit_on_texts(kalimat_test)

sekuens_latih = tokenizer.texts_to_sequences(kalimat_latih)
sekuens_test = tokenizer.texts_to_sequences(kalimat_test)

padded_latih = pad_sequences(sekuens_latih)
padded_test = pad_sequences(sekuens_test)

In [5]:
import tensorflow as tf

model = tf.keras.Sequential([
                             tf.keras.layers.Embedding(250, 16, input_length=20),
                             tf.keras.layers.GlobalAveragePooling1D(),
                             tf.keras.layers.Dense(24, activation='relu'),
                             tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

In [6]:
num_epochs = 30
history = model.fit(padded_latih, 
                    y_latih, 
                    epochs=num_epochs, 
                    validation_data=(padded_test, y_test), 
                    verbose=2)

Epoch 1/30
25/25 - 1s - loss: 0.6933 - accuracy: 0.5088 - val_loss: 0.6936 - val_accuracy: 0.4650
Epoch 2/30
25/25 - 0s - loss: 0.6919 - accuracy: 0.5088 - val_loss: 0.6929 - val_accuracy: 0.4650
Epoch 3/30
25/25 - 0s - loss: 0.6900 - accuracy: 0.5088 - val_loss: 0.6909 - val_accuracy: 0.4650
Epoch 4/30
25/25 - 0s - loss: 0.6865 - accuracy: 0.5975 - val_loss: 0.6873 - val_accuracy: 0.5400
Epoch 5/30
25/25 - 0s - loss: 0.6803 - accuracy: 0.6687 - val_loss: 0.6812 - val_accuracy: 0.6300
Epoch 6/30
25/25 - 0s - loss: 0.6718 - accuracy: 0.7013 - val_loss: 0.6723 - val_accuracy: 0.6850
Epoch 7/30
25/25 - 0s - loss: 0.6599 - accuracy: 0.7050 - val_loss: 0.6626 - val_accuracy: 0.7100
Epoch 8/30
25/25 - 0s - loss: 0.6438 - accuracy: 0.7337 - val_loss: 0.6509 - val_accuracy: 0.7000
Epoch 9/30
25/25 - 0s - loss: 0.6238 - accuracy: 0.7487 - val_loss: 0.6330 - val_accuracy: 0.7400
Epoch 10/30
25/25 - 0s - loss: 0.6015 - accuracy: 0.7613 - val_loss: 0.6194 - val_accuracy: 0.7150
Epoch 11/30
25/25 -

# Long Short-Term Memory (LSTM) Layer

In [7]:
# Contoh penggunaan LSTM dapat dilihat pada kode di bawah. 
# Kita dapat meletakkan layer LSTM setelah layer embedding dan sebelum Dense layer.

# model = tf.keras.Sequential([
#    tf.keras.layers.Embedding(input_dim=5000, output_dim=16),
#    tf.keras.layers.LSTM(64),
#    tf.keras.layers.Dense(128, activation='relu'),
#    tf.keras.layers.Dense(5, activation='softmax')
# ])