In [2]:
import pandas as pd
from sklearn import model_selection
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text # Should not be removed as it is used in side-effect.
from official.nlp import optimization
import matplotlib.pyplot as plt


In [58]:
PREPROCESSED_DATA_PATH = 'assets/preprocessed.csv'
df = pd.read_csv(PREPROCESSED_DATA_PATH, encoding='ISO-8859-1')
data = df['text'].to_numpy()
labels = df['label'].to_numpy()

In [59]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, labels, test_size=0.2, random_state=42)

y_train = tf.keras.utils.to_categorical(LabelEncoder().fit_transform(y_train))
y_test = tf.keras.utils.to_categorical(LabelEncoder().fit_transform(y_test))


[[1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 ...
 [1. 0. 0.]
 [0. 1. 0.]
 [1. 0. 0.]]


In [45]:
tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1'
tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'

def bert_text_classification():

    # Text Input
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')

    # Preprocessing Layer
    preprocessing_layer = hub.KerasLayer(
        tfhub_handle_preprocess, name='preprocessing')

    # Encoding Layer
    encoder_inputs = preprocessing_layer(text_input)
    encoder = hub.KerasLayer(tfhub_handle_encoder,
                             trainable=True, name='BERT_encoder')

    outputs = encoder(encoder_inputs)

    # Classification Layer
    net = outputs['pooled_output']
    net = tf.keras.layers.Dropout(0.2)(net)
    net = tf.keras.layers.Dense(
        5, activation='softmax', name='classifier')(net)

    model = tf.keras.Model(text_input, net)
    return model


model = bert_text_classification()


In [48]:
loss = tf.keras.losses.CategoricalCrossentropy()

BATCH_SIZE = 128
EPOCHS = 16
LEARNING_RATE = 1e-05

# Use the same optimizer that BERT was originally trained with: the "Adaptive
# Moments" (Adam).
steps_per_epoch = int(len(X_train) / BATCH_SIZE)
num_train_steps = steps_per_epoch * EPOCHS
num_warmup_steps = int(0.1 * num_train_steps / BATCH_SIZE)

optimizer = optimization.create_optimizer(init_lr=LEARNING_RATE,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

# -- compile the model --
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['accuracy'])


In [49]:
history = model.fit(x=X_train,
                    y=y_train,
                    validation_split=0.2,
                    epochs=EPOCHS,
                    verbose=1,
                    batch_size=BATCH_SIZE)

Epoch 1/16
  4/226 [..............................] - ETA: 54:02 - loss: 2.0034 - accuracy: 0.1738

KeyboardInterrupt: 

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(12, 8))

plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()), 1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Loss - Cross Entropy')
plt.xlabel('epoch')
plt.ylim([0, 1.0])
plt.title('Training and Validation Loss')

plt.show()


In [None]:
loss, acc = model.evaluate(x=X_test,
                           y=y_test)
print("test loss: ", loss, ", test acc: ", 100*acc, "%")


In [None]:
model.save('bert_classifier_model')
