In [247]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization  # to create AdamW optimizer


import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelBinarizer

In [248]:
np.random.seed(42)

In [249]:
raw_dataset = pd.read_csv("data/file.csv", index_col=0).to_numpy()

In [250]:
np.random.shuffle(raw_dataset)

In [251]:
WHOLE_DATASET_LEN = 20000
lb = LabelBinarizer()
X, y  = raw_dataset[:, 0], lb.fit_transform(raw_dataset[:, 1])
X, y  = X[:WHOLE_DATASET_LEN], y[:WHOLE_DATASET_LEN]

In [252]:
X_train, X_test = X[:int(WHOLE_DATASET_LEN*0.7)], X[int(WHOLE_DATASET_LEN*0.7):]
y_train, y_test = y[:int(WHOLE_DATASET_LEN*0.7)], y[int(WHOLE_DATASET_LEN*0.7):]

In [253]:
test_set_len = int(len(X_test) * 0.7)
X_test, X_valid = X_test[:test_set_len], X_test[test_set_len:]
y_test, y_valid = y_test[:test_set_len], y_test[test_set_len:]

# BERT

In [254]:
#tiny bert l=2 h=128
tiny_bert_model_url = "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/2"
bert_preprocessor_url = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"

In [255]:
def build_bert_model(preprocessor_url, model_url):
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
    preprocessing_layer = hub.KerasLayer(bert_preprocessor_url, name='preprocessing')
    encoder_inputs = preprocessing_layer(text_input)
    encoder = hub.KerasLayer(tiny_bert_model_url, trainable=True, name='BERT_encoder')
    outputs = encoder(encoder_inputs)
    net = outputs['pooled_output']
    net = tf.keras.layers.Dropout(0.1)(net)
    net = tf.keras.layers.Dense(3, activation="softmax", name='classifier')(net)
    return tf.keras.Model(text_input, net)

In [256]:
tiny_bert_model = build_bert_model(bert_preprocessor_url, tiny_bert_model_url)

In [257]:
loss = tf.keras.losses.CategoricalCrossentropy()
metric = tf.keras.metrics.CategoricalCrossentropy()

In [258]:
epochs = 8 
steps_pre_epoch = len(X_train) 
num_train_steps = steps_pre_epoch * epochs
num_warmup_steps = int(0.1 * num_train_steps)

init_lr = 3e-5
optimizer = optimization.create_optimizer(
    init_lr = init_lr,
    num_train_steps=num_train_steps,
    num_warmup_steps=num_warmup_steps,
    optimizer_type='adamw'
)

In [259]:
tiny_bert_model.compile(optimizer=optimizer, loss=loss, metrics=[metric, 'accuracy'])

In [260]:
tiny_bert_model_history = tiny_bert_model.fit(x=X_train, y=y_train,  validation_data=(X_valid, y_valid), epochs=epochs)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [261]:
tiny_bert_model.evaluate(X_test,y_test)



[0.8046244382858276, 0.8046244382858276, 0.6349999904632568]

In [262]:
mini_bert_model_url = "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/2"

In [263]:
mini_bert_model = build_bert_model(bert_preprocessor_url, tiny_bert_model_url)


In [264]:
mini_bert_model.compile(optimizer=optimizer, loss=loss, metrics=[metric, 'accuracy'])

In [265]:
mini_bert_model_history = mini_bert_model.fit(x=X_train, y=y_train,  validation_data=(X_valid, y_valid), epochs=epochs)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [266]:
mini_bert_model.evaluate(X_test,y_test)



[0.564741849899292, 0.564741849899292, 0.772857129573822]

In [267]:
medium_bert_model_url = "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/2"

In [268]:

medium_bert_model = build_bert_model(bert_preprocessor_url, tiny_bert_model_url)

In [269]:

medium_bert_model.compile(optimizer=optimizer, loss=loss, metrics=[metric, 'accuracy'])


In [270]:
medium_bert_model_history = mini_bert_model.fit(x=X_train, y=y_train,  validation_data=(X_valid, y_valid), epochs=epochs)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [272]:
medium_bert_model.evaluate(X_test,y_test)



[1.1146581172943115, 1.1146581172943115, 0.39595237374305725]

# Biblio
https://www.tensorflow.org/text/tutorials/classify_text_with_bert