In [1]:
import numpy as np

In [2]:
with open('xids.npy', 'rb') as f:
    Xids = np.load(f)
    Xids = Xids.astype(np.int32)
with open('xmask.npy', 'rb') as f:
    Xmask = np.load(f)
    Xmask = Xmask.astype(np.int32)
with open('labels.npy', 'rb') as f:
    labels = np.load(f)

In [3]:
import tensorflow as tf

In [4]:
dataset = tf.data.Dataset.from_tensor_slices((Xids, Xmask, labels))

In [5]:
def map_function(ids, masks, labels):
    return {"input_ids": ids, "attention_mask": masks}, labels

dataset = dataset.map(map_function)

In [6]:
dataset = dataset.shuffle(100000).batch(32)

In [7]:
DS_LEN = len(dataset)

In [8]:
SPLIT = 0.85

train = dataset.take(round(DS_LEN*SPLIT))
val = dataset.skip(round(DS_LEN*SPLIT))

del dataset

In [9]:
from transformers import AutoTokenizer, TFAutoModel
import tensorflow as tf

bert = TFAutoModel.from_pretrained("bert-base-cased")
for layers in bert.layers:
    layers.trainable = False

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [10]:
SEQ_LEN = 50

input_ids = tf.keras.layers.Input(shape = (SEQ_LEN,), name = "input_ids", dtype= np.int32)
attention_mask = tf.keras.layers.Input(shape = (SEQ_LEN,), name = "attention_mask", dtype= np.int32)
embeddings = bert(input_ids, attention_mask)[0]
X = tf.keras.layers.GlobalMaxPool1D()(embeddings)
X = tf.keras.layers.BatchNormalization()(X)
X = tf.keras.layers.Dense(128, activation = "relu")(X)
X = tf.keras.layers.Dropout(0.1)(X)
X = tf.keras.layers.Dense(32, activation = "relu")(X)
X = tf.keras.layers.Dropout(0.1)(X)
y = tf.keras.layers.Dense(5, activation = "softmax")(X)

model = tf.keras.Model(inputs = [input_ids, attention_mask], outputs = y)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 50)]         0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 50)]         0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    TFBaseModelOutputWi  108310272   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 50,                                            

In [11]:
optimizer = tf.keras.optimizers.Adam(0.01)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

In [12]:
model.compile(optimizer, loss, metrics = [acc])

In [None]:
history = model.fit(
    train,
    validation_data=val,
    epochs = 10
)

Epoch 1/10
 37/227 [===>..........................] - ETA: 3:38 - loss: 1.7136 - accuracy: 0.3041