In [1]:
import numpy as np
import tensorflow as tf
import transformers
from transformers import TFAutoModelForSequenceClassification

dataset = [
    ("the quick brown fox jumps over the lazy dog", 0),
    ("some people prefer dogs to cats", 0),
    ("cats are often more independent than dogs", 1),
    ("my cat is quite cuddly", 1),
    ("dogs are often considered man's best friend", 0),
]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
sequences = [sequence for sequence, label in dataset]
labels = np.array([label for sequence, label in dataset])

sequences = np.array(sequences)
labels = np.array(labels)

In [3]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
input_ids = [tokenizer.encode(sequence, add_special_tokens=True) for sequence in sequences]
max_length = max(len(sequence) for sequence in input_ids)
input_ids = [sequence + [0] * (max_length - len(sequence)) for sequence in input_ids]
input_ids = np.array(input_ids)

In [4]:
def create_dataset(input_ids, labels, batch_size=32, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((input_ids, labels))
    if shuffle:
        dataset = dataset.shuffle(input_ids.shape[0])
    dataset = dataset.batch(batch_size)
    return dataset

In [5]:
model = TFAutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
dataset = create_dataset(input_ids, labels)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
accuracy_object = tf.keras.metrics.SparseCategoricalAccuracy()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

def train_step(input_ids, labels):
    with tf.GradientTape() as tape:
        logits = model(input_ids, training=True)[0]
        loss_value = loss_object(labels, logits)
    gradients = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_accuracy(labels, logits)
    return loss_value, train_accuracy.result()

for epoch in range(40):
    for input_ids, labels in dataset:
        loss_value, accuracy = train_step(input_ids, labels)
    print(f'Epoch {epoch + 1}: loss = {loss_value}, accuracy = {accuracy}')

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1: loss = 0.7425371408462524, accuracy = 0.20000000298023224
Epoch 2: loss = 0.6634699106216431, accuracy = 0.4000000059604645
Epoch 3: loss = 0.6946564316749573, accuracy = 0.46666666865348816
Epoch 4: loss = 0.6545277833938599, accuracy = 0.5
Epoch 5: loss = 0.6586092710494995, accuracy = 0.5600000023841858
Epoch 6: loss = 0.6782515048980713, accuracy = 0.6000000238418579
Epoch 7: loss = 0.6282340884208679, accuracy = 0.6571428775787354
Epoch 8: loss = 0.6625038981437683, accuracy = 0.6499999761581421
Epoch 9: loss = 0.6406915783882141, accuracy = 0.644444465637207
Epoch 10: loss = 0.6326515674591064, accuracy = 0.6399999856948853
Epoch 11: loss = 0.5980828404426575, accuracy = 0.6727272868156433
Epoch 12: loss = 0.5878999829292297, accuracy = 0.699999988079071
Epoch 13: loss = 0.584142804145813, accuracy = 0.7230769395828247
Epoch 14: loss = 0.5384200811386108, accuracy = 0.7428571581840515
Epoch 15: loss = 0.5261024236679077, accuracy = 0.7599999904632568
Epoch 16: loss = 0.5

TypeError: 'NoneType' object is not subscriptable

<Figure size 600x300 with 0 Axes>