#### Fine Tuning with Keras
- Transformers doesn't seem to work with Keras 3

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
import numpy as np
from pprint import pprint

raw_data = load_dataset('glue', 'mrpc')
checkpoint = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(example):
    return tokenizer(example['sentence1'], 
                     example['sentence2'], 
                     truncation=True)

tokenized_datasets = raw_data.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, 
                                        return_tensors='tf')

tf_train_data = tokenized_datasets['train'].to_tf_dataset(
    columns=['attention_mask', 'input_ids', 'token_type_ids'],
    label_cols=['labels'],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=8
)

tf_validation_data = tokenized_datasets['validation'].to_tf_dataset(
    columns=['attention_mask', 'input_ids', 'token_type_ids'],
    label_cols=['labels'],
    shuffle=False,
    collate_fn=data_collator,
    batch_size=8
)

  from .autonotebook import tqdm as notebook_tqdm
2024-02-22 20:54:06.190109: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-22 20:54:06.495347: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-22 20:54:06.495383: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-22 20:54:06.546725: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-22 20:54:06.6

In [2]:
from transformers import TFAutoModelForSequenceClassification

model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy

model.compile(
    optimizer='adam',
    loss = SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

model.fit(
    tf_train_data,
    validation_data=tf_validation_data,
    epochs=3
)

Epoch 1/3


2024-02-22 20:54:46.431549: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-02-22 20:54:47.757674: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fdf86c37580 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-02-22 20:54:47.757709: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3080 Ti Laptop GPU, Compute Capability 8.6
2024-02-22 20:54:47.769942: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-02-22 20:54:47.797895: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
I0000 00:00:1708664087.870901  797920 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7fe0b7f28550>

#### Learning Rate Scheduler
- Using a keras `PolynomialDecay` lr_scheduler
- Transformers library has `create_optimizer()` that will act as a shortcut for creating AdamW optimizer with learning rate decay

In [5]:
import tensorflow as tf
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam

batch_size = 8
num_epochs = 3

num_train_steps = len(tf_train_data) * num_epochs
lr_scheduler = PolynomialDecay(
    initial_learning_rate=5e-5,
    end_learning_rate=0.0,
    decay_steps=num_train_steps
)

opt = Adam(learning_rate=lr_scheduler)

In [7]:
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=opt,
              loss=loss,
              metrics=['accuracy'])
model.fit(tf_train_data,
          validation_data=tf_validation_data,
          epochs=num_epochs)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7fdf3dceb790>

#### Model Predictions

In [9]:
preds = model.predict(tf_validation_data)['logits']
class_preds = np.argmax(preds, axis=1)
print(preds.shape, class_preds.shape)

(408, 2) (408,)


In [13]:
import evaluate

metric = evaluate.load('glue', 'mrpc')
metric.compute(predictions=class_preds,
               references=raw_data['validation']['label'])

{'accuracy': 0.8406862745098039, 'f1': 0.8900169204737732}