In [None]:
import os

os.environ["KERAS_BACKEND"] = "torch"

# keras_core needs to be imported AFTER the backend is set
import keras_core as keras
import keras_nlp
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
import numpy as np

In [None]:
# Load your dataset here
# It should have 2 columns: 'text_to_classify' and 'category'
# 'category' values should be integers
data = pd.read_csv('YOUR_FILE_NAME.csv')

In [None]:
# Ensure category is present and is type int
data = data.dropna(subset=['category'])
data['category'] = data['category'].astype(int)
data.head()

In [None]:
features = data['text_to_classify']
labels = data['category']

# Get train/test split of data
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [None]:
# Load pre-trained TinyBERT classifier
classifier = keras_nlp.models.BertClassifier.from_preset(
    "bert_tiny_en_uncased",
    num_classes=len(data.category.unique())
)

In [None]:
# Update model loss function, optimizer, and eval metrics for fine-tuning
classifier.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(5e-5),
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
)

In [None]:
# Fine-tune classifier on train data split, using 80% for training and 20% for validation
# Using 1 epoch for a short training time
classifier.fit(x=x_train, y=y_train, batch_size=128, epochs=1, validation_split=0.2)

In [None]:
# Test prediction
classifier.predict(['test'])[0]

In [None]:
# Evaluate model
probabilities = classifier.predict(x_test)
predictions = [np.argmax(p[0], axis=-1) for p in probabilities]

precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions) 
f1 = f1_score(y_test, predictions)

print("Precision:", precision) 
print("Recall:", recall)
print("F1 score:", f1)