In [41]:
import tensorflow as tf
from transformers import TFAutoModel, AutoTokenizer
from datasets import load_dataset

In [42]:
model = TFAutoModel.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [43]:
# Load the dataset
emotion_dataset = load_dataset("google-research-datasets/go_emotions", "simplified")
print(emotion_dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 43410
    })
    validation: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5426
    })
    test: Dataset({
        features: ['text', 'labels', 'id'],
        num_rows: 5427
    })
})


In [44]:
# Select a larger subset of examples for better training
# Using 1000 examples instead of 200 for better learning while still keeping training fast
small_train_dataset = emotion_dataset['train'].select(range(1000))
small_test_dataset = emotion_dataset['test'].select(range(200))

# Create a new small dataset with the reduced splits
small_emotion_dataset = {
    'train': small_train_dataset,
    'test': small_test_dataset
}

print(f"Original train size: {len(emotion_dataset['train'])}")
print(f"Small train size: {len(small_train_dataset)}")
print(f"Original test size: {len(emotion_dataset['test'])}")
print(f"Small test size: {len(small_test_dataset)}")

Original train size: 43410
Small train size: 1000
Original test size: 5427
Small test size: 200


In [45]:
emotions_id2label = {
    0: 'admiration',
    1: 'amusement',
    2: 'anger',
    3: 'annoyance',
    4: 'approval',
    5: 'caring',
    6: 'confusion',
    7: 'curiosity',
    8: 'desire',
    9: 'disappointment',
    10: 'disapproval',
    11: 'disgust',
    12: 'embarrassment',
    13: 'excitement',
    14: 'fear',
    15: 'gratitude',
    16: 'grief',
    17: 'joy',
    18: 'love',
    19: 'nervousness',
    20: 'optimism',
    21: 'pride',
    22: 'realization',
    23: 'relief',
    24: 'remorse',
    25: 'sadness',
    26: 'surprise',
    27: 'neutral'  # Last entry (no comma)
}

emotions_label2id = {v: k for k, v in emotions_id2label.items()}

# Print dataset info to verify we understand what we're working with
print("Sample of emotions dataset:")
print(small_emotion_dataset["train"][0])
print(f"Number of labels: {len(emotions_id2label)}")

Sample of emotions dataset:
{'text': "My favourite food is anything I didn't have to cook myself.", 'labels': [27], 'id': 'eebbqej'}
Number of labels: 28


In [46]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True, return_tensors='tf')

# Tokenize the small dataset
small_emotions_encoded = {}
small_emotions_encoded['train'] = small_emotion_dataset['train'].map(tokenize, batched=True, batch_size=None)
small_emotions_encoded['test'] = small_emotion_dataset['test'].map(tokenize, batched=True, batch_size=None)

print(small_emotions_encoded['train'])

Dataset({
    features: ['text', 'labels', 'id', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 1000
})


In [47]:
# We need to have numerical labels instead of lists, so let's select the first label for each example
# Let's perform a preprocessing step to convert labels to simple integers first

def convert_labels_to_int(example):
    """Convert the list of labels to a single integer (take the first one)"""
    # If the labels list is not empty, take the first label
    if example['labels'] and len(example['labels']) > 0:
        example['label_int'] = int(example['labels'][0])  # Ensure it's an integer
    else:
        # Default to 'neutral' (27) if no labels
        example['label_int'] = 27
    return example

# Apply the conversion to add a new integer label column
small_emotions_encoded['train'] = small_emotions_encoded['train'].map(convert_labels_to_int)
small_emotions_encoded['test'] = small_emotions_encoded['test'].map(convert_labels_to_int)

# Create TensorFlow datasets manually for better control
import numpy as np

# For training data
train_input_ids = np.array(small_emotions_encoded['train']['input_ids'])
train_attention_mask = np.array(small_emotions_encoded['train']['attention_mask'])
train_token_type_ids = np.array(small_emotions_encoded['train']['token_type_ids'])
train_labels = np.array(small_emotions_encoded['train']['label_int'])

# For test data
test_input_ids = np.array(small_emotions_encoded['test']['input_ids'])
test_attention_mask = np.array(small_emotions_encoded['test']['attention_mask'])
test_token_type_ids = np.array(small_emotions_encoded['test']['token_type_ids'])
test_labels = np.array(small_emotions_encoded['test']['label_int'])

# setting BATCH_SIZE to a smaller value for the smaller dataset
BATCH_SIZE = 32

def order(inputs_dict, labels):
    '''
    This function will group all the inputs of BERT
    into a single dictionary and then output it with
    labels.
    '''    
    return inputs_dict, labels

# Create TensorFlow datasets
train_features = {
    'input_ids': train_input_ids,
    'attention_mask': train_attention_mask,
    'token_type_ids': train_token_type_ids
}

test_features = {
    'input_ids': test_input_ids,
    'attention_mask': test_attention_mask,
    'token_type_ids': test_token_type_ids
}

# Create the datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(1000)

test_dataset = tf.data.Dataset.from_tensor_slices((test_features, test_labels))
test_dataset = test_dataset.batch(BATCH_SIZE)

Map: 100%|██████████| 200/200 [00:00<00:00, 9104.10 examples/s]


In [48]:
inp, out = next(iter(train_dataset)) # a batch from train_dataset
print(inp, '\n\n', out)

{'input_ids': <tf.Tensor: shape=(32, 46), dtype=int64, numpy=
array([[ 101, 1045, 2572, ...,    0,    0,    0],
       [ 101, 2108, 2619, ...,    0,    0,    0],
       [ 101, 3407, 9850, ...,    0,    0,    0],
       ...,
       [ 101, 1045, 2150, ...,    0,    0,    0],
       [ 101, 2024, 2017, ...,    0,    0,    0],
       [ 101, 2049, 7929, ...,    0,    0,    0]])>, 'attention_mask': <tf.Tensor: shape=(32, 46), dtype=int64, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>, 'token_type_ids': <tf.Tensor: shape=(32, 46), dtype=int64, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])>} 

 tf.Tensor(
[ 7  5  1 20  9  1  1  1 27 26  4  1 27 27  0  1  0  0 27 10  7

In [49]:
class BERTForClassification(tf.keras.Model):
    
    def __init__(self, bert_model, num_classes):
        super().__init__()
        self.bert = bert_model
        self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')
        
    def call(self, inputs):
        # Make sure we handle the case when inputs is a dictionary
        outputs = self.bert(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            token_type_ids=inputs['token_type_ids'],
            return_dict=True
        )
        pooled_output = outputs.pooler_output
        return self.fc(pooled_output)

In [50]:
# Create a shared emotion prediction function to use with both untrained and trained models
def predict_emotion(text, model):
    """Predict emotion for a given text using the provided model"""
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors='tf', padding=True, truncation=True)
    
    # Get prediction from model
    prediction = model(inputs)
    predicted_class = tf.argmax(prediction, axis=1).numpy()[0]
    predicted_emotion = emotions_id2label[predicted_class]
    
    # Get confidence score
    confidence = tf.nn.softmax(prediction, axis=1).numpy()[0][predicted_class]
    
    return {
        'text': text,
        'emotion': predicted_emotion,
        'confidence': float(confidence)
    }

# Define test texts to use for both untrained and trained models
test_texts = [
    "I'm so happy today!",
    "This makes me really angry.",
    "I'm feeling very sad and disappointed.",
    "That's really interesting, tell me more.",
]

## Analyze Test Texts with Untrained Model

Let's first create and test our model before training to establish a baseline. This will show how the model performs with random weights, which we can compare to the fine-tuned model later.

In [51]:
# Create an untrained model for baseline comparison
untrained_classifier = BERTForClassification(model, num_classes=28)

# Compile the model with the same settings we'll use for training
untrained_classifier.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=2e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

print("Predictions with UNTRAINED model (random weights):")
print("---------------------------------------------")

# Get predictions from untrained model using our shared function
for text in test_texts:
    result = predict_emotion(text, untrained_classifier)
    print(f"Text: {result['text']}")
    print(f"Predicted emotion: {result['emotion']}")
    print(f"Confidence: {result['confidence']:.4f}")
    print()

# Also evaluate on test dataset to get baseline accuracy
untrained_loss, untrained_accuracy = untrained_classifier.evaluate(test_dataset, verbose=0)
print(f"Untrained model test accuracy: {untrained_accuracy:.4f} (should be close to random guessing)")
print(f"Random baseline (1/28): {1/28:.4f}")

Predictions with UNTRAINED model (random weights):
---------------------------------------------
Text: I'm so happy today!
Predicted emotion: gratitude
Confidence: 0.0399

Text: This makes me really angry.
Predicted emotion: surprise
Confidence: 0.0395

Text: I'm feeling very sad and disappointed.
Predicted emotion: surprise
Confidence: 0.0393

Text: That's really interesting, tell me more.
Predicted emotion: surprise
Confidence: 0.0393

Untrained model test accuracy: 0.0300 (should be close to random guessing)
Random baseline (1/28): 0.0357


In [52]:
# Update num_classes to match the actual number of emotion classes (28)
classifier = BERTForClassification(model, num_classes=28)

# Add a smaller learning rate and a weight decay for better regularization
classifier.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=2e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

In [53]:
# Add more epochs for better training (5 instead of 3)
# Add class_weights to handle imbalanced data
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights to handle imbalanced data
unique_classes = np.unique(train_labels)
class_weights_array = compute_class_weight('balanced', classes=unique_classes, y=train_labels)
class_weights = dict(zip(unique_classes, class_weights_array))

print("Class weights to handle imbalanced data:")
print(class_weights)

history = classifier.fit(
    train_dataset,
    epochs=5,  # More epochs
    validation_data=test_dataset,
    class_weight=class_weights  # Add class weights
)

# Evaluate the model on the test set
test_loss, test_accuracy = classifier.evaluate(test_dataset)
print(f"Test accuracy: {test_accuracy:.4f}")

Class weights to handle imbalanced data:
{0: 0.36075036075036077, 1: 0.6265664160401002, 2: 0.8710801393728222, 3: 0.6868131868131868, 4: 0.6053268765133172, 5: 2.100840336134454, 6: 1.1160714285714286, 7: 0.9157509157509157, 8: 3.5714285714285716, 9: 1.2755102040816326, 10: 0.8710801393728222, 11: 3.9682539682539684, 12: 7.142857142857143, 13: 3.5714285714285716, 14: 3.5714285714285716, 15: 0.8116883116883117, 16: 5.9523809523809526, 17: 1.4285714285714286, 18: 1.152073732718894, 19: 11.904761904761905, 20: 1.7857142857142858, 21: 35.714285714285715, 22: 3.9682539682539684, 23: 35.714285714285715, 24: 4.464285714285714, 25: 1.8796992481203008, 26: 1.6233766233766234, 27: 0.11825922421948912}
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.1800


In [54]:
print("Predictions with TRAINED model:")
print("---------------------------")

# Use the same shared function with our trained model
for text in test_texts:
    result = predict_emotion(text, classifier)
    print(f"Text: {result['text']}")
    print(f"Predicted emotion: {result['emotion']}")
    print(f"Confidence: {result['confidence']:.4f}")
    print()

Predictions with TRAINED model:
---------------------------
Text: I'm so happy today!
Predicted emotion: gratitude
Confidence: 0.0435

Text: This makes me really angry.
Predicted emotion: remorse
Confidence: 0.0394

Text: I'm feeling very sad and disappointed.
Predicted emotion: grief
Confidence: 0.0496

Text: That's really interesting, tell me more.
Predicted emotion: curiosity
Confidence: 0.0382

