# BERT - Training on GoEmotions (Ran on Colab)

## Install requirements and import packages

In [1]:
!pip install pandas numpy scikit-learn torch transformers datasets
## If running locally - uncomment and run the below package install requirements.
"""
!pip install "transformers[torch]"
!pip install "accelerate>=0.26.0"
"""

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

'\n!pip install "transformers[torch]"\n!pip install "accelerate>=0.26.0"\n'

In [2]:
import os
import torch
import numpy as np

from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)
from sklearn.metrics import f1_score
from datasets import DatasetDict, Sequence, Value
from torch.nn import BCEWithLogitsLoss
from sklearn.metrics import f1_score, classification_report

## Loading and preparing Dataset

In [3]:
# Set the device based on environment.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# If locally
"""
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
"""
# load the GoEmotions “simplified” split
dataset = load_dataset("go_emotions", "simplified")
label_names = dataset["train"].features["labels"].feature.names
num_labels = len(label_names)
print(f"{num_labels} emotion labels:", label_names)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/9.40k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/350k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

28 emotion labels: ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']


In [4]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize_data(text):
    """
    Tokenize a batch of examples from GoEmotions (or any similar dataset).

    Args:
        examples (dict[str, list[str]]): a batch of examples, e.g.
            { "text": ["I love this!", "So sad today"], ... }

    Returns:
        dict[str, list[list[int]]]: a dict containing
            - input_ids: List of token IDs
            - attention_mask: List of attention masks
    """
    return tokenizer(text["text"], padding="max_length", truncation=True, max_length=128)

tokenize_dataset = dataset.map(tokenize_data, batched=True)
print(tokenize_dataset.column_names)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

{'train': ['text', 'labels', 'id', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['text', 'labels', 'id', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['text', 'labels', 'id', 'input_ids', 'token_type_ids', 'attention_mask']}


In [5]:
# Building label vectors
label_names = dataset["train"].features["labels"].feature.names

def process_labels(example):
    """
    Convert a multi‐label example’s integer label list into a multi‐hot float vector.

    This is meant to be used with a Dataset.map call on GoEmotions (or any
    multi‐label dataset), turning the “labels” field from a list of indices
    into a fixed‐length list of 0.0/1.0 floats for BCEWithLogitsLoss.

    Args:
        example (dict): A single data point dict with keys at least:
            - "labels": List[int], the indices of all positive emotion labels.

    Returns:
        dict: The same example dict, but with:
            - example["labels"] now a List[float] of length len(label_names),
              where positions in the original example["labels"] are set to 1.0
              and all others to 0.0.
    """
    vec = [0.0] * len(label_names)
    for idx in example["labels"]:
        vec[idx] = 1.0
    example["labels"] = vec
    return example

# 2) Apply tokenization
processed = tokenize_dataset.map(process_labels, batched=False)

# 3) Ensure 'labels' is stored as float32
processed = processed.cast_column(
    "labels",
    Sequence(Value("float32"))
)


columns = ["input_ids", "attention_mask", "labels"]
for split in ["train", "validation", "test"]:
    processed[split].set_format(type="torch", columns=columns)

#Assign tensors to variables
train_dataset = processed["train"]
val_dataset   = processed["validation"]
test_dataset  = processed["test"]

# Sanity check
sample = train_dataset[0]
print("input_ids dtype:      ", sample["input_ids"].dtype)       # torch.int64
print("attention_mask dtype: ", sample["attention_mask"].dtype)  # torch.int64
print("labels dtype:         ", sample["labels"].dtype)          # torch.float32

Map:   0%|          | 0/43410 [00:00<?, ? examples/s]

Map:   0%|          | 0/5426 [00:00<?, ? examples/s]

Map:   0%|          | 0/5427 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/43410 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/5426 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/5427 [00:00<?, ? examples/s]

input_ids dtype:       torch.int64
attention_mask dtype:  torch.int64
labels dtype:          torch.float32


## Loading BERT and Training.

In [6]:
import numpy as np
import torch
from sklearn.metrics import f1_score, accuracy_score

def compute_metrics(eval_pred):
    """
    Compute evaluation metrics for multi-label classification.

    Args:
        eval_pred (tuple):
            A tuple of (logits, labels)
            - logits: np.ndarray of shape (batch_size, num_labels)
              Raw outputs from the model’s classification head.
            - labels: np.ndarray of shape (batch_size, num_labels)
              Ground-truth multi-hot vectors (0/1).
        threshold (float, optional):
            Probability cutoff for deciding positive labels after sigmoid.
            Defaults to 0.3.

    Returns:
        dict:
            {
                "f1_micro": float,
                    The micro-averaged F1 score across all labels.
                "subset_accuracy": float,
                    The fraction of samples where the predicted multi-hot
                    vector exactly matches the ground truth.
            }
    """
    logits, labels = eval_pred

    # Convert logits to probabilities
    probs = torch.sigmoid(torch.tensor(logits))

    # Binarize predictions at 0.5
    preds = (probs > 0.3).int().numpy()
    labels = torch.tensor(labels).int().numpy()

    # Micro F1 score
    f1_micro = f1_score(labels, preds, average="micro")
    subset_acc = np.mean(np.all(preds == labels, axis=1))

    return {
        "f1_micro": f1_micro,
        "subset_accuracy": subset_acc
    }

In [7]:
config = AutoConfig.from_pretrained(
    "bert-base-uncased",
    num_labels=num_labels,
    problem_type="multi_label_classification"
)
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased", config=config
).to(device)

#Data collator
data_collator = DataCollatorWithPadding(tokenizer)

#TrainingArguments
training_args = TrainingArguments(
    output_dir="./BERT",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1_micro",
    greater_is_better=True,

    num_train_epochs=1,
    per_device_train_batch_size=16,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",

    fp16=torch.cuda.is_available(),
    logging_strategy="epoch",
    report_to="none",
)

#Trainer with early stopping.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)],
)

#Train the model
trainer.train()

# Evaluate
val_metrics = trainer.evaluate()
print("Validation micro‑F1:", val_metrics["eval_f1_micro"])

test_out = trainer.predict(test_dataset)
test_metrics = compute_metrics((test_out.predictions, test_out.label_ids))
print("Test micro‑F1:", test_metrics["f1_micro"])

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,F1 Micro,Subset Accuracy
1,0.1522,0.105795,0.529123,0.407482


Validation micro‑F1: 0.5291226792864944
Test micro‑F1: 0.539699863574352


In [None]:
# Evaluate on validation set
val_metrics = trainer.evaluate()
print("Validation metrics:", val_metrics)

# Evaluate on test set
test_metrics = trainer.evaluate(test_dataset)
print("Test metrics:", test_metrics)

Validation metrics: {'eval_loss': 0.106411412358284, 'eval_f1_micro': 0.5258485156755757, 'eval_subset_accuracy': 0.41430151124216735, 'eval_runtime': 12.1346, 'eval_samples_per_second': 447.15, 'eval_steps_per_second': 55.956, 'epoch': 1.0}
Test metrics: {'eval_loss': 0.1054513081908226, 'eval_f1_micro': 0.5315256755501904, 'eval_subset_accuracy': 0.4094343099318224, 'eval_runtime': 13.0994, 'eval_samples_per_second': 414.292, 'eval_steps_per_second': 51.834, 'epoch': 1.0}


## Save the model and Tokenizer

In [None]:
# Run the below code to save the model and tokenizer
# model.save_pretrained("./BERT_GoEmotions")
# tokenizer.save_pretrained("./BERT_GoEmotions")

## Upload model to HuggingFace
-  use the command - "huggingface-cli login"
- input the API token key (create a write token from huggingFace)
- Create a repo for model
- load the model from local file or use trained model

## Alternatively
- Unzip the files and upload to Repo created on HuggingFace.

In [None]:
# Push the model
model.push_to_hub("Username/ModelRepo", commit_message="Initial model upload")

# Push the tokenizer
tokenizer.push_to_hub("Username/ModelRepo", commit_message="Initial tokenizer upload")

## Test functionality manually with text inputs

In [None]:
def predict_emotions(text, threshold=0.5):
    """
    Predict emotions for a given input text.

    Args:
        text (str): The input text to analyze.
        threshold (float): The probability threshold to decide if an emotion is present.

    Returns:
        predicted_emotions (list): List of emotion names predicted for the input text.
        probs (ndarray): Array of probability scores for each emotion.
    """
    # Ensure the model is in evaluation mode
    model.eval()

    # Tokenize the input text with same parameters used during training
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=128
    )

    # Move input tensors to the correct device (CPU/GPU/MPS)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Perform a forward pass without gradient calculation
    with torch.no_grad():
        outputs = model(**inputs)
        # Outputs logits from the model's classification head
        logits = outputs.logits

        # Apply sigmoid activation to convert logits to probabilities
        probs = torch.sigmoid(logits)[0].cpu().numpy()

    # Select labels where the probability exceeds the threshold
    predicted_emotions = [label_names[i] for i, prob in enumerate(probs) if prob > threshold]

    return predicted_emotions, probs

# Example usage:
text_input = "I am a CS student."
emotions, probabilities = predict_emotions(text_input, threshold=0.3)  # You may adjust threshold

print("Input text:", text_input)
print("Predicted Emotions:", emotions)
print("Raw Probabilities:", probabilities)

Input text: I am a CS student with no job
Predicted Emotions: ['annoyance', 'approval', 'disappointment', 'disapproval', 'realization', 'sadness', 'neutral']
Raw Probabilities: [0.1083461  0.06874988 0.16720401 0.3849472  0.5228685  0.16159578
 0.1310242  0.06409641 0.12171304 0.5159218  0.5364816  0.21554886
 0.11606698 0.07306944 0.11094448 0.03036223 0.08296096 0.06784081
 0.06281149 0.06163899 0.15693574 0.04577952 0.6547191  0.06201828
 0.0653154  0.33271593 0.08814564 0.76813346]


# Results - BERT

In [8]:
import numpy as np
import torch
from sklearn.metrics import (
    f1_score,
    hamming_loss,
    classification_report,
    multilabel_confusion_matrix
)
import pandas as pd

#Get raw predictions and gold labels
test_out = trainer.predict(test_dataset)
logits   = test_out.predictions           # shape (N, num_labels)
y_true   = test_out.label_ids             # shape (N, num_labels), multi-hot

#Binarize using sigmoid + threshold
threshold = 0.3
probs = torch.sigmoid(torch.tensor(logits)).numpy()  # (N, num_labels)
y_pred = (probs > threshold).astype(int)             # (N, num_labels)

#Overall metrics
micro_f1      = f1_score( y_true, y_pred, average="micro")
subset_acc    = np.mean((y_true == y_pred).all(axis=1))
hamming_acc   = 1 - hamming_loss(y_true, y_pred)

print(f"Micro-F1         : {micro_f1:.4f}")
print(f"Subset accuracy  : {subset_acc:.4f}")
print(f"Hamming accuracy : {hamming_acc:.4f}\n")

# Full classification report
print("Per-class classification report:")
print(classification_report(
    y_true,
    y_pred,
    target_names=label_names,
    zero_division=0
))

#Multi-label confusion matrices
mcm = multilabel_confusion_matrix(y_true, y_pred)  # one (2×2) matrix per label
conf_df = pd.DataFrame(
    [cm.ravel() for cm in mcm],
    columns=["TN", "FP", "FN", "TP"],
    index=label_names
)
print("\nPer-class confusion stats (TN, FP, FN, TP):")
display(conf_df)

Micro-F1         : 0.5397
Subset accuracy  : 0.4048
Hamming accuracy : 0.9667

Per-class classification report:
                precision    recall  f1-score   support

    admiration       0.62      0.75      0.68       504
     amusement       0.76      0.83      0.79       264
         anger       0.76      0.10      0.17       198
     annoyance       0.00      0.00      0.00       320
      approval       0.77      0.14      0.24       351
        caring       0.00      0.00      0.00       135
     confusion       0.00      0.00      0.00       153
     curiosity       0.48      0.70      0.57       284
        desire       0.00      0.00      0.00        83
disappointment       0.00      0.00      0.00       151
   disapproval       0.00      0.00      0.00       267
       disgust       0.00      0.00      0.00       123
 embarrassment       0.00      0.00      0.00        37
    excitement       0.00      0.00      0.00       103
          fear       0.00      0.00      0.00  

Unnamed: 0,TN,FP,FN,TP
admiration,4694,229,126,378
amusement,5092,71,44,220
anger,5223,6,179,19
annoyance,5106,1,320,0
approval,5061,15,302,49
caring,5292,0,135,0
confusion,5274,0,153,0
curiosity,4926,217,86,198
desire,5344,0,83,0
disappointment,5276,0,151,0


# Class Balanced BERT

In [9]:
num_labels = len(label_names)
counts     = np.zeros(num_labels, dtype=np.float32)

for ex in dataset["train"]:
    for i in ex["labels"]:
        counts[i] += 1

total     = len(dataset["train"])
pos_weight = torch.tensor((total - counts) / (counts + 1e-6), dtype=torch.float32)
print("pos_weight per label:", pos_weight)

pos_weight per label: tensor([  9.5109,  17.6469,  26.7026,  16.5749,  13.7703,  38.9356,  30.7325,
         18.8129,  66.7223,  33.2080,  20.4688,  53.7415, 142.2673,  49.8910,
         71.8356,  15.3073, 562.7662,  28.8967,  19.8102, 263.6951,  26.4573,
        390.0811,  38.1081, 282.7255,  78.6514,  31.7376,  39.9528,   2.0530])


In [10]:
#Load model config & head for multi-label
config_cb = AutoConfig.from_pretrained(
    "bert-base-uncased",
    num_labels=num_labels,
    problem_type="multi_label_classification"
)
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    config=config_cb
)

#Override compute_loss to use pos_weight
class CBTrainer(Trainer):
    """
    A Trainer that uses a class-balanced binary cross-entropy loss.

    This injects per-label positive weights into BCEWithLogitsLoss, so that
    rare classes contribute more to the loss.

    Args:
        *args, **kwargs: all the usual HuggingFace Trainer arguments, plus:
        pos_weight (torch.Tensor):
            A 1D float tensor of shape (num_labels,) containing the weight
            for positive examples of each label. Typically computed as
               pos_weight[i] = (N − count_i) / count_i
            where count_i is the number of positive samples for label i.
    """
    def compute_loss(self, model, inputs, return_outputs=False,**kwargs):
        """
        Overrides the default to use BCEWithLogitsLoss with per-label pos_weight.

        Args:
            model: the model being trained
            inputs: a dict containing at least:
                - all inputs for model(**inputs)
                - "labels": a float Tensor of shape (batch_size, num_labels)
            return_outputs: if True, returns (loss, model_outputs)

        Returns:
            loss (and outputs if return_outputs=True)
        """
        labels = inputs.pop("labels").to(model.device)
        outputs = model(**inputs)
        logits  = outputs.logits
        loss_fct = BCEWithLogitsLoss(pos_weight=pos_weight.to(model.device))
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

data_collator_cb = DataCollatorWithPadding(tokenizer)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Train Class Balanced BERT

In [12]:
training_args = TrainingArguments(
    output_dir="./cb_bert_results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1_micro",
    greater_is_better=True,
    num_train_epochs=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_dir="./cb_bert_results/logs",
    report_to="none",
)

trainer = CBTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator_cb,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

# final eval on validation
val_metrics = trainer.evaluate()
print("Validation metrics:", val_metrics)

  trainer = CBTrainer(


Epoch,Training Loss,Validation Loss,F1 Micro,Subset Accuracy
1,0.8004,0.746126,0.223001,0.0


Validation metrics: {'eval_loss': 0.7461264133453369, 'eval_f1_micro': 0.22300099078932883, 'eval_subset_accuracy': 0.0, 'eval_runtime': 10.2278, 'eval_samples_per_second': 530.513, 'eval_steps_per_second': 33.243, 'epoch': 1.0}


In [None]:
## Manual tests
for txt in [
    "I am thrilled about my new job!",
    "I feel sad and upset"
]:
    preds, probs = predict_emotions(txt, threshold=0.6)
    print(f"\n» {txt}\n → {preds}")


» I am thrilled about my new job!
 → ['admiration', 'excitement', 'joy', 'surprise']

» I feel sad and upset
 → ['caring', 'disappointment', 'disgust', 'fear', 'nervousness', 'remorse', 'sadness']


# Results - Class Balanced BERT

In [13]:
import numpy as np
import torch
from sklearn.metrics import (
    f1_score,
    hamming_loss,
    classification_report,
    multilabel_confusion_matrix
)
import pandas as pd

#Get raw predictions and gold labels
test_out = trainer.predict(test_dataset)
logits   = test_out.predictions           # shape (N, num_labels)
y_true   = test_out.label_ids             # shape (N, num_labels), multi-hot

#Binarize using sigmoid + threshold
threshold = 0.3
probs = torch.sigmoid(torch.tensor(logits)).numpy()  # (N, num_labels)
y_pred = (probs > threshold).astype(int)             # (N, num_labels)

#Overall metrics
micro_f1      = f1_score(   y_true, y_pred, average="micro")
subset_acc    = np.mean((y_true == y_pred).all(axis=1))
hamming_acc   = 1 - hamming_loss(y_true, y_pred)

print(f"Micro-F1         : {micro_f1:.4f}")
print(f"Subset accuracy  : {subset_acc:.4f}")
print(f"Hamming accuracy : {hamming_acc:.4f}\n")

# Full classification report
print("Per-class classification report:")
print(classification_report(
    y_true,
    y_pred,
    target_names=label_names,
    zero_division=0
))

#Multi-label confusion matrices
mcm = multilabel_confusion_matrix(y_true, y_pred)  # one (2×2) matrix per label
conf_df = pd.DataFrame(
    [cm.ravel() for cm in mcm],
    columns=["TN", "FP", "FN", "TP"],
    index=label_names
)
print("\nPer-class confusion stats (TN, FP, FN, TP):")
display(conf_df)

Micro-F1         : 0.2217
Subset accuracy  : 0.0000
Hamming accuracy : 0.7201

Per-class classification report:
                precision    recall  f1-score   support

    admiration       0.23      0.95      0.37       504
     amusement       0.25      0.96      0.39       264
         anger       0.11      0.92      0.19       198
     annoyance       0.10      0.93      0.19       320
      approval       0.07      0.98      0.14       351
        caring       0.07      0.93      0.13       135
     confusion       0.10      0.96      0.18       153
     curiosity       0.21      0.97      0.34       284
        desire       0.07      0.89      0.12        83
disappointment       0.05      0.92      0.10       151
   disapproval       0.09      0.96      0.16       267
       disgust       0.08      0.95      0.14       123
 embarrassment       0.03      0.81      0.07        37
    excitement       0.06      0.92      0.11       103
          fear       0.06      0.91      0.12  

Unnamed: 0,TN,FP,FN,TP
admiration,3302,1621,23,481
amusement,4382,781,10,254
anger,3707,1522,15,183
annoyance,2538,2569,22,298
approval,808,4268,6,345
caring,3670,1622,9,126
confusion,3954,1320,6,147
curiosity,4086,1057,8,276
desire,4290,1054,9,74
disappointment,2872,2404,12,139


## Save ClassBalanced BERT

In [None]:
# trainer.save_model("./cb_bert_results/final")
# tokenizer.save_pretrained("./cb_bert_results/final")
# print(" Saved to ./cb_bert_results/final/")