In [10]:
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import torch
import numpy as np

In [2]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda')

In [4]:
dataset = load_dataset("emotion", cache_dir="./data")

In [5]:
# From huggingface docs
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

In [6]:
tokenized_dataset["train"]

Dataset({
    features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 16000
})

In [7]:
num_labels = len(set(dataset["train"]["label"]))
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
model = model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
len_test = len(tokenized_dataset["test"]["text"])
test_pred_list = []
for i in range(int(len_test/10)):
  test_texts = tokenized_dataset["test"]["text"][i:i+10]
  test_labels = tokenized_dataset["test"]["label"][i:i+10]
  test_inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True)
  test_outputs = model(**test_inputs.to(device))
  # Assuming test_outputs is a tensor of shape (batch_size, num_classes)
  _, predicted_labels = torch.max(test_outputs.logits, dim=1)

  # Convert the predicted_labels tensor to a Python list
  predicted_labels_list = predicted_labels.tolist()
  test_pred_list.extend(predicted_labels_list)
  for i in range(10):
    print(f"Predicted: {predicted_labels_list[i]}, actual: {test_labels[i]}")


Predicted: 2, actual: 0
Predicted: 4, actual: 0
Predicted: 0, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 4, actual: 0
Predicted: 0, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 0, actual: 4
Predicted: 0, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 0, actual: 4
Predicted: 2, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 0, actual: 4
Predicted: 2, actual: 0
Predicted: 0, actual: 4
Predicted: 0, actual: 0
Predicted: 4, ac

In [22]:
def evaluate_acc(preds, labels):
  correct = 0
  for i in range(len(preds)):
    if preds[i] == labels[i]:
      correct += 1
  return correct / len(preds)

def compute_metrics(preds, labels):
  # Compute f1 for multi-class classification
  label_list = list(set(labels))
  f1_list = []
  precision_list = []
  recall_list = []
  for label in label_list:
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(preds)):
      if preds[i] == label:
        if labels[i] == label:
          tp += 1
        else:
          fp += 1
      else:
        if labels[i] == label:
          fn += 1
    print(f"{label}: tp={tp}, fp={fp}, fn={fn}")
    epsilon = 1e-7
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)
    f1 = 2 * precision * recall / (precision + recall)
    f1_list.append(f1)
    precision_list.append(precision)
    recall_list.append(recall)
  return f1_list, precision_list, recall_list

In [12]:
test_preds_np = np.array(test_pred_list)
test_labels_np = np.array(tokenized_dataset["test"]["label"])
evaluate_acc(test_preds_np, test_labels_np)

0.133

In [23]:
compute_metrics(test_preds_np, test_labels_np)

0: tp=122, fp=259, fn=459
1: tp=0, fp=0, fn=695


ZeroDivisionError: division by zero

In [10]:
# Test the model
from transformers import TrainingArguments, Trainer

args = TrainingArguments("test-emotion-classification")

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}

trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


ImportError: ignored