In [59]:
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertModel
from datasets import load_dataset
import torch
import numpy as np

In [2]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
device

device(type='cuda')

In [4]:
dataset = load_dataset("emotion", cache_dir="./data")

In [5]:
# From huggingface docs
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

In [6]:
tokenized_dataset["train"]

Dataset({
    features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 16000
})

In [42]:
num_labels = len(set(dataset["train"]["label"]))
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, output_attentions=True)
model = model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
len_test = len(tokenized_dataset["test"]["text"])
test_pred_list = []
for i in range(int(len_test/10)):
  test_texts = tokenized_dataset["test"]["text"][i:i+10]
  test_labels = tokenized_dataset["test"]["label"][i:i+10]
  test_inputs = tokenizer(test_texts, return_tensors="pt", padding=True, truncation=True)
  test_outputs = model(**test_inputs.to(device))
  # Assuming test_outputs is a tensor of shape (batch_size, num_classes)
  _, predicted_labels = torch.max(test_outputs.logits, dim=1)

  # Convert the predicted_labels tensor to a Python list
  predicted_labels_list = predicted_labels.tolist()
  test_pred_list.extend(predicted_labels_list)
  for i in range(10):
    print(f"Predicted: {predicted_labels_list[i]}, actual: {test_labels[i]}")


Predicted: 2, actual: 0
Predicted: 4, actual: 0
Predicted: 0, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 4, actual: 0
Predicted: 0, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 0, actual: 4
Predicted: 0, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 0, actual: 4
Predicted: 2, actual: 0
Predicted: 2, actual: 1
Predicted: 0, actual: 0
Predicted: 4, actual: 4
Predicted: 0, actual: 3
Predicted: 4, actual: 1
Predicted: 2, actual: 1
Predicted: 2, actual: 3
Predicted: 0, actual: 4
Predicted: 2, actual: 0
Predicted: 0, actual: 4
Predicted: 0, actual: 0
Predicted: 4, ac

In [27]:
def evaluate_acc(preds, labels):
  correct = 0
  for i in range(len(preds)):
    if preds[i] == labels[i]:
      correct += 1
  return correct / len(preds)

def compute_metrics(preds, labels):
  # Compute f1 for multi-class classification
  label_list = list(set(labels))
  f1_list = []
  precision_list = []
  recall_list = []
  for label in label_list:
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(preds)):
      if preds[i] == label:
        if labels[i] == label:
          tp += 1
        else:
          fp += 1
      else:
        if labels[i] == label:
          fn += 1
    print(f"{label}: tp={tp}, fp={fp}, fn={fn}")
    epsilon = 1e-7
    precision = tp / (tp + fp + epsilon)
    recall = tp / (tp + fn + epsilon)
    f1 = 2 * precision * recall / (precision + recall + epsilon)
    f1_list.append(f1)
    precision_list.append(precision)
    recall_list.append(recall)
  return f1_list, precision_list, recall_list

In [12]:
test_preds_np = np.array(test_pred_list)
test_labels_np = np.array(tokenized_dataset["test"]["label"])
evaluate_acc(test_preds_np, test_labels_np)

0.133

In [28]:
compute_metrics(test_preds_np, test_labels_np)

0: tp=122, fp=259, fn=459
1: tp=0, fp=0, fn=695
2: tp=82, fp=954, fn=77
3: tp=0, fp=0, fn=275
4: tp=62, fp=521, fn=162
5: tp=0, fp=0, fn=66


([0.2536382057466561, 0.0, 0.13723847063070013, 0.0, 0.1536554741071492, 0.0],
 [0.3202099736692362, 0.0, 0.07915057914293913, 0.0, 0.10634648368673302, 0.0],
 [0.20998278825989972, 0.0, 0.5157232701158974, 0.0, 0.27678571416214925, 0.0])

In [52]:
import bertviz 
from bertviz import head_view

sentence_a = "I am happy today"
sentence_b = "I feel great"


viz_inputs = tokenizer(sentence_a, sentence_b, return_tensors='pt')
print(viz_inputs)

attention = model(**viz_inputs.to(device)).attentions
logits = model(**viz_inputs.to(device)).logits
sentence_b_start = (viz_inputs.token_type_ids == 0).sum(dim=1)
tokens = tokenizer.convert_ids_to_tokens(viz_inputs.input_ids[0])

print(tokens)
# head_view(attention=attention, tokens, sentence_b_start, heads=[8])
head_view(attention=attention, tokens=tokens, sentence_b_start=sentence_b_start, heads=[8])



{'input_ids': tensor([[ 101, 1045, 2572, 3407, 2651,  102, 1045, 2514, 2307,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 1, 1, 1, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
['[CLS]', 'i', 'am', 'happy', 'today', '[SEP]', 'i', 'feel', 'great', '[SEP]']


<IPython.core.display.Javascript object>

In [53]:
# Print outputs for the same sentences with output labels
print(logits)
output = torch.max(logits, dim=1)
# Print the predicted text label
print(f"Predicted label: {dataset['train'].features['label'].int2str(output.indices)}")




tensor([[-0.3042, -0.9476,  0.0952,  0.2952, -0.1418,  0.7021]],
       device='cuda:0', grad_fn=<AddmmBackward0>)
Predicted label: ['surprise']


In [55]:
# Check current model parameters
model.parameters
model.config

BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_attentions": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [60]:
# Modify model to use different number ofhidden layers, etc
model_name = "bert-base-uncased"
config = transformers.BertConfig.from_pretrained(model_name)
config.num_hidden_layers = 8 # default is 12
config.num_attention_heads = 8 # default is 12
config.activation = "relu" # default is gelu
config.hidden_dropout_prob = 0.1 # default is 0.1
config.attention_probs_dropout_prob = 0.1 # default is 0.1
config.output_attentions = True # default is False
modified_model = BertModel.from_pretrained(model_name, config=config)

In [10]:
# Test the model
from transformers import TrainingArguments, Trainer

args = TrainingArguments("test-emotion-classification")

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return {"accuracy": (preds == labels).mean()}

trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


ImportError: ignored