# **Fine-Tuning**

## Install dependencies

In [1]:
!pip install transformers datasets scikit-learn



## Import dependecies

In [18]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score, f1_score, ConfusionMatrixDisplay, confusion_matrix


## Preprocessing

### Load dataset

In [3]:
emotions = load_dataset("emotion")

In [4]:
emotions

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

### Tokenize dataset

In [5]:
model_ckpt = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

In [6]:
def tokenize(batch):
  return tokenizer(batch["text"], padding=True, truncation=True)

In [7]:
emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [8]:
emotions_encoded["train"].column_names

['text', 'label', 'input_ids', 'attention_mask']

## Training

### Loading a pretrained model

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
num_labels = 6
model = (AutoModelForSequenceClassification
.from_pretrained(model_ckpt, num_labels=num_labels)
.to(device))

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Defining the performance metrics

In [12]:
def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  f1 = f1_score(labels, preds, average="weighted")
  acc = accuracy_score(labels, preds)
  return {"accuracy": acc, "f1": f1}

### Training the model

In [19]:
batch_size = 64
logging_steps = len(emotions_encoded["train"]) // batch_size
model_name = f"{model_ckpt}-finetuned-emotion"
training_args = TrainingArguments(output_dir=model_name,
                                  num_train_epochs=2,
                                  learning_rate=2e-5,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  logging_steps=logging_steps,
                                  push_to_hub=False,
                                  log_level="error")

In [20]:
trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=emotions_encoded["train"],
                  eval_dataset=emotions_encoded["validation"],
                  tokenizer=tokenizer)

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.8396,0.329532,0.8985,0.896842
2,0.2554,0.219315,0.922,0.921274


TrainOutput(global_step=500, training_loss=0.5475049209594727, metrics={'train_runtime': 13929.2946, 'train_samples_per_second': 2.297, 'train_steps_per_second': 0.036, 'total_flos': 720342861696000.0, 'train_loss': 0.5475049209594727, 'epoch': 2.0})

In [None]:
trainer.train()

## Testing

### Predict on validation data

In [None]:
preds_output = trainer.predict(emotions_encoded["validation"])

In [None]:
preds_output.metrics

In [None]:
y_preds = np.argmax(preds_output.predictions, axis=1)

In [None]:
def plot_confusion_matrix(y_preds, y_true, labels):
  cm = confusion_matrix(y_true, y_preds, normalize="true")
  fig, ax = plt.subplots(figsize=(6, 6))
  disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
  disp.plot(cmap="Blues", values_format=".2f", ax=ax, colorbar=False)
  plt.title("Normalized confusion matrix")
  plt.show()

In [None]:
plot_confusion_matrix(y_preds, y_valid, labels)