# Supervised Fine-Tuning (in 5 steps)

1. Choose fine-tuning task
2. Prepare training dataset
3. Choose a Base Model
4. Fine-tune model via Supervised Learning
5. Evaluate Model Performance

In [4]:
!pip install accelerate peft bitsandbytes transformers trl evaluate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


In [5]:
from datasets import load_dataset,DatasetDict, Dataset

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    AutoConfig,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)

from peft import LoraConfig, PeftModel, PeftConfig, get_peft_model
import evaluate
import torch
import numpy as np

In [6]:
model_checkpoint =  "distilbert-base-uncased" # base model we're gonna use

# define label maps for sentiment analysis
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

# generate classification model from model_checkpoint
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, num_labels=2, id2label=id2label, label2id=label2id
)

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# load dataset

dataset = load_dataset("shawhin/imdb-truncated")
dataset

README.md:   0%|          | 0.00/592 [00:00<?, ?B/s]

data/train-00000-of-00001-5a744bf76a1d84(…):   0%|          | 0.00/836k [00:00<?, ?B/s]

data/validation-00000-of-00001-a3a52fabb(…):   0%|          | 0.00/853k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
})

In [8]:
# preprocess data
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_spce = True)

# create tokenize function

def tokenize_function(examples):
  # extract text
  text = examples['text']

  # tokenize and truncate text
  tokenizer.truncation_side = "left"
  tokenized_inputs = tokenizer(
      text,
      return_tensors = "np",
      truncation = True,
      max_length = 512,
  )

  return tokenized_inputs

# add pad token if none exists
if tokenizer.pad_token is None:
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
  model.resize_token_embeddings(len(tokenizer))

# tokenize training and validation datasets
tokenized_datasets = dataset.map(tokenize_function, batched = True)
tokenized_datasets

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
})

In [9]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer = tokenizer)

In [10]:
# EVALUATION METRICS

# import accuracy eval func
accuracy = evaluate.load("accuracy")

# define an eval func to pass into trainer later
def compute_metrics(p):
  predictions, labels = p
  predictions = np.argmax(predictions, axis = 1)

  return {"accuracy": accuracy.compute(predictions = predictions, references = labels)}

Downloading builder script: 0.00B [00:00, ?B/s]

In [11]:
# untrained model performance

text_list = ["It was good.", "Not a fan don't recommend.",
             "Better than the first one.", "This is not worth watching even once",
             "This one is a pass."]

print("Untrained model predictions:")
print("----------------------------")
for text in text_list:
  # tokenize text
  inputs = tokenizer.encode(text, return_tensors = "pt")
  # compute logits
  logits = model(inputs).logits
  # convert logits to label
  predictions = torch.argmax(logits)

  print(text + " - " + id2label[predictions.tolist()])

Untrained model predictions:
----------------------------
It was good. - POSITIVE
Not a fan don't recommend. - POSITIVE
Better than the first one. - POSITIVE
This is not worth watching even once - POSITIVE
This one is a pass. - POSITIVE


In [12]:
# Fine-tuning with LoRA

peft_config = LoraConfig(task_type = "SEQ_CLS",  # seq classification
                         r = 4, # rank
                         lora_alpha = 32, # like learing rate
                         lora_dropout = 0.01, # probability of dropout
                         target_modules = ['q_lin'])  # we apply lora to query layer

In [13]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 628,994 || all params: 67,584,004 || trainable%: 0.9307


In [18]:
# hyperparamters
lr = 1e-3 # size of optim step
batch_size = 4
num_epochs = 10

# def training arg
training_args = TrainingArguments(
    output_dir = model_checkpoint + "-lora-text-classification",  # giving a name to our finetuned model
    learning_rate = lr,
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    num_train_epochs = num_epochs,
    weight_decay = 0.01,
    )

In [20]:
# create trainer object
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_datasets["train"],
    eval_dataset = tokenized_datasets["validation"],
    tokenizer = tokenizer,
    data_collator = data_collator,
    compute_metrics = compute_metrics,
)

trainer.train()

  trainer = Trainer(


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshivangshandilya83[0m ([33mshivangshandilya83-independent[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
500,0.4279
1000,0.1807
1500,0.0606
2000,0.017
2500,0.0079


TrainOutput(global_step=2500, training_loss=0.13882831783294677, metrics={'train_runtime': 331.5995, 'train_samples_per_second': 30.157, 'train_steps_per_second': 7.539, 'total_flos': 1112883852759936.0, 'train_loss': 0.13882831783294677, 'epoch': 10.0})

In [22]:
# using the same sample as before ion trained model this time
model.to('cpu')

print("Trained model predictions:")
print("----------------------------")
for text in text_list:
  # tokenize text
  inputs = tokenizer.encode(text, return_tensors = "pt").to('cpu')
  # compute logits
  logits = model(inputs).logits
  # convert logits to label
  predictions = torch.max(logits, 1).indices

  print(text + " - " + id2label[predictions.tolist()[0]])

Trained model predictions:
----------------------------
It was good. - POSITIVE
Not a fan don't recommend. - NEGATIVE
Better than the first one. - POSITIVE
This is not worth watching even once - POSITIVE
This one is a pass. - NEGATIVE
