# Lightweight Fine-Tuning Project

In [1]:
# Install required libraries
!pip install evaluate
!pip install numpy
!pip install peft
!pip install transformers
!pip install -q "datasets==2.15.0"

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec>=2021.05.0 (from fsspec[http]>=2021.05.0->evaluate)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m 

TODO: In this cell, describe your choices for each of the following

* PEFT technique: LoRA
* Model: distilbert-base-uncased
* Evaluation approach: Hugging Face Trainer with `evaluate` library
* Fine-tuning dataset: `imdb`

## Loading and Evaluating a Foundation Model

TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

### Cell 1: Import libraries and set up

In [19]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType, AutoPeftModelForSequenceClassification, PeftConfig
from transformers import AutoTokenizer
from datasets import load_dataset
import evaluate
import numpy as np
import torch

### Cell 2: Load Model and tokenizer

In [20]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Cell 3: Load and prepocess dataset

In [22]:
# Load the IMDb dataset
dataset = load_dataset("imdb")

# Use a small subset for evaluation to speed things up
small_dataset = dataset["test"].shuffle(seed=42).select(range(1000))

# Tokenize
def preprocess(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

tokenized_dataset = small_dataset.map(preprocess, batched=True)


### Cell 4: Define metrics and compute function

In [23]:
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)


### Cell 5: Evaluate the foundation model

In [24]:
# Define trainer (no training yet, just evaluation)
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    eval_dataset=tokenized_dataset
)

# Evaluate the base model
base_eval_results = trainer.evaluate()
print("Base model accuracy:", base_eval_results["eval_accuracy"])


  trainer = Trainer(


Base model accuracy: 0.491


## Performing Parameter-Efficient Fine-Tuning

TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.

In [25]:
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"]
)

peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()


trainable params: 739,586 || all params: 67,694,596 || trainable%: 1.0925


In [26]:
train_dataset = dataset["train"].shuffle(seed=42).select(range(2000))
tokenized_train_dataset = train_dataset.map(preprocess, batched=True)

training_args = TrainingArguments(
    output_dir="./peft_model",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    logging_dir="./logs",
    report_to="none"
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [27]:
trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5587,0.555789,0.823


TrainOutput(global_step=250, training_loss=0.6285762100219726, metrics={'train_runtime': 12.3507, 'train_samples_per_second': 161.935, 'train_steps_per_second': 20.242, 'total_flos': 134739406848000.0, 'train_loss': 0.6285762100219726, 'epoch': 1.0})

###  ⚠️ IMPORTANT ⚠️

Due to workspace storage constraints, you should not store the model weights in the same directory but rather use `/tmp` to avoid workspace crashes which are irrecoverable.
Ensure you save it in /tmp always.

In [28]:
# Saving the model
peft_model.save_pretrained("/tmp/peft_distilbert_imdb")

In [29]:
import os
print(os.listdir("/tmp/peft_distilbert_imdb"))


['adapter_config.json', 'README.md', 'adapter_model.safetensors']


## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [30]:
# Load PEFT config
peft_config = PeftConfig.from_pretrained("/tmp/peft_distilbert_imdb")

# Load model using AutoPeftModelForSequenceClassification
loaded_peft_model = AutoPeftModelForSequenceClassification.from_pretrained(
    "/tmp/peft_distilbert_imdb"
)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [32]:
peft_trainer = Trainer(
    model=loaded_peft_model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    eval_dataset=tokenized_dataset
)


  peft_trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [31]:
peft_eval_results = peft_trainer.evaluate()
print("PEFT model accuracy:", peft_eval_results["eval_accuracy"])

PEFT model accuracy: 0.839


In [33]:
print("Base model accuracy:", base_eval_results["eval_accuracy"])
print("PEFT model accuracy:", peft_eval_results["eval_accuracy"])
improvement = peft_eval_results["eval_accuracy"] - base_eval_results["eval_accuracy"]
print(f"Accuracy Improvement: {improvement:.4f}")

Base model accuracy: 0.491
PEFT model accuracy: 0.839
Accuracy Improvement: 0.3480


In [34]:
device = "cuda" if torch.cuda.is_available() else "cpu"
loaded_peft_model.to(device)


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0-5): 6 x TransformerBlock(
              (attention): DistilBertSdpaAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=76

In [35]:
sample = "This movie was absolutely fantastic. The performances were top-notch!"

inputs = tokenizer(sample, return_tensors="pt", truncation=True, padding=True, max_length=256)
inputs = {key: value.to(device) for key, value in inputs.items()}

outputs = loaded_peft_model(**inputs)
pred = torch.argmax(outputs.logits, dim=1).item()

label = "Positive" if pred == 1 else "Negative"
print(f"Review: \"{sample}\"\nPredicted Sentiment: {label}")


Review: "This movie was absolutely fantastic. The performances were top-notch!"
Predicted Sentiment: Positive
