# Lightweight Fine-Tuning Project

In [1]:
# Install required libraries
!pip install evaluate
!pip install numpy
!pip install peft
!pip install transformers
!pip install -q "datasets==2.15.0"

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec>=2021.05.0 (from fsspec[http]>=2021.05.0->evaluate)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m 

TODO: In this cell, describe your choices for each of the following

* PEFT technique: LoRA
* Model: distilbert-base-uncased
* Evaluation approach: Hugging Face Trainer with `evaluate` library
* Fine-tuning dataset: `imdb`

## Loading and Evaluating a Foundation Model

TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

### Cell 1: Import libraries and set up

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import evaluate
import numpy as np
import torch

### Cell 2: Load Model and tokenizer

In [3]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Cell 3: Load and prepocess dataset

In [4]:
# Load the IMDb dataset
dataset = load_dataset("imdb")

# Use a small subset for evaluation to speed things up
small_dataset = dataset["test"].shuffle(seed=42).select(range(1000))

# Tokenize
def preprocess(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

tokenized_dataset = small_dataset.map(preprocess, batched=True)


Downloading readme:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

### Cell 4: Define metrics and compute function

In [5]:
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

### Cell 5: Evaluate the foundation model

In [7]:
# Define trainer (no training yet, just evaluation)
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    eval_dataset=tokenized_dataset
)

# Evaluate the base model
base_eval_results = trainer.evaluate()
print("Base model accuracy:", base_eval_results["eval_accuracy"])


  trainer = Trainer(


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjanlynnh-916[0m ([33mjanlynnh-916-western-governors-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Base model accuracy: 0.483


## Performing Parameter-Efficient Fine-Tuning

TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.

In [10]:
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType

peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_lin", "v_lin"]
)

peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()


trainable params: 739,586 || all params: 67,694,596 || trainable%: 1.0925


In [11]:
from transformers import Trainer, TrainingArguments

train_dataset = dataset["train"].shuffle(seed=42).select(range(2000))
tokenized_train_dataset = train_dataset.map(preprocess, batched=True)

training_args = TrainingArguments(
    output_dir="./peft_model",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,
    logging_dir="./logs",
    report_to="none"
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [20]:
trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3323,0.351966,0.843


TrainOutput(global_step=250, training_loss=0.3924576644897461, metrics={'train_runtime': 11.7496, 'train_samples_per_second': 170.219, 'train_steps_per_second': 21.277, 'total_flos': 134739406848000.0, 'train_loss': 0.3924576644897461, 'epoch': 1.0})

###  ⚠️ IMPORTANT ⚠️

Due to workspace storage constraints, you should not store the model weights in the same directory but rather use `/tmp` to avoid workspace crashes which are irrecoverable.
Ensure you save it in /tmp always.

In [24]:
# Saving the model
peft_model.save_pretrained("/tmp/peft_distilbert_imdb")

In [25]:
import os
print(os.listdir("/tmp/peft_distilbert_imdb"))


['config.json', 'adapter_config.json', 'model.safetensors', 'README.md', 'adapter_model.safetensors']


## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [26]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSequenceClassification

# Load PEFT config
peft_config = PeftConfig.from_pretrained("/tmp/peft_distilbert_imdb")

# Load the base model again
base_model = AutoModelForSequenceClassification.from_pretrained(
    peft_config.base_model_name_or_path, num_labels=2
)

# Load trained PEFT adapter weights
loaded_peft_model = PeftModel.from_pretrained(base_model, "/tmp/peft_distilbert_imdb")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
peft_trainer = Trainer(
    model=loaded_peft_model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    eval_dataset=tokenized_dataset
)


  peft_trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [28]:
peft_eval_results = peft_trainer.evaluate()
print("PEFT model accuracy:", peft_eval_results["eval_accuracy"])

PEFT model accuracy: 0.843


In [29]:
print("Base model accuracy:", base_eval_results["eval_accuracy"])
print("PEFT model accuracy:", peft_eval_results["eval_accuracy"])
improvement = peft_eval_results["eval_accuracy"] - base_eval_results["eval_accuracy"]
print(f"Accuracy Improvement: {improvement:.4f}")

Base model accuracy: 0.483
PEFT model accuracy: 0.843
Accuracy Improvement: 0.3600


In [31]:
device = "cuda" if torch.cuda.is_available() else "cpu"
loaded_peft_model.to(device)


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0-5): 6 x TransformerBlock(
              (attention): DistilBertSdpaAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=76

In [32]:
import torch

sample = "This movie was absolutely fantastic. The performances were top-notch!"

inputs = tokenizer(sample, return_tensors="pt", truncation=True, padding=True, max_length=256)
inputs = {key: value.to(device) for key, value in inputs.items()}

outputs = loaded_peft_model(**inputs)
pred = torch.argmax(outputs.logits, dim=1).item()

label = "Positive" if pred == 1 else "Negative"
print(f"Review: \"{sample}\"\nPredicted Sentiment: {label}")


Review: "This movie was absolutely fantastic. The performances were top-notch!"
Predicted Sentiment: Positive
