# Lightweight Fine-Tuning Project

In [289]:
!python --version

Python 3.13.5


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


## Loading and Evaluating a Foundation Model

TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

In [None]:
# ! pip install -r requirements.txt
import numpy as np
import datetime
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from transformers.utils import logging
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, AutoPeftModelForSequenceClassification


In [291]:
# Load the dataset
dataset = load_dataset("dair-ai/emotion")

# Split the train set into train/validation
train_valid = dataset["train"].train_test_split(test_size=0.1, seed=42)

# Inspect the dataset
print("Train sample:", train_valid["train"][0])
print("Validation sample:", train_valid["test"][0])

Train sample: {'text': 'when an alcoholic stood dribbling over a food counter', 'label': 3}
Validation sample: {'text': 'while cycling in the country', 'label': 4}


In [None]:
# Initialize the tokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(examples):
    return tokenizer(examples["text"], truncation=True)

tokenized_train = train_valid["train"].map(tokenize, batched=True)
tokenized_test = train_valid["test"].map(tokenize, batched=True)


In [293]:
# Label mapping
num_labels = 4
id2label = {0: 'joy', 1: 'anger', 2: 'fear', 3: 'sadness'}
label2id = {v: k for k, v in id2label.items()}

# Check the result
print(train_valid['train'][0])
print(train_valid['test'][0])

# Initialize the model
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

{'text': 'when an alcoholic stood dribbling over a food counter', 'label': 3}
{'text': 'while cycling in the country', 'label': 4}


In [294]:
# Verify the model
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [295]:
# Freeze the model parameters
for param in model.base_model.parameters():
    param.requires_grad = False

# Print parameters
total_params = sum(p.numel() for p in model.parameters())
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_params:,} total parameters, including {total_trainable_params:,} trainable parameters.")


66,956,548 total parameters, including 593,668 trainable parameters.


In [296]:
# Prepare for training
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

tokenized_train = train_valid["train"].map(tokenize, batched=True)
tokenized_test = train_valid["test"].map(tokenize, batched=True)
data_collator = DataCollatorWithPadding(tokenizer, padding=True)
training_args = TrainingArguments(
    output_dir=f"./results/{model_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=300,
    learning_rate=3e-5,
    weight_decay=0.01,
    save_strategy="epoch",
    eval_strategy="epoch", 
    save_total_limit=1,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [297]:
# Evaluate the base model
trainer.evaluate()



{'eval_loss': 1.164207935333252, 'eval_model_preparation_time': 0.0008, 'eval_accuracy': 0.09875, 'eval_runtime': 2.4701, 'eval_samples_per_second': 647.746, 'eval_steps_per_second': 20.242}


{'eval_loss': 1.164207935333252,
 'eval_model_preparation_time': 0.0008,
 'eval_accuracy': 0.09875,
 'eval_runtime': 2.4701,
 'eval_samples_per_second': 647.746,
 'eval_steps_per_second': 20.242}

## Performing Parameter-Efficient Fine-Tuning

TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.

In [313]:
# Use label_list, label2id, id2label from earlier preprocessing
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Use label_list, label2id, id2label from your preprocessing cell
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
)

# Define the LoRA configuration
config = LoraConfig(
    task_type='SEQ_CLS',
    target_modules=["q_lin", "k_lin", "v_lin"],
    r=16,
    lora_alpha=64,
    lora_dropout=0.05
)

# Apply LoRA to the model
fine_tuned_model = get_peft_model(model, config)

# Print trainable parameters
fine_tuned_model.print_trainable_parameters()

trainable params: 1,036,036 || all params: 67,992,584 || trainable%: 1.5237


In [299]:
# Prepare for training
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

training_args = TrainingArguments(
    output_dir=f"./results/{model_name}-lora/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=300,
    learning_rate=2e-5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
)

lora_trainer = Trainer(
    model=fine_tuned_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics,
)

  lora_trainer = Trainer(


In [300]:
# Train the LoRA model 
lora_trainer.train()

{'eval_loss': 0.5760677456855774, 'eval_accuracy': 0.6225, 'eval_runtime': 2.7034, 'eval_samples_per_second': 591.857, 'eval_steps_per_second': 18.496, 'epoch': 1.0}




{'loss': 0.8975, 'grad_norm': 2.527202844619751, 'learning_rate': 1.7958974358974363e-05, 'epoch': 1.1111111111111112}
{'eval_loss': 0.4066803455352783, 'eval_accuracy': 0.673125, 'eval_runtime': 2.7073, 'eval_samples_per_second': 590.995, 'eval_steps_per_second': 18.469, 'epoch': 2.0}
{'eval_loss': 0.4066803455352783, 'eval_accuracy': 0.673125, 'eval_runtime': 2.7073, 'eval_samples_per_second': 590.995, 'eval_steps_per_second': 18.469, 'epoch': 2.0}




{'loss': 0.4865, 'grad_norm': 1.2797263860702515, 'learning_rate': 1.2830769230769232e-05, 'epoch': 2.2222222222222223}
{'eval_loss': 0.3449048101902008, 'eval_accuracy': 0.694375, 'eval_runtime': 2.7328, 'eval_samples_per_second': 585.477, 'eval_steps_per_second': 18.296, 'epoch': 3.0}
{'eval_loss': 0.3449048101902008, 'eval_accuracy': 0.694375, 'eval_runtime': 2.7328, 'eval_samples_per_second': 585.477, 'eval_steps_per_second': 18.296, 'epoch': 3.0}




{'loss': 0.3888, 'grad_norm': 5.792016983032227, 'learning_rate': 7.702564102564102e-06, 'epoch': 3.3333333333333335}
{'eval_loss': 0.3149174749851227, 'eval_accuracy': 0.70625, 'eval_runtime': 2.7316, 'eval_samples_per_second': 585.729, 'eval_steps_per_second': 18.304, 'epoch': 4.0}
{'eval_loss': 0.3149174749851227, 'eval_accuracy': 0.70625, 'eval_runtime': 2.7316, 'eval_samples_per_second': 585.729, 'eval_steps_per_second': 18.304, 'epoch': 4.0}




{'loss': 0.3454, 'grad_norm': 4.225612640380859, 'learning_rate': 2.5743589743589746e-06, 'epoch': 4.444444444444445}
{'eval_loss': 0.30747634172439575, 'eval_accuracy': 0.710625, 'eval_runtime': 2.7063, 'eval_samples_per_second': 591.224, 'eval_steps_per_second': 18.476, 'epoch': 5.0}
{'eval_loss': 0.30747634172439575, 'eval_accuracy': 0.710625, 'eval_runtime': 2.7063, 'eval_samples_per_second': 591.224, 'eval_steps_per_second': 18.476, 'epoch': 5.0}
{'train_runtime': 305.5558, 'train_samples_per_second': 235.636, 'train_steps_per_second': 7.364, 'train_loss': 0.5086749877929687, 'epoch': 5.0}
{'train_runtime': 305.5558, 'train_samples_per_second': 235.636, 'train_steps_per_second': 7.364, 'train_loss': 0.5086749877929687, 'epoch': 5.0}


TrainOutput(global_step=2250, training_loss=0.5086749877929687, metrics={'train_runtime': 305.5558, 'train_samples_per_second': 235.636, 'train_steps_per_second': 7.364, 'train_loss': 0.5086749877929687, 'epoch': 5.0})

In [301]:
# Evaluate the LoRA model
lora_trainer.evaluate()



{'eval_loss': 0.30747634172439575, 'eval_accuracy': 0.710625, 'eval_runtime': 2.7041, 'eval_samples_per_second': 591.69, 'eval_steps_per_second': 18.49, 'epoch': 5.0}


{'eval_loss': 0.30747634172439575,
 'eval_accuracy': 0.710625,
 'eval_runtime': 2.7041,
 'eval_samples_per_second': 591.69,
 'eval_steps_per_second': 18.49,
 'epoch': 5.0}

In [302]:
# Save the loRA model
fine_tuned_model.save_pretrained(f"./peft/{model_name}-lora")

## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [None]:
# Load the saved PEFT model and evaluate performance
loaded_lora_model = AutoPeftModelForSequenceClassification.from_pretrained(
    f"./peft/{model_name}-lora",
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
    torch_dtype=torch.float16
)

# Move model to device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
loaded_lora_model = loaded_lora_model.to(device)

# Evaluate on the test set
lora_eval_trainer = Trainer(
    model=loaded_lora_model,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics,
)
lora_eval_results = lora_eval_trainer.evaluate()



  lora_eval_trainer = Trainer(


{'eval_loss': 0.307373046875, 'eval_model_preparation_time': 0.001, 'eval_accuracy': 0.71, 'eval_runtime': 4.5139, 'eval_samples_per_second': 354.464, 'eval_steps_per_second': 44.308}


In [312]:
# Print only the accuracy comparison between PEFT and base models
peft_accuracy = lora_eval_results.get('eval_accuracy', None)
try:
    base_results = trainer.evaluate()
    base_accuracy = base_results.get('eval_accuracy', None)
except Exception as e:
    base_accuracy = None

print("Accuracy Comparison:")
print(f"PEFT Model Accuracy: {peft_accuracy}")
print(f"Base Model Accuracy: {base_accuracy}")

{'eval_loss': 1.164207935333252, 'eval_model_preparation_time': 0.0008, 'eval_accuracy': 0.09875, 'eval_runtime': 2.594, 'eval_samples_per_second': 616.813, 'eval_steps_per_second': 19.275}
Accuracy Comparison:
PEFT Model Accuracy: 0.71
Base Model Accuracy: 0.09875
