# Lightweight Fine-Tuning Project

In [None]:
!python --version

## Loading and Evaluating a Foundation Model

TODO: In the cells below, load your chosen pre-trained Hugging Face model and evaluate its performance prior to fine-tuning. This step includes loading an appropriate tokenizer and dataset.

In [None]:
import numpy as np
import datetime
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from transformers.utils import logging
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, AutoPeftModelForSequenceClassification


In [None]:
# Load the dataset
dataset = load_dataset("dair-ai/emotion")

# Split the train set into train/validation
train_valid = dataset["train"].train_test_split(test_size=0.1, seed=42)

# Inspect the dataset
print("Train sample:", train_valid["train"][0])
print("Validation sample:", train_valid["test"][0])

In [None]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(examples):
    return tokenizer(examples["text"], truncation=True)

tokenized_train = train_valid["train"].map(tokenize, batched=True)
tokenized_test = train_valid["test"].map(tokenize, batched=True)


In [None]:
num_labels = 4
id2label = {0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger'}
label2id = {v: k for k, v in id2label.items()}

# Check the result
print(train_valid['train'][0])
print(train_valid['test'][0])

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

In [None]:
# Verify the model
print(model)

In [None]:
# Freeze the model parameters
for param in model.base_model.parameters():
    param.requires_grad = False

# Print parameters
total_params = sum(p.numel() for p in model.parameters())
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_params:,} total parameters, including {total_trainable_params:,} trainable parameters.")


In [None]:
# Prepare for training
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

tokenized_train = train_valid["train"].map(tokenize, batched=True)
tokenized_test = train_valid["test"].map(tokenize, batched=True)
data_collator = DataCollatorWithPadding(tokenizer, padding=True)
training_args = TrainingArguments(
    output_dir=f"./results/{model_name}/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=300,
    learning_rate=2e-5,
    weight_decay=0.01,
    save_strategy="epoch",
    eval_strategy="epoch", 
    save_total_limit=1,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.evaluate()

## Performing Parameter-Efficient Fine-Tuning

TODO: In the cells below, create a PEFT model from your loaded model, run a training loop, and save the PEFT model weights.

In [None]:
# Use label_list, label2id, id2label from earlier preprocessing
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Use label_list, label2id, id2label from your preprocessing cell
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
)

# Define the LoRA configuration
config = LoraConfig(
    task_type='SEQ_CLS',
    target_modules=["q_lin", "k_lin", "v_lin"],
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)

# Apply LoRA to the model
fine_tuned_model = get_peft_model(model, config)

# Print trainable parameters
fine_tuned_model.print_trainable_parameters()

In [None]:
# Prepare for training
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

training_args = TrainingArguments(
    output_dir=f"./results/{model_name}-lora/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=300,
    learning_rate=2e-5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
)

lora_trainer = Trainer(
    model=fine_tuned_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics,
)

In [None]:
lora_trainer.train()

In [None]:
lora_trainer.evaluate()

In [None]:
# Save the loRA model
fine_tuned_model.save_pretrained(f"./peft/{model_name}-lora")

In [None]:
fine_tuned_model

## Performing Inference with a PEFT Model

TODO: In the cells below, load the saved PEFT model weights and evaluate the performance of the trained PEFT model. Be sure to compare the results to the results from prior to fine-tuning.

In [None]:
# Load the saved PEFT (LoRA) model weights and evaluate performance
loaded_lora_model = AutoPeftModelForSequenceClassification.from_pretrained(
    f"./peft/{model_name}-lora",
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
    torch_dtype=torch.float16  # Add this line for faster loading if your hardware supports it
)

# Move model to device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
loaded_lora_model = loaded_lora_model.to(device)

# Evaluate on the test set
lora_eval_trainer = Trainer(
    model=loaded_lora_model,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer),
    compute_metrics=compute_metrics,
)
lora_eval_results = lora_eval_trainer.evaluate()
print("PEFT Model Evaluation Results:", lora_eval_results)

# For comparison, print the base model evaluation results if available
try:
    print("Base Model Evaluation Results:", trainer.evaluate())
except Exception as e:
    print("Base model evaluation not available or failed:", e)

In [None]:
# Print only the accuracy comparison between PEFT and base models
peft_accuracy = lora_eval_results.get('eval_accuracy', None)
try:
    base_results = trainer.evaluate()
    base_accuracy = base_results.get('eval_accuracy', None)
except Exception as e:
    base_accuracy = None

print("Accuracy Comparison:")
print(f"PEFT Model Accuracy: {peft_accuracy}")
if base_accuracy is not None:
    print(f"Base Model Accuracy: {base_accuracy}")
else:
    print("Base model accuracy not available or failed.")