In [1]:
# Check if we have GPU
!nvidia-smi

Tue Jul  2 00:34:48 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:35:00.0 Off |                    0 |
| N/A   52C    P0              22W /  72W |      0MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# Fine-tune with Low-rank Adaptation (LoRA)
- Model: `distilbert/distilbert-base-uncased`
- Dataset: `stanfordnlp/imdb dataset`
- Task: Text classification (Binary)

## Installations

In [2]:
# Installations
!pip install transformers datasets lightning rich -q

from rich import print
MODEL_NAME = "distilbert/distilbert-base-uncased"
DATASET_NAME = "stanfordnlp/imdb"


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1[0m[39;49m -> [0m[32;49m24.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Loading the dataset

In [3]:
from data_utils import DatasetUtils

dataset_utils = DatasetUtils(
    dataset_uri=DATASET_NAME,
    model_uri=MODEL_NAME,
    batch_size=64,
    num_workers=8
)

train_loader = dataset_utils.get_data_loader("train")
val_loader = dataset_utils.get_data_loader("val")
test_loader = dataset_utils.get_data_loader("test")

[DEBUG]Loading the dataset...
[DEBUG]Splitting the dataset...
[DEBUG]Tokenizing the dataset...
[DEBUG]Setting up the dataloaders...
[DEBUG]Data setup complete.


## [Baseline] test performance (w/o fine-tuning)

In [None]:
from models import BaseModel

base_model = BaseModel(
    model_uri=MODEL_NAME,
    num_classes=2,
    freeze_all=True
)

# Test baseline performance on downstream task
test_loss, test_accuracy = base_model.predict(test_loader)
print(f"Test Loss [Baseline]: {test_loss:.2f}")
print(f"Test accuracy [Baseline]: {test_accuracy:.2f}%")

## [Simple Fine-tuning]
Only un-freeze the last 2 Linear layers and train the model


In [None]:
from models import SimpleFTModel

# Define the model
simple_ft_model = SimpleFTModel()

# Train the model
simple_ft_model.train(
    train_loader,
    val_loader,
    num_epochs=10,
)

# Test performance on downstream task
test_loss, test_accuracy = simple_ft_model.predict(test_loader)
print(f"Test Loss [Simple Fine-tuning]: {test_loss:.2f}")
print(f"Test accuracy [Simple Fine-tuning]: {test_accuracy:.2f}%")

## [LoRA]

### Train the model

In [None]:
from models import LoRAModel

# Define the model
lora_model = LoRAModel(
    lora_rank=2,
    lora_alpha=5
)

# Train the model
lora_model.train(
    train_loader,
    val_loader,
    num_epochs=10,
)

# Test performance on downstream task
test_loss, test_accuracy = lora_model.predict(test_loader)
print(f"Test Loss [LoRA]: {test_loss:.2f}")
print(f"Test accuracy [LoRA]: {test_accuracy:.2f}%")

## Adapter

In [4]:
from models import AdaptedModel

# Define the model
adapted_model = AdaptedModel(
    bottleneck_dim=4
)

# Train the model
adapted_model.train(
    train_loader,
    val_loader,
    num_epochs=1,
)

# Test performance on downstream task
test_loss, test_accuracy = adapted_model.predict(test_loader)
print(f"Test Loss [Adapter]: {test_loss:.2f}")
print(f"Test accuracy [Adapter]: {test_accuracy:.2f}%")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Using device: cuda


[DEBUG]Adding Adapters...


[DEBUG]Unfreezing specific layers...


% of trainable parameters: 0.13 %



Epoch 1/1 (Train): 100%|██████████| 317/317 [06:56<00:00,  1.32s/it]
Validation: 100%|██████████| 36/36 [00:23<00:00,  1.55it/s]



Epoch 1/1, LR: 0.0003, Train Loss: 0.0078, Train Accuracy: 74.65%, Val Loss: 0.0056, Val Accuracy: 85.33%



Test: 100%|██████████| 40/40 [00:25<00:00,  1.59it/s]
