# Load Libraries

In [1]:
!nvidia-smi

Sun Apr 27 15:41:32 2025       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 517.00       Driver Version: 517.00       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   53C    P8     5W /  N/A |    136MiB /  4096MiB |     25%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# For finetuning
import os
import torch

# Configure Finetune Parameters

In [3]:
# List of implemented methods
models   = ['t5-base', 'bart-base', 'gpt2']
datasets = ['squad', 'wmt16_en_de', 'imdb']
finetunes = ['full', 'lora', 'adapters']

# Selecting index
model, dataset, finetune = 2, 0, 0

## Saved Directory for Finetuned Model

In [4]:
task = {
    "squad": "qa",
    "wmt16_en_de": "translation",
    "imdb": "textsentiment"
}

model_path = f'models/ft-{models[model]}-{finetunes[finetune]}-{task[datasets[dataset]]}'
if not os.path.exists('models'):
    os.makedirs('models', exist_ok=True)

## Hyperparameters

In [5]:
# configure training
num_train_epochs = 1
learning_rate = 5e-5
weight_decay = 0.02
logging_steps = 1
use_cpu = True

# reduce if CUDA Out Of Memory
train_batch_size = 1
eval_batch_size = 1

# turn into `False' for full training
test = True

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# setup manual for testing
device = torch.device('cpu')

print(f'Using device: {device}.')

Using device: cpu.


# Setup Trainer

Wandb keys.

In [7]:
import json

with open('api_key.json', 'r', encoding='utf-8') as file:
    api_keys = json.load(file)

WANDB_TOKEN, WANDB_API = api_keys['hf_token'], api_keys['hf_api']

Start trainer.

In [None]:
# Import trainers pipeline
from modules.train.trainer import BaseTrainer

"""
Args:
    device (torch.device): device used for finetuning.
    model (str): name of the model.
    dataset (str): name of the dataset.
    finetune (str): name of the finetune strategy.
"""

# Configure
trainer = BaseTrainer(
    device=device,
    model=models[model],
    dataset=datasets[dataset],
    finetune=finetunes[finetune],
    train_batch_size=train_batch_size,
    eval_batch_size=eval_batch_size,
    test=test
)

# Set up api key
trainer.set_wandb_api(wandb_token=WANDB_TOKEN, wandb_api=WANDB_API, project='phat-ft-nlp-test')

# Start training loop
trainer.run(
    saved_model = model_path,
    num_train_epochs = num_train_epochs,
    learning_rate = learning_rate,
    weight_decay = weight_decay,
    use_cpu = use_cpu
)