# Load Libraries

In [1]:
!nvidia-smi

Mon Apr 14 21:27:03 2025       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 517.00       Driver Version: 517.00       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   54C    P8     3W /  N/A |    115MiB /  4096MiB |     31%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# For finetuning
import os
import torch

# Configure Finetune Parameters

In [3]:
# List of implemented methods
models   = ['t5-base', 'bart-base', 'prophetnet-large-uncased']
datasets = ['squad', 'wmt16_en_de', 'imdb']
finetunes = ['full', 'lora', 'adapters']

# Selecting index
model, dataset, finetune = 0, 0, 0

## Saved Directory for Finetuned Model

In [4]:
task = {
    "squad": "qa",
    "wmt16_en_de": "translation",
    "imdb": "textsentiment"
}

model_path = f'models/ft-{models[model]}-{finetunes[finetune]}-{task[datasets[dataset]]}'
if not os.path.exists('models'):
    os.makedirs('models', exist_ok=True)

## Hyperparameters

In [None]:
# configure training
num_train_epochs = 1
learning_rate = 5e-5
weight_decay = 0.02
logging_steps = 1
use_cpu = True

# reduce if CUDA Out Of Memory
train_batch_size = 1
eval_batch_size = 1

# turn into `False' for full training
test = True

In [6]:
if models[model] == 'prophetnet-large-uncased':
    device = torch.device('cpu') # manually setup for prophetnet since it is too large
else:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# setup manual for testing
device = torch.device('cpu')

print(f'Using device: {device}.')

Using device: cpu.


# Setup Trainer

Wandb keys.

In [7]:
import json

with open('api_key.json', 'r', encoding='utf-8') as file:
    api_keys = json.load(file)

WANDB_TOKEN, WANDB_API = api_keys['hf_token'], api_keys['hf_api']

Start trainer.

In [None]:
# Import trainers pipeline
from modules.trainer import BaseTrainer

"""
Args:
    device (torch.device): device used for finetuning.
    model (str): name of the model.
    dataset (str): name of the dataset.
    finetune (str): name of the finetune strategy.
"""

# Configure
trainer = BaseTrainer(
    device=device,
    model=models[model],
    dataset=datasets[dataset],
    finetune=finetunes[finetune],
    train_batch_size=train_batch_size,
    eval_batch_size=eval_batch_size,
    test=test
)

# Set up api key
trainer.set_wandb_api(wandb_token=WANDB_TOKEN, wandb_api=WANDB_API, project='phat-ft-nlp-test')

# Start training loop
trainer.run(
    saved_model = model_path,
    num_train_epochs = num_train_epochs,
    learning_rate = learning_rate,
    weight_decay = weight_decay,
    use_cpu = use_cpu
)

  from .autonotebook import tqdm as notebook_tqdm



- Using device: cpu
- No. epoch(s): 1
- Train batch size: 1
- Eval batch size: 1

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\quang\.cache\huggingface\token
Login successful


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


****************** Finetune information ******************
- Model: t5-base
- Dataset: squad
- Finetune strategy: full



[34m[1mwandb[0m: Currently logged in as: [33mtuanbc88[0m ([33mtuanbc88-hcmut[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/16 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 4.00 GiB total capacity; 3.16 GiB already allocated; 0 bytes free; 3.44 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF