# Homework 4
- 開始寫作業前，若使用 Colab 請先設定使用 GPU!!

In [None]:
# 0. 安裝套件

# !pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cu124
# !pip install -r requirements.txt

In [None]:
# 1. 載入套件

import torch
from pathlib import Path
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score

# Hugging Face PEFT
from peft import get_peft_model, LoraConfig, TaskType
from peft.utils.other import prepare_model_for_kbit_training

# 客製化模組
from dataset import SemevalDataset
from model import MultiLabelModel

In [None]:
# 2. 設定參數

MODEL_NAME = "microsoft/deberta-large" # "bert-base-uncased"
LR = 1e-5
NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 8
VAL_BATCH_SIZE = 8
SAVE_DIR = "./saved_models/"

# Create the directory if it doesn't exist
if not Path(SAVE_DIR).exists():
    Path(SAVE_DIR).mkdir(parents=True, exist_ok=False)

In [None]:
# 3. 載入資料集與測試

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset example: 
{'sentence_pair_id': 1, 'premise': 'A group of kids is playing in a yard and an old man is standing in the background', 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background', 'relatedness_score': 4.5, 'entailment_judgment': 0} 
{'sentence_pair_id': 2, 'premise': 'A group of children is playing in the house and there is no man standing in the background', 'hypothesis': 'A group of kids is playing in a yard and an old man is standing in the background', 'relatedness_score': 3.200000047683716, 'entailment_judgment': 0} 
{'sentence_pair_id': 3, 'premise': 'The young boys are playing outdoors and the man is smiling nearby', 'hypothesis': 'The kids are playing outdoors near a man with a smile', 'relatedness_score': 4.699999809265137, 'entailment_judgment': 1}


In [None]:
# 4. 載入 tokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir="./cache/")

In [None]:
# 5. 將 batch 資料進行整理
# 取出每筆資料的 'premise' 和 'hypothesis' 內容
# 將內容進行 tokenization 換成 token_ids 後，轉成 tensors
# 將 labels 也轉成 tensors

def collate_fn(batch):
    # TODO1: 完成 collate_fn
    # Write your code here
    return input_text, labels1, labels2

In [None]:
# 6. 建立 DataLoader

train_loader = DataLoader(
    SemevalDataset(split="train"),
    collate_fn=collate_fn,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
)
val_loader = DataLoader(
    SemevalDataset(split="validation"),
    collate_fn=collate_fn,
    batch_size=VAL_BATCH_SIZE,
    shuffle=False,
)

In [None]:
# 7. 設置 loss functions
# 因為是 multi-output learning
# 所以應該要有 2 種 loss functions

loss_fn1 = torch.nn.MSELoss()
loss_fn2 = torch.nn.CrossEntropyLoss()

In [None]:
# 8. 設置評估指標

spc = SpearmanCorrCoef()
acc = Accuracy(task="multiclass", num_classes=3)
f1 = F1Score(task="multiclass", num_classes=3, average='macro')



In [None]:
# 9. 載入模型，並直接把模型送至 GPU

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = MultiLabelModel(MODEL_NAME).to(device)

In [None]:
# 10. 配置LoRA
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,                          # LoRA矩陣的秩
    lora_alpha=32,                 # LoRA的縮放參數
    lora_dropout=0.1,              # LoRA層的dropout率
    bias="none",                   # 是否包含偏置參數
    target_modules=["query_proj", "key_proj", "value_proj", "output.dense"],  # 要應用LoRA的模塊
)

# 為主幹模型做準備
model = prepare_model_for_kbit_training(model)

# 將模型轉換為PEFT模型
model = get_peft_model(model, peft_config)

# 只訓練LoRA參數
for name, param in model.named_parameters():
    if "lora" not in name and "regression_head" not in name and "classification_head" not in name:
        param.requires_grad = False

# 印出可訓練參數的數量
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"可訓練參數數量: {trainable_params} ({trainable_params/total_params:.2%})")

可訓練參數數量: 2752512 (0.67%)


In [None]:
# 11. 載入模型與 optimizer

optimizer = AdamW(model.parameters(), lr = LR)

In [None]:
# 12. 建立測試函數

def do_test(
    dataloader,
    model,
    loss_fn1,
    loss_fn2,
    mode="validation",
    cur_epoch=0,
    num_epochs=NUM_EPOCHS,
):
    model.eval()

    pbar = tqdm(dataloader)
    pbar.set_description(f"{mode} epoch [{cur_epoch+1}/{NUM_EPOCHS}]")

    pred1 = torch.tensor([])
    pred2 = torch.tensor([])
    gt1 = torch.tensor([])
    gt2 = torch.tensor([])
    loss1 = 0
    loss2 = 0

    with torch.no_grad():
        for input_text, labels1, labels2 in pbar:
            outputs1, outputs2 = model(**input_text)

            loss1 += loss_fn1(outputs1, labels1).item()
            loss2 += loss_fn2(outputs2, labels2).item()

            outputs1 = outputs1.squeeze()
            outputs2 = torch.argmax(outputs2, dim=-1)
            pred1 = torch.cat((pred1, outputs1.to("cpu")), dim=-1)
            pred2 = torch.cat((pred2, outputs2.to("cpu")), dim=-1)
            gt1 = torch.cat((gt1, labels1.to("cpu")), dim=-1)
            gt2 = torch.cat((gt2, labels2.to("cpu")), dim=-1)

    print(f"Spearman Corr: {spc(pred1, gt1)} \nAccuracy: {acc(pred2, gt2)} \nF1 Score: {f1(pred2, gt2)}")
    loss1 /= len(dataloader)
    loss2 /= len(dataloader)
    return loss1, loss2

In [None]:
# 13. 開始訓練模型

for ep in range(NUM_EPOCHS):
    pbar = tqdm(train_loader)
    pbar.set_description(f"Training epoch [{ep+1}/{NUM_EPOCHS}]")
    model.train()
    # TODO3: Write the training loop
    # Write your code here
    # train your model
    # clear gradient
    # forward pass
    # compute loss
    # back-propagation
    # model optimization

    val_loss1, val_loss2 = do_test(
        val_loader,
        model,
        loss_fn1,
        loss_fn2,
        mode="validation",
        cur_epoch=ep,
        num_epochs=NUM_EPOCHS,
    )
    torch.save(model, f'./saved_models/ep{ep}.ckpt')
    print(f"Model saved to {SAVE_DIR}ep{ep}.ckpt!")

Training epoch [1/3]:   0%|          | 0/563 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
Training epoch [1/3]: 100%|██████████| 563/563 [01:36<00:00,  5.85it/s, loss=1.32]
validation epoch [1/3]: 100%|██████████| 63/63 [00:06<00:00,  9.15it/s]


Spearman Corr: 0.19111038744449615 
Accuracy: 0.5640000104904175 
F1 Score: 0.24040921032428741
Model saved to ./saved_models/ep0.ckpt!


Training epoch [2/3]: 100%|██████████| 563/563 [01:24<00:00,  6.63it/s, loss=1.41]
validation epoch [2/3]: 100%|██████████| 63/63 [00:04<00:00, 13.98it/s]


Spearman Corr: 0.2858576476573944 
Accuracy: 0.6159999966621399 
F1 Score: 0.3575589954853058
Model saved to ./saved_models/ep1.ckpt!


Training epoch [3/3]: 100%|██████████| 563/563 [01:23<00:00,  6.72it/s, loss=1.94]
validation epoch [3/3]: 100%|██████████| 63/63 [00:04<00:00, 14.00it/s]


Spearman Corr: 0.3598698377609253 
Accuracy: 0.6700000166893005 
F1 Score: 0.47290879487991333
Model saved to ./saved_models/ep2.ckpt!
