In [None]:
import transformers as T
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score

device = "cuda:0" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 有些中文的標點符號在tokenizer編碼以後會變成[UNK]，所以將其換成英文標點
token_replacement = [
    ["：" , ":"],
    ["，" , ","],
    ["“" , "\""],
    ["”" , "\""],
    ["？" , "?"],
    ["……" , "..."],
    ["！" , "!"]
]

In [3]:
# model = MultiLabelModel().to(device)
tokenizer = T.BertTokenizer.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")



In [4]:
class SemevalDataset(Dataset):
    def __init__(self, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation", "test"]
        self.data = load_dataset(
            "sem_eval_2014_task_1", split=split, cache_dir="./cache/"
        ).to_list()

    def __getitem__(self, index):
        d = self.data[index]
        # 把中文標點替換掉
        for k in ["premise", "hypothesis"]:
            for tok in token_replacement:
                d[k] = d[k].replace(tok[0], tok[1])
        return d

    def __len__(self):
        return len(self.data)

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

Dataset example: 
{'sentence_pair_id': 1, 'premise': 'A group of kids is playing in a yard and an old man is standing in the background', 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background', 'relatedness_score': 4.5, 'entailment_judgment': 0} 
{'sentence_pair_id': 2, 'premise': 'A group of children is playing in the house and there is no man standing in the background', 'hypothesis': 'A group of kids is playing in a yard and an old man is standing in the background', 'relatedness_score': 3.200000047683716, 'entailment_judgment': 0} 
{'sentence_pair_id': 3, 'premise': 'The young boys are playing outdoors and the man is smiling nearby', 'hypothesis': 'The kids are playing outdoors near a man with a smile', 'relatedness_score': 4.699999809265137, 'entailment_judgment': 1}


In [5]:
SemevalDataset(split="train").data[0]

{'sentence_pair_id': 1,
 'premise': 'A group of kids is playing in a yard and an old man is standing in the background',
 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background',
 'relatedness_score': 4.5,
 'entailment_judgment': 0}

In [6]:
# Define the hyperparameters
lr = 2e-5
epochs = 5
train_batch_size = 8
validation_batch_size = 8

In [7]:
# TODO1: Create batched data for DataLoader
# `collate_fn` is a function that defines how the data batch should be packed.
# This function will be called in the DataLoader to pack the data batch.

import torch.utils
import torch.utils.data
import torch.utils.data.dataloader
import torch.utils.data.dataset


def collate_fn(batch):
    # TODO1-1: Implement the collate_fn function
    # Write your code here
    # The input parameter is a data batch (tuple), and this function packs it into tensors.
    # Use tokenizer to pack tokenize and pack the data and its corresponding labels.
    # Return the data batch and labels for each sub-task.

    #提取出每個batch的資料

    premises = [item['premise'] for item in batch]
    hypotheses = [item['hypothesis'] for item in batch]
    relatedness_scores = [item['relatedness_score'] for item in batch]
    entailment_judgements = [item['entailment_judgment'] for item in batch]

    #將資料轉換成模型可以讀取的格式 {input_ids, token_type_ids, attention_mask}
    # input_ids: 代表每個token的id
    # token_type_ids: 代表每個token屬於第一句或第二句
    # attention_mask: 代表哪些token是padding
    encoding = tokenizer(
        premises,
        hypotheses,
        padding=True,
        truncation=True,
        return_tensors='pt',
        return_token_type_ids=True, # 返回token_type_ids 用來區分兩個句子，第一句全為0，第二句全為1
    ).to(device)

    # 將label轉換成tensor
    relatedness_scores = torch.tensor(relatedness_scores, dtype=torch.float).to(device)
    entailment_judgements = torch.tensor(entailment_judgements, dtype=torch.long).to(device)

    # 將資料打包成輸出的字典
    batch_output = {
        'input_text':{
            'input_ids': encoding['input_ids'],
            'token_type_ids': encoding['token_type_ids'],
            'attention_mask': encoding['attention_mask']
        },
        'label1': relatedness_scores,
        'label2': entailment_judgements
    }

    return batch_output
    
    

# TODO1-2: Define your DataLoader
dl_train = torch.utils.data.DataLoader(
    SemevalDataset(split="train"),
    batch_size=train_batch_size,
    shuffle=True,
    collate_fn=collate_fn
) # Write your code here

dl_validation = torch.utils.data.DataLoader(
    SemevalDataset(split="validation"),
    batch_size=validation_batch_size,
    shuffle=False,
    collate_fn=collate_fn
) # Write your code here

dl_test = torch.utils.data.DataLoader(
    SemevalDataset(split="test"),
    batch_size=validation_batch_size,
    shuffle=False,
    collate_fn=collate_fn
)

In [8]:
# TODO2: Construct your model
class MultiLabelModel(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Write your code here
        # Define what modules you will use in the model
        # 先做一個BertModel，然後加上relatedness_score的regression和entailment_judgement的分類層

        self.bert = T.BertModel.from_pretrained("bert-base-uncased", cache_dir="./cache/")
        self.regression = torch.nn.Linear(self.bert.config.hidden_size, 1)
        self.classification = torch.nn.Linear(self.bert.config.hidden_size, 3)

    def forward(self, **kwargs):
        # Write your code here
        # Forward pass

        input_text = kwargs['input_text']
        outputs = self.bert(
            input_ids=input_text['input_ids'],
            token_type_ids=input_text['token_type_ids'],
            attention_mask=input_text['attention_mask']
        )
        cls_output = outputs.last_hidden_state[:, 0, :]
        regression_output = self.regression(cls_output)
        classification_output = self.classification(cls_output)
        
        return regression_output, classification_output

model = MultiLabelModel().to(device)

In [9]:
# TODO3: Define your optimizer and loss function

# TODO3-1: Define your Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Write your code here

# TODO3-2: Define your loss functions (you should have two)
# Write your code here
loss_fn_regression = torch.nn.MSELoss()
loss_fn_classification = torch.nn.CrossEntropyLoss()

# scoring functions
spc = SpearmanCorrCoef()
acc = Accuracy(task="multiclass", num_classes=3)
f1 = F1Score(task="multiclass", num_classes=3, average='macro')



In [10]:
for ep in range(epochs):
    # Training Loop
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()

    for batch_idx, batch in enumerate(pbar):

        # 清空梯度
        optimizer.zero_grad()

        # 移動資料到設備
        relatedness_scores = batch['label1'].to(device)
        entailment_judgements = batch['label2'].to(device)
        batch['input_text'] = {
            k: v.to(device) for k, v in batch['input_text'].items() if isinstance(v, torch.Tensor)
        }

        # 前向傳播與計算損失
        regression_output, classification_output = model(**batch)
        loss1 = loss_fn_regression(regression_output.squeeze(), relatedness_scores)
        loss2 = loss_fn_classification(classification_output, entailment_judgements)
        loss = loss1 + loss2

        # 反向傳播與更新參數
        loss.backward()
        optimizer.step()

        # 更新進度條
        pbar.set_postfix({
            "Loss": loss.item()
        })

    # Validation Loop
    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()

    spc.to(device)
    acc.to(device)
    f1.to(device)

    spc.reset()
    acc.reset()
    f1.reset()

    val_start_time = time.time()
    with torch.no_grad():
        for batch_idx, batch in enumerate(pbar):
            relatedness_scores = batch['label1'].to(device)
            entailment_judgements = batch['label2'].to(device)
            batch['input_text'] = {
                k: v.to(device) for k, v in batch['input_text'].items() if isinstance(v, torch.Tensor)
            }

            regression_output, classification_output = model(**batch)

            # 更新度量
            spc.update(regression_output.squeeze(), relatedness_scores)
            acc.update(classification_output, entailment_judgements)
            f1.update(classification_output, entailment_judgements)

    print(f"SpearmanCorr: {spc.compute().item()}\n"
          f"Accuracy: {acc.compute().item()}\nF1Score: {f1.compute().item()}\n"
          )

    # 儲存模型
    torch.save(model, f'./saved_models/ep{ep}.ckpt')


Training epoch [1/5]: 100%|██████████| 563/563 [00:14<00:00, 39.04it/s, Loss=0.305]
Validation epoch [1/5]: 100%|██████████| 63/63 [00:00<00:00, 150.21it/s]


SpearmanCorr: 0.7813636660575867
Accuracy: 0.8500000238418579
F1Score: 0.8494459390640259



Training epoch [2/5]: 100%|██████████| 563/563 [00:14<00:00, 38.65it/s, Loss=0.24]  
Validation epoch [2/5]: 100%|██████████| 63/63 [00:00<00:00, 162.81it/s]


SpearmanCorr: 0.8214364051818848
Accuracy: 0.8619999885559082
F1Score: 0.8606271743774414



Training epoch [3/5]: 100%|██████████| 563/563 [00:14<00:00, 39.55it/s, Loss=0.0453]
Validation epoch [3/5]: 100%|██████████| 63/63 [00:00<00:00, 163.48it/s]


SpearmanCorr: 0.8182405829429626
Accuracy: 0.8659999966621399
F1Score: 0.8640569448471069



Training epoch [4/5]: 100%|██████████| 563/563 [00:14<00:00, 38.40it/s, Loss=0.0875]
Validation epoch [4/5]: 100%|██████████| 63/63 [00:00<00:00, 163.02it/s]


SpearmanCorr: 0.8265454173088074
Accuracy: 0.8600000143051147
F1Score: 0.859329104423523



Training epoch [5/5]: 100%|██████████| 563/563 [00:14<00:00, 38.53it/s, Loss=0.24]  
Validation epoch [5/5]: 100%|██████████| 63/63 [00:00<00:00, 150.26it/s]


SpearmanCorr: 0.8274685144424438
Accuracy: 0.8560000061988831
F1Score: 0.8504743576049805



For test set predictions, you can write perform evaluation simlar to #TODO5.

In [11]:
# test Loop
pbar = tqdm(dl_test)
pbar.set_description(f"test")
model.eval()

spc.to(device)
acc.to(device)
f1.to(device)

spc.reset()
acc.reset()
f1.reset()

with torch.no_grad():
    for batch_idx, batch in enumerate(pbar):
        relatedness_scores = batch['label1'].to(device)
        entailment_judgements = batch['label2'].to(device)
        batch['input_text'] = {
            k: v.to(device) for k, v in batch['input_text'].items() if isinstance(v, torch.Tensor)
        }

        regression_output, classification_output = model(**batch)

        # 更新度量
        spc.update(regression_output.squeeze(), relatedness_scores)
        acc.update(classification_output, entailment_judgements)
        f1.update(classification_output, entailment_judgements)

print(f"Testset\nSpearmanCorr: {spc.compute().item()}\n"
        f"Accuracy: {acc.compute().item()}\nF1Score: {f1.compute().item()}\n"
        )

test: 100%|██████████| 616/616 [00:03<00:00, 154.75it/s]

Testset
SpearmanCorr: 0.8335764408111572
Accuracy: 0.8749746084213257
F1Score: 0.8664665222167969




