<a href="https://colab.research.google.com/github/sjpark0605/NLP-FYP/blob/main/Flow_Graph_Training_Loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install datasets evaluate transformers[sentencepiece] accelerate

In [2]:
# Imports for Data Processing
import pandas as pd
import numpy as np
import torch
from datasets import load_from_disk

In [3]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive/')

dataset_dir = '/content/drive/MyDrive/COMP0029/datasets/'

Mounted at /content/drive/


In [4]:
corpus_datasets = load_from_disk(dataset_dir + 'english-recipe-flow-graph')

In [5]:
def tokenize_function(data):
    return tokenizer(data["Word Pairs"], data["Sentence Pairs"], truncation=True)

In [6]:
from transformers import AutoTokenizer, DataCollatorWithPadding

checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

tokenized_datasets = corpus_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



Map:   0%|          | 0/7291 [00:00<?, ? examples/s]

In [7]:
tokenized_datasets = tokenized_datasets.remove_columns(["Word Pairs", "Sentence Pairs"])
tokenized_datasets = tokenized_datasets.rename_column("Label", "labels")
tokenized_datasets.set_format("torch")
tokenized_datasets["train"].column_names

['labels', 'input_ids', 'token_type_ids', 'attention_mask']

In [8]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tokenized_datasets["train"], shuffle=True, batch_size=128, collate_fn=data_collator
)
eval_dataloader = DataLoader(
    tokenized_datasets["valid"], batch_size=128, collate_fn=data_collator
)

In [10]:
label_names = tokenized_datasets["train"].features["labels"].names
label_names

['a',
 'a-eq',
 'd',
 'f-comp',
 'f-eq',
 'f-part-of',
 'o',
 's',
 't',
 't-comp',
 't-eq',
 't-part-of',
 'v',
 'v-tm',
 'x']

In [11]:
from transformers import AutoModelForSequenceClassification

flow_model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=len(label_names))

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [12]:
#import evaluate

#metric = evaluate.load("seqeval")

In [13]:
def postprocess(predictions, labels):
    predictions = predictions.detach().cpu().clone().numpy()
    labels = labels.detach().cpu().clone().numpy()

    # Remove ignored index (special tokens) and convert to labels
    true_predictions = [[label_names[prediction]] for prediction in predictions]
    true_labels = [[label_names[label]] for label in labels]

    return true_predictions, true_labels

In [14]:
def evaluate(dataloader_val):

    flow_model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        inputs = {
                  'input_ids':      batch['input_ids'],
                  'attention_mask': batch['attention_mask'],
                  'labels':         batch['labels'],
                 }

        with torch.no_grad():        
            outputs = flow_model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [15]:
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {index: label for index, label in enumerate(label_names)}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

In [17]:
from tqdm.auto import tqdm
from accelerate import Accelerator
from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
from sklearn.metrics import f1_score

accelerator = Accelerator()

flow_model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=len(label_names))
optimizer = AdamW(flow_model.parameters(), lr=3e-5)

train_dl, eval_dl, flow_model, optimizer = accelerator.prepare(
    train_dataloader, eval_dataloader, flow_model, optimizer
)

num_epochs = 3
num_training_steps = num_epochs * len(train_dl)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

progress_bar = tqdm(range(num_training_steps))

for epoch in range(num_epochs):
    flow_model.train()
    for batch in train_dl:
        outputs = flow_model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)


    val_loss, predictions, true_vals = evaluate(eval_dl)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  0%|          | 0/684 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Validation loss: 0.4979727153192487
F1 Score (Weighted): 0.847492663752319
Validation loss: 0.4190257834760766
F1 Score (Weighted): 0.8560965708370506
Validation loss: 0.397441957864845
F1 Score (Weighted): 0.8619248957733773


In [18]:
_, predictions, true_vals = evaluate(eval_dl)
val_f1 = f1_score_func(predictions, true_vals)
accuracy_per_class(predictions, true_vals)

Class: a
Accuracy: 3/68

Class: a-eq
Accuracy: 0/21

Class: d
Accuracy: 9/131

Class: f-comp
Accuracy: 0/19

Class: f-eq
Accuracy: 0/93

Class: f-part-of
Accuracy: 0/49

Class: o
Accuracy: 160/199

Class: s
Accuracy: 0/6

Class: t
Accuracy: 347/446

Class: t-comp
Accuracy: 3/65

Class: t-eq
Accuracy: 0/27

Class: t-part-of
Accuracy: 0/8

Class: v
Accuracy: 0/12

Class: v-tm
Accuracy: 2/38

Class: x
Accuracy: 5956/6109

