<a href="https://colab.research.google.com/github/sjpark0605/NLP-FYP/blob/main/Entity_Marking_Flow_Graph_Training_Loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install datasets evaluate transformers[sentencepiece] accelerate

In [None]:
# Imports for Data Processing
import pandas as pd
import numpy as np
import torch
from datasets import load_from_disk

In [None]:
project_dir = '/content/drive/MyDrive/COMP0029/'

In [None]:
device = torch.device('cpu')

if torch.cuda.is_available():
  device = torch.device('cuda')

In [None]:
corpus_datasets = load_from_disk(project_dir + 'datasets/english-recipe-entity-marked-flow-graph')

In [None]:
from transformers import AutoTokenizer, DataCollatorWithPadding

checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

tokenizer.add_tokens(['<e1>', '</e1>', '<e2>', '</e2>'], special_tokens=True)

4

In [None]:
corpus_datasets

DatasetDict({
    train: Dataset({
        features: ['First Sentence', 'Second Sentence', 'Label'],
        num_rows: 49123
    })
    valid: Dataset({
        features: ['First Sentence', 'Second Sentence', 'Label'],
        num_rows: 12281
    })
})

In [None]:
print(corpus_datasets['train']['Second Sentence'][:5])

['<e2> Season </e2> with salt ;', 'Split the <e2> layers </e2> of cooled cake horizontally , cover the top of each layer with icing , then stack them onto a serving plate .', 'Place covered dish in oven at <e2> Gas Mark 5 </e2> ( 375 degrees Fahrenheit/190 degrees Centigrade ) for about 2 hours ( removing every half hour to stir ) .', None, 'Stir-fry the garlic and <e2> shallots </e2> until fragrant , 3-4 minutes .']


In [None]:
def tokenize_function(data):
  if data["Second Sentence"] is None:
    return tokenizer(data["First Sentence"], add_special_tokens=True, max_length=128, padding='max_length')
  return tokenizer(data["First Sentence"], data["Second Sentence"], add_special_tokens=True, max_length=128, padding='max_length')

In [None]:
tokenized_datasets = corpus_datasets.map(tokenize_function, batched=False)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)



In [None]:
print(tokenized_datasets["train"]["First Sentence"][12])
print(tokenized_datasets["train"]["Second Sentence"][12])

print(tokenizer.convert_ids_to_tokens(tokenized_datasets["train"]["input_ids"][12]))


In a large bowl , <e1> mix together </e1> the pumpkin puree , 3 eggs , <e2> 100g </e2> caster sugar , dark brown soft sugar and cinnamon .
None
['[CLS]', 'in', 'a', 'large', 'bowl', ',', '<e1>', 'mix', 'together', '</e1>', 'the', 'pumpkin', 'pure', '##e', ',', '3', 'eggs', ',', '<e2>', '100', '##g', '</e2>', 'caste', '##r', 'sugar', ',', 'dark', 'brown', 'soft', 'sugar', 'and', 'cinnamon', '.', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]',

In [None]:
tokenized_datasets = tokenized_datasets.remove_columns(["First Sentence", "Second Sentence"])
tokenized_datasets = tokenized_datasets.rename_column("Label", "labels")

tokenized_datasets.set_format("torch")
tokenized_datasets["train"].column_names

['labels', 'input_ids', 'token_type_ids', 'attention_mask']

In [None]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tokenized_datasets["train"], shuffle=True, batch_size=64, collate_fn=data_collator
)


eval_dataloader = DataLoader(
    tokenized_datasets["valid"], batch_size=64, collate_fn=data_collator
)


In [None]:
label_names = tokenized_datasets["train"].features["labels"].names
label_names

['a-eq:LR',
 'a-eq:RL',
 'a:LR',
 'a:RL',
 'd:LR',
 'd:RL',
 'f-comp:LR',
 'f-comp:RL',
 'f-eq:LR',
 'f-eq:RL',
 'f-part-of:LR',
 'f-part-of:RL',
 'f-set:LR',
 'f-set:RL',
 'non-edge',
 'o:LR',
 'o:RL',
 't-comp:LR',
 't-comp:RL',
 't-eq:LR',
 't-eq:RL',
 't-part-of:LR',
 't-part-of:RL',
 't:LR',
 't:RL',
 'v-tm:LR',
 'v-tm:RL']

In [None]:
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [None]:
#import evaluate

#metric = evaluate.load("seqeval")

In [None]:
def postprocess(predictions, labels):
    predictions = predictions.detach().cpu().clone().numpy()
    labels = labels.detach().cpu().clone().numpy()

    # Remove ignored index (special tokens) and convert to labels
    true_predictions = [[label_names[prediction]] for prediction in predictions]
    true_labels = [[label_names[label]] for label in labels]

    return true_predictions, true_labels

In [None]:
def evaluate(dataloader_val):

    flow_model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        inputs = {
                  'input_ids':      batch['input_ids'],
                  'attention_mask': batch['attention_mask'],
                  'labels':         batch['labels'],
                 }

        with torch.no_grad():        
            outputs = flow_model(**inputs)

        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [None]:
from sklearn.metrics import f1_score

def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {index: label for index, label in enumerate(label_names)}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

In [None]:
# Calculate Weights for Cross Entropy Loss
num_labels = len(label_names)
frequencies = [0] * num_labels

for batch in train_dataloader:
  for label in batch['labels']:
      frequencies[label] += 1

weights = [0.] * num_labels

# total_samples = sum(frequencies)
for i in range(num_labels):
    weights[i] = 1 / frequencies[i]

weights = torch.tensor(weights).to(device)
# weights /= weights.sum()
weights

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


tensor([5.6180e-03, 9.0909e-02, 1.9685e-03, 3.2258e-02, 1.3141e-03, 1.1111e-03,
        9.0909e-02, 4.5872e-03, 1.1299e-03, 3.7037e-02, 1.9455e-03, 1.3158e-02,
        9.0909e-02, 1.4286e-01, 2.7447e-05, 9.9206e-04, 8.2440e-04, 1.2195e-02,
        2.2831e-03, 3.9841e-03, 5.0000e-01, 8.8496e-03, 2.5641e-02, 6.7249e-04,
        2.9360e-04, 2.0408e-02, 2.1598e-03], device='cuda:0')

In [None]:
from tqdm.auto import tqdm
from accelerate import Accelerator
from transformers import AutoModelForSequenceClassification, get_scheduler
from sklearn.metrics import f1_score
from torch import nn

accelerator = Accelerator()

flow_model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint, 
    id2label=id2label,
    label2id=label2id,
    num_labels=len(label_names)
)

flow_model.resize_token_embeddings(len(tokenizer))

param_optimizer = list(flow_model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
    'weight_decay_rate': 0.0}]

optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=3e-5)

# optimizer = torch.optim.AdamW(flow_model.parameters(), lr=3e-5)

train_dl, eval_dl, flow_model, optimizer = accelerator.prepare(
    train_dataloader, eval_dataloader, flow_model, optimizer
)

loss_fct = torch.nn.CrossEntropyLoss(weight=weights)

print(flow_model.config)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "a-eq:LR",
    "1": "a-eq:RL",
    "2": "a:LR",
    "3": "a:RL",
    "4": "d:LR",
    "5": "d:RL",
    "6": "f-comp:LR",
    "7": "f-comp:RL",
    "8": "f-eq:LR",
    "9": "f-eq:RL",
    "10": "f-part-of:LR",
    "11": "f-part-of:RL",
    "12": "f-set:LR",
    "13": "f-set:RL",
    "14": "non-edge",
    "15": "o:LR",
    "16": "o:RL",
    "17": "t-comp:LR",
    "18": "t-comp:RL",
    "19": "t-eq:LR",
    "20": "t-eq:RL",
    "21": "t-part-of:LR",
    "22": "t-part-of:RL",
    "23": "t:LR",
    "24": "t:RL",
    "25": "v-tm:LR",
    "26": "v-tm:RL"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "a-eq:LR": 0,
    "a-eq:RL": 1,
    "a:LR": 2,
    "a:RL"

In [None]:
num_epochs = 10
num_training_steps = num_epochs * len(train_dl)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

progress_bar = tqdm(range(num_training_steps))

# weights = torch.tensor([64.44, 210.68, 33.40, 228.24, 46.92, 89.07, 22.07, 730.37, 9.83, 67.21, 163.51, 534.41, 377.78, 114.12, 1.37])

for epoch in range(num_epochs):
    flow_model.train()
    for batch in train_dl:
        # # Custom Loss
        # labels = batch.get("labels")
        # outputs = flow_model(**batch)
        # logits = outputs.get("logits")
        # # compute custom loss (suppose one has 3 labels with different weights)
        # loss = loss_fct(logits.view(-1, flow_model.config.num_labels), labels.view(-1))
        # # End of Custom Loss

        outputs = flow_model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

        if progress_bar.n % 20 == 0:
          tqdm.write(f'Training loss: {loss}')


    val_loss, predictions, true_vals = evaluate(eval_dl)
    val_f1 = f1_score_func(predictions, true_vals)
    accuracy = accuracy_per_class(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')
    print(accuracy)

  0%|          | 0/7680 [00:00<?, ?it/s]

Training loss: 1.3079016208648682
Training loss: 1.1348644495010376
Training loss: 1.2271782159805298
Training loss: 0.5608022809028625
Training loss: 0.7859618663787842
Training loss: 0.9551581144332886
Training loss: 0.6585026383399963
Training loss: 1.1674813032150269
Training loss: 0.45428845286369324
Training loss: 0.6169338226318359
Training loss: 0.5855906009674072
Training loss: 0.6160440444946289
Training loss: 0.8316535949707031
Training loss: 1.0070163011550903
Training loss: 0.7390241026878357
Training loss: 0.5796247124671936
Training loss: 0.5851195454597473
Training loss: 0.3028911352157593
Training loss: 0.6106656193733215
Training loss: 0.6108065843582153
Training loss: 0.7276121973991394
Training loss: 0.4235355854034424
Training loss: 0.3937338590621948
Training loss: 0.27413272857666016
Training loss: 0.4686310887336731
Training loss: 0.265854150056839
Training loss: 0.3728793263435364
Training loss: 0.5601999163627625
Training loss: 0.28741252422332764
Training los

KeyboardInterrupt: ignored

In [None]:
flow_model.save_pretrained(project_dir + 'saved_models/entity_marking/flow-graph-optimized-parameter-model')

In [None]:
_, predictions, true_vals = evaluate(eval_dl)
val_f1 = f1_score_func(predictions, true_vals)
accuracy_per_class(predictions, true_vals)

Class: a-eq:LR
Accuracy: 4/45

Class: a-eq:RL
Accuracy: 0/3

Class: a:LR
Accuracy: 100/127

Class: a:RL
Accuracy: 3/8

Class: d:LR
Accuracy: 157/190

Class: d:RL
Accuracy: 179/225

Class: f-comp:LR
Accuracy: 0/3

Class: f-comp:RL
Accuracy: 38/54

Class: f-eq:LR
Accuracy: 122/221

Class: f-eq:RL
Accuracy: 0/6

Class: f-part-of:LR
Accuracy: 75/128

Class: f-part-of:RL
Accuracy: 15/19

Class: f-set:LR
Accuracy: 0/3

Class: f-set:RL
Accuracy: 0/2

Class: non-edge
Accuracy: 8388/9109

Class: o:LR
Accuracy: 233/252

Class: o:RL
Accuracy: 246/303

Class: t-comp:LR
Accuracy: 7/20

Class: t-comp:RL
Accuracy: 83/110

Class: t-eq:LR
Accuracy: 24/63

Class: t-part-of:LR
Accuracy: 10/28

Class: t-part-of:RL
Accuracy: 9/10

Class: t:LR
Accuracy: 298/372

Class: t:RL
Accuracy: 806/852

Class: v-tm:LR
Accuracy: 10/12

Class: v-tm:RL
Accuracy: 112/116



In [None]:
from sklearn.metrics import classification_report

pred_vals = np.argmax(predictions, axis=1).flatten()

labeled_preds = [label_names[pred_val] for pred_val in pred_vals]
labeled_trues = [label_names[true_val] for true_val in true_vals]

print(classification_report(labeled_trues, labeled_preds))

              precision    recall  f1-score   support

     a-eq:LR       0.44      0.09      0.15        45
     a-eq:RL       0.00      0.00      0.00         3
        a:LR       0.64      0.79      0.71       127
        a:RL       0.60      0.38      0.46         8
        d:LR       0.49      0.83      0.62       190
        d:RL       0.78      0.80      0.79       225
   f-comp:LR       0.00      0.00      0.00         3
   f-comp:RL       0.73      0.70      0.72        54
     f-eq:LR       0.44      0.55      0.49       221
     f-eq:RL       0.00      0.00      0.00         6
f-part-of:LR       0.39      0.59      0.47       128
f-part-of:RL       0.58      0.79      0.67        19
    f-set:LR       0.00      0.00      0.00         3
    f-set:RL       0.00      0.00      0.00         2
    non-edge       0.97      0.92      0.94      9109
        o:LR       0.91      0.92      0.92       252
        o:RL       0.70      0.81      0.75       303
   t-comp:LR       0.41    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
pred_vals = np.argmax(predictions, axis=1).flatten()

labeled_preds = [label_names[pred_val].replace(":LR", "").replace(":RL", "") for pred_val in pred_vals]
labeled_trues = [label_names[true_val].replace(":LR", "").replace(":RL", "") for true_val in true_vals]

print(classification_report(labeled_trues, labeled_preds))

              precision    recall  f1-score   support

           a       0.64      0.76      0.70       135
        a-eq       0.67      0.12      0.21        48
           d       0.61      0.81      0.70       415
      f-comp       0.73      0.67      0.70        57
        f-eq       0.44      0.54      0.48       227
   f-part-of       0.42      0.63      0.50       147
       f-set       0.00      0.00      0.00         5
    non-edge       0.97      0.92      0.94      9109
           o       0.80      0.87      0.84       555
           t       0.81      0.90      0.85      1224
      t-comp       0.63      0.69      0.66       130
        t-eq       0.39      0.38      0.38        63
   t-part-of       0.49      0.50      0.49        38
        v-tm       0.86      0.95      0.90       128

    accuracy                           0.89     12281
   macro avg       0.60      0.63      0.60     12281
weighted avg       0.90      0.89      0.89     12281



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
