In [None]:
import torch
from torch.utils.data import DataLoader
from torch import optim, nn
from transformers import BertTokenizerFast

from data_handling.trc_dataset import TRCDataset
from model.trc_model import TRCModel
from trainer.trainer import Trainer
from trainer.training_utils import get_parameters

if torch.backends.cuda.is_built():
    device_name = 'cuda'

else:
    device_name = 'cpu'

device = torch.device(device_name)
print('device:', device)

BATCH_SIZE = 4
MODEL_CHECKPOINT = 'onlplab/alephbert-base'
TRAINING_LAYERS = 52
LABELS = ['BEFORE', 'AFTER', 'EQUAL', 'VAGUE']

data_paths = {
    'train': 'data_handling/split_data/train.csv',
    'test': 'data_handling/split_data/test.csv'}

tokenizer = BertTokenizerFast.from_pretrained(MODEL_CHECKPOINT)
tokenizer.add_tokens(['[א1]', '[/א1]', '[א2]', '[/א2]'])
E1_start = tokenizer.convert_tokens_to_ids('[א1]')
E2_start = tokenizer.convert_tokens_to_ids('[א2]')

train_set = TRCDataset(data_path=data_paths['train'])
test_set = TRCDataset(data_path=data_paths['test'])

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE)

print(f'train: {len(train_set)}\ntest: {len(test_set)}')

model = TRCModel(output_size=len(LABELS), tokenizer=tokenizer, check_point=MODEL_CHECKPOINT, architecture='SEQ_CLS')

trainer = Trainer(model, tokenizer=tokenizer,
                  optimizer=optim.Adam(get_parameters(model.named_parameters(), TRAINING_LAYERS), lr=1e-5),
                  criterion=nn.CrossEntropyLoss(),
                  entity_markers=(E1_start, E2_start),
                  labels=LABELS,
                  device=device)

trainer.train(train_loader=train_loader,
              valid_loader=test_loader,
              max_epochs=10)


device: cpu
train: 5679
test: 1420


Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias

Epoch [1/10], Step [1420/14200], Train Loss: 1.1264, Valid Loss: 0.9493, precision: 0.61, recall: 0.63, F1: 0.60


100%|██████████| 1420/1420 [13:12<00:00,  2.00it/s]
  0%|          | 0/1420 [00:00<?, ?it/s]           A

Epoch [2/10], Step [2840/14200], Train Loss: 0.7656, Valid Loss: 0.7807, precision: 0.71, recall: 0.72, F1: 0.69


100%|██████████| 1420/1420 [13:16<00:00,  1.91it/s]
  0%|          | 0/1420 [00:00<?, ?it/s]           A

Epoch [3/10], Step [4260/14200], Train Loss: 0.4886, Valid Loss: 0.7100, precision: 0.75, recall: 0.75, F1: 0.75


  2%|▏         | 30/1420 [00:16<13:03,  1.77it/s]

In [None]:
trainer.evaluate(test_loader, print_report=True)

In [None]:
model = TRCModel(output_size=len(LABELS), tokenizer=tokenizer, check_point=MODEL_CHECKPOINT, architecture='ESS')

trainer = Trainer(model, tokenizer=tokenizer,
                  optimizer=optim.Adam(get_parameters(model.named_parameters(), TRAINING_LAYERS), lr=1e-5),
                  criterion=nn.CrossEntropyLoss(),
                  entity_markers=(E1_start, E2_start),
                  labels=LABELS,
                  device=device)

trainer.train(train_loader=train_loader,
              valid_loader=test_loader,
              max_epochs=10)


In [None]:
trainer.evaluate(test_loader, print_report=True)

In [None]:
model = TRCModel(output_size=len(LABELS), tokenizer=tokenizer, check_point=MODEL_CHECKPOINT, architecture='EMP')

trainer = Trainer(model, tokenizer=tokenizer,
                  optimizer=optim.Adam(get_parameters(model.named_parameters(), TRAINING_LAYERS), lr=1e-5),
                  criterion=nn.CrossEntropyLoss(),
                  entity_markers=(E1_start, E2_start),
                  labels=LABELS,
                  device=device)

trainer.train(train_loader=train_loader,
              valid_loader=test_loader,
              max_epochs=10)


In [None]:
trainer.evaluate(test_loader, print_report=True)