In [1]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), ".")))

In [2]:
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np
from trc_model.temporal_relation_classification import TemporalRelationClassification
from trc_model.temporal_relation_classification_config import TemporalRelationClassificationConfig
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding, AutoTokenizer

In [3]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

In [4]:
eval_mode = False


def compute_metrics(eval_preds):
    predictions, labels = eval_preds
    predictions = np.argmax(predictions, axis=1)

    if eval_mode:
        report = classification_report(y_true=labels, y_pred=predictions,
                                       target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'])
        for i in range(labels.shape[0]):
            if labels[i] == 3 and predictions[i] != 3:
                labels[i] = predictions[i]
        report_no_vague = classification_report(y_true=labels, y_pred=predictions,
                                                target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'])

        with open(f'{model_final_name}/evaluation_report.txt', 'w') as f:
            f.write(report)
            f.write('\n')
            f.write(report_no_vague)
        print(report)
        print(report_no_vague)

    results = \
        classification_report(y_true=labels, y_pred=predictions, target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'],
                              output_dict=True)['weighted avg']
    results.pop('support')
    return results

In [5]:
lm_checkpoints = ['onlplab/alephbert-base']
architectures = ['EMP']

In [6]:
for checkpoint in lm_checkpoints:
    for arc in architectures:
        for split in range(1, 4):
            raw_datasets = load_dataset(f"data_handling/data_splits/split_{split}")
            label2id = {}
            id2label = {}
            for label, named_label in zip(raw_datasets['train']['label'], raw_datasets['train']['named_label']):
                label2id[named_label] = label
                id2label[label] = named_label

            model_final_name = f'split-{split}-hebrew-trc-{checkpoint.split("/")[1]}-{arc}'
            tokenizer = AutoTokenizer.from_pretrained(checkpoint)
            tokenizer.add_special_tokens({'additional_special_tokens': ['[א1]', '[/א1]', '[א2]', '[/א2]']})
            E1_start = tokenizer.convert_tokens_to_ids('[א1]')
            E1_end = tokenizer.convert_tokens_to_ids('[/א1]')
            E2_start = tokenizer.convert_tokens_to_ids('[א2]')
            E2_end = tokenizer.convert_tokens_to_ids('[/א2]')
            tokenized_datasets = raw_datasets.map(preprocess_function, remove_columns=['named_label'], batched=True)
            data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

            tokenizer_class = str(type(tokenizer)).strip("><'").split('.')[-1]
            config = TemporalRelationClassificationConfig(special_markers=True,
                                                          pool_tokens=True,
                                                          EMS1=E1_start,
                                                          EMS2=E2_start,
                                                          EME1=E1_end,
                                                          EME2=E2_end,
                                                          architecture=arc,
                                                          num_labels=len(label2id),
                                                          id2label=id2label,
                                                          label2id=label2id,
                                                          name_or_path=checkpoint,
                                                          tokenizer_class=tokenizer_class,
                                                          vocab_size=len(tokenizer))

            model = TemporalRelationClassification(config=config)

            training_args = TrainingArguments(
                output_dir=model_final_name,
                learning_rate=2e-5,
                per_device_train_batch_size=32,
                per_device_eval_batch_size=32,
                weight_decay=0.01,
                num_train_epochs=10,
                evaluation_strategy="epoch",
                save_strategy="no",
                optim='adamw_torch',
                report_to=[],
            )

            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=tokenized_datasets["train"].shuffle(),
                eval_dataset=tokenized_datasets["test"],
                tokenizer=tokenizer,
                data_collator=data_collator,
                compute_metrics=compute_metrics,
            )
            trainer.train()
            eval_mode = True
            print('Evaluate:', model_final_name)
            trainer.evaluate(tokenized_datasets['test'])
            eval_mode = False
            config.register_for_auto_class()
            model.register_for_auto_class('AutoModelForSequenceClassification')
            # trainer.push_to_hub()
            trainer.save_model(model_final_name)

Found cached dataset csv (/Users/shahafpariente/.cache/huggingface/datasets/csv/split_1-6cea8d771e2b9d09/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /Users/shahafpariente/.cache/huggingface/datasets/csv/split_1-6cea8d771e2b9d09/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-8728f92add9fb4b0.arrow


Map:   0%|          | 0/1471 [00:00<?, ? examples/s]

Some weights of the model checkpoint at onlplab/alephbert-base were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight

Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 