In [1]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), ".")))

In [2]:
from datasets import load_dataset
from sklearn.metrics import classification_report
import numpy as np
from trc_model.temporal_relation_classification import TemporalRelationClassification
from trc_model.temporal_relation_classification_config import TemporalRelationClassificationConfig
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding, AutoTokenizer

In [3]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

In [4]:
eval_mode = False


def compute_metrics(eval_preds):
    predictions, labels = eval_preds
    predictions = np.argmax(predictions, axis=1)

    if eval_mode:
        report = classification_report(y_true=labels, y_pred=predictions,
                                       target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'])
        for i in range(labels.shape[0]):
            if labels[i] == 3 and predictions[i] != 3:
                labels[i] = predictions[i]
        report_no_vague = classification_report(y_true=labels, y_pred=predictions,
                                                target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'])

        with open(f'{model_final_name}/evaluation_report.txt', 'w') as f:
            f.write(report)
            f.write('\n')
            f.write(report_no_vague)
        print(report)
        print(report_no_vague)

    results = \
        classification_report(y_true=labels, y_pred=predictions, target_names=['BEFORE', 'AFTER', 'EQUAL', 'VAGUE'],
                              output_dict=True)['weighted avg']
    results.pop('support')
    return results

In [11]:
lm_checkpoints = ['onlplab/alephbert-base']
architectures = ['EMP', 'ESS', 'SEQ_CLS']

In [12]:
raw_datasets = load_dataset(f"data_handling/data_splits/split_1")
for checkpoint in lm_checkpoints:
    for arc in architectures:
        label2id = {}
        id2label = {}
        for label, named_label in zip(raw_datasets['train']['label'], raw_datasets['train']['named_label']):
            label2id[named_label] = label
            id2label[label] = named_label

        model_final_name = f'hebrew-trc-{checkpoint.split("/")[1]}-{arc}'
        tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        tokenizer.add_special_tokens({'additional_special_tokens': ['[א1]', '[/א1]', '[א2]', '[/א2]']})
        E1_start = tokenizer.convert_tokens_to_ids('[א1]')
        E1_end = tokenizer.convert_tokens_to_ids('[/א1]')
        E2_start = tokenizer.convert_tokens_to_ids('[א2]')
        E2_end = tokenizer.convert_tokens_to_ids('[/א2]')
        tokenized_datasets = raw_datasets.map(preprocess_function, remove_columns=['named_label'], batched=True)
        data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

        tokenizer_class = str(type(tokenizer)).strip("><'").split('.')[-1]
        config = TemporalRelationClassificationConfig(EMS1=E1_start,
                                                      EMS2=E2_start,
                                                      EME1=E1_end,
                                                      EME2=E2_end,
                                                      architecture=arc,
                                                      num_labels=len(label2id),
                                                      id2label=id2label,
                                                      label2id=label2id,
                                                      name_or_path=checkpoint,
                                                      tokenizer_class=tokenizer_class,
                                                      vocab_size=len(tokenizer))

        model = TemporalRelationClassification(config=config)

        training_args = TrainingArguments(
            output_dir=model_final_name,
            learning_rate=3e-5,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=32,
            weight_decay=0.01,
            num_train_epochs=10,
            evaluation_strategy="epoch",
            save_strategy="no",
            optim='adamw_torch',
            report_to=[],
            use_mps_device=True
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_datasets["train"].shuffle(),
            eval_dataset=tokenized_datasets["test"],
            tokenizer=tokenizer,
            data_collator=data_collator,
            compute_metrics=compute_metrics,
        )
        # trainer.train()
        eval_mode = True
        print('Evaluate:', model_final_name)
        trainer.evaluate(tokenized_datasets['test'])
        eval_mode = False
        config.register_for_auto_class()
        model.register_for_auto_class('AutoModelForSequenceClassification')
        # trainer.push_to_hub()
        trainer.save_model(model_final_name)

Found cached dataset csv (/Users/guy.yanko/.cache/huggingface/datasets/csv/split_1-342133e3b8689d17/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/2 [00:00<?, ?it/s]

loading configuration file config.json from cache at /Users/guy.yanko/.cache/huggingface/hub/models--onlplab--alephbert-base/snapshots/1745fb3ff5137e41e9eb4d6246e0758f63b93e46/config.json
Model config BertConfig {
  "_name_or_path": "onlplab/alephbert-base",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 52000
}

loading file vocab.txt from cache at /Users/guy.yanko/.cache/huggingface/hub/models--onlplab--alephbert-base/snapshots/1745fb3ff5137e41e9eb4d6246e

Evaluate: hebrew-trc-alephbert-base-EMP


Saving model checkpoint to hebrew-trc-alephbert-base-EMP
Configuration saved in hebrew-trc-alephbert-base-EMP/config.json


              precision    recall  f1-score   support

      BEFORE       0.36      0.10      0.15       592
       AFTER       0.29      0.40      0.34       475
       EQUAL       0.11      0.26      0.15       128
       VAGUE       0.19      0.24      0.21       276

    accuracy                           0.24      1471
   macro avg       0.24      0.25      0.21      1471
weighted avg       0.28      0.24      0.22      1471

              precision    recall  f1-score   support

      BEFORE       0.61      0.15      0.25       633
       AFTER       0.47      0.52      0.50       596
       EQUAL       0.26      0.46      0.33       176
       VAGUE       0.19      1.00      0.32        66

    accuracy                           0.38      1471
   macro avg       0.39      0.53      0.35      1471
weighted avg       0.50      0.38      0.36      1471



Model weights saved in hebrew-trc-alephbert-base-EMP/pytorch_model.bin
tokenizer config file saved in hebrew-trc-alephbert-base-EMP/tokenizer_config.json
Special tokens file saved in hebrew-trc-alephbert-base-EMP/special_tokens_map.json
loading configuration file config.json from cache at /Users/guy.yanko/.cache/huggingface/hub/models--onlplab--alephbert-base/snapshots/1745fb3ff5137e41e9eb4d6246e0758f63b93e46/config.json
Model config BertConfig {
  "_name_or_path": "onlplab/alephbert-base",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers

Evaluate: hebrew-trc-alephbert-base-ESS


Saving model checkpoint to hebrew-trc-alephbert-base-ESS
Configuration saved in hebrew-trc-alephbert-base-ESS/config.json


              precision    recall  f1-score   support

      BEFORE       0.41      0.62      0.50       592
       AFTER       0.24      0.10      0.14       475
       EQUAL       0.08      0.18      0.11       128
       VAGUE       0.33      0.12      0.17       276

    accuracy                           0.32      1471
   macro avg       0.27      0.25      0.23      1471
weighted avg       0.31      0.32      0.29      1471

              precision    recall  f1-score   support

      BEFORE       0.59      0.70      0.64       748
       AFTER       0.45      0.17      0.25       516
       EQUAL       0.25      0.40      0.30       175
       VAGUE       0.33      1.00      0.49        32

    accuracy                           0.49      1471
   macro avg       0.40      0.57      0.42      1471
weighted avg       0.49      0.49      0.46      1471



Model weights saved in hebrew-trc-alephbert-base-ESS/pytorch_model.bin
tokenizer config file saved in hebrew-trc-alephbert-base-ESS/tokenizer_config.json
Special tokens file saved in hebrew-trc-alephbert-base-ESS/special_tokens_map.json
loading configuration file config.json from cache at /Users/guy.yanko/.cache/huggingface/hub/models--onlplab--alephbert-base/snapshots/1745fb3ff5137e41e9eb4d6246e0758f63b93e46/config.json
Model config BertConfig {
  "_name_or_path": "onlplab/alephbert-base",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers

Evaluate: hebrew-trc-alephbert-base-SEQ_CLS


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to hebrew-trc-alephbert-base-SEQ_CLS
Configuration saved in hebrew-trc-alephbert-base-SEQ_CLS/config.json


              precision    recall  f1-score   support

      BEFORE       0.41      0.38      0.39       592
       AFTER       0.32      0.12      0.18       475
       EQUAL       0.07      0.38      0.11       128
       VAGUE       0.00      0.00      0.00       276

    accuracy                           0.23      1471
   macro avg       0.20      0.22      0.17      1471
weighted avg       0.27      0.23      0.23      1471

              precision    recall  f1-score   support

      BEFORE       0.58      0.47      0.52       687
       AFTER       0.61      0.22      0.32       530
       EQUAL       0.24      0.69      0.36       254
       VAGUE       0.00      0.00      0.00         0

    accuracy                           0.41      1471
   macro avg       0.36      0.34      0.30      1471
weighted avg       0.54      0.41      0.42      1471



Model weights saved in hebrew-trc-alephbert-base-SEQ_CLS/pytorch_model.bin
tokenizer config file saved in hebrew-trc-alephbert-base-SEQ_CLS/tokenizer_config.json
Special tokens file saved in hebrew-trc-alephbert-base-SEQ_CLS/special_tokens_map.json
