In [1]:
import sys
from pathlib import Path
ROOT_DIR = Path().resolve().parents[0]
sys.path.append(str(ROOT_DIR))
import config as cfg

from transformers import (
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
import numpy as np
from sklearn.metrics import roc_auc_score
from datasets import load_from_disk
import os
import json

N_RUN = 3               # Number of run to separe different experiments

In [2]:
def get_fold_datasets(ds, fold):
    ds_train = ds.filter(lambda x: x["fold"] != fold)
    ds_val = ds.filter(lambda x: x["fold"] == fold)
    return ds_train, ds_val

In [3]:
def model_init(model_name=cfg.MODEL_BASE):
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=6,
        problem_type="multi_label_classification",
    )

In [4]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = 1 / (1 + np.exp(-logits))
    auc = roc_auc_score(labels, probs, average="macro")
    return {"roc_auc_macro": auc}

In [5]:
def print_highlighted_box(text, wide_chars):
    spaces = (wide_chars - len(text)) // 2
    left_spaces = spaces - 1
    right_spaces = spaces if (wide_chars - len(text)) % 2 else spaces - 1
    print(f"{'-' * wide_chars}")
    print(f"|{' ' * (wide_chars - 2)}|")
    print(f"|{' ' * left_spaces}{text}{' ' * right_spaces}|")
    print(f"|{' ' * (wide_chars - 2)}|")
    print(f"{'-' * wide_chars}")

In [6]:
class TrainerWithTrainMetrics(Trainer):

    def evaluate(
        self,
        eval_dataset=None,
        ignore_keys=None,
        metric_key_prefix: str = "eval",
    ):
		# Validation metrics (what is usually returned by Trainer.evaluate)
        metrics = super().evaluate(
            eval_dataset=eval_dataset,
            ignore_keys=ignore_keys,
            metric_key_prefix=metric_key_prefix,
        )

        # Training metrics (added in this custom Trainer)
        train_metrics = super().evaluate(
            eval_dataset=self.train_dataset,
            ignore_keys=ignore_keys,
            metric_key_prefix="train",
        )

        # Combine metrics
        metrics.update(train_metrics)
        return metrics

In [7]:
ds_train_tokenized = load_from_disk(cfg.PATH_DS_TRAIN_TOKENIZED)
ds_train_tokenized

Dataset({
    features: ['id', 'fold', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 159571
})

In [8]:
for fold in range(cfg.N_FOLDS):

    path_checkpoint_dir = os.path.join(cfg.PATH_CHECKPOINTS, cfg.MODEL_BASE, f"run_{N_RUN}", f"fold_{fold}")
    path_model_final = os.path.join(path_checkpoint_dir, "model_final")

    ds_train, ds_val = get_fold_datasets(ds_train_tokenized, fold)
    print_highlighted_box(f"FOLD {fold}: TRAIN SIZE: {len(ds_train)} ({len(ds_train)/len(ds_train_tokenized):.2%}), VAL SIZE: {len(ds_val)} ({len(ds_val)/len(ds_train_tokenized):.2%})", 80)

    args = TrainingArguments(
        # Training structure parameters
        num_train_epochs=cfg.EPOCHS,
        per_device_train_batch_size=cfg.BATCH_SIZE,
        gradient_accumulation_steps=1,
        per_device_eval_batch_size=cfg.BATCH_SIZE,
        # Optimization parameters
        learning_rate=cfg.LEARNING_RATE,
        weight_decay=0.01,
        optim="adamw_torch_fused",
        # Evaluation and saving parameters
        eval_strategy="steps",
        save_strategy="steps",
        eval_steps=cfg.EVAL_STEPS,
        save_steps=cfg.SAVE_STEPS,
        load_best_model_at_end=True,
        save_only_model=True,
        save_total_limit=cfg.SAVE_TOTAL_LIMIT,
        metric_for_best_model="eval_roc_auc_macro",
        # Precision and memory parameters
        fp16=True,
        gradient_checkpointing=False,
        dataloader_num_workers=2,
        dataloader_pin_memory=True,
        # Logging and reproducibility parameters
        logging_steps=cfg.LOGGING_STEPS,
        seed=cfg.RANDOM_SEED,
        output_dir= path_checkpoint_dir,
    )

    trainer = TrainerWithTrainMetrics(
        model=model_init(cfg.MODEL_BASE),
		args=args,
		train_dataset=ds_train,
		eval_dataset=ds_val,
		compute_metrics=compute_metrics,
		callbacks=[EarlyStoppingCallback(early_stopping_patience=cfg.EARLY_STOP_PATIENCE)]
	)

    train_results = trainer.train()
    
	# Save log_history of the last training to easily access it later
    path_hist = os.path.join(path_checkpoint_dir, "log_history.json")
    with open(path_hist, "w") as f:
        json.dump(trainer.state.log_history, f, indent=2)

	# Save the final model
    trainer.save_model(path_model_final)

--------------------------------------------------------------------------------
|                                                                              |
|        FOLD 0: TRAIN SIZE: 127656 (80.00%), VAL SIZE: 31915 (20.00%)         |
|                                                                              |
--------------------------------------------------------------------------------


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Roc Auc Macro
250,0.0732,0.060197,0.942345
500,0.0652,0.054832,0.95026
750,0.0489,0.04795,0.976738
1000,0.0489,0.045996,0.978491
1250,0.0415,0.04353,0.978753
1500,0.0382,0.044416,0.979025
1750,0.0392,0.043646,0.980857
2000,0.0431,0.040924,0.980842
2250,0.0469,0.042129,0.981409
2500,0.0638,0.051611,0.983353


early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stop

--------------------------------------------------------------------------------
|                                                                              |
|        FOLD 1: TRAIN SIZE: 127656 (80.00%), VAL SIZE: 31915 (20.00%)         |
|                                                                              |
--------------------------------------------------------------------------------


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Roc Auc Macro
250,0.0638,0.060235,0.945058
500,0.0567,0.051094,0.953404
750,0.0398,0.046629,0.967902
1000,0.039,0.048718,0.966774
1250,0.0586,0.046075,0.978418
1500,0.0517,0.05426,0.979273
1750,0.0565,0.041596,0.979263
2000,0.0338,0.044125,0.981197
2250,0.04,0.040919,0.981242
2500,0.0402,0.040564,0.982235


early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stop

--------------------------------------------------------------------------------
|                                                                              |
|        FOLD 2: TRAIN SIZE: 127656 (80.00%), VAL SIZE: 31915 (20.00%)         |
|                                                                              |
--------------------------------------------------------------------------------


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Roc Auc Macro
250,0.0683,0.063805,0.918888
500,0.0502,0.047674,0.96849
750,0.0484,0.046332,0.97171
1000,0.0433,0.046276,0.978167
1250,0.0453,0.043595,0.977081
1500,0.0299,0.042756,0.97882
1750,0.0477,0.042672,0.978176
2000,0.0434,0.041353,0.979929
2250,0.0456,0.042823,0.981408
2500,0.0373,0.041045,0.982197


early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stop

--------------------------------------------------------------------------------
|                                                                              |
|        FOLD 3: TRAIN SIZE: 127660 (80.00%), VAL SIZE: 31911 (20.00%)         |
|                                                                              |
--------------------------------------------------------------------------------


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Roc Auc Macro
250,0.0662,0.061638,0.949346
500,0.0411,0.051445,0.949818
750,0.0419,0.047588,0.972031
1000,0.0519,0.055467,0.978874
1250,0.0391,0.048249,0.978122
1500,0.0401,0.045164,0.973708
1750,0.0375,0.05797,0.979677
2000,0.0438,0.041787,0.980087
2250,0.0387,0.042664,0.981126
2500,0.0531,0.041366,0.982394


early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stop

--------------------------------------------------------------------------------
|                                                                              |
|        FOLD 4: TRAIN SIZE: 127656 (80.00%), VAL SIZE: 31915 (20.00%)         |
|                                                                              |
--------------------------------------------------------------------------------


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Roc Auc Macro
250,0.0485,0.058558,0.94999
500,0.0595,0.048698,0.974947
750,0.0504,0.049351,0.974917
1000,0.0391,0.045135,0.97703
1250,0.0346,0.047797,0.964732
1500,0.0424,0.044076,0.980219
1750,0.0577,0.041564,0.981511
2000,0.0378,0.041858,0.981498
2250,0.0401,0.043832,0.982573
2500,0.0441,0.038637,0.983151


early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_roc_auc_macro so early stop