# Setup

In [1]:
from dataclasses import dataclass
from tqdm.autonotebook import tqdm
from src.utils.other import set_seeds
import wandb

import numpy as np
import torch
from datasets import Dataset
from transformers import (
    AutoConfig,
    AutoTokenizer,
    AutoModel,
    AutoModelForTokenClassification,
    Trainer,
    TrainingArguments
)

set_seeds(seed=42)
tqdm.pandas()

  from tqdm.autonotebook import tqdm


# Config

In [2]:
@dataclass
class Config:
    dataset_name: str = "Goader/ner-uk-2.0"
    
    pretrained: str = "microsoft/mdeberta-v3-base"
    # max_length: int = 1024
    merge_subwords: bool = True
    
    wandb_init_args = {
        'project': "sl-ner-uk-2.0",
        'entity': "havlytskyi-thesis",
        'name': "mdeberta-v3-base--token-level"
    }

config = Config()

# Training Arguments

In [3]:
training_args = TrainingArguments(
    output_dir=f'./checkpoints/{config.wandb_init_args["name"]}',
    logging_dir=f'./logs/{config.wandb_init_args["name"]}',
    learning_rate=2e-5,
    weight_decay=0.01,
    lr_scheduler_type='cosine',
    warmup_ratio=0.0,
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    bf16=False,
    report_to="wandb",
    optim='adamw_torch',
    eval_strategy='steps',
    save_strategy="steps",
    eval_steps=200,
    logging_steps=10,
    save_steps=200,
    save_total_limit=10,
    metric_for_best_model='eval_f1',
    greater_is_better=True,
    load_best_model_at_end=True,

    dataloader_drop_last=True, 
)

# Data & Tokenizer

In [4]:
tokenizer = AutoTokenizer.from_pretrained(config.pretrained)



In [5]:
from token_utils.data import NerUKDataset

dataset = NerUKDataset(
    tokenizer=tokenizer,
    splits=('train', 'validation', 'test'),
    dataset_name=config.dataset_name,
)

Tokenizing train split:   0%|          | 0/10980 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Tokenizing validation split:   0%|          | 0/1206 [00:00<?, ? examples/s]

Tokenizing test split:   0%|          | 0/5593 [00:00<?, ? examples/s]

# Model

In [6]:
model = AutoModelForTokenClassification.from_pretrained(
    config.pretrained,
    id2label=dataset.id2label,
    label2id=dataset.label2id,
)

Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
num_labels = len(dataset.label_list)

def flag_bad(example):
    return {"bad": any(l >= num_labels for l in example["labels"]
                       if l != -100)}

for split in ("train", "validation", "test"):
    bad_rows = dataset.aligned[split].filter(lambda example: any(l >= num_labels for l in example["labels"] if l != -100))
    print(split, bad_rows.num_rows, "bad sequences")


train 0 bad sequences
validation 0 bad sequences
test 0 bad sequences


# Train

In [8]:
from transformers import DataCollatorForTokenClassification
from token_utils.metric import NerMetrics


data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

trainer = Trainer(
    model=model, 
    args=training_args,
    train_dataset=dataset.train,
    eval_dataset=dataset.val,
    data_collator=data_collator,
    compute_metrics=NerMetrics(id2label=dataset.id2label).compute_metrics,
    tokenizer=tokenizer,
)

  trainer = Trainer(
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [9]:
wandb.init(**config.wandb_init_args)

trainer.train()

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mivan-havlytskyi[0m ([33mivan-havlytskyiz[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,0.1824,0.189603,0.556231,0.57502,0.565469,0.94275
400,0.1088,0.14744,0.625767,0.721131,0.670073,0.957099
600,0.0671,0.121462,0.67623,0.77769,0.72342,0.968453
800,0.0577,0.132209,0.687292,0.811469,0.744236,0.964812
1000,0.0587,0.114276,0.746077,0.821681,0.782056,0.969722
1200,0.0549,0.118195,0.745884,0.818539,0.780524,0.969555
1400,0.0319,0.115282,0.736548,0.827965,0.779586,0.970393
1600,0.033,0.118658,0.743158,0.831893,0.785026,0.96977


  _warn_prf(average, modifier, msg_start, len(result))
[34m[1mwandb[0m: Adding directory to artifact (./checkpoints/mdeberta-v3-base--token-level/checkpoint-200)... Done. 3.7s
  _warn_prf(average, modifier, msg_start, len(result))
[34m[1mwandb[0m: Adding directory to artifact (./checkpoints/mdeberta-v3-base--token-level/checkpoint-400)... Done. 3.8s
  _warn_prf(average, modifier, msg_start, len(result))
[34m[1mwandb[0m: Adding directory to artifact (./checkpoints/mdeberta-v3-base--token-level/checkpoint-600)... Done. 3.7s
  _warn_prf(average, modifier, msg_start, len(result))
[34m[1mwandb[0m: Adding directory to artifact (./checkpoints/mdeberta-v3-base--token-level/checkpoint-800)... Done. 3.7s
  _warn_prf(average, modifier, msg_start, len(result))
[34m[1mwandb[0m: Adding directory to artifact (./checkpoints/mdeberta-v3-base--token-level/checkpoint-1000)... Done. 3.7s
  _warn_prf(average, modifier, msg_start, len(result))
[34m[1mwandb[0m: Adding directory to artifact 

TrainOutput(global_step=1715, training_loss=0.10954257211601769, metrics={'train_runtime': 355.2817, 'train_samples_per_second': 154.525, 'train_steps_per_second': 4.827, 'total_flos': 2759224000595616.0, 'train_loss': 0.10954257211601769, 'epoch': 5.0})

# Inference

## Checkpoint

In [10]:
FINETUNED_MODEL = f'checkpoints/{config.wandb_init_args["name"]}/checkpoint-1000'

In [11]:
trainer._load_from_checkpoint(FINETUNED_MODEL)

## Test

In [12]:
test_preds = trainer.predict(dataset.test)
test_metrics = trainer.compute_metrics((test_preds.predictions, test_preds.label_ids))

test_metrics

  _warn_prf(average, modifier, msg_start, len(result))


{'precision': 0.7833958571195564,
 'recall': 0.8252577319587628,
 'f1': 0.8037821102836582,
 'accuracy': 0.9771072498502097}