In [1]:
import os
from datasets import load_from_disk
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    TrainingArguments,
    Trainer,
    DataCollatorForTokenClassification
)
import numpy as np
from seqeval.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATASET_FOLDER = os.path.join("../data", "ner_dataset")
MODEL_CHECKPOINT = "dmis-lab/biobert-base-cased-v1.1"
OUTPUT_MODEL_FOLDER = os.path.join("../models", "clinical-ner-model")

In [3]:
LABELS = ["CONDITION", "DRUG", "LAB_TEST", "VALUE", "OPERATOR", "PROCEDURE", "DEMOGRAPHIC"]
tag_names = ['O'] + [f'{prefix}-{tag}' for tag in LABELS for prefix in ['B', 'I']]
id2tag = {i: tag for i, tag in enumerate(tag_names)}
tag2id = {tag: i for i, tag in enumerate(tag_names)}

In [4]:
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)
    true_predictions = [[tag_names[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    true_labels = [[tag_names[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    report = classification_report(true_labels, true_predictions, output_dict=True)
    return {"precision": report["macro avg"]["precision"], "recall": report["macro avg"]["recall"], "f1": report["macro avg"]["f1-score"]}

In [5]:
processed_dataset = load_from_disk(DATASET_FOLDER)

In [6]:
print(f"Loading model and tokenizer from '{MODEL_CHECKPOINT}'...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
model = AutoModelForTokenClassification.from_pretrained(
    MODEL_CHECKPOINT, num_labels=len(tag_names), id2label=id2tag, label2id=tag2id
)

Loading model and tokenizer from 'dmis-lab/biobert-base-cased-v1.1'...


Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
training_args = TrainingArguments(
    output_dir=OUTPUT_MODEL_FOLDER,
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=50,
    push_to_hub=False,
    no_cuda=True,
)



In [8]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [9]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [10]:
trainer.train()

Step,Training Loss
50,0.6135
100,0.0015
150,0.001
200,0.0008
250,0.0006
300,0.0006
350,0.0005
400,0.0005
450,0.0004
500,0.0004


TrainOutput(global_step=732, training_loss=0.0424511114840238, metrics={'train_runtime': 378.4027, 'train_samples_per_second': 3.869, 'train_steps_per_second': 1.934, 'total_flos': 12719297833200.0, 'train_loss': 0.0424511114840238, 'epoch': 3.0})

In [11]:
trainer.save_model(OUTPUT_MODEL_FOLDER)