In [5]:
import numpy as np
import pandas as pd
import torch

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)

from datasets import Dataset
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

In [6]:
# =========================================================
# 3Ô∏è‚É£ Load BioBERT
# =========================================================
model_name = "dmis-lab/biobert-base-cased-v1.2"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3
)

print("BioBERT model loaded.")

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]



pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

[1mBertForSequenceClassification LOAD REPORT[0m from: dmis-lab/biobert-base-cased-v1.2
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.decoder.bias               | UNEXPECTED | 
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
cls.predictions.decoder.weight             | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
classifier.bias                            | MISSING    | 
classifier.weight                          | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:

BioBERT model loaded.


In [7]:
# =========================================================
# 4Ô∏è‚É£ Create Synthetic Clinical Notes
# =========================================================
np.random.seed(42)

notes = []
labels = []

pneumonia_templates = [
    "Patient presents with fever and cough. Chest x-ray shows consolidation.",
    "Shortness of breath and bilateral infiltrates noted.",
    "Elevated WBC count with suspected pneumonia.",
    "Crackles heard on auscultation with hypoxia."
]

diabetes_templates = [
    "Patient with elevated HbA1c and fasting glucose levels.",
    "History of type 2 diabetes with poor glycemic control.",
    "Frequent urination and increased thirst reported.",
    "Hyperglycemia managed with insulin therapy."
]

cardiac_templates = [
    "Chest pain radiating to left arm, possible myocardial infarction.",
    "Elevated troponin levels and ST-segment changes.",
    "History of coronary artery disease and hypertension.",
    "Shortness of breath with reduced ejection fraction."
]

# 400 samples per class
for _ in range(400):
    notes.append(np.random.choice(pneumonia_templates))
    labels.append(0)

for _ in range(400):
    notes.append(np.random.choice(diabetes_templates))
    labels.append(1)

for _ in range(400):
    notes.append(np.random.choice(cardiac_templates))
    labels.append(2)

df = pd.DataFrame({"text": notes, "label": labels})
df = df.sample(frac=1).reset_index(drop=True)

print("Dataset size:", len(df))

Dataset size: 1200


In [8]:
# =========================================================
# 5Ô∏è‚É£ Convert to HuggingFace Dataset
# =========================================================
dataset = Dataset.from_pandas(df)

train_test = dataset.train_test_split(test_size=0.2)
train_dataset = train_test["train"]
test_dataset = train_test["test"]

In [9]:
# =========================================================
# 6Ô∏è‚É£ Tokenization
# =========================================================
def tokenize_function(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.remove_columns(["text"])
test_dataset = test_dataset.remove_columns(["text"])

train_dataset.set_format("torch")
test_dataset.set_format("torch")

Map:   0%|          | 0/960 [00:00<?, ? examples/s]

Map:   0%|          | 0/240 [00:00<?, ? examples/s]

In [10]:
# =========================================================
# 7Ô∏è‚É£ Define Metrics
# =========================================================
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)

    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1_macro": f1_score(labels, predictions, average="macro"),
        "precision_macro": precision_score(labels, predictions, average="macro"),
        "recall_macro": recall_score(labels, predictions, average="macro")
    }


In [11]:
pip install --upgrade accelerate transformers datasets sentencepiece -q

In [12]:
# =========================================================
# 8Ô∏è‚É£ Training Arguments
# =========================================================
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=20,
    learning_rate=2e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none"
)

In [13]:
# =========================================================
# 9Ô∏è‚É£ Trainer
# =========================================================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

print("Trainer ready.")

Trainer ready.


In [14]:
# =========================================================
# üîü Train
# =========================================================
trainer.train()

  super().__init__(loader)


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision Macro,Recall Macro
1,0.055712,0.014497,1.0,1.0,1.0,1.0


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision Macro,Recall Macro
1,0.055712,0.014497,1.0,1.0,1.0,1.0
2,0.004891,0.002892,1.0,1.0,1.0,1.0
3,0.003333,0.002186,1.0,1.0,1.0,1.0


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['bert.embeddings.LayerNorm.weight', 'bert.embeddings.LayerNorm.bias', 'bert.encoder.layer.0.attention.output.LayerNorm.weight', 'bert.encoder.layer.0.attention.output.LayerNorm.bias', 'bert.encoder.layer.0.output.LayerNorm.weight', 'bert.encoder.layer.0.output.LayerNorm.bias', 'bert.encoder.layer.1.attention.output.LayerNorm.weight', 'bert.encoder.layer.1.attention.output.LayerNorm.bias', 'bert.encoder.layer.1.output.LayerNorm.weight', 'bert.encoder.layer.1.output.LayerNorm.bias', 'bert.encoder.layer.2.attention.output.LayerNorm.weight', 'bert.encoder.layer.2.attention.output.LayerNorm.bias', 'bert.encoder.layer.2.output.LayerNorm.weight', 'bert.encoder.layer.2.output.LayerNorm.bias', 'bert.encoder.layer.3.attention.output.LayerNorm.weight', 'bert.encoder.layer.3.attention.output.LayerNorm.bias', 'bert.encoder.layer.3.output.LayerNorm.weight', 'bert.encoder.layer.3.output.LayerNorm.bias', 'bert.encoder.layer.4.attention.output.La

TrainOutput(global_step=180, training_loss=0.12969390580223666, metrics={'train_runtime': 3799.5528, 'train_samples_per_second': 0.758, 'train_steps_per_second': 0.047, 'total_flos': 189441660764160.0, 'train_loss': 0.12969390580223666, 'epoch': 3.0})

In [15]:
# =========================================================
# 1Ô∏è‚É£1Ô∏è‚É£ Evaluate
# =========================================================
results = trainer.evaluate()
print("Final Results:", results)

  super().__init__(loader)


Final Results: {'eval_loss': 0.0021855412051081657, 'eval_accuracy': 1.0, 'eval_f1_macro': 1.0, 'eval_precision_macro': 1.0, 'eval_recall_macro': 1.0, 'eval_runtime': 121.9884, 'eval_samples_per_second': 1.967, 'eval_steps_per_second': 0.123, 'epoch': 3.0}


In [16]:
# =========================================================
# 1Ô∏è‚É£2Ô∏è‚É£ Save Model
# =========================================================
trainer.save_model("./biobert_clinical_model")
tokenizer.save_pretrained("./biobert_clinical_model")

print("Model saved successfully.")

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Model saved successfully.
