1. Load data and model

In [29]:
import torch

if torch.cuda.is_available():
    print("GPU is available!")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available. Check your setup.")


GPU is available!
Using GPU: NVIDIA GeForce RTX 4080 Laptop GPU


In [30]:
# Import required libraries
import os
from datasets import Dataset

# Function to read IOB files and split into paragraphs
def read_iob_file(file_path):
    """Read IOB file and convert to token-label samples"""
    examples = []
    words, labels = [], []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                token, label = line.split("\t")
                words.append(token)
                labels.append(label.strip())
            else:
                # End of a sample
                if words and labels:
                    examples.append({"tokens": words, "labels": labels})
                    words, labels = [], []
    # Add the last example if file doesn't end with a blank line
    if words and labels:
        examples.append({"tokens": words, "labels": labels})
    return examples



In [31]:
# Load datasets using the updated function
train_path = 'C:\\S24-25\\TxM\\dataset\\train.tsv'
val_path = 'C:\\S24-25\\TxM\\dataset\\val_gold.tsv'
test_path = 'C:\\S24-25\\TxM\\dataset\\test_gold.tsv'
train_data = read_iob_file(train_path)
val_data = read_iob_file(val_path)
test_data = read_iob_file(test_path)

print(f"Train samples: {len(train_data)}, Val samples: {len(val_data)}, Test samples: {len(test_data)}")


Train samples: 998, Val samples: 124, Test samples: 126


In [43]:
from transformers import AutoTokenizer
# Load model directly
# Use a pipeline as a high-level helper
from transformers import pipeline
model_checkpoint="emilyalsentzer/Bio_ClinicalBERT"
pipe = pipeline("fill-mask", model="emilyalsentzer/Bio_ClinicalBERT")
# Define label mappings
label_list = ["O", "B-ADR", "I-ADR", "B-DRU", "I-DRU", "B-DIS", "I-DIS", "B-SYM", "I-SYM"]
label_to_id = {label: i for i, label in enumerate(label_list)}
id_to_label = {i: label for label, i in label_to_id.items()}

def tokenize_and_align_labels(examples):
    tokenized_inputs = pipe.tokenizer(
        examples["tokens"], 
        is_split_into_words=True,
        padding="max_length",      # or True, depending on your preference
        truncation=True,          # enable truncation
        max_length=512,           # explicitly set maximum sequence length 
        return_overflowing_tokens=False,
    )
    labels = []
    for i, label in enumerate(examples["labels"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        aligned_labels = []
        previous_word_idx = None
        for word_idx in word_ids:
            if word_idx is None:
                aligned_labels.append(-100)  # Ignore padding
            elif word_idx != previous_word_idx:
                aligned_labels.append(label_to_id[label[word_idx]])
            else:
                aligned_labels.append(label_to_id[label[word_idx]])
            previous_word_idx = word_idx
        labels.append(aligned_labels)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Tokenize datasets
train_dataset = Dataset.from_list(train_data).map(tokenize_and_align_labels, batched=True)
test_dataset = Dataset.from_list(test_data).map(tokenize_and_align_labels, batched=True)
val_dataset = Dataset.from_list(val_data).map(tokenize_and_align_labels, batched=True)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.

[A
Map: 100%|██████████| 998/998 [00:00<00:00, 2624.21 examples/s]

Map: 100%|██████████| 126/126 [00:00<00:00, 2342.01 examples/s]

Map: 100%|██████████| 124/124 [00:00<00:00, 2486.73 examples/s]


In [44]:
from transformers import AutoModelForTokenClassification


# Load the pre-trained BERT model

model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)


Some weights of BertForTokenClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2. Train and evaluate with default parameters on the test set

In [45]:
from transformers import TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    load_best_model_at_end=True,
    logging_dir="./bio_bert_logs",
    logging_steps=10,
)


In [48]:
from transformers import Trainer
from evaluate import load
import torch

# Load metric for evaluation
metric = load("seqeval")

def compute_metrics(predictions):
    predictions, labels = predictions
    predictions = torch.argmax(torch.tensor(predictions), dim=2)
    
    # Convert predictions and labels to CPU and then to plain Python integers
    predictions = predictions.cpu().numpy()
    
    true_labels = [
        [id_to_label[label] for label in label_seq if label != -100] 
        for label_seq in labels
    ]
    true_predictions = [
        [id_to_label[int(p)] for (p, l) in zip(prediction, label_seq) if l != -100]  # Convert tensor to int
        for prediction, label_seq in zip(predictions, labels)
    ]
    return metric.compute(predictions=true_predictions, references=true_labels)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=pipe.tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


  5%|▌         | 10/189 [00:08<01:20,  2.23it/s]

{'loss': 0.217, 'grad_norm': 1.288893461227417, 'learning_rate': 1.8941798941798943e-05, 'epoch': 0.16}


 11%|█         | 20/189 [00:11<01:03,  2.68it/s]

{'loss': 0.2185, 'grad_norm': 1.2395459413528442, 'learning_rate': 1.7883597883597884e-05, 'epoch': 0.32}


 16%|█▌        | 30/189 [00:15<00:58,  2.71it/s]

{'loss': 0.2213, 'grad_norm': 1.9911036491394043, 'learning_rate': 1.6825396825396828e-05, 'epoch': 0.48}


 21%|██        | 40/189 [00:19<00:55,  2.70it/s]

{'loss': 0.1705, 'grad_norm': 0.9661669135093689, 'learning_rate': 1.576719576719577e-05, 'epoch': 0.63}


 26%|██▋       | 50/189 [00:22<00:51,  2.71it/s]

{'loss': 0.2053, 'grad_norm': 1.5040099620819092, 'learning_rate': 1.470899470899471e-05, 'epoch': 0.79}


 32%|███▏      | 60/189 [00:26<00:47,  2.71it/s]

{'loss': 0.1795, 'grad_norm': 1.3299952745437622, 'learning_rate': 1.3650793650793652e-05, 'epoch': 0.95}


  _warn_prf(average, modifier, msg_start, len(result))

 33%|███▎      | 63/189 [00:28<00:38,  3.30it/s]

{'eval_loss': 0.23622222244739532, 'eval_ADR': {'precision': 0.43388429752066116, 'recall': 0.5189456342668863, 'f1': 0.47261815453863465, 'number': 607}, 'eval_DIS': {'precision': 0.5, 'recall': 0.017241379310344827, 'f1': 0.03333333333333333, 'number': 58}, 'eval_DRU': {'precision': 0.8398791540785498, 'recall': 0.8633540372670807, 'f1': 0.8514548238897398, 'number': 322}, 'eval_SYM': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 50}, 'eval_overall_precision': 0.5609065155807366, 'eval_overall_recall': 0.5728061716489875, 'eval_overall_f1': 0.566793893129771, 'eval_overall_accuracy': 0.9283537617661001, 'eval_runtime': 1.0603, 'eval_samples_per_second': 118.837, 'eval_steps_per_second': 7.545, 'epoch': 1.0}


 37%|███▋      | 70/189 [00:32<00:54,  2.20it/s]

{'loss': 0.1403, 'grad_norm': 0.9963726997375488, 'learning_rate': 1.2592592592592593e-05, 'epoch': 1.11}


 42%|████▏     | 80/189 [00:36<00:40,  2.67it/s]

{'loss': 0.1632, 'grad_norm': 1.7293460369110107, 'learning_rate': 1.1534391534391536e-05, 'epoch': 1.27}


 48%|████▊     | 90/189 [00:39<00:36,  2.68it/s]

{'loss': 0.1715, 'grad_norm': 1.4716163873672485, 'learning_rate': 1.0476190476190477e-05, 'epoch': 1.43}


 53%|█████▎    | 100/189 [00:43<00:33,  2.67it/s]

{'loss': 0.1546, 'grad_norm': 0.7301039099693298, 'learning_rate': 9.417989417989418e-06, 'epoch': 1.59}


 58%|█████▊    | 110/189 [00:47<00:29,  2.71it/s]

{'loss': 0.162, 'grad_norm': 0.7526003122329712, 'learning_rate': 8.35978835978836e-06, 'epoch': 1.75}


 63%|██████▎   | 120/189 [00:51<00:25,  2.69it/s]

{'loss': 0.1242, 'grad_norm': 0.7895763516426086, 'learning_rate': 7.301587301587301e-06, 'epoch': 1.9}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 126/189 [00:54<00:19,  3.29it/s]

{'eval_loss': 0.2372179478406906, 'eval_ADR': {'precision': 0.45874125874125876, 'recall': 0.5403624382207578, 'f1': 0.4962178517397882, 'number': 607}, 'eval_DIS': {'precision': 0.08333333333333333, 'recall': 0.017241379310344827, 'f1': 0.028571428571428574, 'number': 58}, 'eval_DRU': {'precision': 0.8377581120943953, 'recall': 0.8819875776397516, 'f1': 0.8593040847201211, 'number': 322}, 'eval_SYM': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 50}, 'eval_overall_precision': 0.575046904315197, 'eval_overall_recall': 0.5911282545805208, 'eval_overall_f1': 0.5829766999524488, 'eval_overall_accuracy': 0.9293695401909663, 'eval_runtime': 1.0783, 'eval_samples_per_second': 116.85, 'eval_steps_per_second': 7.419, 'epoch': 2.0}


 69%|██████▉   | 130/189 [00:56<00:35,  1.68it/s]

{'loss': 0.1351, 'grad_norm': 1.334479570388794, 'learning_rate': 6.243386243386243e-06, 'epoch': 2.06}


 74%|███████▍  | 140/189 [01:00<00:18,  2.66it/s]

{'loss': 0.1314, 'grad_norm': 1.2395824193954468, 'learning_rate': 5.185185185185185e-06, 'epoch': 2.22}


 79%|███████▉  | 150/189 [01:04<00:14,  2.70it/s]

{'loss': 0.1358, 'grad_norm': 1.2166728973388672, 'learning_rate': 4.126984126984127e-06, 'epoch': 2.38}


 85%|████████▍ | 160/189 [01:08<00:10,  2.69it/s]

{'loss': 0.1453, 'grad_norm': 1.1995375156402588, 'learning_rate': 3.068783068783069e-06, 'epoch': 2.54}


 90%|████████▉ | 170/189 [01:11<00:07,  2.70it/s]

{'loss': 0.1272, 'grad_norm': 1.2140284776687622, 'learning_rate': 2.0105820105820108e-06, 'epoch': 2.7}


 95%|█████████▌| 180/189 [01:15<00:03,  2.68it/s]

{'loss': 0.158, 'grad_norm': 1.1432398557662964, 'learning_rate': 9.523809523809525e-07, 'epoch': 2.86}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 189/189 [01:20<00:00,  3.28it/s]

{'eval_loss': 0.22910471260547638, 'eval_ADR': {'precision': 0.4652482269503546, 'recall': 0.5403624382207578, 'f1': 0.5, 'number': 607}, 'eval_DIS': {'precision': 0.21739130434782608, 'recall': 0.08620689655172414, 'f1': 0.12345679012345678, 'number': 58}, 'eval_DRU': {'precision': 0.8636363636363636, 'recall': 0.8850931677018633, 'f1': 0.8742331288343559, 'number': 322}, 'eval_SYM': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 50}, 'eval_overall_precision': 0.5841209829867675, 'eval_overall_recall': 0.5959498553519769, 'eval_overall_f1': 0.5899761336515513, 'eval_overall_accuracy': 0.9297758515609128, 'eval_runtime': 1.0596, 'eval_samples_per_second': 118.912, 'eval_steps_per_second': 7.55, 'epoch': 3.0}


100%|██████████| 189/189 [01:22<00:00,  2.30it/s]

{'train_runtime': 82.0785, 'train_samples_per_second': 36.477, 'train_steps_per_second': 2.303, 'train_loss': 0.16333529431983906, 'epoch': 3.0}





TrainOutput(global_step=189, training_loss=0.16333529431983906, metrics={'train_runtime': 82.0785, 'train_samples_per_second': 36.477, 'train_steps_per_second': 2.303, 'total_flos': 782372000176128.0, 'train_loss': 0.16333529431983906, 'epoch': 3.0})

In [49]:
# Evaluate the model
results = trainer.evaluate()
print("Evaluation Results:", results)


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 8/8 [00:00<00:00,  8.33it/s]

Evaluation Results: {'eval_loss': 0.22910471260547638, 'eval_ADR': {'precision': 0.4652482269503546, 'recall': 0.5403624382207578, 'f1': 0.5, 'number': 607}, 'eval_DIS': {'precision': 0.21739130434782608, 'recall': 0.08620689655172414, 'f1': 0.12345679012345678, 'number': 58}, 'eval_DRU': {'precision': 0.8636363636363636, 'recall': 0.8850931677018633, 'f1': 0.8742331288343559, 'number': 322}, 'eval_SYM': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 50}, 'eval_overall_precision': 0.5841209829867675, 'eval_overall_recall': 0.5959498553519769, 'eval_overall_f1': 0.5899761336515513, 'eval_overall_accuracy': 0.9297758515609128, 'eval_runtime': 1.3838, 'eval_samples_per_second': 91.051, 'eval_steps_per_second': 5.781, 'epoch': 3.0}





3. Train and test on val set

4. HPO