In [1]:
# Set GPUs
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3,4"

In [2]:
# Data Loading

# Import packages
import pandas as pd

# Import data
df = pd.read_csv("../Data/subject-info-cleaned-with-prognosis-B.csv")

# Get prognosis and outcome
df = df[['Prognosis', 'Outcome']]

# Map labels to integers
# Survivor = 0, SCD = 1, PFD = 2
label_map = {"survivor": 0, "sudden cardiac death": 1, "pump failure death": 2}
df['Outcome'] = df['Outcome'].map(label_map)
df.rename(columns = {"Outcome": "labels"}, inplace = True)
df.head()

# Shuffle dataset
# df = df.sample(frac = 1, random_state = 42)
# df.head()

Unnamed: 0,Prognosis,labels
0,REASONING: \n\nThe patient is a 58-year-old ma...,0
1,REASONING: \n\nThe patient is a 58-year-old ma...,0
2,REASONING: \n\nThe patient's age of 69 and NYH...,0
3,REASONING: \n\nThe patient's outcome is a surv...,0
4,"REASONING: \n\nThe patient, a 70-year-old male...",0


In [3]:
# Load in BioBERT with classification head
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch import cuda

model_name = "dmis-lab/biobert-base-cased-v1.1"
num_labels = 3  # Three possible outcomes

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

device = 'cuda' if cuda.is_available() else 'cpu'
model.to(device)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [4]:
# Data split (for testing)
from sklearn.model_selection import train_test_split

# Split data
train_df, test_df = train_test_split(df, test_size = 0.3, random_state = 42, stratify = df['labels'])

# Get encodings
train_encodings = tokenizer(list(train_df['Prognosis']), truncation = True, padding = True)
test_encodings = tokenizer(list(test_df['Prognosis']), truncation = True, padding = True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [5]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# Convert to Hugging Face Dataset format
dataset = Dataset.from_pandas(df)


In [6]:
# Load BioBERT tokenizer
model_name = "dmis-lab/biobert-base-cased-v1.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["Prognosis"], padding="max_length", truncation=True, max_length = 512)

# Apply tokenization
dataset = dataset.map(tokenize_function, batched=True)


Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 849/849 [00:00<00:00, 2370.70 examples/s]


In [7]:
# Split data
train_test = dataset.train_test_split(test_size=0.2)
train_dataset = train_test["train"]
val_dataset = train_test["test"]


In [8]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Convert logits to class predictions

    # Compute accuracy
    acc = accuracy_score(labels, predictions)

    # Compute precision, recall, f1-score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="weighted")

    # Compute AUC (only if there are at least 2 classes)
    # auc = roc_auc_score(labels, logits, multi_class="ovr") if len(set(labels)) > 1 else 0

    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
        #"auc": auc
    }


In [9]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="../Results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="../logs",
    logging_steps=10,
    load_best_model_at_end=True,
)




In [10]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics = compute_metrics
)

trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.1143,0.028185,1.0,1.0,1.0,1.0
2,0.0057,0.025807,0.994118,0.994464,0.994118,0.994196
3,0.0279,0.002625,1.0,1.0,1.0,1.0




TrainOutput(global_step=129, training_loss=0.14806590905023176, metrics={'train_runtime': 52.2306, 'train_samples_per_second': 39.0, 'train_steps_per_second': 2.47, 'total_flos': 535962031911936.0, 'train_loss': 0.14806590905023176, 'epoch': 3.0})

In [11]:
trainer.evaluate()



{'eval_loss': 0.0026249412912875414,
 'eval_accuracy': 1.0,
 'eval_precision': 1.0,
 'eval_recall': 1.0,
 'eval_f1': 1.0,
 'eval_runtime': 0.9447,
 'eval_samples_per_second': 179.956,
 'eval_steps_per_second': 11.644,
 'epoch': 3.0}