In [1]:
# Set GPUs
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "5,6"

In [6]:
# Data Loading

# Import packages
import pandas as pd

# Import data
df = pd.read_csv("../Data/subject-info-cleaned-with-prognosis-D-Llama3B.csv")

# Get prognosis and outcome
# df = df[['Prognosis', 'Outcome']]

# Map labels to integers
# Survivor = 0, SCD = 1, PFD = 2
label_map = {"survivor": 0, "sudden cardiac death": 1, "pump failure death": 2}
df['Outcome'] = df['Outcome'].map(label_map)
df.rename(columns = {"Outcome": "labels"}, inplace = True)
df.head()

# Shuffle dataset
# df = df.sample(frac = 1, random_state = 42)
# df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Patient ID,Prompts,labels,Prognosis
0,0,0,P0001,Age: 58\nGender: Male \nWeight: 83 kg\nHeight:...,0,RANKING: 1. Survivor\nREASONING: \n\nBased on ...
1,1,1,P0002,Age: 58\nGender: Male \nWeight: 74 kg\nHeight:...,0,RANKING: \n1. Survivor\nREASONING: \nThe patie...
2,2,2,P0003,Age: 69\nGender: Male \nWeight: 83 kg\nHeight:...,0,RANKING: \n1. Survivor\nREASONING: \nBased on ...
3,3,3,P0004,Age: 56\nGender: Female \nWeight: 84 kg\nHeigh...,0,RANKING: 1. Survivor \nREASONING: \n\nBased on...
4,4,4,P0005,Age: 70\nGender: Male \nWeight: 97 kg\nHeight:...,0,RANKING: 1. Survivor\nREASONING: \nThe patient...


In [8]:
print(df['Prompts'][0])

Age: 58
Gender: Male 
Weight: 83 kg
Height: 163 cm
NYHA Class: III
Blood Pressure: 110/75 mmHg
Past Medical History: Idiopathic dilated cardiomyopathy
Albumin (g/L): 42,4
ALT or GPT (IU/L): 10.0
AST or GOT (IU/L): 20.0
Total Cholesterol (mmol/L): 5,4
Creatinine (mmol/L): 106.0
Gamma-glutamil transpeptidase (IU/L): 20
Glucose (mmol/L): 5,7
Hemoglobin (g/L): 132.0
HDL (mmol/L): 1,29
Potassium (mEq/L): 4,6
LDL (mmol/L): 3,36
Sodium (mEq/L): 141.0
Pro-BNP (ng/L): 1834.0
Protein (g/L): 69
T3 (pg/dL): 0,05
T4 (ng/L): 15
Troponin (ng/mL): 0,01
TSH (mIU/L): 3,02
Urea (mg/dL): 7,12
LVEF (%): 35.0
Medications: Beta Blockers, Digoxin, Loop Diuretics, ACE Inhibitor
ECG Impression:
        - Ventricular Extrasystole: Polymorphic
        - Ventricular Tachycardia: Non-sustained VT
        - Non-sustained ventricular tachycardia (CH>10): Yes
        - Paroxysmal supraventricular tachyarrhythmia: Unknown paroxysmal supraventricular tachyarrhythmia code
        - Bradycardia: Unknown bradycardia code
 

In [9]:
print(df['Prognosis'][0])

RANKING: 1. Survivor
REASONING: 

Based on the provided patient data, the patient's prognosis is most likely to be a survivor. The patient has a history of idiopathic dilated cardiomyopathy, which is a condition where the heart muscle becomes weakened and the heart chambers become enlarged. This condition can lead to heart failure, arrhythmias, and decreased cardiac function.

The patient's current NYHA Class III indicates that they have severe symptoms, such as shortness of breath, fatigue, and swelling in the legs, ankles, and feet. However, despite these symptoms, the patient's hemoglobin level (132 g/L) and serum potassium level (4.6 mEq/L) are within normal limits.

The patient's LVEF (left ventricular ejection fraction) is 35%, which is significantly below the normal range (50-70%). However, the patient is already on beta blockers, digoxin, and loop diuretics, which are standard treatments for heart failure. The patient's creatinine level (106.0 mmol/L) is elevated, indicating im

In [7]:
# Load in BioBERT with classification head
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch import cuda

model_name = "dmis-lab/biobert-base-cased-v1.1"
num_labels = 3  # Three possible outcomes

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

device = 'cuda' if cuda.is_available() else 'cpu'
model.to(device)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [8]:
# Data split (for testing)
from sklearn.model_selection import train_test_split

# Split data
train_df, test_df = train_test_split(df, test_size = 0.3, random_state = 42, stratify = df['labels'])

# Get encodings
train_encodings = tokenizer(list(train_df['Prognosis']), truncation = True, padding = True)
test_encodings = tokenizer(list(test_df['Prognosis']), truncation = True, padding = True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [9]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

# Convert to Hugging Face Dataset format
dataset = Dataset.from_pandas(df)


In [10]:
# Load BioBERT tokenizer
# model_name = "dmis-lab/biobert-base-cased-v1.1"
# tokenizer = AutoTokenizer.from_pretrained(model_name)

model_name = "dmis-lab/biobert-base-cased-v1.1"
num_labels = 3  # Three possible outcomes

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples["Prognosis"], padding="max_length", truncation=True, max_length = 512)

# Apply tokenization
dataset = dataset.map(tokenize_function, batched=True)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 849/849 [00:00<00:00, 1671.29 examples/s]


In [11]:
# Split data
train_test = dataset.train_test_split(test_size=0.2)
train_dataset = train_test["train"]
val_dataset = train_test["test"]


In [12]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)  # Convert logits to class predictions

    # Compute accuracy
    acc = accuracy_score(labels, predictions)

    # Compute precision, recall, f1-score
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="weighted")

    # Compute AUC (only if there are at least 2 classes)
    # auc = roc_auc_score(labels, logits, multi_class="ovr") if len(set(labels)) > 1 else 0

    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
        #"auc": auc
    }


In [13]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="../Results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="../logs",
    logging_steps=10,
    load_best_model_at_end=True,
)




In [14]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics = compute_metrics
)

trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4855,0.391664,0.888235,0.896955,0.888235,0.872773
2,0.451,0.379228,0.847059,0.76066,0.847059,0.800151
3,0.3391,0.343995,0.905882,0.904046,0.905882,0.898669


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TrainOutput(global_step=129, training_loss=0.41489963198817054, metrics={'train_runtime': 81.5779, 'train_samples_per_second': 24.97, 'train_steps_per_second': 1.581, 'total_flos': 535962031911936.0, 'train_loss': 0.41489963198817054, 'epoch': 3.0})

In [15]:
trainer.evaluate()



{'eval_loss': 0.34399500489234924,
 'eval_accuracy': 0.9058823529411765,
 'eval_precision': 0.9040459467369037,
 'eval_recall': 0.9058823529411765,
 'eval_f1': 0.8986689270302716,
 'eval_runtime': 0.9122,
 'eval_samples_per_second': 186.362,
 'eval_steps_per_second': 12.059,
 'epoch': 3.0}