In [None]:
https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4591013

In [4]:
import torch
from torch.utils.data import DataLoader
from transformers import BertForSequenceClassification, BertTokenizer,AdamW,get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,roc_auc_score
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification,Trainer, TrainingArguments

In [5]:
train_dataset=pd.read_csv('/kaggle/input/mimic-iii/LOS_WEEKS_adm_train.csv')
val_dataset=pd.read_csv('/kaggle/input/mimic-iii/LOS_WEEKS_adm_val.csv')
test_dataset=pd.read_csv('/kaggle/input/mimic-iii/LOS_WEEKS_adm_test.csv')

In [6]:
from torch.utils.data import dataloader
from torch import nn

class EnsembleModel(nn.Module):
    def __init__(self, model1):
        super(EnsembleModel, self).__init__()
        self.model1 = model1
    def forward(self, input_ids, attention_mask):
        output1 = self.model1(input_ids=input_ids, attention_mask=attention_mask)[0]
        avg_output= output1
        return output1

In [7]:
from transformers import AutoModelForSequenceClassification, AutoConfig
config=AutoConfig.from_pretrained('bvanaken/CORe-clinical-outcome-biobert-v1',
                                  num_labels=4,
                                  hidden_dropout_prob=0.2,
                                  attention_probs_dropout_prob=0.2)

core_model=AutoModelForSequenceClassification.from_pretrained('bvanaken/CORe-clinical-outcome-biobert-v1',
                                                              config=config)



config.json:   0%|          | 0.00/428 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/433M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bvanaken/CORe-clinical-outcome-biobert-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
from transformers import AutoTokenizer
tokenizer=AutoTokenizer.from_pretrained('bvanaken/CORe-clinical-outcome-biobert-v1')

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]



In [9]:
train_encodings = tokenizer(train_dataset['text'].tolist(), truncation=True, padding=True, max_length = 512)
val_encodings = tokenizer(val_dataset['text'].tolist(), truncation=True, padding=True,  max_length = 512)
test_encodings = tokenizer(test_dataset['text'].tolist(), truncation=True, padding=True , max_length = 512)

In [10]:
class LosDataset(torch.utils.data.Dataset):
    def __init__(self,encodings,labels):
        self.encodings=encodings
        self.labels=labels

    def __getitem__(self,idx):
        item={key:torch.tensor(val[idx]) for key,val in self.encodings.items()}
        item['labels']=torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)
        

In [11]:
train_dataset = LosDataset(train_encodings, train_dataset['los_label'].tolist())
val_dataset   = LosDataset(val_encodings, val_dataset['los_label'].tolist())
test_dataset  = LosDataset(test_encodings, test_dataset['los_label'].tolist())

In [12]:
from transformers import AdamW , get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score,roc_auc_score
from tqdm import tqdm
from torch import nn
import numpy as np

ensemble_model=EnsembleModel(core_model)

In [13]:
import os

# list all files in the current directory
files = os.listdir('.')

# filter the ones that start with 'CORE_baseline'
core_models = [f for f in files if f.startswith('CORE_baseline')]

if core_models:
    print("Found models starting with 'CORE_baseline':")
    for model in core_models:
        print(model)
        
    # get the first (and supposedly only) model
    model_path = core_models[0]

    # load the model state
    ensemble_model.load_state_dict(torch.load(model_path))
    print("Loaded Model")
else:
    print("No models found starting with 'CORE_baseline'.")

No models found starting with 'CORE_baseline'.


In [14]:
device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
ensemble_model=ensemble_model.to(device)


In [15]:

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [16]:
epochs = 200
best_roc_auc = 0.701894
min_delta = 0.0001
early_stopping_count = 0
early_stopping_patience = 3
gradient_accumulation_steps = 10
best_model_path = "best_model.pth"


optimizer = AdamW(ensemble_model.parameters(), lr=1e-5, weight_decay=0.01)


scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=50, 
    num_training_steps=len(train_loader) * epochs // gradient_accumulation_steps
)



In [17]:
device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
ensemble_model=ensemble_model.to(device)


In [18]:
from torch.nn import functional as F
# Training
for epoch in range(5, epochs):
    ensemble_model.train()
    train_loss = 0
    for step, batch in enumerate(tqdm(train_loader)):
        optimizer.zero_grad() if step % gradient_accumulation_steps == 0 else None
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = ensemble_model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        (loss / gradient_accumulation_steps).backward()
        train_loss += loss.item()
        if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(train_loader):
            optimizer.step()
            scheduler.step()
    ensemble_model.eval()
    val_loss=0
    val_preds=[]
    val_labels=[]
    with torch.no_grad():
        for batch in tqdm(val_loader):
            input_ids=batch['input_ids'].to(device)
            attention_mask=batch['attention_mask'].to(device)
            labels=batch['labels'].to(device)
            outputs=ensemble_model(input_ids,attention_mask=attention_mask)
            loss=nn.CrossEntropyLoss()(outputs,labels)
            val_loss+=loss.item()
            val_preds.append(F.softmax(outputs,dim=1).cpu().numpy())
            val_labels.append(labels.cpu().numpy())

    val_preds=np.concatenate(val_preds)
    val_labels=np.concatenate(val_labels)
    val_loss /=len(val_loader)
    train_loss /=len(train_loader)
    print(f'Epoch {epoch+1}/{epochs}')
    print(f'Train Loss: {train_loss:.4f}')
    print(f'Val Loss: {val_loss:.4f}')

    val_preds_class =np.argmax(val_preds, axis=1)
    acuuracy=accuracy_score(val_labels,val_preds_class)
    recall=recall_score(val_labels,val_preds_class,average='weighted')
    precision=precision_score(val_labels,val_preds_class,average='weighted')
    f1=f1_score(val_labels,val_preds_class,average='weighted')
    mirco_f1= f1_score(val_labels,val_preds_class,average='micro')
    macro_roc_auc=roc_auc_score(val_labels,val_preds,multi_class='ovo',average='macro')
    #weighted_roc_auc=roc_auc_score(val_labels,val_preds,multi_class='ovo',average='weighted')
    print(f'Accuracy: {acuuracy}')
    print(f'Recall: {recall}')
    print(f'Precision: {precision}')
    print(f'F1: {f1}')
    print(f'Micro F1: {mirco_f1}')
    print(f'Macro ROC AUC: {macro_roc_auc}')
    #print(f'Weighted ROC AUC: {weighted_roc_auc:.4f}')

    #implement early stopping

    if epoch >0 and macro_roc_auc - best_roc_auc < min_delta:
        early_stopping_count += 1
        print(f"Early stopping count: {early_stopping_count}/{early_stopping_patience}")
        if early_stopping_count >= early_stopping_patience:
            print("Early stopping triggered.")
            break
    else:
        best_roc_auc = macro_roc_auc
        early_stopping_count = 0
        torch.save(ensemble_model.state_dict(), f"CORE_baseline_epoch_{epoch}roc_{best_roc_auc}.pth")
        print("Model saved.")
        

    
           


100%|██████████| 3803/3803 [47:50<00:00,  1.32it/s]
100%|██████████| 549/549 [02:08<00:00,  4.27it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6/200
Train Loss: 1.3176
Val Loss: 1.2873
Accuracy: 0.3689364609428376
Recall: 0.36893646094283755
Precision: 0.3189242806190809
F1: 0.33851006678610823
Micro F1: 0.3689364609428376
Macro ROC AUC: 0.6435483771558027
Early stopping count: 1/3


100%|██████████| 3803/3803 [47:57<00:00,  1.32it/s]
100%|██████████| 549/549 [02:08<00:00,  4.29it/s]


Epoch 7/200
Train Loss: 1.2338
Val Loss: 1.2285
Accuracy: 0.3939877021179686
Recall: 0.3939877021179686
Precision: 0.42792509760144676
F1: 0.35424250119704603
Micro F1: 0.3939877021179686
Macro ROC AUC: 0.6785975665068826
Early stopping count: 2/3


100%|██████████| 3803/3803 [47:56<00:00,  1.32it/s]
100%|██████████| 549/549 [02:08<00:00,  4.28it/s]

Epoch 8/200
Train Loss: 1.1924
Val Loss: 1.2058
Accuracy: 0.41038487815987246
Recall: 0.41038487815987246
Precision: 0.41827392031167565
F1: 0.3958617528785462
Micro F1: 0.41038487815987246
Macro ROC AUC: 0.6957385184280573
Early stopping count: 3/3
Early stopping triggered.





In [19]:
import os

files=os.listdir('.')
core_models=[f for f in files if f.startswith('CORE_baseline')]


if core_models:
    print("Found models starting with 'CORE_baseline':")
    for model in core_models:
        print(model)

    model_path=core_models[0]
    ensemble_model.load_state_dict(torch.load(model_path))
    print("Loaded Model")
else:
    print("No models found starting with 'CORE_baseline'.")


No models found starting with 'CORE_baseline'.


In [20]:
ensemble_model.eval()

test_preds =[]
test_lables =[]

with torch.no_grad():
    for batch in tqdm(test_loader):
        input_ids=batch['input_ids'].to(device)
        attention_mask=batch['attention_mask'].to(device)
        labels=batch['labels'].to(device)
        outputs=ensemble_model(input_ids,attention_mask=attention_mask)
        test_preds.append(F.softmax(outputs,dim=1).cpu().numpy())
        test_lables.append(labels.cpu().numpy())



100%|██████████| 1100/1100 [04:17<00:00,  4.27it/s]


In [21]:
test_preds=np.concatenate(test_preds)
test_labels=np.concatenate(test_lables) 

test_preds_class=np.argmax(test_preds,axis=1)
accuracy = accuracy_score(test_labels, test_preds_class)
recall = recall_score(test_labels, test_preds_class, average='weighted')
precision = precision_score(test_labels, test_preds_class, average='weighted')
f1 = f1_score(test_labels, test_preds_class, average='weighted')
micro_f1 = f1_score(test_labels, test_preds_class, average='micro')
macro_roc_auc = roc_auc_score(test_labels, test_preds, multi_class='ovo', average='macro')
print(f'Accuracy: {accuracy}')
print(f'Recall: {recall}')
print(f'Precision: {precision}')
print(f'F1: {f1}')
print(f'Micro F1: {micro_f1}')
print(f'Macro ROC AUC: {macro_roc_auc}')

Accuracy: 0.42559963623962715
Recall: 0.42559963623962715
Precision: 0.44011706747668994
F1: 0.4135338519759182
Micro F1: 0.42559963623962715
Macro ROC AUC: 0.7062879105845056
