In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import BertForSequenceClassification, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

In [2]:
train_dataset = pd.read_csv('/kaggle/input/mp-data/MP_IN_adm_train.csv')
val_dataset = pd.read_csv('/kaggle/input/mp-data/MP_IN_adm_val.csv')
test_dataset = pd.read_csv('/kaggle/input/mp-data/MP_IN_adm_test.csv')

In [3]:
from torch.utils.data import DataLoader
from torch import nn

class EnsembleModel(nn.Module):
    def __init__(self, model1):
        super(EnsembleModel, self).__init__()
        self.model1 = model1

    def forward(self, input_ids, attention_mask):
        output1 = self.model1(input_ids, attention_mask=attention_mask)[0]
        avg_output = output1
        return avg_output

In [4]:
from transformers import AutoModelForSequenceClassification, AutoConfig


config = AutoConfig.from_pretrained('emilyalsentzer/Bio_Discharge_Summary_BERT', 
                                    num_labels=2, 
                                    hidden_dropout_prob=0.2, 
                                    attention_probs_dropout_prob=0.2)


core_model = AutoModelForSequenceClassification.from_pretrained('emilyalsentzer/Bio_Discharge_Summary_BERT', config=config)

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_Discharge_Summary_BERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
from transformers import AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained('emilyalsentzer/Bio_Discharge_Summary_BERT')

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [6]:

train_encodings = tokenizer(train_dataset['text'].tolist(), truncation=True, padding=True, max_length = 512)
val_encodings = tokenizer(val_dataset['text'].tolist(), truncation=True, padding=True,  max_length = 512)
test_encodings = tokenizer(test_dataset['text'].tolist(), truncation=True, padding=True , max_length = 512)

In [7]:

class LosDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [8]:
train_dataset = LosDataset(train_encodings, train_dataset['hospital_expire_flag'].tolist())
val_dataset = LosDataset(val_encodings, val_dataset['hospital_expire_flag'].tolist())
test_dataset = LosDataset(test_encodings, test_dataset['hospital_expire_flag'].tolist())

In [9]:
from transformers import AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score
from tqdm import tqdm
from torch import nn
import numpy as np


ensemble_model = EnsembleModel(core_model)

In [10]:
import os


files = os.listdir('.')


core_models = [f for f in files if f.startswith('dischargeBERT_baseline_MP')]

if core_models:
    print("Found models starting with 'dischargeBERT_baseline_MP':")
    for model in core_models:
        print(model)
        
   
    model_path = core_models[0]

    
    ensemble_model.load_state_dict(torch.load(model_path))
    print("Loaded Model")
else:
    print("No models found starting with 'dischargeBERT_baseline_MP'.")

No models found starting with 'dischargeBERT_baseline_MP'.


In [11]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ensemble_model = ensemble_model.to(device)

In [12]:

train_loader = DataLoader(train_dataset, batch_size=18, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=18, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=18, shuffle=False)

In [13]:

epochs = 200
best_roc_auc = 0.0
min_delta = 0.0001
early_stopping_count = 0
early_stopping_patience = 3
gradient_accumulation_steps = 10
best_model_path = "best_model.pth"


optimizer = AdamW(ensemble_model.parameters(), lr=1e-5, weight_decay=0.01)


scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=50, 
    num_training_steps=len(train_loader) * epochs // gradient_accumulation_steps
)



In [14]:
from torch.nn import functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Training
for epoch in range(epochs):
    ensemble_model.train()
    train_loss = 0
    for step, batch in enumerate(tqdm(train_loader)):
        optimizer.zero_grad() if step % gradient_accumulation_steps == 0 else None
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = ensemble_model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        (loss / gradient_accumulation_steps).backward()
        train_loss += loss.item()
        if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(train_loader):
            optimizer.step()
            scheduler.step()

    ensemble_model.eval()
    val_loss = 0
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for batch in tqdm(val_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = ensemble_model(input_ids, attention_mask)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            val_loss += loss.item()
            val_preds.append(F.softmax(outputs, dim=1).cpu().numpy())
            val_labels.append(labels.cpu().numpy())
            
    val_preds = np.concatenate(val_preds)
    val_labels = np.concatenate(val_labels)
    val_loss /= len(val_loader)
    train_loss /= len(train_loader)
    print(f'Epoch: {epoch+1}/{epochs}, Training Loss: {train_loss}, Validation Loss: {val_loss}')

    # Calculate metrics
    val_preds_class = np.argmax(val_preds, axis=1)
    accuracy = accuracy_score(val_labels, val_preds_class)
    recall = recall_score(val_labels, val_preds_class)
    precision = precision_score(val_labels, val_preds_class)
    f1 = f1_score(val_labels, val_preds_class)
    roc_auc = roc_auc_score(val_labels, val_preds[:, 1])

    print(f'Accuracy: {accuracy}, Recall: {recall}, Precision: {precision}, F1: {f1}, Roc Auc: {roc_auc}')

    # Implement early stopping
    if epoch > 0 and roc_auc - best_roc_auc < min_delta:
        early_stopping_count += 1
        print(f'EarlyStopping counter: {early_stopping_count} out of {early_stopping_patience}')
        if early_stopping_count >= early_stopping_patience:
            print('Early stopping')
            break
    else:
        best_roc_auc = roc_auc
        early_stopping_count = 0
        torch.save(ensemble_model.state_dict(), f"dischargeBERT_baseline_MP_epoch_{epoch}roc_{best_roc_auc}.pth")

100%|██████████| 1887/1887 [56:36<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 1/200, Training Loss: 0.34972611292367517, Validation Loss: 0.30640777426979915
Accuracy: 0.8944580277098615, Recall: 0.0, Precision: 0.0, F1: 0.0, Roc Auc: 0.7379372213085197


100%|██████████| 1887/1887 [56:42<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 2/200, Training Loss: 0.292095772308067, Validation Loss: 0.27782470833905887
Accuracy: 0.8987367563162184, Recall: 0.09652509652509653, Precision: 0.6329113924050633, F1: 0.16750418760469013, Roc Auc: 0.7978359908883826


100%|██████████| 1887/1887 [56:40<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 3/200, Training Loss: 0.2712681589941294, Validation Loss: 0.2756115767709065
Accuracy: 0.8989405052974735, Recall: 0.06756756756756757, Precision: 0.7291666666666666, F1: 0.1236749116607774, Roc Auc: 0.8035795639440286


100%|██████████| 1887/1887 [56:46<00:00,  1.81s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 4/200, Training Loss: 0.2557579556615282, Validation Loss: 0.2740489991565982
Accuracy: 0.8993480032599837, Recall: 0.09266409266409266, Precision: 0.6666666666666666, F1: 0.16271186440677965, Roc Auc: 0.8119251369820846


100%|██████████| 1887/1887 [56:40<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 5/200, Training Loss: 0.2456650483739332, Validation Loss: 0.2690746789941421
Accuracy: 0.8983292583537082, Recall: 0.16988416988416988, Precision: 0.5605095541401274, F1: 0.2607407407407407, Roc Auc: 0.8181858558851727


100%|██████████| 1887/1887 [56:42<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 6/200, Training Loss: 0.23161814187193872, Validation Loss: 0.29037770774938687
Accuracy: 0.8985330073349633, Recall: 0.06563706563706563, Precision: 0.7083333333333334, F1: 0.12014134275618372, Roc Auc: 0.8060263322222321
EarlyStopping counter: 1 out of 3


100%|██████████| 1887/1887 [56:43<00:00,  1.80s/it]
100%|██████████| 273/273 [02:32<00:00,  1.79it/s]


Epoch: 7/200, Training Loss: 0.22028103728236254, Validation Loss: 0.28206058033492976
Accuracy: 0.8966992665036675, Recall: 0.11776061776061776, Precision: 0.5495495495495496, F1: 0.19395866454689983, Roc Auc: 0.8149396223428114
EarlyStopping counter: 2 out of 3


100%|██████████| 1887/1887 [56:41<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 8/200, Training Loss: 0.21033138630094395, Validation Loss: 0.2813579781124225
Accuracy: 0.8983292583537082, Recall: 0.19305019305019305, Precision: 0.5524861878453039, F1: 0.2861230329041488, Roc Auc: 0.8195442432344482


100%|██████████| 1887/1887 [56:44<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 9/200, Training Loss: 0.19397635741636085, Validation Loss: 0.29902877906958264
Accuracy: 0.8995517522412388, Recall: 0.15057915057915058, Precision: 0.5954198473282443, F1: 0.24036979969183356, Roc Auc: 0.8068290956104168
EarlyStopping counter: 1 out of 3


100%|██████████| 1887/1887 [56:44<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]


Epoch: 10/200, Training Loss: 0.1807367366096513, Validation Loss: 0.3079958547541237
Accuracy: 0.8985330073349633, Recall: 0.11583011583011583, Precision: 0.6, F1: 0.1941747572815534, Roc Auc: 0.8048381280727522
EarlyStopping counter: 2 out of 3


100%|██████████| 1887/1887 [56:44<00:00,  1.80s/it]
100%|██████████| 273/273 [02:33<00:00,  1.78it/s]

Epoch: 11/200, Training Loss: 0.16316930512282218, Validation Loss: 0.3059203746581907
Accuracy: 0.8920130399348003, Recall: 0.1776061776061776, Precision: 0.46938775510204084, F1: 0.25770308123249297, Roc Auc: 0.8080012488896317
EarlyStopping counter: 3 out of 3
Early stopping





In [15]:
import os

# list all files in the current directory
files = os.listdir('.')

# filter the ones that start with 'CORE_baseline'
core_models = [f for f in files if f.startswith('dischargeBERT_baseline_MP')]

if core_models:
    print("Found models starting with 'dischargeBERT_baseline_MP':")
    for model in core_models:
        print(model)
        
    # get the first (and supposedly only) model
    model_path = core_models[0]

    # load the model state
    ensemble_model.load_state_dict(torch.load(model_path))
    print("Loaded Model")
else:
    print("No models found starting with 'dischargeBERT_baseline_MP'.")

  ensemble_model.load_state_dict(torch.load(model_path))


Found models starting with 'dischargeBERT_baseline_MP':
dischargeBERT_baseline_MP_epoch_2roc_0.8035795639440286.pth
dischargeBERT_baseline_MP_epoch_4roc_0.8181858558851727.pth
dischargeBERT_baseline_MP_epoch_3roc_0.8119251369820846.pth
dischargeBERT_baseline_MP_epoch_7roc_0.8195442432344482.pth
dischargeBERT_baseline_MP_epoch_1roc_0.7978359908883826.pth
dischargeBERT_baseline_MP_epoch_0roc_0.7379372213085197.pth
Loaded Model


In [16]:
from torch.nn import functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Put the model in evaluation mode
ensemble_model.eval()

# Initialize lists to store predictions and true labels
test_preds = []
test_labels = []

# Iterate over test data
with torch.no_grad():
    for batch in tqdm(test_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = ensemble_model(input_ids, attention_mask)
        test_preds.append(F.softmax(outputs, dim=1).cpu().numpy())
        test_labels.append(labels.cpu().numpy())

100%|██████████| 546/546 [05:06<00:00,  1.78it/s]


In [17]:
test_preds = np.concatenate(test_preds)
test_labels = np.concatenate(test_labels)

# Calculate metrics
test_preds_class = np.argmax(test_preds, axis=1)
accuracy = accuracy_score(test_labels, test_preds_class)
recall = recall_score(test_labels, test_preds_class)
precision = precision_score(test_labels, test_preds_class)
f1 = f1_score(test_labels, test_preds_class)
roc_auc = roc_auc_score(test_labels, test_preds[:, 1])

print(f'Accuracy: {accuracy}, Recall: {recall}, Precision: {precision}, F1: {f1}, Roc Auc: {roc_auc}')

Accuracy: 0.9014457340663816, Recall: 0.0721951219512195, Precision: 0.8131868131868132, F1: 0.13261648745519714, Roc Auc: 0.8191845335300004
