In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModel, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
import pandas as pd
from collections import deque
import matplotlib.pyplot as plt
import numpy as np
from datasets import load_dataset
import torch.optim as optim
import torch.nn.functional as F

In [2]:
# Load Train, Validation, Test Dataset
train_df = pd.read_csv('/kaggle/input/mp-train/MP_train.csv')
val_df = pd.read_csv('/kaggle/input/mp-val/MP_val.csv')
test_df = pd.read_csv('/kaggle/input/mp-test/MP_test.csv')

In [3]:
train_df.head()

Unnamed: 0,id,text,hospital_expire_flag
0,107384,"CHIEF COMPLAINT: AMS, concern for toxic alcoho...",0
1,101061,CHIEF COMPLAINT: abdominal pain\n\nPRESENT ILL...,0
2,127180,CHIEF COMPLAINT: Bilateral Sub Dural Hematoma\...,0
3,168339,CHIEF COMPLAINT: Intracranial bleed\n\nPRESENT...,0
4,154044,CHIEF COMPLAINT: ischemic left foot\n\nPRESENT...,0


In [4]:
val_df.head()

Unnamed: 0,id,text,hospital_expire_flag
0,176763,CHIEF COMPLAINT: # Lethargy # Confusion # Hypo...,0
1,173211,"CHIEF COMPLAINT: Dyspnea, LE edema\n\nPRESENT ...",0
2,116333,CHIEF COMPLAINT: upper GI bleed\n\nPRESENT ILL...,1
3,161102,CHIEF COMPLAINT: increased lethargy\n\nPRESENT...,0
4,116799,CHIEF COMPLAINT: s/p 18 ft fall\n\nPRESENT ILL...,0


In [5]:
test_df.head()

Unnamed: 0,id,text,hospital_expire_flag
0,100058,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,0
1,124871,CHIEF COMPLAINT: shortness of breath\n\nPRESEN...,0
2,109159,CHIEF COMPLAINT: s/p mechanical fall\n\nPRESEN...,0
3,159161,CHIEF COMPLAINT: nausea and vomiting\n\nPRESEN...,0
4,109863,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,1


In [6]:
train_df['hospital_expire_flag'].nunique()

2

In [7]:
val_df['hospital_expire_flag'].nunique()

2

In [8]:
test_df['hospital_expire_flag'].nunique()

2

In [9]:
train_df.drop(columns=['id'], inplace=True)
val_df.drop(columns=['id'], inplace=True)
test_df.drop(columns=['id'], inplace=True)

In [10]:
train_df.head()

Unnamed: 0,text,hospital_expire_flag
0,"CHIEF COMPLAINT: AMS, concern for toxic alcoho...",0
1,CHIEF COMPLAINT: abdominal pain\n\nPRESENT ILL...,0
2,CHIEF COMPLAINT: Bilateral Sub Dural Hematoma\...,0
3,CHIEF COMPLAINT: Intracranial bleed\n\nPRESENT...,0
4,CHIEF COMPLAINT: ischemic left foot\n\nPRESENT...,0


In [11]:
test_df.head()

Unnamed: 0,text,hospital_expire_flag
0,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,0
1,CHIEF COMPLAINT: shortness of breath\n\nPRESEN...,0
2,CHIEF COMPLAINT: s/p mechanical fall\n\nPRESEN...,0
3,CHIEF COMPLAINT: nausea and vomiting\n\nPRESEN...,0
4,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,1


In [12]:
train_df.shape

(33954, 2)

In [13]:
val_df.shape

(4908, 2)

In [14]:
test_df.shape

(9822, 2)

In [15]:
train_texts = train_df['text'].tolist()
train_labels = train_df['hospital_expire_flag'].tolist()
val_texts = val_df['text'].tolist()
val_labels = val_df['hospital_expire_flag'].tolist()
test_texts = test_df['text'].tolist()
test_labels = test_df['hospital_expire_flag'].tolist()

In [16]:
class TextClassificationDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = [str(text) for text in texts]
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
        return {'input_ids': encoding['input_ids'].flatten(), 'attention_mask': encoding['attention_mask'].flatten(), 'label': torch.tensor(label)}

In [17]:
class BioClinicalBERTClassifier(nn.Module):
    def __init__(self, bert_model_name, num_classes):
        super(BioClinicalBERTClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        logits = self.fc(x)
        return logits

In [18]:
# Set up parameters
bert_model_name = 'emilyalsentzer/Bio_ClinicalBERT'
num_classes = 2
max_length = 512
batch_size = 8

In [19]:
tokenizer = AutoTokenizer.from_pretrained(bert_model_name)

train_dataset = TextClassificationDataset(train_texts, train_labels, tokenizer, max_length)
val_dataset = TextClassificationDataset(val_texts, val_labels, tokenizer, max_length)
test_dataset = TextClassificationDataset(test_texts, test_labels, tokenizer, max_length)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BioClinicalBERTClassifier(bert_model_name, num_classes).to(device)

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


In [21]:
epochs = 20
best_roc_auc = 0.0
min_delta = 0.0001
early_stopping_count = 0
early_stopping_patience = 3
gradient_accumulation_steps = 10

# Set the optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)

# Set the scheduler
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=50, 
    num_training_steps=len(train_dataloader) * epochs // gradient_accumulation_steps
)


In [22]:
# Training loop
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for step, batch in enumerate(train_dataloader):
        optimizer.zero_grad() if step % gradient_accumulation_steps == 0 else None
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        (loss / gradient_accumulation_steps).backward()
        train_loss += loss.item()
        if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(train_dataloader):
            optimizer.step()
            scheduler.step()
            
    model.eval()
    val_loss = 0
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            outputs = model(input_ids, attention_mask)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            val_loss += loss.item()
            val_preds.append(F.softmax(outputs, dim=1).cpu().numpy())
            val_labels.append(labels.cpu().numpy())
            
    val_preds = np.concatenate(val_preds)
    val_labels = np.concatenate(val_labels)
    val_loss /= len(val_dataloader)
    train_loss /= len(train_dataloader)
    print(f'Epoch: {epoch+1}/{epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
    
    # Calculate metrics
    val_preds_class = np.argmax(val_preds, axis=1)
    accuracy = accuracy_score(val_labels, val_preds_class)
    recall = recall_score(val_labels, val_preds_class, average='weighted')
    precision = precision_score(val_labels, val_preds_class, average='weighted')
    f1 = f1_score(val_labels, val_preds_class, average='weighted')
    micro_f1 = f1_score(val_labels, val_preds_class, average='micro')
    macro_roc_auc = roc_auc_score(val_labels, val_preds[:, 1], multi_class='ovo', average='macro')  # Changed val_preds to val_preds[:, 1]
    
    print(f'Accuracy: {accuracy:.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}, F1: {f1}, Micro F1: {micro_f1:.4f}, Macro Roc Auc: {macro_roc_auc:.4f}')
            
    # Implement early stopping
    if epoch > 0 and macro_roc_auc - best_roc_auc < min_delta:
        early_stopping_count += 1
        print(f'EarlyStopping counter: {early_stopping_count} out of {early_stopping_patience}')
        if early_stopping_count >= early_stopping_patience:
            print('Early stopping')
            break
    else:
        best_roc_auc = macro_roc_auc
        early_stopping_count = 0  # Reset early stopping counter


Epoch: 1/20, Training Loss: 0.3270, Validation Loss: 0.2867
Accuracy: 0.8979, Recall: 0.8979, Precision: 0.8743, F1: 0.8595478575789299, Micro F1: 0.8979, Macro Roc Auc: 0.7793
Epoch: 2/20, Training Loss: 0.2735, Validation Loss: 0.2687
Accuracy: 0.9002, Recall: 0.9002, Precision: 0.8797, F1: 0.8659725332798981, Micro F1: 0.9002, Macro Roc Auc: 0.8160
Epoch: 3/20, Training Loss: 0.2469, Validation Loss: 0.2701
Accuracy: 0.8996, Recall: 0.8996, Precision: 0.8749, F1: 0.8721934063662693, Micro F1: 0.8996, Macro Roc Auc: 0.8196
Epoch: 4/20, Training Loss: 0.2206, Validation Loss: 0.2712
Accuracy: 0.8936, Recall: 0.8936, Precision: 0.8745, F1: 0.8806056138836953, Micro F1: 0.8936, Macro Roc Auc: 0.8271
Epoch: 5/20, Training Loss: 0.1873, Validation Loss: 0.2833
Accuracy: 0.8943, Recall: 0.8943, Precision: 0.8688, F1: 0.8743541140947371, Micro F1: 0.8943, Macro Roc Auc: 0.8206
EarlyStopping counter: 1 out of 3
Epoch: 6/20, Training Loss: 0.1555, Validation Loss: 0.3042
Accuracy: 0.8863, Rec

In [23]:
model.eval()

test_preds = []
test_labels = []

# Iterate over test data
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = model(input_ids, attention_mask)
        test_preds.append(F.softmax(outputs, dim=1).cpu().numpy())
        test_labels.append(labels.cpu().numpy())


In [24]:
test_preds = np.concatenate(test_preds)
test_labels = np.concatenate(test_labels)

test_preds_class = np.argmax(test_preds, axis=1)

report = classification_report(test_labels, test_preds_class, digits=4)

print(report)

              precision    recall  f1-score   support

           0     0.9199    0.9637    0.9413      8797
           1     0.4736    0.2800    0.3519      1025

    accuracy                         0.8924      9822
   macro avg     0.6968    0.6219    0.6466      9822
weighted avg     0.8733    0.8924    0.8798      9822

