In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
from transformers import AutoTokenizer, AutoModel, AdamW, get_linear_schedule_with_warmup
import pandas as pd
from collections import deque
import matplotlib.pyplot as plt
import numpy as np
from datasets import load_dataset
import torch.optim as optim
import torch.nn.functional as F
import string

In [2]:
# Load Train, Validation, Test Dataset
train_df = pd.read_csv('/kaggle/input/mp-train/MP_train.csv')
val_df = pd.read_csv('/kaggle/input/mp-val/MP_val.csv')
test_df = pd.read_csv('/kaggle/input/mp-test/MP_test.csv')

In [3]:
train_df.head()

Unnamed: 0,id,text,hospital_expire_flag
0,107384,"CHIEF COMPLAINT: AMS, concern for toxic alcoho...",0
1,101061,CHIEF COMPLAINT: abdominal pain\n\nPRESENT ILL...,0
2,127180,CHIEF COMPLAINT: Bilateral Sub Dural Hematoma\...,0
3,168339,CHIEF COMPLAINT: Intracranial bleed\n\nPRESENT...,0
4,154044,CHIEF COMPLAINT: ischemic left foot\n\nPRESENT...,0


In [4]:
val_df.head()

Unnamed: 0,id,text,hospital_expire_flag
0,176763,CHIEF COMPLAINT: # Lethargy # Confusion # Hypo...,0
1,173211,"CHIEF COMPLAINT: Dyspnea, LE edema\n\nPRESENT ...",0
2,116333,CHIEF COMPLAINT: upper GI bleed\n\nPRESENT ILL...,1
3,161102,CHIEF COMPLAINT: increased lethargy\n\nPRESENT...,0
4,116799,CHIEF COMPLAINT: s/p 18 ft fall\n\nPRESENT ILL...,0


In [5]:
test_df.head()

Unnamed: 0,id,text,hospital_expire_flag
0,100058,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,0
1,124871,CHIEF COMPLAINT: shortness of breath\n\nPRESEN...,0
2,109159,CHIEF COMPLAINT: s/p mechanical fall\n\nPRESEN...,0
3,159161,CHIEF COMPLAINT: nausea and vomiting\n\nPRESEN...,0
4,109863,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,1


In [6]:
train_df['hospital_expire_flag'].nunique()

2

In [7]:
val_df['hospital_expire_flag'].nunique()

2

In [8]:
test_df['hospital_expire_flag'].nunique()

2

In [9]:
train_df.drop(columns=['id'], inplace=True)
val_df.drop(columns=['id'], inplace=True)
test_df.drop(columns=['id'], inplace=True)

In [10]:
train_df.head()

Unnamed: 0,text,hospital_expire_flag
0,"CHIEF COMPLAINT: AMS, concern for toxic alcoho...",0
1,CHIEF COMPLAINT: abdominal pain\n\nPRESENT ILL...,0
2,CHIEF COMPLAINT: Bilateral Sub Dural Hematoma\...,0
3,CHIEF COMPLAINT: Intracranial bleed\n\nPRESENT...,0
4,CHIEF COMPLAINT: ischemic left foot\n\nPRESENT...,0


In [11]:
test_df.head()

Unnamed: 0,text,hospital_expire_flag
0,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,0
1,CHIEF COMPLAINT: shortness of breath\n\nPRESEN...,0
2,CHIEF COMPLAINT: s/p mechanical fall\n\nPRESEN...,0
3,CHIEF COMPLAINT: nausea and vomiting\n\nPRESEN...,0
4,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,1


In [12]:
train_df.shape

(33954, 2)

In [13]:
val_df.shape

(4908, 2)

In [14]:
test_df.shape

(9822, 2)

In [15]:
# Removing Repeated Punctuations
def remove_repeated_punctuation(text):
    punctuations = set(string.punctuation)
    cleaned_text = []
    for char in text:
        if char in punctuations:
            punctuations.remove(char)
            cleaned_text.append(char)
        elif char not in punctuations:
            punctuations = set(string.punctuation)
            cleaned_text.append(char)
    return ''.join(cleaned_text)

# Apply the remove_repeated_punctuation function to the 'review' column
train_df['text'] = train_df['text'].apply(remove_repeated_punctuation)

train_df.head()

Unnamed: 0,text,hospital_expire_flag
0,"CHIEF COMPLAINT: AMS, concern for toxic alcoho...",0
1,CHIEF COMPLAINT: abdominal pain\n\nPRESENT ILL...,0
2,CHIEF COMPLAINT: Bilateral Sub Dural Hematoma\...,0
3,CHIEF COMPLAINT: Intracranial bleed\n\nPRESENT...,0
4,CHIEF COMPLAINT: ischemic left foot\n\nPRESENT...,0


In [16]:
test_df['text'] = test_df['text'].apply(remove_repeated_punctuation)

test_df.head()

Unnamed: 0,text,hospital_expire_flag
0,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,0
1,CHIEF COMPLAINT: shortness of breath\n\nPRESEN...,0
2,CHIEF COMPLAINT: s/p mechanical fall\n\nPRESEN...,0
3,CHIEF COMPLAINT: nausea and vomiting\n\nPRESEN...,0
4,CHIEF COMPLAINT: \n\nPRESENT ILLNESS: The pati...,1


In [17]:
val_df['text'] = val_df['text'].apply(remove_repeated_punctuation)

val_df.head()

Unnamed: 0,text,hospital_expire_flag
0,CHIEF COMPLAINT: # Lethargy # Confusion # Hypo...,0
1,"CHIEF COMPLAINT: Dyspnea, LE edema\n\nPRESENT ...",0
2,CHIEF COMPLAINT: upper GI bleed\n\nPRESENT ILL...,1
3,CHIEF COMPLAINT: increased lethargy\n\nPRESENT...,0
4,CHIEF COMPLAINT: s/p 18 ft fall\n\nPRESENT ILL...,0


In [18]:
train_texts = train_df['text'].tolist()
train_labels = train_df['hospital_expire_flag'].tolist()
val_texts = val_df['text'].tolist()
val_labels = val_df['hospital_expire_flag'].tolist()
test_texts = test_df['text'].tolist()
test_labels = test_df['hospital_expire_flag'].tolist()

In [19]:
class TextClassificationDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = [str(text) for text in texts]
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
        return {'input_ids': encoding['input_ids'].flatten(), 'attention_mask': encoding['attention_mask'].flatten(), 'label': torch.tensor(label)}

In [20]:
class RobertaClassifier(nn.Module):
    def __init__(self, bert_model_name, num_classes):
        super(RobertaClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        logits = self.fc(x)
        return logits

In [21]:
# Set up parameters
bert_model_name = 'FacebookAI/roberta-base'
num_classes = 2
max_length = 512
batch_size = 8

In [22]:
tokenizer = AutoTokenizer.from_pretrained(bert_model_name)

train_dataset = TextClassificationDataset(train_texts, train_labels, tokenizer, max_length)
val_dataset = TextClassificationDataset(val_texts, val_labels, tokenizer, max_length)
test_dataset = TextClassificationDataset(test_texts, test_labels, tokenizer, max_length)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RobertaClassifier(bert_model_name, num_classes).to(device)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
epochs = 20
best_roc_auc = 0.0
min_delta = 0.0001
early_stopping_count = 0
early_stopping_patience = 3
gradient_accumulation_steps = 10

# Set the optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)

# Set the scheduler
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=50, 
    num_training_steps=len(train_dataloader) * epochs // gradient_accumulation_steps
)


In [25]:
# Training loop
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for step, batch in enumerate(train_dataloader):
        optimizer.zero_grad() if step % gradient_accumulation_steps == 0 else None
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        (loss / gradient_accumulation_steps).backward()
        train_loss += loss.item()
        if (step + 1) % gradient_accumulation_steps == 0 or (step + 1) == len(train_dataloader):
            optimizer.step()
            scheduler.step()
            
    model.eval()
    val_loss = 0
    val_preds = []
    val_labels = []
    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            outputs = model(input_ids, attention_mask)
            loss = nn.CrossEntropyLoss()(outputs, labels)
            val_loss += loss.item()
            val_preds.append(F.softmax(outputs, dim=1).cpu().numpy())
            val_labels.append(labels.cpu().numpy())
            
    val_preds = np.concatenate(val_preds)
    val_labels = np.concatenate(val_labels)
    val_loss /= len(val_dataloader)
    train_loss /= len(train_dataloader)
    print(f'Epoch: {epoch+1}/{epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
    
    # Calculate metrics
    val_preds_class = np.argmax(val_preds, axis=1)
    accuracy = accuracy_score(val_labels, val_preds_class)
    recall = recall_score(val_labels, val_preds_class, average='weighted')
    precision = precision_score(val_labels, val_preds_class, average='weighted')
    f1 = f1_score(val_labels, val_preds_class, average='weighted')
    micro_f1 = f1_score(val_labels, val_preds_class, average='micro')
    macro_roc_auc = roc_auc_score(val_labels, val_preds[:, 1], multi_class='ovo', average='macro')  # Changed val_preds to val_preds[:, 1]
    
    print(f'Accuracy: {accuracy:.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}, F1: {f1}, Micro F1: {micro_f1:.4f}, Macro Roc Auc: {macro_roc_auc:.4f}')
            
    # Implement early stopping
    if epoch > 0 and macro_roc_auc - best_roc_auc < min_delta:
        early_stopping_count += 1
        print(f'EarlyStopping counter: {early_stopping_count} out of {early_stopping_patience}')
        if early_stopping_count >= early_stopping_patience:
            print('Early stopping')
            break
    else:
        best_roc_auc = macro_roc_auc
        early_stopping_count = 0  # Reset early stopping counter


Epoch: 1/20, Training Loss: 0.3235, Validation Loss: 0.2874
Accuracy: 0.8945, Recall: 0.8945, Precision: 0.8001, F1: 0.8446269609908136, Micro F1: 0.8945, Macro Roc Auc: 0.7874


  _warn_prf(average, modifier, msg_start, len(result))


Epoch: 2/20, Training Loss: 0.2781, Validation Loss: 0.2733
Accuracy: 0.8996, Recall: 0.8996, Precision: 0.8818, F1: 0.8620939360478421, Micro F1: 0.8996, Macro Roc Auc: 0.8114
Epoch: 3/20, Training Loss: 0.2543, Validation Loss: 0.2884
Accuracy: 0.8993, Recall: 0.8993, Precision: 0.8896, F1: 0.8588355845088045, Micro F1: 0.8993, Macro Roc Auc: 0.8178
Epoch: 4/20, Training Loss: 0.2321, Validation Loss: 0.2827
Accuracy: 0.8989, Recall: 0.8989, Precision: 0.8771, F1: 0.8807353361822257, Micro F1: 0.8989, Macro Roc Auc: 0.8200
Epoch: 5/20, Training Loss: 0.2059, Validation Loss: 0.2960
Accuracy: 0.8943, Recall: 0.8943, Precision: 0.8756, F1: 0.8815204786844886, Micro F1: 0.8943, Macro Roc Auc: 0.8201
Epoch: 6/20, Training Loss: 0.1797, Validation Loss: 0.3216
Accuracy: 0.8849, Recall: 0.8849, Precision: 0.8718, F1: 0.8772630286574993, Micro F1: 0.8849, Macro Roc Auc: 0.8137
EarlyStopping counter: 1 out of 3
Epoch: 7/20, Training Loss: 0.1519, Validation Loss: 0.3899
Accuracy: 0.8975, Rec

In [26]:
model.eval()

test_preds = []
test_labels = []

# Iterate over test data
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = model(input_ids, attention_mask)
        test_preds.append(F.softmax(outputs, dim=1).cpu().numpy())
        test_labels.append(labels.cpu().numpy())


In [27]:
test_preds = np.concatenate(test_preds)
test_labels = np.concatenate(test_labels)

test_preds_class = np.argmax(test_preds, axis=1)

report = classification_report(test_labels, test_preds_class, digits=4)

print(report)

              precision    recall  f1-score   support

           0     0.9138    0.9699    0.9410      8797
           1     0.4536    0.2146    0.2914      1025

    accuracy                         0.8911      9822
   macro avg     0.6837    0.5923    0.6162      9822
weighted avg     0.8658    0.8911    0.8732      9822

