# TabTransformer for Transfusion Prediction
This notebook will be experimenting with transfusion prediction using a Tab Transformer. A subset will be take of the training dataset previously made.


## Get data
We will first import training and testing datasets that have already been split.

In [1]:
# imports here
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score


In [2]:
data_directory = "C:\\Users\\micha\\OneDrive - UT Health San Antonio\\UTHSCSA\\Trauma\\TransfusionPrediction\\trauma_r\\"
train = pd.read_csv(data_directory + "train_trauma.csv")
print(train.shape)
train.head()

(986366, 80)


Unnamed: 0,onehot__SEX_1.0,onehot__SEX_2.0,onehot__SEX_3.0,onehot__ETHNICITY_1.0,onehot__ETHNICITY_2.0,onehot__TBIMIDLINESHIFT_1.0,onehot__TBIMIDLINESHIFT_2.0,onehot__TBIMIDLINESHIFT_3.0,onehot__TEACHINGSTATUS_1.0,onehot__TEACHINGSTATUS_5.0,...,scaler__RESPIRATORYRATE,scaler__PULSEOXIMETRY,scaler__HEIGHT,scaler__WEIGHT,scaler__TOTALGCS,scaler__HOSPITALARRIVALHRS,scaler__HOSPITALARRIVALDAYS,scaler__TBIHIGHESTTOTALGCS,scaler__ISS,transfusion
0,0,1,0,0,1,0,1,0,0,1,...,1.052371,0.361127,-3.117664,-2.27282,0.325218,-9.088105000000001e-18,-0.021071,0.9919297,0.83832,No
1,1,0,0,1,0,0,1,0,0,1,...,-1.052626,0.511366,-0.132069,-0.838981,0.325218,-9.088105000000001e-18,-0.021071,0.9919297,-0.032457,No
2,1,0,0,0,1,0,1,0,0,1,...,-0.210627,0.361127,0.3295,-0.815281,0.325218,-9.088105000000001e-18,-0.021071,-2.524544e-15,-1.027631,No
3,1,0,0,0,1,0,1,0,0,1,...,-0.210627,0.511366,-1.540148,-1.640825,0.325218,-9.088105000000001e-18,-0.021071,-2.524544e-15,-0.530044,No
4,0,1,0,0,1,0,1,0,0,1,...,-0.210627,0.511366,-2.971598,-2.027922,0.325218,-9.088105000000001e-18,-0.021071,0.9919297,-0.530044,No


In [3]:
# For prototyping, we will use a subset of the data as the entire dataset is too big

sample_size = 0.001  # 1% of the dataset, about 1,000

train_sample, _ = train_test_split(train, train_size=sample_size, stratify=train['transfusion'], random_state=42)

train_sample = train


In [4]:
test = pd.read_csv(data_directory + "test_trauma.csv")
print(test.shape)
test.head()

(246590, 80)


Unnamed: 0,onehot__SEX_1.0,onehot__SEX_2.0,onehot__SEX_3.0,onehot__ETHNICITY_1.0,onehot__ETHNICITY_2.0,onehot__TBIMIDLINESHIFT_1.0,onehot__TBIMIDLINESHIFT_2.0,onehot__TBIMIDLINESHIFT_3.0,onehot__TEACHINGSTATUS_1.0,onehot__TEACHINGSTATUS_5.0,...,scaler__RESPIRATORYRATE,scaler__PULSEOXIMETRY,scaler__HEIGHT,scaler__WEIGHT,scaler__TOTALGCS,scaler__HOSPITALARRIVALHRS,scaler__HOSPITALARRIVALDAYS,scaler__TBIHIGHESTTOTALGCS,scaler__ISS,transfusion
0,0,1,0,0,1,0,1,0,0,1,...,1.49569e-15,2.135025e-15,-5.104166,2.976689,-2.849348e-15,-9.088105000000001e-18,-0.021071,0.9919297,-0.405648,No
1,1,0,0,0,1,0,1,0,0,1,...,-0.2106272,0.5113665,-1.172061,-0.550633,0.3252183,-9.088105000000001e-18,-0.021071,-2.524544e-15,-1.027631,No
2,1,0,0,0,1,0,1,0,0,1,...,-0.8421263,0.3611275,-2.650252,-2.087171,0.3252183,-9.088105000000001e-18,-0.021071,0.9919297,-0.654441,No
3,0,1,0,1,0,0,1,0,0,1,...,0.2103722,0.3611275,-0.1963383,-1.407777,-0.07579178,-9.088105000000001e-18,-0.021071,0.9919297,-0.530044,No
4,0,1,0,0,1,0,1,0,0,1,...,0.2103722,0.5113665,9.963489e-15,-1.881773,0.3252183,-9.088105000000001e-18,-0.021071,-2.524544e-15,-0.654441,No


In [5]:
# Convert "Yes" to 1 and "No" to 0 for binary classification
train_sample["transfusion"] = train_sample["transfusion"].map({"Yes": 1, "No": 0})
test["transfusion"] = test["transfusion"].map({"Yes": 1, "No": 0})


## Defining the Model


In [6]:
class MultiHeadAttention(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.1):
        super().__init__()
        self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout, batch_first=True)
        self.norm = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        attn_out, _ = self.attention(x, x, x)
        x = self.norm(x + self.dropout(attn_out))
        return x

class FeedForward(nn.Module):
    def __init__(self, embed_dim, hidden_dim, dropout=0.1):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(embed_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, embed_dim),
            nn.Dropout(dropout)
        )
        self.norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        return self.norm(x + self.fc(x))

class TabTransformer(nn.Module):
    def __init__(self, num_numerical_features, mlp_hidden_dim=64, num_classes=1, dropout=0.1):
        super().__init__()
        
        # MLP for numerical data
        self.mlp_numerical = nn.Sequential(
            nn.Linear(num_numerical_features, mlp_hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(mlp_hidden_dim, mlp_hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
        
        # Final classifier
        self.classifier = nn.Sequential(
            nn.Linear(mlp_hidden_dim, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )

    def forward(self, x_num):
        x = self.mlp_numerical(x_num)
        return self.classifier(x)

# Training the Model


In [7]:
# Model Initialization
num_features = train_sample.shape[1] - 1  # Excluding target column

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TabTransformer(num_numerical_features=num_features, num_classes=1).to(device)

# Define loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Convert DataFrame to Tensors
X_train_tensor = torch.tensor(train_sample.drop(columns=["transfusion"]).values, dtype=torch.float32)
y_train_tensor = torch.tensor(train_sample["transfusion"].values, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(test.drop(columns=["transfusion"]).values, dtype=torch.float32)
y_test_tensor = torch.tensor(test["transfusion"].values, dtype=torch.float32).unsqueeze(1)

# Create DataLoader
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [8]:
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

train_model(model, train_loader, criterion, optimizer, epochs=10)

Epoch 1/10, Loss: 0.2331
Epoch 2/10, Loss: 0.2294
Epoch 3/10, Loss: 0.2289
Epoch 4/10, Loss: 0.2281
Epoch 5/10, Loss: 0.2278
Epoch 6/10, Loss: 0.2282
Epoch 7/10, Loss: 0.2283
Epoch 8/10, Loss: 0.2281
Epoch 9/10, Loss: 0.2295
Epoch 10/10, Loss: 0.2302


In [9]:
# Evaluation Function
def evaluate_model(model, X, y_true):
    model.eval()
    with torch.no_grad():
        X, y_true = X.to(device), y_true.to(device)
        logits = model(X).squeeze()
        y_pred = torch.sigmoid(logits).cpu().numpy()
        y_true = y_true.cpu().numpy()
        y_pred_binary = (y_pred > 0.5).astype(int)
        accuracy = accuracy_score(y_true, y_pred_binary)
        auc_roc = roc_auc_score(y_true, y_pred)
        print(f"Accuracy: {accuracy:.4f}, AUROC: {auc_roc:.4f}")

evaluate_model(model, X_test_tensor, y_test_tensor)

Accuracy: 0.9206, AUROC: 0.8421
