In [17]:
!ls /teamspace/studios/this_studio

DataPreprocessing.ipynb  SepsisData.csv		       getting_started.ipynb
FT_Sepsis.html		 Tab_transformer_Sepsis.ipynb
FT_Sepsis.ipynb		 examples


In [18]:
# # Install nbconvert if not already installed
!pip install nbconvert

# # Convert the current notebook to HTML
!jupyter nbconvert --to html "/teamspace/studios/this_studio/Tab_transformer_Sepsis.ipynb"

[NbConvertApp] Converting notebook /teamspace/studios/this_studio/Tab_transformer_Sepsis.ipynb to html
[NbConvertApp] Writing 379361 bytes to /teamspace/studios/this_studio/Tab_transformer_Sepsis.html


In [5]:
!pip install imblearn
!pip install lib
!pip install torchsummary
!pip install einops



In [6]:
# Import libraries
import math
import warnings
from typing import Dict, Literal
import matplotlib.pyplot as plt
warnings.simplefilter("ignore")
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from scipy.stats import zscore
from imblearn.over_sampling import SMOTE
import torch
import torch.nn.functional as F
import torch.optim
from torch import Tensor
from tqdm.std import tqdm
warnings.resetwarnings()
import lib
import torchsummary
!pip install torchinfo
from torchinfo import summary
!pip install pytorch-ignite
from ignite.handlers import EarlyStopping
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report



In [7]:
# Load dataset
df = pd.read_csv('./SepsisData.csv', header=None, low_memory=False)

# Step 1: Drop the first row (original header) and set a new header
df = df.drop(index=0)
df.columns = df.iloc[0]
df = df.drop(index=1)

# Define the columns based on the categories
numeric_features = [
    'age', 'BMI', 'gcs', 'sirs', 'apsiii', 'lods', 'oasis', 'sapsii', 'sofa_total',
    'sofa_respiration', 'sofa_coagulation', 'sofa_liver', 'sofa_cardiovascular',
    'sofa_cns', 'sofa_renal', 'urineoutput_1stday', 'hematocrit_min', 'hematocrit_max',
    'hemoglobin_min', 'hemoglobin_max', 'platelets_min', 'platelets_max', 'wbc_min',
    'wbc_max', 'albumin_min', 'albumin_max', 'aniongap_min', 'aniongap_max', 'bicarbonate_min',
    'bicarbonate_max', 'calcium_min', 'calcium_max', 'chloride_min', 'chloride_max',
    'glucose_mean', 'sodium_min', 'sodium_max', 'potassium_min', 'potassium_max', 'bun_max',
    'creatinine_max', 'INR_min', 'INR_max', 'PT_min', 'PT_max', 'ptt_min', 'ptt_max',
    'ALT_max', 'ALP_max', 'AST_max', 'bilirubin_total_max', 'ld_ldh_max', 'heart_rate_max',
    'SBP_mean', 'DBP_mean', 'mbp_mean', 'resp_rate_min', 'resp_rate_max', 'temperature_min',
    'temperature_max', 'SpO2_min', 'lactate_max_bg', 'pCO2_min_bg', 'pCO2_max_bg',
    'baseexcess_min_bg', 'baseexcess_max_bg'
]

categorical_features = [
    'gender_M1F0', 'Myocardial_infarction', 'Congestive_heart_failure', 'Peripheral_vascular_disease',
    'Cerebrovascular_disease', 'Dementia', 'Chronic_pulmonary_disease', 'Rheumatic_disease',
    'peptic_ulcer_disease', 'mild_liver_disease', 'Diabetes', 'Hemiplegia_paraplegia',
    'renal_disease', 'malignancy', 'Moderate_or_severe_liver_disease', 'Metastatic_solid_tumor',
    'AIDS', 'vasoactive drug ', 'dobutamine', 'vasopressin', 'phenylephrine', 'norepinephrine',
    'dopamine', 'milrinone', 'epinephrine', 'MV'
]

label_encoding_feature = 'race'
output_features = ['death_28day', 'death_90day', 'death_1year']

# Step 2: Handle missing values
# Fill numeric columns with mean
for col in numeric_features:
    df[col] = pd.to_numeric(df[col], errors='coerce')  # Ensure numeric dtype
    df[col] = df[col].fillna(df[col].mean())
    df[col] = df[col].astype('int64')

# Fill categorical columns with mode
for col in categorical_features:
    df[col] = df[col].fillna(df[col].mode()[0])
    df[col] = df[col].astype('int64')

# Step 3: Label encode the 'race' column
label_encoder = LabelEncoder()
df[label_encoding_feature] = label_encoder.fit_transform(df[label_encoding_feature].fillna(df[label_encoding_feature].mode()[0]))

for col in output_features:
    df[col] = df[col].astype('int64')

# Step 4: Normalize all numeric columns using z-score
for col in numeric_features:
    df[col] = zscore(df[col])

# Step 5: Extract X (features) and Y (target)
Y = df['death_1year']  # Target column
X = df.drop(columns=['death_1year', 'death_28day', 'death_90day','MV'])  # Features

#Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)

# Use RandomUnderSampler for imbalanced dataset
from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=42)
X_train_resampled, Y_train_resampled = rus.fit_resample(X_train, Y_train)
categorical_features.pop(25)
# print("X_train_resampled shape:", X_train_resampled.shape)
# print("Y_train_resampled shape:", Y_train_resampled.shape)
# Split training data into training and validation sets (e.g., 80% train, 20% val)
X_train_resampled, X_val, Y_train_resampled, Y_val = train_test_split(
    X_train_resampled, Y_train_resampled, test_size=0.2, random_state=42
)

# Convert the DataFrames to NumPy arrays before converting them to PyTorch tensors
X_train_tensor = torch.tensor(X_train_resampled.to_numpy(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train_resampled.to_numpy(), dtype=torch.long)
Y_test_tensor = torch.tensor(Y_test.to_numpy(), dtype=torch.long)

X_val_tensor = torch.tensor(X_val.to_numpy(), dtype=torch.float32)
Y_val_tensor = torch.tensor(Y_val.to_numpy(), dtype=torch.long)

In [8]:
# Count the number of rows with label 0 and label 1 in the training set
label_counts = df['death_1year'].value_counts()
print("Label Counts:")
print(label_counts)

# print("X_train_resampled shape:", X_train_resampled.shape)
# print("Y_train_resampled shape:", Y_train_resampled.shape)

print("X_train_tensor shape:", X_train_tensor.shape)
print("Y_train_tensor shape:", Y_train_tensor.shape)
print("X_val_tensor shape:", X_val_tensor.shape)
print("Y_val_tensor shape:", Y_val_tensor.shape)
print("X_test_tensor shape:", X_test_tensor.shape)
print("Y_test_tensor shape:", Y_test_tensor.shape)


# Check the count of each label after undersampling
label_counts = pd.Series(Y_train_resampled).value_counts()
print("Count of each label after undersampling:")
print(label_counts)

label_counts_testset = pd.Series(Y_test_tensor).value_counts()
print("Count of each label in testset:")
print(label_counts_testset)

Label Counts:
death_1year
0    14570
1     4230
Name: count, dtype: int64
X_train_tensor shape: torch.Size([5414, 92])
Y_train_tensor shape: torch.Size([5414])
X_val_tensor shape: torch.Size([1354, 92])
Y_val_tensor shape: torch.Size([1354])
X_test_tensor shape: torch.Size([3760, 92])
Y_test_tensor shape: torch.Size([3760])
Count of each label after undersampling:
death_1year
1    2719
0    2695
Name: count, dtype: int64
Count of each label in testset:
0    2914
1     846
Name: count, dtype: int64


In [9]:
# Ensure that all columns are encoded as needed and converted to PyTorch tensors

# Separate numerical and categorical features
X_train_num = torch.tensor(X_train_resampled[numeric_features].to_numpy(), dtype=torch.float32)
X_train_cat = torch.tensor(X_train_resampled[categorical_features].to_numpy(), dtype=torch.long)

X_test_num = torch.tensor(X_test[numeric_features].to_numpy(), dtype=torch.float32)
X_test_cat = torch.tensor(X_test[categorical_features].to_numpy(), dtype=torch.long)

# Target labels
Y_train_tensor = torch.tensor(Y_train_resampled.to_numpy(), dtype=torch.long)
Y_test_tensor = torch.tensor(Y_test.to_numpy(), dtype=torch.long)


In [10]:
print(X_train_num)
print(X_train_num.shape)

print(X_train_cat)
print(X_train_cat.shape)

tensor([[ 0.5826, -1.0166,  0.5710,  ..., -0.0359,  0.1061, -0.0194],
        [ 0.2142,  0.0081,  0.2421,  ...,  1.5111,  0.5907,  2.0411],
        [ 1.5650,  0.0081, -2.0604,  ..., -0.0359,  0.1061, -0.0194],
        ...,
        [ 0.5826,  0.6912,  0.2421,  ..., -0.0359,  0.1061, -0.0194],
        [ 1.5650,  0.0081, -0.0869,  ..., -1.0369,  1.0753,  0.2750],
        [-0.2770,  0.1789, -1.0736,  ...,  4.0592,  3.7408,  4.1016]])
torch.Size([5414, 66])
tensor([[1, 1, 0,  ..., 0, 0, 0],
        [1, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [1, 0, 1,  ..., 0, 0, 0],
        [0, 0, 1,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])
torch.Size([5414, 25])


In [11]:
import torch
import torch.nn.functional as F
from torch import nn, einsum

from einops import rearrange, repeat

# helpers

def exists(val):
    return val is not None

def default(val, d):
    return val if exists(val) else d

# classes

class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(x, **kwargs) + x

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

# attention

class GEGLU(nn.Module):
    def forward(self, x):
        x, gates = x.chunk(2, dim = -1)
        return x * F.gelu(gates)

class FeedForward(nn.Module):
    def __init__(self, dim, mult = 4, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * mult * 2),
            GEGLU(),
            nn.Dropout(dropout),
            nn.Linear(dim * mult, dim)
        )

    def forward(self, x, **kwargs):
        return self.net(x)

class Attention(nn.Module):
    def __init__(
        self,
        dim,
        heads = 8,
        dim_head = 16,
        dropout = 0.
    ):
        super().__init__()
        inner_dim = dim_head * heads
        self.heads = heads
        self.scale = dim_head ** -0.5

        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)
        self.to_out = nn.Linear(inner_dim, dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h = self.heads
        q, k, v = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v))
        sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale

        attn = sim.softmax(dim = -1)
        dropped_attn = self.dropout(attn)

        out = einsum('b h i j, b h j d -> b h i d', dropped_attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)', h = h)
        return self.to_out(out), attn

# transformer

class Transformer(nn.Module):
    def __init__(
        self,
        dim,
        depth,
        heads,
        dim_head,
        attn_dropout,
        ff_dropout
    ):
        super().__init__()
        self.layers = nn.ModuleList([])

        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout)),
                PreNorm(dim, FeedForward(dim, dropout = ff_dropout)),
            ]))

    def forward(self, x, return_attn = False):
        post_softmax_attns = []

        for attn, ff in self.layers:
            attn_out, post_softmax_attn = attn(x)
            post_softmax_attns.append(post_softmax_attn)

            x = x + attn_out
            x = ff(x) + x

        if not return_attn:
            return x

        return x, torch.stack(post_softmax_attns)
# mlp

class MLP(nn.Module):
    def __init__(self, dims, act = None):
        super().__init__()
        dims_pairs = list(zip(dims[:-1], dims[1:]))
        layers = []
        for ind, (dim_in, dim_out) in enumerate(dims_pairs):
            is_last = ind >= (len(dims_pairs) - 1)
            linear = nn.Linear(dim_in, dim_out)
            layers.append(linear)

            if is_last:
                continue

            act = default(act, nn.ReLU())
            layers.append(act)

        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        return self.mlp(x)

# main class

class TabTransformer(nn.Module):
    def __init__(
        self,
        *,
        categories,
        num_continuous,
        dim,
        depth,
        heads,
        dim_head = 16,
        dim_out = 1,
        mlp_hidden_mults = (4, 2),
        mlp_act = None,
        num_special_tokens = 2,
        continuous_mean_std = None,
        attn_dropout = 0.,
        ff_dropout = 0.,
        use_shared_categ_embed = True,
        shared_categ_dim_divisor = 8.   # in paper, they reserve dimension / 8 for category shared embedding
    ):
        super().__init__()
        assert all(map(lambda n: n > 0, categories)), 'number of each category must be positive'
        assert len(categories) + num_continuous > 0, 'input shape must not be null'

        # categories related calculations

        self.num_categories = len(categories)
        self.num_unique_categories = sum(categories)

        # create category embeddings table

        self.num_special_tokens = num_special_tokens
        total_tokens = self.num_unique_categories + num_special_tokens

        shared_embed_dim = 0 if not use_shared_categ_embed else int(dim // shared_categ_dim_divisor)

        self.category_embed = nn.Embedding(total_tokens, dim - shared_embed_dim)

        # take care of shared category embed

        self.use_shared_categ_embed = use_shared_categ_embed

        if use_shared_categ_embed:
            self.shared_category_embed = nn.Parameter(torch.zeros(self.num_categories, shared_embed_dim))
            nn.init.normal_(self.shared_category_embed, std = 0.02)

        # for automatically offsetting unique category ids to the correct position in the categories embedding table

        if self.num_unique_categories > 0:
            categories_offset = F.pad(torch.tensor(list(categories)), (1, 0), value = num_special_tokens)
            categories_offset = categories_offset.cumsum(dim = -1)[:-1]
            self.register_buffer('categories_offset', categories_offset)

        # continuous

        self.num_continuous = num_continuous

        if self.num_continuous > 0:
            if exists(continuous_mean_std):
                assert continuous_mean_std.shape == (num_continuous, 2), f'continuous_mean_std must have a shape of ({num_continuous}, 2) where the last dimension contains the mean and variance respectively'
            self.register_buffer('continuous_mean_std', continuous_mean_std)

            self.norm = nn.LayerNorm(num_continuous)

        # transformer

        self.transformer = Transformer(
            dim = dim,
            depth = depth,
            heads = heads,
            dim_head = dim_head,
            attn_dropout = attn_dropout,
            ff_dropout = ff_dropout
        )

        # mlp to logits

        input_size = (dim * self.num_categories) + num_continuous

        hidden_dimensions = [input_size * t for t in  mlp_hidden_mults]
        all_dimensions = [input_size, *hidden_dimensions, dim_out]

        self.mlp = MLP(all_dimensions, act = mlp_act)

    def forward(self, x_categ, x_cont, return_attn = False):
        xs = []

        assert x_categ.shape[-1] == self.num_categories, f'you must pass in {self.num_categories} values for your categories input'

        if self.num_unique_categories > 0:
            x_categ = x_categ + self.categories_offset

            categ_embed = self.category_embed(x_categ)

            if self.use_shared_categ_embed:
                shared_categ_embed = repeat(self.shared_category_embed, 'n d -> b n d', b = categ_embed.shape[0])
                categ_embed = torch.cat((categ_embed, shared_categ_embed), dim = -1)

            x, attns = self.transformer(categ_embed, return_attn = True)

            flat_categ = rearrange(x, 'b ... -> b (...)')
            xs.append(flat_categ)

        assert x_cont.shape[1] == self.num_continuous, f'you must pass in {self.num_continuous} values for your continuous input'

        if self.num_continuous > 0:
            if exists(self.continuous_mean_std):
                mean, std = self.continuous_mean_std.unbind(dim = -1)
                x_cont = (x_cont - mean) / std

            normed_cont = self.norm(x_cont)
            xs.append(normed_cont)

        x = torch.cat(xs, dim = -1)
        logits = self.mlp(x)

        if not return_attn:
            return logits

        return logits, attns

In [12]:
# Number of unique values for each categorical feature
category_sizes = [X_train_cat[:, i].max().item() + 1 for i in range(X_train_cat.shape[1])]
num_continuous = X_train_num.shape[1]  # 66 in this case (numeric_features)

# Model parameters
dim = 64  # embedding dimension, can be tuned
depth = 4  # number of Transformer layers, can be tuned
heads = 8  # number of attention heads, can be tuned
dim_head = 16  # dimension per head, can be tuned
dim_out = 2  # output dimension (e.g., binary classification with logits)
mlp_hidden_mults = (4, 2)  # scaling for MLP hidden layers
attn_dropout = 0.1
ff_dropout = 0.1

# Precomputed mean and std for continuous features (standardization)
# continuous_mean_std = torch.tensor([X_train_num.mean(dim=0), X_train_num.std(dim=0)]).T
# Precompute mean and std for continuous features (standardization)
mean = X_train_num.mean(dim=0)
std = X_train_num.std(dim=0)
continuous_mean_std = torch.stack((mean, std), dim=1)  # Shape will be (num_continuous, 2)


In [13]:
model = TabTransformer(
    categories=category_sizes,
    num_continuous=num_continuous,
    dim=dim,
    depth=depth,
    heads=heads,
    dim_head=dim_head,
    dim_out=dim_out,
    mlp_hidden_mults=mlp_hidden_mults,
    attn_dropout=attn_dropout,
    ff_dropout=ff_dropout,
    continuous_mean_std=continuous_mean_std
)


In [14]:
import torch.optim as optim

# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss()  # suitable for multi-class or binary classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100  # can be adjusted
batch_size = 32  # adjust as needed

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    # Mini-batch training
    for i in range(0, X_train_num.size(0), batch_size):
        # Get batch
        x_batch_cat = X_train_cat[i:i+batch_size]
        x_batch_num = X_train_num[i:i+batch_size]
        y_batch = Y_train_tensor[i:i+batch_size]

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        logits = model(x_batch_cat, x_batch_num)

        # Compute loss
        loss = criterion(logits, y_batch)
        epoch_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / (i // batch_size + 1)}")


Epoch 1/100, Loss: 0.713356594303075
Epoch 2/100, Loss: 0.5200731542180567
Epoch 3/100, Loss: 0.5029106126112096
Epoch 4/100, Loss: 0.4882618483375101
Epoch 5/100, Loss: 0.4742837017073351
Epoch 6/100, Loss: 0.45778900612803064
Epoch 7/100, Loss: 0.4313568378196043
Epoch 8/100, Loss: 0.4223144482163822
Epoch 9/100, Loss: 0.40751823739093895
Epoch 10/100, Loss: 0.3722338189973551
Epoch 11/100, Loss: 0.3495246813577764
Epoch 12/100, Loss: 0.3233927904902136
Epoch 13/100, Loss: 0.3064892978571794
Epoch 14/100, Loss: 0.2889961805742453
Epoch 15/100, Loss: 0.28576364681562955
Epoch 16/100, Loss: 0.24162552709526874
Epoch 17/100, Loss: 0.22543686401537236
Epoch 18/100, Loss: 0.2060361295719357
Epoch 19/100, Loss: 0.1917062449871617
Epoch 20/100, Loss: 0.17895610921413582
Epoch 21/100, Loss: 0.16187063295613316
Epoch 22/100, Loss: 0.1487095342236845
Epoch 23/100, Loss: 0.11857827944845399
Epoch 24/100, Loss: 0.09324323278577888
Epoch 25/100, Loss: 0.10457175356226371
Epoch 26/100, Loss: 0.081

In [15]:
model.eval()
with torch.no_grad():
    # Forward pass on test set
    test_logits = model(X_test_cat, X_test_num)

    # Get predictions
    test_preds = torch.argmax(test_logits, dim=1)  # Assuming a classification task
    accuracy = (test_preds == Y_test_tensor).float().mean()

    print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")


Test Accuracy: 71.68%


In [16]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import torch

def evaluate_model(model, X_test_num, X_test_cat, Y_test_tensor):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        # Forward pass - only get the logits
        logits = model(X_test_cat, X_test_num, return_attn=False)  # Set return_attn to False

        # Get predicted classes
        _, predicted = torch.max(logits, dim=1)

    # Move data to CPU and convert to numpy arrays if necessary
    Y_test_np = Y_test_tensor.cpu().numpy()
    predicted_np = predicted.cpu().numpy()

    # Calculate metrics
    accuracy = accuracy_score(Y_test_np, predicted_np)
    precision = precision_score(Y_test_np, predicted_np, average='weighted')
    recall = recall_score(Y_test_np, predicted_np, average='weighted')
    f1 = f1_score(Y_test_np, predicted_np, average='weighted')

    # Display results
    print("Model Evaluation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(Y_test_np, predicted_np))

# Call the evaluation function
evaluate_model(model, X_test_num, X_test_cat, Y_test_tensor)


Model Evaluation Metrics:
Accuracy: 0.7168
Precision: 0.7954
Recall: 0.7168
F1 Score: 0.7379

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.71      0.80      2914
           1       0.43      0.74      0.54       846

    accuracy                           0.72      3760
   macro avg       0.66      0.72      0.67      3760
weighted avg       0.80      0.72      0.74      3760

