# Q3.2 - Simulating Label Scarcity
This notebook combines encoder pretraining, linear probing, and supervised LSTM models under label scarcity conditions.

In [2]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score
import torch
import torch.nn as nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from glob import glob
import matplotlib.pyplot as plt
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint


In [3]:
# def load_data():
#     os.chdir("/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data")
#     df_train = pd.read_parquet("set-a-filled.parquet")
#     df_val = pd.read_parquet("set-b-filled.parquet")
#     df_test = pd.read_parquet("set-c-filled.parquet")
    
#     #print(df_train.head())
#     #print(df_val.head())
#     #print(df_test.head())
    

#     labels_train = pd.read_csv("Outcomes-a.txt", sep=',')[['RecordID', 'In-hospital_death']]
#     labels_val = pd.read_csv("Outcomes-b.txt", sep=',')[['RecordID', 'In-hospital_death']]
#     labels_test = pd.read_csv("Outcomes-c.txt", sep=',')[['RecordID', 'In-hospital_death']]

#     return (df_train.drop(columns = ['In-hospital_death', 'ICUType']), labels_train), (df_val.drop(columns = ['In-hospital_death', 'ICUType']), labels_val), (df_test.drop(columns = ['In-hospital_death', 'ICUType']), labels_test)

# os.chdir("/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data")\
os.chdir("../../data")
# Load parquet data
df_a = pd.read_parquet("set-a-filled.parquet").drop(columns = ['ICUType'])
df_b = pd.read_parquet("set-b-filled.parquet").drop(columns = ['ICUType'])
df_c = pd.read_parquet("set-c-filled.parquet").drop(columns = ['ICUType'])

print("Train:", df_a.shape, "Val:", df_b.shape, "Test:", df_c.shape)



Train: (196000, 43) Val: (196000, 43) Test: (196000, 43)


In [6]:
class LSTMAutoencoder(pl.LightningModule):
    def __init__(self, input_size, hidden_size=64, num_layers=1, lr=1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.encoder = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.decoder = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_size, input_size)
        self.loss_fn = nn.MSELoss()
        self.lr = lr

    def forward(self, x):
        _, (last_hidden_state, last_cell_state) = self.encoder(x)

        decoder_input = torch.zeros(x.size(0), x.size(1), self.hparams.hidden_size, device=x.device)

        decoder_output, _ = self.decoder(decoder_input, (last_hidden_state, last_cell_state))

        output_seq = self.output_layer(decoder_output)  # shape: (batch, seq_len, input_size)
        return output_seq

    def training_step(self, batch, batch_idx):
        input_seq, _ = batch
        output_seq = self(input_seq)
        loss = self.loss_fn(output_seq, input_seq)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        input_seq, _ = batch
        output_seq = self(input_seq)
        loss = self.loss_fn(output_seq, input_seq)
        self.log("val_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)


In [7]:
class TimeSeriesDataModule(pl.LightningDataModule):
    def __init__(
        self,
        feature_cols,
        batch_size=32,
        k_folds=None,  # set this to an int (e.g., 5) to activate k-fold
        fold_index=0    # index of current fold
    ):
        super().__init__()
        self.feature_cols = feature_cols
        self.batch_size = batch_size
        self.k_folds = k_folds
        self.fold_index = fold_index



    def preprocess_parquet_for_lstm(self, df, scaler=None, fit_scaler=False):
        labelname = 'In-hospital_death'
        

    
            
    
        # Sort and fill NaNs
        df = df.sort_values(["RecordID", "Time"])
        df[self.feature_cols] = df[self.feature_cols].fillna(0)
    
        # Replace -1 with 0 in binary categorical feature
        if "MechVent" in df.columns:
            df["MechVent"] = df["MechVent"].replace(-1, 0)
    
        # One-hot encode Gender, drop last column to avoid multicollinearity
        if "Gender" in df.columns:
            gender_dummies = pd.get_dummies(df["Gender"], prefix="Gender", dtype=float)
            gender_dummies = gender_dummies.iloc[:, :-1]  # drop last dummy column
            df = df.drop(columns=["Gender"])
            df = pd.concat([df, gender_dummies], axis=1)
    
        # Update feature columns after dummy encoding
        current_feature_cols = [col for col in df.columns if col in self.feature_cols or col.startswith("Gender_")]
    
        # Extract numerical columns for scaling
        numerical = [f for f in current_feature_cols if f not in ["MechVent", labelname] and not f.startswith("Gender_")]
    
        # Fit or reuse scaler
        if fit_scaler or scaler is None:
            scaler = StandardScaler()
            scaler.fit(df[numerical])
    
        # Apply scaling
        df[numerical] = scaler.transform(df[numerical])
    
        # Group by RecordID
        X, y = [], []
        for pid, group in df.groupby("RecordID"):
            
            group = group.sort_values("Time")
            X.append(group[current_feature_cols].values)
            label = group[labelname].iloc[0]
            # if label not in [0, 1]:
            #     raise ValueError(f"Unexpected label {label} for patient {pid}. Expected binary labels only.")
            y.append(int(label))
        #print(current_feature_cols)
        #print(pd.DataFrame(np.stack(X)).head())
        X_tensor = torch.tensor(np.stack(X)).float()
        y_tensor = torch.tensor(y).float()
    
        return X_tensor, y_tensor, scaler
        



    def setup(self, stage=None):
        # # Load training and validation sets and stack them
        # X_trainval_a, y_trainval_a, fitted_scaler = self.preprocess_parquet_for_lstm("a", fit_scaler=True)
        # X_trainval_b, y_trainval_b, _ = self.preprocess_parquet_for_lstm("b", scaler=fitted_scaler)

        # # Stack A and B
        # X_trainval = torch.cat([X_trainval_a, X_trainval_b], dim=0)
        # y_trainval = torch.cat([y_trainval_a, y_trainval_b], dim=0)

        # if self.k_folds:
        #     kf = KFold(n_splits=self.k_folds, shuffle=True, random_state=42)
        #     indices = list(kf.split(X_trainval))

        #     train_idx, val_idx = indices[self.fold_index]
        #     self.X_train, self.y_train = X_trainval[train_idx], y_trainval[train_idx]
        #     self.X_val, self.y_val     = X_trainval[val_idx], y_trainval[val_idx]
        # else:
        #     self.X_train, self.y_train = X_trainval_a, y_trainval_a
        #     self.X_val, self.y_val     = X_trainval_b, y_trainval_b

        # self.X_test, self.y_test, _ = self.preprocess_parquet_for_lstm("c", scaler=fitted_scaler)

        # self.train_dataset = TensorDataset(self.X_train, self.y_train)
        # self.val_dataset   = TensorDataset(self.X_val, self.y_val)
        # self.test_dataset  = TensorDataset(self.X_test, self.y_test)
        self.X_train, self.y_train, fitted_scaler = self.preprocess_parquet_for_lstm(df_a, fit_scaler = True)
        self.X_val, self.y_val , _    = self.preprocess_parquet_for_lstm(df_b, scaler = fitted_scaler)
        self.X_test, self.y_test , _  = self.preprocess_parquet_for_lstm(df_c, scaler = fitted_scaler)

        self.train_dataset = TensorDataset(self.X_train, self.y_train)
        self.val_dataset   = TensorDataset(self.X_val, self.y_val)
        self.test_dataset  = TensorDataset(self.X_test, self.y_test)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)



In [8]:
def extract_embeddings(model, dataloader):
    model.eval()
    embeddings, labels = [], []
    with torch.no_grad():
        for x, y in dataloader:
            _, (last_hidden_state, _) = model.encoder(x)
            z = last_hidden_state[-1]
            embeddings.append(z.cpu().numpy())
            labels.append(y.cpu().numpy())
    return np.vstack(embeddings), np.concatenate(labels)

def train_and_eval_probe(X_train, y_train, X_test, y_test):
    clf = LogisticRegression(max_iter=1000)
    clf.fit(X_train, y_train)
    probs = clf.predict_proba(X_test)[:, 1]
    auroc = roc_auc_score(y_test, probs)
    auprc = average_precision_score(y_test, probs)
    print(f"Linear Probe:\n AuROC: {auroc:.4f},\n AuPRC: {auprc:.4f}")


In [9]:
class LSTMClassifier(pl.LightningModule):
    def __init__(self, input_size, hidden_size=64, num_layers=1, lr=1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.lr = lr

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.classifier = nn.Linear(hidden_size, 1)
        self.loss_fn = nn.BCEWithLogitsLoss()

        # Store test outputs manually
        self.test_probs = []
        self.test_targets = []

        # Prediction threshold
        self.prediction_threshold = 0.5
        self.clear_after_testing = False

    def forward(self, x):
        # Run input through the LSTM
        _, (hidden_state, _) = self.lstm(x)  # lstm_output, (hidden_state, cell_state)

        # Take the last layer's hidden state (for stacked LSTM)
        last_hidden = hidden_state[-1]  # shape: (batch_size, hidden_dim)

        # Pass through the classification head
        logits = self.classifier(last_hidden)

        return logits

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x).squeeze(1)
        loss = self.loss_fn(logits, y.float())
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x).squeeze(1)
        loss = self.loss_fn(logits, y.float())
        preds = torch.sigmoid(logits) > 0.5
        acc = (preds.int() == y).float().mean()
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        return loss
        

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x).squeeze(1)
        probs = torch.sigmoid(logits)
        # print(torch.unique(log
        loss = self.loss_fn(logits, y.float())

        self.log("test_loss", loss)

        # Store results for epoch-end metrics
        self.test_probs.append(probs.detach().cpu())
        self.test_targets.append(y.detach().cpu())

        return loss 

    def on_test_epoch_end(self):
            probs = torch.cat(self.test_probs).cpu()
            targets = torch.cat(self.test_targets).cpu()
            preds = (probs > 0.5).int()
            
            # Convert both to int numpy arrays
            targets_np = targets.int().numpy()
            preds_np = preds.numpy()

            if self.clear_after_testing: 
                self.test_probs.clear()
                self.test_targets.clear()
    
            # --- Confusion Matrix ---
            cm = confusion_matrix(targets_np, preds_np)
            disp_cm = ConfusionMatrixDisplay(confusion_matrix=cm)
            disp_cm.plot(cmap='Blues')
            plt.title("Confusion Matrix")
            plt.show()
    
            # --- ROC Curve ---
            auc = roc_auc_score(targets_np, probs.numpy())
            RocCurveDisplay.from_predictions(targets, probs)
            plt.title(f"ROC Curve (AuROC = {auc:.3f})")
            plt.show()
    
            # --- Precision-Recall Curve ---
            auprc = average_precision_score(targets_np, probs.numpy())
            PrecisionRecallDisplay.from_predictions(targets, probs)
            plt.title(f"Precision-Recall Curve (AuPRC = {auprc:.3f})")
            plt.show()
    
            # --- Metrics ---
            acc = (preds == targets).float().mean()
            self.log("test_acc", acc)
            self.log("test_auroc", auc)
            self.log("test_auprc", auprc)
    
            print(f"\nTest Accuracy : {acc:.4f}")
            print(f"Test AuROC    : {auc:.4f}")
            print(f"Test AuPRC    : {auprc:.4f}")


    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True),
            'monitor': 'val_loss',
            'interval': 'epoch',
            'frequency': 1
        }
        return {"optimizer": optimizer, "lr_scheduler": scheduler}

    def tune_threshold_min_fnr(self):
        """
        Find the classification threshold that minimizes the False Negative Rate (FNR)
        and store it in self.prediction_threshold.
    
        Returns:
            best_thresh (float): Threshold with the lowest FNR
            best_fnr (float): Lowest observed FNR
            thresholds (np.ndarray): Array of tested thresholds
            fnrs (List[float]): FNRs corresponding to each threshold
        """
    
        assert hasattr(self, "test_probs") and hasattr(self, "test_targets"), \
            "test_probs and test_targets must be defined (run test first)."
    
        probs = torch.cat(self.test_probs).cpu().numpy()
        targets = torch.cat(self.test_targets).cpu().numpy()
    
        thresholds = np.linspace(0, 1, 100)
        best_thresh = 0.5
        best_fnr = 1.0
        fnrs = []
    
        for t in tqdm(thresholds):
            preds = (probs >= t).astype(int)
            tn, fp, fn, tp = confusion_matrix(targets, preds).ravel()
            fnr = fn / (fn + tp) if (fn + tp) > 0 else 1.0
            fnrs.append(fnr)
    
            if fnr < best_fnr:
                best_fnr = fnr
                best_thresh = t
    
        self.prediction_threshold = best_thresh
    
        return best_thresh, best_fnr, thresholds, fnrs


In [10]:
def get_callbacks(model_name):
    return [
        EarlyStopping(monitor='val_loss', patience=10, verbose=True, mode='min'),
        LearningRateMonitor(logging_interval='epoch'),
        ModelCheckpoint(
            monitor='val_loss',
            mode='min',
            save_top_k=1,
            verbose=True,
            filename=f'{model_name}-best-checkpoint',
            dirpath=os.path.join("lightning_logs", model_name, "checkpoints")
        )
    ]
def train_model(ModelClass, model_name, datamodule, input_size, k_folds=None, n_layers=5, h_size=64, return_model=False):
    if k_folds:
        print(f"Training {model_name} with {k_folds}-fold cross-validation...\n")
        trained_models = []

        for fold in range(k_folds):
            print(f"\n=== Fold {fold+1}/{k_folds} ===")

            # Re-initialize datamodule with current fold
            fold_dm = TimeSeriesDataModule(
                feature_cols=datamodule.feature_cols,
                batch_size=datamodule.batch_size,
                k_folds=k_folds,
                fold_index=fold
            )
            fold_dm.setup()

            model = ModelClass(input_size=input_size, hidden_size=h_size, num_layers=n_layers)
            fold_name = f"{model_name}-fold{fold}"

            trainer = pl.Trainer(
                max_epochs=20,
                callbacks=get_callbacks(fold_name),
                log_every_n_steps=10,
                accelerator="auto",
                devices="auto",
                default_root_dir=os.path.join("lightning_logs", fold_name)
            )

            trainer.fit(model, datamodule=fold_dm)

            if return_model:
                trained_models.append(model)

            # Optional test after each fold
            test_model(ModelClass, fold_name, fold_dm, model=model)

        if return_model:
            return trained_models

    else:
        print(f"Training {model_name} without cross-validation...")
        model = ModelClass(input_size=input_size, hidden_size=h_size, num_layers=n_layers)

        trainer = pl.Trainer(
            max_epochs=20,
            callbacks=get_callbacks(model_name),
            log_every_n_steps=10,
            accelerator="auto",
            devices="auto",
            default_root_dir=os.path.join("lightning_logs", model_name)
        )

        trainer.fit(model, datamodule=datamodule)

        if return_model:
            return model


def test_model(ModelClass, model_name, datamodule, model=None):
    print(f"Testing {model_name}...")

    if model is None:
        # Load best checkpoint if model is not provided
        ckpt_dir = os.path.join("lightning_logs", model_name, "checkpoints")
        ckpt_files = glob(os.path.join(ckpt_dir, "*.ckpt"))
        best_ckpt_path = ckpt_files[0] if ckpt_files else None
        print(f"Checkpoint files found: {ckpt_files}")

        if best_ckpt_path:
            print(f"Best checkpoint: {best_ckpt_path}")
            model = ModelClass.load_from_checkpoint(best_ckpt_path)
        else:
            print(f"No checkpoint found for model '{model_name}'. Skipping test.")
            return

    trainer = pl.Trainer(accelerator="auto", devices="auto")
    trainer.test(model, datamodule=datamodule)


In [11]:
def compute_embedding_and_train_and_eval_probe(datamodule, encoder_model):
    X_train_embed, y_train = extract_embeddings(encoder_model, datamodule.train_dataloader())
    X_test_embed, y_test = extract_embeddings(encoder_model, datamodule.test_dataloader())



    df_embeddings = pd.DataFrame(X_train_embed)
    df_embeddings["label"] = y_train
    df_embeddings.to_csv("patient_embeddings_train.csv", index=False)
    print("Embeddings saved to patient_embeddings_train.csv")


    df_embeddings = pd.DataFrame(X_test_embed)
    df_embeddings["label"] = y_test
    df_embeddings.to_csv("patient_embeddings_test.csv", index=False)
    print("Embeddings saved to patient_embeddings_test.csv")


    train_and_eval_probe(X_train_embed, y_train, X_test_embed, y_test)

    

In [12]:
def run_label_scarcity_experiment(datamodule, encoder_model, input_size,):
    sizes = [100, 500, 1000, 3999]

    X_train_embed, y_train = extract_embeddings(encoder_model, datamodule.train_dataloader())
    X_test_embed, y_test = extract_embeddings(encoder_model, datamodule.test_dataloader())

    
    

    collector = ResultCollector()

    for size in sizes:
        print(f"\nTraining size: {size}")
        idx = np.random.choice(len(X_train_embed), size, replace=False)

        # --- Linear Probe ---
        X_train_small = X_train_embed[idx]
        y_train_small = y_train[idx]
        clf = LogisticRegression(max_iter=1000)
        clf.fit(X_train_small, y_train_small)
        probs_lp = clf.predict_proba(X_test_embed)[:, 1]
        auroc_lp = roc_auc_score(y_test, probs_lp)
        auprc_lp = average_precision_score(y_test, probs_lp)
        print(f"Linear Probe:\n AuROC: {auroc_lp:.4f},\n AuPRC: {auprc_lp:.4f}")
        collector.add("Linear Probe", size, auroc_lp, auprc_lp)

        # --- LSTM Classifier ---
        lstm_model = train_model(LSTMClassifier, "lstm", datamodule, input_size, k_folds=k_folds, h_size = h_size, n_layers = n_layers, return_model=True)

        test_loader = datamodule.test_dataloader()
        probs = []
        labels = []
        with torch.no_grad():
            for x, y in test_loader:
                logits = lstm_model(x)
                probs.append(torch.sigmoid(logits))
                labels.append(y)
        y_pred = torch.cat(probs).numpy()
        y_true = torch.cat(labels).numpy()
        auroc = roc_auc_score(y_true, y_pred)
        auprc = average_precision_score(y_true, y_pred)
        print(f"LSTM:\n AuROC: {auroc:.4f},\n AuPRC: {auprc:.4f}")
        collector.add("LSTM", size, auroc, auprc)

    display(collector.to_df())


In [13]:
import pandas as pd

class ResultCollector:
    def __init__(self):
        self.results = []

    def add(self, model_type, train_size, auroc, auprc):
        self.results.append({
            "Model": model_type,
            "Train Size": train_size,
            "AuROC": auroc,
            "AuPRC": auprc
        })

    def to_df(self):
        return pd.DataFrame(self.results).sort_values(["Train Size", "Model"])

In [16]:
ID_vars = ["PatientID", "Time", "RecordID"]
stationary_vars = ["Age", "Gender", "Height"] #, "ICUType"]
dynamic_vars = df_a.columns.difference(stationary_vars + ID_vars + ["In-hospital_death"]).tolist()
feature_cols = dynamic_vars + stationary_vars

# Setup
k_folds = None
h_size = 64
n_layers = 1
input_size = len(feature_cols) + 1
datamodule = TimeSeriesDataModule(feature_cols=feature_cols, batch_size=64, k_folds = k_folds)

datamodule.setup()

autoencoder = LSTMAutoencoder(input_size=input_size)


compute_embedding_and_train_and_eval_probe(datamodule, autoencoder)
run_label_scarcity_experiment(datamodule, autoencoder, input_size)




Embeddings saved to patient_embeddings_train.csv
Embeddings saved to patient_embeddings_test.csv
Linear Probe:
 AuROC: 0.8265,
 AuPRC: 0.4662


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Training size: 100
Linear Probe:
 AuROC: 0.7533,
 AuPRC: 0.3702
Training lstm without cross-validation...


/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints exists and is not empty.

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | lstm       | LSTM              | 27.4 K | train
1 | classifier | Linear            | 65     | train
2 | loss_fn    | BCEWithLogitsLoss | 0      | train
---------------------------------------------------------
27.5 K    Trainable params
0         Non-trainable params
27.5 K    Total params
0.110     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.334
Epoch 0, global step 63: 'val_loss' reached 0.33401 (best 0.33401), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v43.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.026 >= min_delta = 0.0. New best score: 0.308
Epoch 1, global step 126: 'val_loss' reached 0.30821 (best 0.30821), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v43.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.303
Epoch 2, global step 189: 'val_loss' reached 0.30272 (best 0.30272), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v43.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 3, global step 252: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 4, global step 315: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 5, global step 378: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 6, global step 441: 'val_loss' was not in top 1


Epoch 00007: reducing learning rate of group 0 to 5.0000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 7, global step 504: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 8, global step 567: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 9, global step 630: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 10, global step 693: 'val_loss' was not in top 1


Epoch 00011: reducing learning rate of group 0 to 2.5000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 11, global step 756: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.303. Signaling Trainer to stop.
Epoch 12, global step 819: 'val_loss' was not in top 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


LSTM:
 AuROC: 0.8188,
 AuPRC: 0.4721

Training size: 500
Linear Probe:
 AuROC: 0.7737,
 AuPRC: 0.3869
Training lstm without cross-validation...


/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints exists and is not empty.

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | lstm       | LSTM              | 27.4 K | train
1 | classifier | Linear            | 65     | train
2 | loss_fn    | BCEWithLogitsLoss | 0      | train
---------------------------------------------------------
27.5 K    Trainable params
0         Non-trainable params
27.5 K    Total params
0.110     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.324
Epoch 0, global step 63: 'val_loss' reached 0.32443 (best 0.32443), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v44.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.313
Epoch 1, global step 126: 'val_loss' reached 0.31339 (best 0.31339), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v44.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.301
Epoch 2, global step 189: 'val_loss' reached 0.30096 (best 0.30096), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v44.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.301
Epoch 3, global step 252: 'val_loss' reached 0.30061 (best 0.30061), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v44.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 4, global step 315: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 5, global step 378: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 6, global step 441: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 7, global step 504: 'val_loss' was not in top 1


Epoch 00008: reducing learning rate of group 0 to 5.0000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 8, global step 567: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 9, global step 630: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 10, global step 693: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 11, global step 756: 'val_loss' was not in top 1


Epoch 00012: reducing learning rate of group 0 to 2.5000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 12, global step 819: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.301. Signaling Trainer to stop.
Epoch 13, global step 882: 'val_loss' was not in top 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


LSTM:
 AuROC: 0.8110,
 AuPRC: 0.4648

Training size: 1000
Linear Probe:
 AuROC: 0.8071,
 AuPRC: 0.4584
Training lstm without cross-validation...


/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints exists and is not empty.

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | lstm       | LSTM              | 27.4 K | train
1 | classifier | Linear            | 65     | train
2 | loss_fn    | BCEWithLogitsLoss | 0      | train
---------------------------------------------------------
27.5 K    Trainable params
0         Non-trainable params
27.5 K    Total params
0.110     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.328
Epoch 0, global step 63: 'val_loss' reached 0.32808 (best 0.32808), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v45.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.308
Epoch 1, global step 126: 'val_loss' reached 0.30795 (best 0.30795), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v45.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.302
Epoch 2, global step 189: 'val_loss' reached 0.30188 (best 0.30188), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v45.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.301
Epoch 3, global step 252: 'val_loss' reached 0.30053 (best 0.30053), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v45.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 4, global step 315: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 5, global step 378: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 6, global step 441: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 7, global step 504: 'val_loss' was not in top 1


Epoch 00008: reducing learning rate of group 0 to 5.0000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 8, global step 567: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 9, global step 630: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 10, global step 693: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 11, global step 756: 'val_loss' was not in top 1


Epoch 00012: reducing learning rate of group 0 to 2.5000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 12, global step 819: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.301. Signaling Trainer to stop.
Epoch 13, global step 882: 'val_loss' was not in top 1
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


LSTM:
 AuROC: 0.7999,
 AuPRC: 0.4463

Training size: 3999
Linear Probe:
 AuROC: 0.8265,
 AuPRC: 0.4661
Training lstm without cross-validation...


/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints exists and is not empty.

  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | lstm       | LSTM              | 27.4 K | train
1 | classifier | Linear            | 65     | train
2 | loss_fn    | BCEWithLogitsLoss | 0      | train
---------------------------------------------------------
27.5 K    Trainable params
0         Non-trainable params
27.5 K    Total params
0.110     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                        | 0/? [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/miniforge/base/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |                                               | 0/? [00:00<?, ?it/s]

Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.335
Epoch 0, global step 63: 'val_loss' reached 0.33543 (best 0.33543), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v46.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.316
Epoch 1, global step 126: 'val_loss' reached 0.31618 (best 0.31618), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v46.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.307
Epoch 2, global step 189: 'val_loss' reached 0.30667 (best 0.30667), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v46.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.307
Epoch 3, global step 252: 'val_loss' reached 0.30665 (best 0.30665), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v46.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.304
Epoch 4, global step 315: 'val_loss' reached 0.30392 (best 0.30392), saving model to '/Users/damlaortac/Desktop/ML for HC/Project 1/ICU-TimeSeries-Mortality-Prediction/data/lightning_logs/lstm/checkpoints/lstm-best-checkpoint-v46.ckpt' as top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 5, global step 378: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 6, global step 441: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 7, global step 504: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 8, global step 567: 'val_loss' was not in top 1


Epoch 00009: reducing learning rate of group 0 to 5.0000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 9, global step 630: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 10, global step 693: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 11, global step 756: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 12, global step 819: 'val_loss' was not in top 1


Epoch 00013: reducing learning rate of group 0 to 2.5000e-04.


Validation: |                                             | 0/? [00:00<?, ?it/s]

Epoch 13, global step 882: 'val_loss' was not in top 1


Validation: |                                             | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.304. Signaling Trainer to stop.
Epoch 14, global step 945: 'val_loss' was not in top 1


LSTM:
 AuROC: 0.8000,
 AuPRC: 0.4399


Unnamed: 0,Model,Train Size,AuROC,AuPRC
1,LSTM,100,0.818771,0.472098
0,Linear Probe,100,0.753322,0.370153
3,LSTM,500,0.811018,0.464785
2,Linear Probe,500,0.773654,0.38691
5,LSTM,1000,0.799932,0.44632
4,Linear Probe,1000,0.807144,0.458378
7,LSTM,3999,0.800019,0.439871
6,Linear Probe,3999,0.826494,0.46606
