In [2]:
import pandas as pd
import torch
import torch.nn as nn
from torch.nn.functional import softmax
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from sksurv.linear_model import CoxPHSurvivalAnalysis
from losses import CensoredMSELoss
from utils import compute_time_to_event
import numpy as np
import logging
import hydra
from hydra import initialize, compose
from omegaconf import DictConfig, OmegaConf
import os
import logging
from model import TimeToDeath3DCNN
from sklearn.model_selection import train_test_split
from utils import LungCancerDataset
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import imageio.v3 as iio
from tqdm import tqdm

In [3]:
def train_model(model, train_dataset, batch_size, criterion, optimizer, writer, device, num_epochs, gamma, logger):

    dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()
    
    with tqdm(range(num_epochs), desc="Epochs", leave=False) as pbar:
        for epoch in pbar:
            epoch_loss = 0
            for batch in dataloader: # clinical vars too
                scans, events, times, clinical_vars = batch
                print(scans.shape)
                scans, events, times, clinical_vars = scans.to(device), events.to(device), times.to(device), clinical_vars.to(device)

                optimizer.zero_grad()
                embedding, proba_thresh = model(scans)
                all_features = np.concatenate((embedding.detach().cpu().numpy(), clinical_vars.detach().cpu().numpy()), axis=1) 
                survival_estimator = model.fit_survival_estimator(all_features, events, times)
                surv_funcs = survival_estimator.predict_survival_function(all_features)
                survival_times = compute_time_to_event(surv_funcs, thershold = proba_thresh)
                loss = criterion(survival_times, events, times, gamma)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

                pbar.set_postfix_str(
                f"Epoch {epoch} "
                f"| Loss {loss.item():.02f} "
            )

            print(f"Epoch {epoch + 1}, Loss: {epoch_loss:.4f}")
            avg_loss = epoch_loss / len(dataloader)
            logger.info(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")
            writer.add_scalar("Loss/Train", avg_loss, epoch + 1)

    return model

In [4]:
def test_model(model, test_dataset, criterion, writer, device, epoch, batch_size, gamma, logger):

    dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    model.eval()
    with torch.no_grad():
        total_loss = 0
        for batch in dataloader:
            scans, events, times, clinical_vars = batch
            scans, events, times, clinical_vars = scans.to(device), events.to(device), times.to(device), clinical_vars.to(device)

            embedding, proba_thresh = model(scans)
            all_features = np.concatenate((embedding.detach().numpy(), clinical_vars.detach().numpy()), axis=1)
            survival_estimator = model.fit_survival_estimator(all_features, events, times)
            surv_funcs = survival_estimator.predict_survival_function(all_features)
            survival_times = compute_time_to_event(surv_funcs, thershold = proba_thresh)
            loss = criterion(survival_times, events, times, gamma)

            total_loss += loss.item()

        print(f"Test Loss: {total_loss:.4f}")
        avg_loss = total_loss / len(dataloader)
        logger.info(f"Test Loss: {avg_loss:.4f}")
        writer.add_scalar("Loss/Test", avg_loss, epoch)
        return avg_loss

In [71]:
with initialize(version_base=None, config_path="."):
    cfg = compose(config_name='experiment_config.yaml')
    print(OmegaConf.to_yaml(cfg))

project: Death3DCNN
experiment_name: exp1
hypothesis: '-'
in_channels: 1
out_channels_conv1: 16
out_channels_conv2: 32
out_channels_conv3: 64
output_dim_target: 50
kernel_conv: 3
kernel_pool: 2
dropout: 0.5
num_epochs: 10
batch_size: 1
lr: 0.001
gamma: 0.1
scans_path_train: /Users/VictoriaShevchenko/Documents/biohack/data/train
scans_path_test: /Users/VictoriaShevchenko/Documents/biohack/data/test
clinical_vars_path: /Users/VictoriaShevchenko/Documents/biohack/data/NSCLC-Radiomics-Lung1.clinical-version3-Oct-2019.csv
work_dir: .
results_dir: ./results



In [72]:
dataset_train = LungCancerDataset(scans_path_train=cfg.scans_path_train, scans_path_test=cfg.scans_path_test, clinical_path=cfg.clinical_vars_path, return_train=True)

In [73]:
dataset_train.scans.shape

torch.Size([325, 1, 5, 256, 256])

In [19]:
def main(cfg=cfg):

    # cfg contains all hyperparams and paths
    # make sure the paths are correct

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    results_dir = os.path.join(cfg.results_dir, cfg.experiment_name)
    os.makedirs(results_dir, exist_ok=True)
    random_state = np.random.RandomState(seed=42)

    batch_size = cfg.batch_size
    learning_rate = cfg.lr
    num_epochs = cfg.num_epochs
    in_channels = cfg.in_channels
    out_channels_conv1 = cfg.out_channels_conv1
    out_channels_conv2 = cfg.out_channels_conv2
    out_channels_conv3 = cfg.out_channels_conv3
    kernel_conv = cfg.kernel_conv
    kernel_pool = cfg.kernel_pool
    dropout = cfg.dropout
    gamma = cfg.gamma


    logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
    )
    logger = logging.getLogger(__name__)

    model  = TimeToDeath3DCNN(
        in_channels,
        out_channels_conv1,
        out_channels_conv2,
        out_channels_conv3,
        kernel_conv,
        kernel_pool,
        dropout)
    
    criterion = CensoredMSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate) # replace with ADOPT?

    train_dataset = LungCancerDataset(cfg.scans_path_train, cfg.scans_path_test, cfg.clinical_vars_path, return_train=True)
    test_dataset = LungCancerDataset(cfg.scans_path_train, cfg.scans_path_test, cfg.clinical_vars_path, return_train=False)

    writer = SummaryWriter(results_dir)
    model = train_model(model, train_dataset, batch_size, criterion, optimizer, writer, device, num_epochs, gamma, logger)
    avg_test_loss = test_model(model, test_dataset, criterion, writer, device, num_epochs, batch_size, gamma, logger)

    return model, avg_test_loss
    


In [20]:
with initialize(version_base=None, config_path="."):
    cfg = compose(config_name='experiment_config.yaml')
    print(OmegaConf.to_yaml(cfg))

project: Death3DCNN
experiment_name: exp1
hypothesis: '-'
in_channels: 1
out_channels_conv1: 16
out_channels_conv2: 32
out_channels_conv3: 64
output_dim_target: 50
kernel_conv: 3
kernel_pool: 2
dropout: 0.5
num_epochs: 100
batch_size: 1
lr: 0.001
gamma: 0.1
scans_path_train: /Users/VictoriaShevchenko/Documents/biohack/data/train
scans_path_test: /Users/VictoriaShevchenko/Documents/biohack/data/test
clinical_vars_path: /Users/VictoriaShevchenko/Documents/biohack/data/NSCLC-Radiomics-Lung1.clinical-version3-Oct-2019.csv
work_dir: .
results_dir: ./results



In [None]:
if __name__ == "__main__":
    main()

Epochs:   0%|          | 0/100 [00:00<?, ?it/s]