# Load dependencies
- Change oversample indices

In [None]:
#! pip install tabulate
#! pip install classification-models-3D
#! pip install keras_applications

In [None]:
import os
import h5py
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
# from PIL import Image
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
# from scipy import ndimage
from scipy import ndimage
from scipy.special import expit, logit

from skimage import exposure
import sklearn.metrics as skm
from tabulate import tabulate

# Tensorflow/Keras
import tensorflow as tf
print(tf.__version__)
from tensorflow import keras
from keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from keras.utils import to_categorical
# from tensorflow.keras.applications.imagenet_utils import preprocess_input

#from classification_models_3D.tfkeras import Classifiers

# Own functions
from functions.plot_slices import plot_slices
# ontram functions
from k_ontram_functions.ontram import ontram
from k_ontram_functions.ontram_loss import ontram_loss
from k_ontram_functions.ontram_metrics import ontram_acc, ontram_auc
from k_ontram_functions.ontram_predict import predict_ontram, get_parameters

from functions.augmentation3d import zoom, rotate, flip, shift
from functions.augmentation3d_2modi import zoom2, rotate2, flip2, shift2

In [None]:
!python -V

In [None]:
# Define the path + output path:
# os.getcwd()
DIR = "/tf/notebooks/"
DATA_DIR = "hezo/stroke_bern/data/"
OUTPUT_DIR = "/tf/notebooks/hezo/stroke_bern/callbacks/ontrams_3d_resent_dwi_tmax/"
OUTPUT_DIR_DWI = "/tf/notebooks/hezo/stroke_bern/callbacks/ontrams_3d_resent_clinical_dwi/"

# Import images

In [None]:
with h5py.File(DIR + DATA_DIR + "data_bern_25_11_2020_preprocessed.h5", "r") as h5:
    X_dwi0 = h5["X_dwi"][:]
    X_tmax0 = h5["X_tmax"][:]
print(X_dwi0.shape, X_dwi0.min(), X_dwi0.max(), X_dwi0.mean(), X_dwi0.std())
print(X_tmax0.shape, X_tmax0.min(), X_tmax0.max(), X_tmax0.mean(), X_tmax0.std())

In [None]:
plot_slices(X_dwi0[0], 0, "axial", modality = "DWI")

In [None]:
plot_slices(X_tmax0[0], 0, "axial")

#### Adapt image size for ResNet

In [None]:
# Reshape the data to the correct dimension
dim = (128, 128, 20, 1)

X_dwi = np.empty((len(X_dwi0), 128, 128, 20, 3))
for i in range(len(X_dwi0)):
    scaling_factor = [dim[0]/X_dwi0[i].shape[0], dim[1]/X_dwi0[i].shape[1], dim[2]/X_dwi0[i].shape[2], dim[3]/X_dwi0[i].shape[3]]
    X_dwi[i,:,:,:,:] = ndimage.zoom(X_dwi0[i], scaling_factor, order = 1)
X_dwi[:,:,:,:,1] = X_dwi[:,:,:,:,0]
X_dwi[:,:,:,:,2] = X_dwi[:,:,:,:,0]

plot_slices(X_dwi[0], 0, "axial", "DWI")

In [None]:
# Reshape the data to the correct dimension
dim = (128, 128, 20, 3)

X_tmax = np.empty((len(X_tmax0), 128, 128, 20, 3))
for i in range(len(X_tmax0)):
    scaling_factor = [dim[0]/X_tmax0[i].shape[0], dim[1]/X_tmax0[i].shape[1], dim[2]/X_tmax0[i].shape[2], dim[3]/X_tmax0[i].shape[3]]
    X_tmax[i,:,:,:,:] = ndimage.zoom(X_tmax0[i], scaling_factor, order = 1)

plot_slices(X_tmax[0], 0, "axial")

In [None]:
# Combine the two modalities
X = np.concatenate((X_dwi.reshape((222,128,128,20,3,1)),
                    X_tmax.reshape((222,128,128,20,3,1))), axis = 5)
X.shape

In [None]:
del X_dwi0
del X_tmax0

# Import clinical and patient data

In [None]:
dat = pd.read_csv(DIR + DATA_DIR + "data_bern_25_11_2020_dwi.csv")
# dat.head(3)

In [None]:
# add image information to define the expert model as in the paper with Janne
dat1 = pd.read_csv(DIR + DATA_DIR + "data_wide_all_nihss.csv", sep = ",")
# dat1.head(3)

# check if the patient IDs between datasets match
print(all(dat.p_id.values == dat1.p_id.values))

# attach values 
dat["S_Medm_rbf"] = dat1.S_Medm_rbf.values
dat["volume_adc"] = dat1.volume_adc.values
dat["volume_tar"] = dat1.volume_tar.values
dat["infarct_side"] = dat1.infarct_side

In [None]:
# Size of the dataframe: same as the images
dat.shape

In [None]:
# define binary mRS
dat["mrs_3months_binary"] = 0
dat.loc[dat.mrs_3months <= 2, "mrs_3months_binary"] = 1

In [None]:
plt.hist(dat.mrs_3months, bins = 7)

In [None]:
plt.hist(dat.mrs_3months_binary, bins = 2)

#### Train, validation and test

In [None]:
# NAs?
[dat.age.isnull().sum(), 
 dat.nihss_bl.isnull().sum(),  
 dat.sys_bloodpressure_bl.isnull().sum(),
 dat.rf_diabetes.isnull().sum(), 
 dat.rf_hypertonia.isnull().sum(), 
 dat.rf_smoker.isnull().sum(),  
 dat.rf_tia_stroke.isnull().sum(), 
 dat.lyse.isnull().sum(),
 dat.time_to_groin_puncture.isnull().sum()]

In [None]:
# # simple imputation: replace all missing values with the mode of the column
# for column in dat.columns:
#     dat[column].fillna(dat[column].mode()[0], inplace=True)

In [None]:
# use the imputed data from the last project
train0 = pd.read_csv(DIR + DATA_DIR + "train_imputed_all1.csv", sep = ",")
train1 = pd.read_csv(DIR + DATA_DIR + "train_imputed_all2.csv", sep = ",")
train2 = pd.read_csv(DIR + DATA_DIR + "train_imputed_all3.csv", sep = ",")
train3 = pd.read_csv(DIR + DATA_DIR + "train_imputed_all4.csv", sep = ",")
train4 = pd.read_csv(DIR + DATA_DIR + "train_imputed_all5.csv", sep = ",")

test0 = pd.read_csv(DIR + DATA_DIR + "test_imputed_all1.csv", sep = ",")
test1 = pd.read_csv(DIR + DATA_DIR + "test_imputed_all2.csv", sep = ",")
test2 = pd.read_csv(DIR + DATA_DIR + "test_imputed_all3.csv", sep = ",")
test3 = pd.read_csv(DIR + DATA_DIR + "test_imputed_all4.csv", sep = ",")
test4 = pd.read_csv(DIR + DATA_DIR + "test_imputed_all5.csv", sep = ",")

In [None]:
# # check if I can compare the datasets: works
# print(all(train0.age.values.round(2) == dat.age.values[train0.index-1].round(2)),
# all(train1.age.values.round(2) == dat.age.values[train1.index-1].round(2)),
# all(train2.age.values.round(2) == dat.age.values[train2.index-1].round(2)),
# all(train3.age.values.round(2) == dat.age.values[train3.index-1].round(2)),
# all(train4.age.values.round(2) == dat.age.values[train4.index-1].round(2)))
# 
# print(all(test0.age.values.round(2) == dat.age.values[test0.index-1].round(2)),
# all(test1.age.values.round(2) == dat.age.values[test1.index-1].round(2)),
# all(test2.age.values.round(2) == dat.age.values[test2.index-1].round(2)),
# all(test3.age.values.round(2) == dat.age.values[test3.index-1].round(2)),
# all(test4.age.values.round(2) == dat.age.values[test4.index-1].round(2)))

In [None]:
train = [train0, train1, train2, train3, train4]
test = [test0, test1, test2, test3, test4]

In [None]:
# get training and test indices for CV later to impute during CV
train_idxes0 = [train0.index.values, train1.index.values, train2.index.values, train3.index.values, train4.index.values]
test_idxes = [test0.index.values, test1.index.values, test2.index.values, test3.index.values, test4.index.values]

In [None]:
my_seed = 3004

# define validation data
train_idxes = []
valid_idxes = []
for i in range(5):
    np.random.seed(my_seed)
    train_idx, valid_idx = train_test_split(train_idxes0[i], test_size = int(len(train_idxes0[i])*0.15))
    valid_idxes.append(np.sort(valid_idx))
    train_idxes.append(np.sort(train_idx))
    my_seed += 1
    
# define datasets: train and validation
train_old = train
train = []
valid = []
for f in range(5):
    train.append(train_old[f].loc[train_old[f].index.isin(train_idxes[f]),:])
    valid.append(train_old[f].loc[train_old[f].index.isin(valid_idxes[f]),:])

# get patient IDs
for i in range(5):
    train[i] = train[i].assign(p_id = dat.p_id[train_idxes[i]-1].values)
    valid[i] = valid[i].assign(p_id = dat.p_id[valid_idxes[i]-1].values)
    test[i] = test[i].assign(p_id = dat.p_id[test_idxes[i]-1].values)

In [None]:
# check if indices overlap
# np.sort(train[0].index.values)
# np.sort(valid[0].index.values)
# np.sort(test[0].index.values)

In [None]:
# #  check if image data still corresponds to tables --> works
# for i in range(5):
#     print(all(train[i].age.values.round(2) == dat.age.values[train_idxes[i]-1].round(2)))
#     print(all(valid[i].age.values.round(2) == dat.age.values[valid_idxes[i]-1].round(2)))
#     print(all(test[i].age.values.round(2) == dat.age.values[test_idxes[i]-1].round(2)))

In [None]:
Y = np.array(dat.mrs_3months)
print(Y.shape)
Y = to_categorical(Y)
print(Y.shape)

# Define models

In [None]:
# Model for the intercept function: C = number of classes
def mod_baseline(C):
    mod = keras.Sequential(name = "mod_baseline")
    mod.add(keras.Input(shape = (1, )))
    mod.add(keras.layers.Dense(C - 1, activation = "linear", use_bias = False))
    return mod

# Model for linear shift terms
def mod_linear_shift(x):
    mod = keras.Sequential(name = "mod_linear_shift")
    mod.add(keras.Input(shape = (x, )))
    mod.add(keras.layers.Dense(1, activation = "linear", use_bias = False))
    return mod

# Model for complex shift terms
def mod_complex_shift(x):
    mod = keras.Sequential(name = "mod_complex_shift")
    mod.add(keras.Input(shape = (x, )))
    mod.add(keras.layers.Dense(8, activation = "relu"))
    mod.add(keras.layers.Dense(8, activation = "relu"))
    mod.add(keras.layers.Dense(1, activation = "linear", use_bias = False))
    return mod  

In [None]:
from classification_models_3D_master.classification_models_3D_master.classification_models_3D.tfkeras import Classifiers

In [None]:
# ResNet with pretrained weights
def img_model(nout, last_layer_activation = "linear"):
    ResNet50, preprocess_input = Classifiers.get('resnet50')
    base_model = ResNet50(
        input_shape=(128, 128, 20, 3), 
        weights='imagenet', 
        include_top = False)

    # add a global average pooling layer and the dense part and define model
    x = base_model.output
    x = keras.layers.GlobalAveragePooling3D()(x)
    x = keras.layers.Dense(128, name="fc1")(x)
    x = keras.layers.Dense(128, name = "fc2")(x)
    x = keras.layers.Dense(nout, name="output")(x)
    predictions = keras.layers.Activation(last_layer_activation, name='output_activation')(x)
    
    return keras.Model(inputs=base_model.input, outputs=predictions)
mod = img_model(1, "linear")
mod.summary()

In [None]:
from classification_models_3D.tfkeras import Classifiers

def img_model(nout, last_layer_activation = "linear"):
    ResNet50, preprocess_input = Classifiers.get('resnet50')
    base_model = ResNet50(
        input_shape=(128, 128, 20, 3), 
        weights='imagenet', 
        include_top = False)

    # add a global average pooling layer and the dense part and define model
    x = base_model.output
    x = keras.layers.GlobalAveragePooling3D()(x)
    x = keras.layers.Dense(128, name="fc1")(x)
    x = keras.layers.Dense(128, name = "fc2")(x)
    x = keras.layers.Dense(nout, name="output")(x)
    predictions = keras.layers.Activation(last_layer_activation, name='output_activation')(x)
    
    return keras.Model(inputs=base_model.input, outputs=predictions)
mod = img_model(1, "linear")
mod.summary()

In [None]:
# from keras import backend as K
# 
# def conv_part(in_):
#     x = layers.Convolution3D(32, kernel_size=(3, 3, 3), padding = 'same', activation = 'relu', 
#                              kernel_initializer = 'he_normal')(in_)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
#     x = layers.Convolution3D(32, kernel_size=(3, 3, 3), padding = 'same', activation = 'relu', 
#                              kernel_initializer = 'he_normal')(x)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
#     x = layers.Convolution3D(64, kernel_size=(3, 3, 3), padding = 'same', activation = 'relu', 
#                              kernel_initializer = 'he_normal')(x)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
#     x = layers.Convolution3D(64, kernel_size=(3, 3, 3), padding = 'same', activation = 'relu', 
#                              kernel_initializer = 'he_normal')(x)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
#     x = layers.Convolution3D(64, kernel_size=(3, 3, 3), padding = 'same', activation = 'relu', 
#                              kernel_initializer = 'he_normal')(x)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
#     x = layers.Flatten()(x)
#     return x
# 
# # input is supposed to be 6D: None, 2, 128, 128, 64, 3 [batch_size, DWI/tmax, pixelx, pixely, pixelz, color]
# def par_img_model(input_shape, output_shape, input_name, activation = "linear"):
#     in_ = keras.Input(shape = input_shape, name = input_name)
#     # split the input:
#     dwi_in_ = layers.Lambda(lambda x: x[:,:,:,:,:,0])(in_)
#     tmax_in_ = layers.Lambda(lambda x: x[:,:,:,:,:,1])(in_)
#     x_dwi = conv_part(dwi_in_)
#     x_tmax = conv_part(tmax_in_)
#     x = layers.concatenate((x_dwi, x_tmax))
#     x = layers.Dense(128, activation = 'relu', kernel_initializer = 'he_normal')(x)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.Dropout(0.3)(x)
#     x = layers.Dense(128, activation = 'relu', kernel_initializer = 'he_normal')(x)
#     x = layers.BatchNormalization(center=True, scale=True)(x)
#     x = layers.Dropout(0.3)(x)
#     out_ = layers.Dense(output_shape, activation = activation, use_bias = False)(x) # activation = linear!
#     nn_im = keras.Model(inputs = in_, outputs = out_)
#     return nn_im

# ONTRAMs

In [None]:
# Hyperparameters for all models
C = 7

### Simple Intercept, Complex Shift: TMAX
Learn first to predict the outcome with TMAX images alone

In [None]:
folder_name = "SI_CSb_TMAX/"

## create folders: run only once
#os.mkdir(OUTPUT_DIR)
#os.mkdir(OUTPUT_DIR + folder_name)
#for i in range(5):
#    os.mkdir(OUTPUT_DIR + folder_name + "fold" + str(i))
#    for j in range(5):
#        os.mkdir(OUTPUT_DIR + folder_name + "fold" + str(i) + "/run" + str(j))

In [None]:
# Hyperparameters
batch_size = 8
toplayer_epochs = 40
alllayer_epochs = 100

In [None]:
# depends on the model
def train_preprocessing(data, label):
    """Process training data."""
    intercept = data[0] # intercept
    volume = data[1] # shift: image
    volume = zoom(volume)
    volume = rotate(volume)
    volume = shift(volume)
    volume = flip(volume)
    return (intercept, volume), label

In [None]:
j = 0
my_seed = 1
nll = np.empty((5, 1))

# for train_idx, test_idx in kf.split(X): # folds
for f in range(len(train_idxes)):
    
    train_idx = train_idxes[f]
    test_idx = test_idxes[f]
    
    # Load data for fold j ------------------------------------------------------
    
    X_train = X_tmax[train_idx-1]
    X_valid = X_tmax[valid_idx-1]
    X_test = X_tmax[test_idx-1]
    
    Y_train = Y[train_idx-1]
    Y_valid = Y[valid_idx-1]
    Y_test = Y[test_idx-1]
    
    dat_train = train[f]
    dat_valid = valid[f]
    dat_test = test[f]
    
    dat_train.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_train.csv", index = False)
    dat_valid.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_valid.csv", index = False)
    dat_test.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_test.csv", index = False)    
    
    
    # Define datasets for ONTRAM ------------------------------------------------------

    train_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_train),1]), X_train))
    train_labels = tf.data.Dataset.from_tensor_slices((Y_train))
    
    valid_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_valid),1]), X_valid))
    valid_labels = tf.data.Dataset.from_tensor_slices((Y_valid))
    
    test_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_test),1]), X_test))
    test_labels = tf.data.Dataset.from_tensor_slices((Y_test))
    
    train_loader = tf.data.Dataset.zip((train_data, train_labels))
    validation_loader = tf.data.Dataset.zip((valid_data, valid_labels))
    test_loader = tf.data.Dataset.zip((test_data, test_labels))
    
    train_dataset = (train_loader.shuffle(len(X_train))
                     .map(train_preprocessing)
                     .batch(batch_size, drop_remainder = True))
    validation_dataset = (validation_loader.batch(batch_size, drop_remainder = True))
    test_dataset = (test_loader.batch(len(X_test)))
    
    
    # Training ---------------------------------------------------------------------
    
    for i in range(5):
        
        # Define model
        mbl = mod_baseline(C)
        mcs = img_model(1, "linear")
        
        # start to train the top layers
        for layer in mcs.layers:
            layer.trainable = False
        model = ontram(mbl, mcs)
        
        # save weights of the best model
        checkpoint_filepath = OUTPUT_DIR + folder_name + "/fold" + str(j) + "/" + "run" + str(i) + "/"
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath,
                                                                       save_weights_only = True,
                                                                       monitor = "val_loss",
                                                                       mode = "min",
                                                                       save_best_only = True)
        
        # compile and train
        model.compile(optimizer = keras.optimizers.Adam(),
                    loss = ontram_loss(C, batch_size),
                    metrics = [ontram_acc(C, batch_size)])
        history_0 = model.fit(train_dataset,
                              validation_data = validation_dataset,
                              epochs = toplayer_epochs,
                              shuffle = True,
                              verbose = 2,
                              callbacks=[model_checkpoint_callback])
        
        # Load the wieghts and unfreeze the layers
        model.load_weights(checkpoint_filepath)
        for layer in model.layers:
            layer.trainable = True
        
        # recompile the model for the modifications to take effect
        model.compile(optimizer = keras.optimizers.Adam(lr=0.0001), 
                      loss = ontram_loss(C, batch_size),
                      metrics = [ontram_acc(C, batch_size)])   
        history_1 = model.fit(train_dataset,
                              validation_data = validation_dataset,
                              epochs = alllayer_epochs,
                              shuffle = True,
                              verbose = 2,
                              callbacks=[model_checkpoint_callback])
        
        # save the history
        pd.DataFrame(history_0.history).append(pd.DataFrame(history_1.history)).to_csv(checkpoint_filepath + "history.csv", index = False)
        
        # Do predictions
        model.load_weights(checkpoint_filepath)
        test_batch_size = len(X_test)
        model.compile(loss = ontram_loss(C, test_batch_size))
        preds = predict_ontram(model, data = test_dataset)
        
        # save predictions/parameters
        cdf = pd.DataFrame(preds["cdf"])
        cdf.columns = ["y_pred" + str(i) for i in range(8)]
        cdf["p_id"] = dat_test.p_id.values
        cdf["y_true"] = np.argmax(Y_test, axis=1)
        cdf.to_csv(checkpoint_filepath + "cdf.csv", index = False) 
        
        pdf = pd.DataFrame(preds["pdf"])
        pdf.columns = ["y_pred" + str(i) for i in range(7)]
        pdf["p_id"] = dat_test.p_id.values
        pdf["y_true"] = np.argmax(Y_test, axis=1)
        pdf.to_csv(checkpoint_filepath + "pdf.csv", index = False)
        
        nll[i] = preds["nll"]
    
    pd.DataFrame(nll).to_csv(OUTPUT_DIR + folder_name + "/fold" + str(j) + "/nll.csv", index = False)
    
    j += 1

### Simple Intercept, Complex Shift: TMAX + DWI
Learn to combine the pretained networks for TMAX and DWI images
- Load both models
- Remove the FC part for both models
- Learn a new FC part with weights fixed for the convolutional parts
- Fine-tune the whole model with smaller learning rate

In [None]:
folder_name = "SI_CSb_TMAX_DWI/"

# # create folders: run only once
# #os.mkdir(OUTPUT_DIR)
# os.mkdir(OUTPUT_DIR + folder_name)
# for i in range(5):
#     os.mkdir(OUTPUT_DIR + folder_name + "fold" + str(i))
#     for j in range(5):
#         os.mkdir(OUTPUT_DIR + folder_name + "fold" + str(i) + "/run" + str(j))

In [None]:
# Hyperparameters
batch_size = 8
toplayer_epochs = 40
alllayer_epochs = 100

In [None]:
def combined_img_model(nout, dir_dwi, dir_tmax, fix_conv_parts = True, last_layer_activation = "linear"):
    in_ = keras.Input(shape = (128, 128, 20, 3, 2))
    # split the input
    dwi_in_ = layers.Lambda(lambda x: x[:,:,:,:,:,0])(in_) #(None, 128, 128, 20, 3, 2)
    tmax_in_ = layers.Lambda(lambda x: x[:,:,:,:,:,1])(in_)
    
    # define dwi model and load weights
    mbl = mod_baseline(C)
    mcs = img_model(1, "linear")
    ontram_dwi = ontram(mbl, mcs)
    ontram_dwi.load_weights(dir_dwi).expect_partial()
    conv_part_dwi = keras.Model(ontram_dwi.mod_shift[0].input, 
                                ontram_dwi.mod_shift[0].layers[-5].output) # global average pooling layer (output of conv part)
  
    # define tmax model and load weights
    mbl = mod_baseline(C)
    mcs = img_model(1, "linear")
    ontram_tmax = ontram(mbl, mcs)
    ontram_tmax.load_weights(dir_tmax).expect_partial()
    conv_part_tmax = keras.Model(ontram_tmax.mod_shift[0].input, 
                                 ontram_tmax.mod_shift[0].layers[-5].output) # global average pooling layer (output of conv part)
    
    # fix convolutional parts
    if(fix_conv_parts):
        for layer in conv_part_dwi.layers:
            layer.trainable = False
        for layer in conv_part_tmax.layers:
            layer.trainable = False
        
    x_dwi = conv_part_dwi(dwi_in_)
    x_tmax = conv_part_tmax(tmax_in_)
    x = keras.layers.concatenate((x_dwi, x_tmax))
    x = keras.layers.Dense(128, name="fc1")(x)
    x = keras.layers.Dense(128, name="fc2")(x)
    x = keras.layers.Dense(128, name="fc3")(x)
    x = keras.layers.Dense(nout, name="output")(x)
    predictions = keras.layers.Activation(last_layer_activation, name='output_activation')(x)
    
    return keras.Model(inputs=in_, outputs=predictions)

In [None]:
# checkpoint_filepath_dwi = OUTPUT_DIR_DWI + "SI_CSb/fold" + str(j) + "/" + "run" + str(i) + "/"
# checkpoint_filepath_tmax = OUTPUT_DIR + "SI_CSb_TMAX/fold" + str(j) + "/" + "run" + str(i) + "/"
# 
# model = combined_img_model(C, checkpoint_filepath_dwi, checkpoint_filepath_tmax, 
#                         fix_conv_parts = True, last_layer_activation = "linear")
# model.summary()

In [None]:
# depends on the model
def train_preprocessing(data, label):
    """Process training data."""
    intercept = data[0] # intercept
    volume = data[1] # shift: image
    #volume = zoom2(volume)
    volume = rotate2(volume)
    volume = shift2(volume)
    volume = flip2(volume)
    return (intercept, volume), label

In [None]:
j = 3
my_seed = 1
nll = np.empty((5, 1))

# for train_idx, test_idx in kf.split(X): # folds
#for f in range(len(train_idxes)):
for f in range(3,5):
    
    train_idx = train_idxes[f]
    test_idx = test_idxes[f]
    
    # Load data for fold j ------------------------------------------------------
    
    X_train = X[train_idx-1]
    X_valid = X[valid_idx-1]
    X_test = X[test_idx-1]
    
    Y_train = Y[train_idx-1]
    Y_valid = Y[valid_idx-1]
    Y_test = Y[test_idx-1]
    
    dat_train = train[f]
    dat_valid = valid[f]
    dat_test = test[f]
    
    dat_train.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_train.csv", index = False)
    dat_valid.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_valid.csv", index = False)
    dat_test.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_test.csv", index = False)    
    
    
    # Define datasets for ONTRAM ------------------------------------------------------

    train_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_train),1]), X_train))
    train_labels = tf.data.Dataset.from_tensor_slices((Y_train))
    
    valid_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_valid),1]), X_valid))
    valid_labels = tf.data.Dataset.from_tensor_slices((Y_valid))
    
    test_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_test),1]), X_test))
    test_labels = tf.data.Dataset.from_tensor_slices((Y_test))
    
    train_loader = tf.data.Dataset.zip((train_data, train_labels))
    validation_loader = tf.data.Dataset.zip((valid_data, valid_labels))
    test_loader = tf.data.Dataset.zip((test_data, test_labels))
    
    train_dataset = (train_loader.shuffle(len(X_train))
                     .map(train_preprocessing)
                     .batch(batch_size, drop_remainder = True))
    validation_dataset = (validation_loader.batch(batch_size, drop_remainder = True))
    test_dataset = (test_loader.batch(len(X_test)))
    
    
    # Training ---------------------------------------------------------------------
    
    for i in range(0,2):
        
        # Define combined model for DWI & TMAX
        checkpoint_filepath_dwi = OUTPUT_DIR_DWI + "SI_CSb/fold" + str(j) + "/" + "run" + str(i) + "/"
        checkpoint_filepath_tmax = OUTPUT_DIR + "SI_CSb_TMAX/fold" + str(j) + "/" + "run" + str(i) + "/"
        mcs_combined = combined_img_model(1, checkpoint_filepath_dwi, checkpoint_filepath_tmax, 
                                          fix_conv_parts = True, last_layer_activation = "linear")
        
        # take the weights of the DWI baseline model
        mbl = mod_baseline(C)
        mcs = img_model(1, "linear")
        ontram_dwi = ontram(mbl, mcs)
        ontram_dwi.load_weights(checkpoint_filepath_dwi).expect_partial()

        # ontram model
        model = ontram(ontram_dwi.mod_baseline, mcs_combined)
        
        # save weights of the best model
        checkpoint_filepath = OUTPUT_DIR + folder_name + "/fold" + str(j) + "/" + "run" + str(i) + "/"
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath,
                                                                       save_weights_only = True,
                                                                       monitor = "val_loss",
                                                                       mode = "min",
                                                                       save_best_only = True)
        
        # compile and train
        model.compile(optimizer = keras.optimizers.Adam(),
                    loss = ontram_loss(C, batch_size),
                    metrics = [ontram_acc(C, batch_size)])
        history_0 = model.fit(train_dataset,
                              validation_data = validation_dataset,
                              epochs = toplayer_epochs,
                              shuffle = True,
                              verbose = 2,
                              callbacks=[model_checkpoint_callback])
        
        # Load the wieghts and unfreeze the layers
        model.load_weights(checkpoint_filepath)
        for layer in model.layers:
            layer.trainable = True
        
        # recompile the model for the modifications to take effect
        model.compile(optimizer = keras.optimizers.Adam(lr=0.0001), 
                      loss = ontram_loss(C, batch_size),
                      metrics = [ontram_acc(C, batch_size)])   
        history_1 = model.fit(train_dataset,
                              validation_data = validation_dataset,
                              epochs = alllayer_epochs,
                              shuffle = True,
                              verbose = 2,
                              callbacks=[model_checkpoint_callback])
        
        # save the history
        pd.DataFrame(history_0.history).append(pd.DataFrame(history_1.history)).to_csv(checkpoint_filepath + "history.csv", index = False)
        
        # Do predictions
        model.load_weights(checkpoint_filepath)
        test_batch_size = len(X_test)
        model.compile(loss = ontram_loss(C, test_batch_size))
        preds = predict_ontram(model, data = test_dataset)
        
        # save predictions/parameters
        cdf = pd.DataFrame(preds["cdf"])
        cdf.columns = ["y_pred" + str(i) for i in range(8)]
        cdf["p_id"] = dat_test.p_id.values
        cdf["y_true"] = np.argmax(Y_test, axis=1)
        cdf.to_csv(checkpoint_filepath + "cdf.csv", index = False) 
        
        pdf = pd.DataFrame(preds["pdf"])
        pdf.columns = ["y_pred" + str(i) for i in range(7)]
        pdf["p_id"] = dat_test.p_id.values
        pdf["y_true"] = np.argmax(Y_test, axis=1)
        pdf.to_csv(checkpoint_filepath + "pdf.csv", index = False)
        
        nll[i] = preds["nll"]
    
    pd.DataFrame(nll).to_csv(OUTPUT_DIR + folder_name + "/fold" + str(j) + "/nll.csv", index = False)
    
    j += 1

### Simple intercept, Complex shift, Linear shift

In [None]:
# folder_name = "SI_LSx_CSb_TMAX_DWI/"
# IMG_MODEL_DIR = "SI_CSb_TMAX_DWI/"
# POLR_DIR = OUTPUT_DIR_DWI + "SI_LSx/"
# 
# ## create folders: run only once
# #os.mkdir(OUTPUT_DIR + folder_name)
# #for i in range(5):
# #    os.mkdir(OUTPUT_DIR + folder_name + "fold" + str(i))
# #    for j in range(5):
# #        os.mkdir(OUTPUT_DIR + folder_name + "fold" + str(i) + "/run" + str(j))

In [None]:
# # Hyperparameters
# batch_size = 8
# toplayer_epochs = 40
# alllayer_epochs = 100

In [None]:
# # function for preprocessing
# def train_preprocessing(data, label):
#     """Process training data."""
#     intercept = data[0] # intercept
#     volume = data[1] # shift: image
#     tabular = data[2] # shift: tabular
#     volume = zoom(volume)
#     volume = rotate(volume)
#     volume = shift(volume)
#     volume = flip(volume)
#     return (intercept, volume, tabular), label

In [None]:
# j = 0
# my_seed = 1
# nll = np.empty((5, 1))
# estimates = np.empty((5, 6+9))
# estimates_sd = np.empty((5, 6+9))
# 
# # for train_idx, test_idx in kf.split(X): # folds
# for f in range(len(train_idxes)):
#     
#     train_idx = train_idxes[f]
#     test_idx = test_idxes[f]
#     
#     # Load data for fold j ------------------------------------------------------
#     
#     # define datasets
#     X_tab_train = np.array([train[f].age, train[f].nihss_bl, train[f].sys_bloodpressure_bl, 
#                             train[f].rf_diabetes, train[f].rf_hypertonia, train[f].rf_smoker, 
#                             train[f].rf_tia_stroke, train[f].lyse, train[f].time_to_groin_puncture]).T
#     X_tab_valid = np.array([valid[f].age, valid[f].nihss_bl, valid[f].sys_bloodpressure_bl, 
#                             valid[f].rf_diabetes, valid[f].rf_hypertonia, valid[f].rf_smoker, 
#                             valid[f].rf_tia_stroke, valid[f].lyse, valid[f].time_to_groin_puncture]).T
#     X_tab_test = np.array([test[f].age, test[f].nihss_bl, test[f].sys_bloodpressure_bl, 
#                             test[f].rf_diabetes, test[f].rf_hypertonia, test[f].rf_smoker, 
#                             test[f].rf_tia_stroke, test[f].lyse, test[f].time_to_groin_puncture]).T
#     # normalize
#     sd = np.empty((9,))
#     for i in range(X_tab_train.shape[1]):
#         train_mean = np.mean(X_tab_train[:,i])
#         train_std = np.std(X_tab_train[:,i])
#         X_tab_train[:,i] = (X_tab_train[:,i] - train_mean) / train_std
#         X_tab_valid[:,i] = (X_tab_valid[:,i] - train_mean) / train_std
#         X_tab_test[:,i] = (X_tab_test[:,i] - train_mean) / train_std
#         sd[i] = train_std
#     
#     X_train = X[train_idx-1]
#     X_valid = X[valid_idx-1]
#     X_test = X[test_idx-1]
#     
#     Y_train = Y[train_idx-1]
#     Y_valid = Y[valid_idx-1]
#     Y_test = Y[test_idx-1]
#     
#     dat_train = train[f]
#     dat_valid = valid[f]
#     dat_test = test[f]
#     
#     dat_train.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_train.csv", index = False)
#     dat_valid.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_valid.csv", index = False)
#     dat_test.to_csv(OUTPUT_DIR +  folder_name + "fold" + str(j) + "/" + "dat_test.csv", index = False)  
#     
#     
#     # Define datasets for ONTRAM ------------------------------------------------------
#     
#     train_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_train),1]), X_train, X_tab_train))
#     train_labels = tf.data.Dataset.from_tensor_slices((Y_train))
#     
#     valid_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_valid),1]), X_valid, X_tab_valid))
#     valid_labels = tf.data.Dataset.from_tensor_slices((Y_valid))
#     
#     test_data = tf.data.Dataset.from_tensor_slices((np.ones(shape=[len(X_test),1]), X_test, X_tab_test))
#     test_labels = tf.data.Dataset.from_tensor_slices((Y_test))
#     
#     train_loader = tf.data.Dataset.zip((train_data, train_labels))
#     validation_loader = tf.data.Dataset.zip((valid_data, valid_labels))
#     test_loader = tf.data.Dataset.zip((test_data, test_labels))
#     
#     train_dataset = (train_loader.shuffle(len(X_train))
#                      .map(train_preprocessing)
#                      .batch(batch_size, drop_remainder = True))
#     validation_dataset = (validation_loader.batch(batch_size, drop_remainder = True))
#     test_dataset = (test_loader.batch(len(X_test), drop_remainder = True))
# 
#     # Training ---------------------------------------------------------------------
#     
#     for i in range(5):
#         
#         # folder to save weights 
# 
#         # define model and load weights from SI_LSx
#         mbl = mod_baseline(C)
#         mls = mod_linear_shift(X_tab_train.shape[1])
#         polr = ontram(mbl, mls)
#         polr.load_weights(POLR_DIR + "fold" + str(j) + "/run" + str(i) + "/model_weights.hdf5")
#         mcs = img_model(1, "linear")
#         
#         # start to train the top layers
#         for layer in mcs.layers:
#             layer.trainable = False
#         model = ontram(polr.mod_baseline, [mcs, polr.mod_shift[0]])
#         
#         # save weights of the best model
#         checkpoint_filepath = OUTPUT_DIR + folder_name + "/fold" + str(j) + "/" + "run" + str(i) + "/"
#         model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath,
#                                                                        save_weights_only = True,
#                                                                        monitor = "val_loss",
#                                                                        mode = "min",
#                                                                        save_best_only = True)
#         
#         # compile and train
#         model.compile(optimizer = keras.optimizers.Adam(),
#                     loss = ontram_loss(C, batch_size),
#                     metrics = [ontram_acc(C, batch_size)])
#         history_0 = model.fit(train_dataset,
#                               validation_data = validation_dataset,
#                               epochs = toplayer_epochs,
#                               shuffle = True,
#                               verbose = 2,
#                               callbacks=[model_checkpoint_callback])
#         
#         # Load the wieghts and unfreeze the layers
#         model.load_weights(checkpoint_filepath)
#         for layer in model.layers:
#             layer.trainable = True
#         
#         # recompile the model for the modifications to take effect
#         model.compile(optimizer = keras.optimizers.Adam(lr=0.0001), 
#                       loss = ontram_loss(C, batch_size),
#                       metrics = [ontram_acc(C, batch_size)])   
#         history_1 = model.fit(train_dataset,
#                               validation_data = validation_dataset,
#                               epochs = alllayer_epochs,
#                               shuffle = True,
#                               verbose = 2,
#                               callbacks=[model_checkpoint_callback])
#         
#         # save the history
#         pd.DataFrame(history_0.history).append(pd.DataFrame(history_1.history)).to_csv(checkpoint_filepath + "history.csv", index = False)
#         
#         # Do predictions
#         model.load_weights(checkpoint_filepath)
#         test_batch_size = len(X_test)
#         model.compile(loss = ontram_loss(C, test_batch_size))
#         preds = predict_ontram(model, data = test_dataset)
#         params = get_parameters(model)
#         
#         # save predictions/parameters
#         cdf = pd.DataFrame(preds["cdf"])
#         cdf.columns = ["y_pred" + str(i) for i in range(8)]
#         cdf["p_id"] = dat_test.p_id.values
#         cdf["y_true"] = np.argmax(Y_test, axis=1)
#         cdf.to_csv(checkpoint_filepath + "cdf.csv", index = False)  
#         
#         pdf = pd.DataFrame(preds["pdf"])
#         pdf.columns = ["y_pred" + str(i) for i in range(7)]
#         pdf["p_id"] = dat_test.p_id.values
#         pdf["y_true"] = np.argmax(Y_test, axis=1)
#         pdf.to_csv(checkpoint_filepath + "pdf.csv", index = False)
#         
#         nll[i] = preds["nll"]
#         estimates[i] = np.concatenate((params["intercept"][0][0][0], np.concatenate(params["shift"][0][1][0]))) 
#         estimates_sd[i] = np.concatenate((params["intercept"][0][0][0], np.concatenate(params["shift"][0][1][0])/sd)) 
#     
#     pd.DataFrame(nll).to_csv(OUTPUT_DIR + folder_name + "/fold" + str(j) + "/nll.csv", index = False)
#     
#     pd_estimates = pd.DataFrame(estimates)
#     pd_estimates.columns = ["intercept0", "intercept1", "intercept2", "intercept3", "intercept4", 
#                             "intercept5", "age", "nihss_bl", "sys_bloodpressure_bl", "rf_diabetes", 
#                             "rf_hypertonia", "rf_smoker", "rf_tia_stroke", "lyse", "time_to_groin_puncture"]
#     pd_estimates.to_csv(OUTPUT_DIR + folder_name + "/fold" + str(j) + "/estimates.csv", index = False)
#     
#     pd_estimates_sd = pd.DataFrame(estimates_sd)
#     pd_estimates_sd.columns = ["intercept0", "intercept1", "intercept2", "intercept3", "intercept4", 
#                               "intercept5", "age", "nihss_bl", "sys_bloodpressure_bl", "rf_diabetes", 
#                               "rf_hypertonia", "rf_smoker", "rf_tia_stroke", "lyse", "time_to_groin_puncture"]
#     pd_estimates_sd.to_csv(OUTPUT_DIR + folder_name + "/fold" + str(j) + "/estimates_sd.csv", index = False)
#     
#     j += 1

# Trafo ensembles
Calculate the average CDF per patient by averaging the transformation functions

In [None]:
folder_names = ["SI_CSb_TMAX", "SI_CSb_TMAX_DWI"]

In [None]:
for folder_name in folder_names:
    for f in range(5): # fold
        checkpoint_filepath = OUTPUT_DIR + folder_name + "/fold" + str(f) + "/"
        cdf0 = pd.read_csv(checkpoint_filepath + "run0/cdf.csv")
        cdf1 = pd.read_csv(checkpoint_filepath + "run1/cdf.csv")
        cdf2 = pd.read_csv(checkpoint_filepath + "run2/cdf.csv")
        cdf3 = pd.read_csv(checkpoint_filepath + "run3/cdf.csv")
        cdf4 = pd.read_csv(checkpoint_filepath + "run4/cdf.csv")
        cdf = pd.concat((cdf0[["y_pred0", "y_pred1", "y_pred2", "y_pred3", "y_pred4", "y_pred5", "y_pred6", "y_pred7"]], 
                         cdf1[["y_pred0", "y_pred1", "y_pred2", "y_pred3", "y_pred4", "y_pred5", "y_pred6", "y_pred7"]], 
                         cdf2[["y_pred0", "y_pred1", "y_pred2", "y_pred3", "y_pred4", "y_pred5", "y_pred6", "y_pred7"]], 
                         cdf3[["y_pred0", "y_pred1", "y_pred2", "y_pred3", "y_pred4", "y_pred5", "y_pred6", "y_pred7"]], 
                         cdf4[["y_pred0", "y_pred1", "y_pred2", "y_pred3", "y_pred4", "y_pred5", "y_pred6", "y_pred7"]]))
        by_row_index = cdf.groupby(cdf.index)
        
        trafo_cdf = by_row_index.apply(lambda x: expit(np.mean(logit(x))))
        trafo_pdf = np.array(trafo_cdf)[:,1:] - np.array(trafo_cdf)[:,:-1]
        y_pred = np.argmax(trafo_pdf, axis = 1)
        y_pred_fav = np.sum(trafo_pdf[:,:3], axis = 1) # mRS 0-2
        y_pred_unfav = np.sum(trafo_pdf[:,3:], axis = 1) # mRS 3-6
        y_pred_bin = np.where(y_pred_fav>0.5, 1, 0)
        y_true_bin = np.where(cdf0.y_true<=2, 1, 0)
        
        trafo_cdf["p_id"] = cdf0.p_id
        trafo_cdf["y_true"] = cdf0.y_true
        trafo_cdf["y_pred"] = y_pred
        trafo_cdf.to_csv(checkpoint_filepath + "/trafo_cdf.csv", index = False)
        
        trafo_pdf = pd.DataFrame(trafo_pdf)
        trafo_pdf.columns = ["y_pred" + str(i) for i in range(7)]
        trafo_pdf["p_id"] = cdf0.p_id
        trafo_pdf["y_true"] = cdf0.y_true 
        trafo_pdf["y_pred"] = y_pred
        trafo_pdf.to_csv(checkpoint_filepath + "/trafo_pdf.csv", index = False)
        
        trafo_pdf_bin = pd.DataFrame({"y_pred1_fav": y_pred_fav, 
                                      "y_pred0_unfav": y_pred_unfav,
                                      "y_pred": y_pred_bin,
                                      "y_true": y_true_bin,
                                      "p_id": cdf0.p_id})
        trafo_pdf_bin.to_csv(checkpoint_filepath + "/trafo_pdf_bin.csv", index = False)

In [None]:
# combine all test samples
for folder_name in folder_names:
    checkpoint_filepath = OUTPUT_DIR + folder_name 
    cdf0 = pd.read_csv(checkpoint_filepath + "/fold0/trafo_cdf.csv")
    cdf1 = pd.read_csv(checkpoint_filepath + "/fold1/trafo_cdf.csv")
    cdf2 = pd.read_csv(checkpoint_filepath + "/fold2/trafo_cdf.csv")
    cdf3 = pd.read_csv(checkpoint_filepath + "/fold3/trafo_cdf.csv")
    cdf4 = pd.read_csv(checkpoint_filepath + "/fold4/trafo_cdf.csv")
    cdf = pd.concat((cdf0, cdf1, cdf2, cdf3, cdf4))
    cdf.to_csv(OUTPUT_DIR + folder_name + "/test_cdf.csv", index = False)
    
    pdf0 = pd.read_csv(checkpoint_filepath + "/fold0/trafo_pdf.csv")
    pdf1 = pd.read_csv(checkpoint_filepath + "/fold1/trafo_pdf.csv")
    pdf2 = pd.read_csv(checkpoint_filepath + "/fold2/trafo_pdf.csv")
    pdf3 = pd.read_csv(checkpoint_filepath + "/fold3/trafo_pdf.csv")
    pdf4 = pd.read_csv(checkpoint_filepath + "/fold4/trafo_pdf.csv")
    pdf = pd.concat((pdf0, pdf1, pdf2, pdf3, pdf4))
    pdf.to_csv(OUTPUT_DIR + folder_name + "/test_pdf.csv", index = False)
    
    pdf_bin0 = pd.read_csv(checkpoint_filepath + "/fold0/trafo_pdf_bin.csv")
    pdf_bin1 = pd.read_csv(checkpoint_filepath + "/fold1/trafo_pdf_bin.csv")
    pdf_bin2 = pd.read_csv(checkpoint_filepath + "/fold2/trafo_pdf_bin.csv")
    pdf_bin3 = pd.read_csv(checkpoint_filepath + "/fold3/trafo_pdf_bin.csv")
    pdf_bin4 = pd.read_csv(checkpoint_filepath + "/fold4/trafo_pdf_bin.csv")
    pdf_bin = pd.concat((pdf_bin0, pdf_bin1, pdf_bin2, pdf_bin3, pdf_bin4))
    pdf_bin.to_csv(OUTPUT_DIR + folder_name + "/test_pdf_bin.csv", index = False)

# Results

In [None]:
nll = ["nll"]
acc = ["acc"]
qwk = ["qwk"]
nll_bin = ["nll_bin"]
acc_bin = ["acc_bin"]
bs = ["bs"]
auc = ["auc"]
for folder_name in folder_names:
    pdf = pd.read_csv(OUTPUT_DIR + folder_name + "/test_pdf.csv")
    pdf_bin = pd.read_csv(OUTPUT_DIR + folder_name + "/test_pdf_bin.csv")
    
    nll.append(skm.log_loss(pdf.y_true, pdf[["y_pred0", "y_pred1", "y_pred2", "y_pred3", "y_pred4", "y_pred5", "y_pred6"]]))
    acc.append(skm.accuracy_score(pdf.y_true, pdf.y_pred))
    qwk.append(skm.cohen_kappa_score(pdf.y_true, pdf.y_pred, labels = [0,1,2,3,4,5,6], weights = "quadratic"))
    
    nll_bin.append(skm.log_loss(pdf_bin.y_true, pdf_bin[["y_pred0_unfav","y_pred1_fav"]]))
    acc_bin.append(skm.accuracy_score(pdf_bin.y_true, pdf_bin.y_pred))
    bs.append(skm.brier_score_loss(y_true = pdf_bin.y_true, y_prob = pdf_bin.y_pred1_fav, pos_label = 1))
    auc.append(skm.roc_auc_score(y_true = pdf_bin.y_true, y_score = pdf_bin.y_pred1_fav))

In [None]:
nll

In [None]:
# Ordinal outcome
header = [" ", "SI_CSb_TMAX", "SI_CSb_TMAX_DWI"]
data = [nll, acc, qwk]
print(tabulate(data, headers = header, tablefmt = "grid"))

In [None]:
# Binary outcome
header = [" ", "SI_CSb_TMAX", "SI_CSb_TMAX_DWI"]
data = [nll_bin, acc_bin, bs, auc]
print(tabulate(data, headers = header, tablefmt = "grid"))

In [None]:
# Estimates
folder_names = ["SI_LSx", "SI_LSx_Expert", "SI_LSx_CSb"]
model_est = []
for folder_name in folder_names:
    cv_est = []
    for f in range(5):
        checkpoint_filepath = OUTPUT_DIR + folder_name + "/fold" + str(f) + "/"
        est = pd.read_csv(checkpoint_filepath + "estimates.csv")
        cv_est.append(est)
    cv_est = pd.concat(cv_est)
    model_est.append(cv_est)

In [None]:
estimates = pd.concat([pd.DataFrame(model_est[0].mean()), pd.DataFrame(model_est[1].mean()), pd.DataFrame(model_est[2].mean())], axis = 1)
estimates.columns = folder_names
estimates