# Example Setups for Model Creation

The following scripts show the structure of the code to be run for specific parts of the survival modelling pipeline. These are not meant to be functional, but are created to give guidance on how to set up the pipeline to run for tuning, training, and evaluation. This code assumes that the specific preprocessing steps have been followed. 

## Hyperparemeter Tuning (Unimodal)

In [None]:
# Unimodal Hyperparameter Tuning
import pandas as pd 
from all_purpose_hp_tuning import HyperparameterTuner
from joblib import Parallel, delayed

data_root_path = "path/to/parent_dir_of_survival_data" #Structure within this folder outlined in the main README.md

#Specify models you would like to tune on which data. 
#If only one entry in each list, this will tune one model. 
#If multiple entries in lists, this will tune all possible model combinations. 
hp_tuner = HyperparameterTuner(root_path = data_root_path,
                             model_types = ['cph'], #Pick the type of model you would like to create. Supports cph, ssvm_ker (kernel ssvm), and rsf
                             surv_types = ['OS', 'HDFS'], #Corresponds to the prefix(es) in the file names describing clinical endpoint
                             feature_types = ['clin', 'rad'], #The types of features used to create separate unimodal models (these are in the corresponding file names)
                             combat_types = ['0', '1'] #This parameter was for a separate experiment not included in the paper. Just checks for additional prefix in file name.
                             )
hp_tuner.generate_run_params() #Creates parameter grids for each model requested

#Run hyperparameter tuning with parallelization to tune multiple models at once.
hp_tuning_results = Parallel(n_jobs = 4, verbose = 10)(delayed(hp_tuner.run_grid_hp_tuning)(param_idx) for param_idx in range(hp_tuner.run_parameters.shape[0]))

#Combine all model results into one dataframe for further modelling
hp_tuner_results = pd.DataFrame()
for model, result in hp_tuning_results:
    hp_tuner_results = pd.concat([hp_tuner_results, result], axis = 0)

hp_tuner_results #Visualize what the hyperparameter grid will look like

## Hyperparameter Tuning (Multimodal)

In [None]:
import pandas as pd
from all_purpose_hp_tuning import HyperparameterTuner
from joblib import Parallel, delayed

#Unimodal model hyperparameter tuning should have been executed prior to running this
#This will, however, create the unimodal models so you don't have to save the outputted risks separately

data_root_path = "path/to/parent_dir_of_survival_data" #Structure within this folder outlined in the main README.md
parameter_path = "path/to/parent_dir_of_survival_data/Late_Fusion_Param_Grids/Late_Param_Grid.csv" #Contains all parameter grids in json format, one parameter grid per row

param_grid = pd.read_csv(parameter_path) #Read in parameter grid data

#Specify models and data you would like to use for late fusion
#Here, model types refers to the model used to combine the risk scores of two or more unimodal models. 
#Mirrors the setup for the unimodal hyperparameter tuning (more descriptions of the variables in that section)
late_hp = HyperparameterTuner(root_path = data_root_path, 
                              model_types = ['cph'], 
                              surv_types = ['OS', 'HDFS'], 
                              feature_types = ['risk'], 
                              combat_types = ['0'])
late_hp.generate_run_params()
late_hp_results = Parallel(n_jobs = 2, verbose = 10)(delayed(late_hp.run_late_grid_hp_tuning)(param_idx, param_grid) for param_idx in range(late_hp.run_parameters.shape[0]))

hp_tuner_results = pd.DataFrame()
for result in late_hp_results: 
    hp_tuner_results = pd.concat([hp_tuner_results, result], axis = 0)

hp_tuner_results #Visualize output of multimodal hyperparameter tuning

## Unimodal Model Training and Evaluation

In [None]:
from survival_model import SurvivalModel

#Assumes hyperparameter tuning has been run and the hyperparameter tuning file is accessible in the data structure

data_root_path = "path/to/parent_dir_of_survival_data" #Structure within this folder outlined in the main README.md

#This is to train one model. If you would like to train multiple models, this can be done by looping over each row of
#information in the hyperparameter tuning file. 
curr_model = SurvivalModel.create_model(model_type = 'cph', #Can also be rsf or ssvm_ker
                                        root_path = data_root_path, 
                                        surv_type = 'OS', 
                                        feature_type = 'rad',
                                        combat = '0',
                                        feat_filt = 'Clin_' #regex term to filter features to subset containing this string
                                        )

curr_model.train() #Train model based on inputted parameters above
curr_model.bootstrap(iter_num = 500) #Number of bootstrap iterations to run
curr_model.calc_conf(conf_interval = 0.95) #Calculate confidence interval of performance metrics

curr_model.med_test_U #Will output the median Uno's C testing performance. Other metrics are accessible (see calc_conf)

## Late Fusion Model Training and Evaluation

In [None]:
from survival_model import LateFusionModel

#Assumes hyperparameter tuning has been run and the hyperparameter tuning file is accessible in the data structure

data_root_path = "path/to/parent_dir_of_survival_data" #Structure within this folder outlined in the main README.md
parameter_path = "path/to/parent_dir_of_survival_data/Late_Fusion_Param_Grids/Late_Param_Grid.csv" #Contains all parameter grids in json format, one parameter grid per row

param_grid = pd.read_csv(parameter_path) #Read in parameter grid data

#This is to train one model. If you would like to train multiple models, this can be done by looping over each row of
#information in the late fusion hyperparameter tuning file. 

#Format is similar to the unimodal training, with a similar training setup for late fusion hyperparameter tuning.
#For more information on input variables, please refer to those sections.
lf_mod = LateFusionModel(root_path = data_root_path, 
                        model_type = 'cph', 
                        surv_type = 'OS',  
                        combat = '0')

lf_mod.set_first_layer(parameter_grid = param_grid)
lf_mod.train(late_params = param_grid)
lf_mod.bootstrap(iter_num = 500)
lf_mod.calc_conf(conf_interval = 0.95) #The same metrics available for unimodal modelling are also available here