# 1. Setup

In [1]:
install.packages("ggbeeswarm")
install.packages("ggridges")
install.packages("patchwork")
install.packages("ggbump")
install.packages("cowplot")
install.packages("svglite")
install.packages("survRM2")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘beeswarm’, ‘vipor’


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [2]:
library(ggplot2)
library(dplyr)
library(repr)
library(ggbeeswarm)
library(ggridges)
library(patchwork)
library(ggbump)
library(cowplot)
library(svglite)
library(survival)
library(survRM2)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘cowplot’


The following object is masked from ‘package:patchwork’:

    align_plots




# 2. Loading data

## 2.1 Preparing timings data

In [7]:
# Define directory where files are stored
dir <- "Benchmarking/Results"

# List all files in the directory
files <- system(paste0("dx ls ", dir), intern = TRUE)

# Filter for timings files based on expected patterns
timings_files <- files[grepl("_timings.tsv", files)]

# Initialize lists to store merged dataframes for each endpoint
merged_timings_dataframes <- list()

# Loop through each timings file to download and process
for (perf_entry in timings_files) {
  # Split entry to extract filename and file ID (if available)
  parts <- strsplit(perf_entry, " : ")[[1]]
  file_name <- parts[1]
  file_id <- ifelse(length(parts) > 1, parts[2], NA)
  
  # Download the file by ID if available, otherwise by name
  if (!is.na(file_id)) {
    download_cmd <- paste("dx download", file_id, "-o", file_name)
  } else {
    download_cmd <- paste("dx download", paste0(dir, "/", file_name))
  }
  system(download_cmd)
  
  # Read the file into a dataframe
  perf_data <- read.csv(file_name, sep = "\t", header = TRUE)
  
  # Ensure all expected columns are present
  # If columns are missing, add them with NA values
  if (!"cv_time" %in% colnames(perf_data)) {
    perf_data$cv_time <- NA
  }
  if (!"refit_time" %in% colnames(perf_data)) {
    perf_data$refit_time <- NA
  }
  if (!"fit_time" %in% colnames(perf_data)) {
    perf_data$fit_time <- NA
  }
  
  # Add the 'once_time' column
  # 'once_time' takes the value of 'fit_time' if it exists; otherwise, it takes 'refit_time'
  perf_data$once_time <- ifelse(!is.na(perf_data$fit_time), perf_data$fit_time, perf_data$refit_time)
  
  # Extract metadata from the filename
  name_parts <- strsplit(file_name, "_")[[1]]
  model_type <- name_parts[1]
  endpoint <- name_parts[2]
  combo_name <- paste(name_parts[3:(length(name_parts) - 2)], collapse = "_")
  # combo_name <- sub("_cvsplit$", "", combo_name)  # Remove '_cvsplit' suffix from combo_name
  cv_split <- sub("cvsplit", "", name_parts[length(name_parts) - 1])
  
  # Add metadata columns to dataframe
  perf_data$model_type <- model_type
  perf_data$endpoint <- endpoint
  perf_data$combo_name <- combo_name
  perf_data$cv_split <- as.numeric(cv_split)
  
  # Merge timings data by endpoint
  if (!is.null(merged_timings_dataframes[[endpoint]])) {
    merged_timings_dataframes[[endpoint]] <- rbind(
      merged_timings_dataframes[[endpoint]], 
      perf_data
    )
  } else {
    merged_timings_dataframes[[endpoint]] <- perf_data
  }
  
  # Clean up by removing the downloaded file
  unlink(file_name)
}

# Combine all endpoint-specific dataframes into one
final_timings_df <- do.call(rbind, merged_timings_dataframes)

In [9]:
# Define directory where files are stored
dir <- "Benchmarking/Results"

# List all files in the directory
files <- system(paste0("dx ls ", dir), intern = TRUE)

# Filter for cvresults files based on expected patterns
cvresults_files <- files[grepl("_cvresults.tsv$", files)]

# Initialize lists to store merged dataframes for each model_type
merged_cvresults_dataframes <- list()

# Loop through each cvresults file to download and process
for (perf_entry in cvresults_files) {
  # Split entry to extract filename and file ID (if available)
  parts <- strsplit(perf_entry, " : ")[[1]]
  file_name <- parts[1]
  file_id <- ifelse(length(parts) > 1, parts[2], NA)
  
  # Download the file by ID if available, otherwise by name
  if (!is.na(file_id)) {
    download_cmd <- paste("dx download", file_id, "-o", file_name)
  } else {
    download_cmd <- paste("dx download", paste0(dir, "/", file_name))
  }
  system(download_cmd)
  
  # Read the file into a dataframe
  perf_data <- read.csv(file_name, sep = "\t", header = TRUE)
  
  # Extract metadata from the filename
  name_parts <- strsplit(file_name, "_")[[1]]
  model_type <- name_parts[1]
  endpoint <- name_parts[2]
  combo_name <- paste(name_parts[3:(length(name_parts) - 2)], collapse = "_")
  cv_split <- sub("cvsplit", "", name_parts[length(name_parts) - 1])
  
  # Add metadata columns to dataframe
  perf_data$model_type <- model_type
  perf_data$endpoint <- endpoint
  perf_data$combo_name <- combo_name
  perf_data$cv_split <- as.numeric(cv_split)
  
  # Merge cvresults data by model_type
  if (!is.null(merged_cvresults_dataframes[[model_type]])) {
    merged_cvresults_dataframes[[model_type]] <- rbind(
      merged_cvresults_dataframes[[model_type]], 
      perf_data
    )
  } else {
    merged_cvresults_dataframes[[model_type]] <- perf_data
  }
  
  # Clean up by removing the downloaded file
  unlink(file_name)
}

# Now, for example, if you want to access the dataframe for Lasso model
# merged_cvresults_dataframes[["Lasso"]]

# Print available dataframes
print(names(merged_cvresults_dataframes))

[1] "DL"       "EN"       "Lasso"    "lightGBM" "RangerRF" "Ridge"    "XGB"     


In [6]:
head(merged_cvresults_dataframes$DL)

Unnamed: 0_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_batch_size,param_dropouts,param_epochs,param_layer_sizes,param_lr,param_optimizer,⋯,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,model_type,endpoint,combo_name,cv_split
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<int>,<chr>,<dbl>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<chr>,<dbl>
1,128.9612,51.36538,0.8708953,0.011411927,50000,0.2,100,"[16, 16]",0.001,<class 'torch.optim.adam.Adam'>,⋯,0.8134457,0.8265159,0.8136075,0.8210208,0.006129052,3,DL,AD,agesex_cvsplit,1
2,124.3105,21.32025,0.8671031,0.009589296,50000,0.2,100,"[64, 64]",0.001,<class 'torch.optim.adam.Adam'>,⋯,0.8138917,0.820587,0.8127075,0.8199784,0.005871267,9,DL,AD,agesex_cvsplit,1
3,103.078,22.3075,0.8700817,0.012674805,50000,0.2,100,"[256, 256]",0.001,<class 'torch.optim.adam.Adam'>,⋯,0.8131611,0.8260845,0.8134062,0.8209329,0.006251598,7,DL,AD,agesex_cvsplit,1
4,115.1342,25.81461,0.8485882,0.030894234,50000,0.2,100,"[16, 16, 16]",0.001,<class 'torch.optim.adam.Adam'>,⋯,0.8145598,0.8256028,0.8129548,0.8209827,0.005930038,6,DL,AD,agesex_cvsplit,1
5,116.3338,28.40631,0.8718509,0.014735395,50000,0.2,100,"[64, 64, 64]",0.001,<class 'torch.optim.adam.Adam'>,⋯,0.8144269,0.8253893,0.8129557,0.8209879,0.005981434,5,DL,AD,agesex_cvsplit,1
6,122.641,46.28896,0.8678499,0.008623257,50000,0.2,100,"[256, 256, 256]",0.001,<class 'torch.optim.adam.Adam'>,⋯,0.8140672,0.8265761,0.8132722,0.821108,0.006085599,2,DL,AD,agesex_cvsplit,1


## 2.2 Downloading prepared performance data

In [13]:
system("dx download Benchmarking/Plots/results_perf_2.tsv -o downloaded_results_perf_2.tsv")
results <- read.table("downloaded_results_perf_2.tsv", sep = "\t", header = TRUE)
head(results)

Unnamed: 0_level_0,cv_split,model_type,endpoint,combo_name,c_index_train,c_index_test,delta_c,uno_c,chisq_20,sensitivity_20,⋯,specificity_10,fpr_10,fnr_10,logrank_p_val_10,hr_10,hr_lower_ci_10,hr_upper_ci_10,hr_pval_10,rmst_diff_10,rmst_pval_10
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1,Cox,AD,agesex,0.8215526,0.8183378,0.00321479,0.834979,444.84132,0.627027,⋯,0.8981106,0.1018894,0.6,0.0,6.328312,5.139642,7.791892,0.0,-0.02685881,1.890388e-07
2,1,Cox,AD,clinicalrisk,0.8425921,0.8309559,0.011636174,0.8515589,499.24051,0.6324324,⋯,0.9024579,0.09754211,0.5810811,0.0,7.361114,5.986615,9.051191,0.0,-0.02866641,2.50203e-08
3,1,Cox,AD,everything,0.8831058,0.868338,0.014767849,0.8833318,861.59381,0.772973,⋯,0.9035656,0.09643439,0.4378378,0.0,13.278892,10.811507,16.30938,0.0,-0.04146502,1.4501e-11
4,1,Cox,AD,pmh,0.8309989,0.8185526,0.01244631,0.8331651,468.93041,0.6324324,⋯,0.9021862,0.09781382,0.6162162,0.0,6.307689,5.114863,7.778691,0.0,-0.0240315,1.633003e-06
5,1,Cox,AD,prs_metabolomics,0.871631,0.8644288,0.007202283,0.8702061,830.30141,0.7648649,⋯,0.9035656,0.09643439,0.4378378,0.0,13.138751,10.698022,16.136328,0.0,-0.04200113,3.9719e-12
6,1,Cox,BC,agesex,0.5356915,0.5444747,-0.008783186,0.544717,12.79255,0.253012,⋯,0.8699096,0.1300904,0.8443003,0.003991972,1.272724,1.079583,1.500419,0.004080671,-0.0401091,0.0495248


## 2.3 Merge in timing data & Recoding

In [31]:
final_timings_df <- final_timings_df %>%
  mutate(
    combo_name = sub("_cvsplit$", "", combo_name)  # remove trailing "_cvsplit"
  ) %>%
  select(
    cv_split, model_type, endpoint, combo_name, once_time  # keep only once_time
  )

# Merge into results
results_with_timings <- results %>%
  left_join(final_timings_df,
            by = c("cv_split", "model_type", "endpoint", "combo_name"))

In [32]:
# Recoding
results_with_timings <- results_with_timings %>%
  mutate(
    # Recoding 'endpoint' to descriptive labels
    endpoint = recode(endpoint,
                      "CVD" = "Cardiovascular Disease",
                      "AD"  = "Alzheimer's Disease", 
                      "BC"  = "Breast Cancer",
                      .default = endpoint
    ),
    
    # Recoding 'model_type' to descriptive labels
    model_type = recode(model_type,
                        "Cox"       = "Cox",
                        "DL"        = "Deep Learning",
                        "EN"        = "Elastic Net",
                        "Lasso"     = "Lasso",
                        "lightGBM"  = "Gradient Boosting Machine",
                        "RangerRF"  = "Random Forest",
                        "Ridge"     = "Ridge",
                        "XGB"       = "XGBoost",
                        .default = model_type
    ),
    
    # Recoding 'combo_name' to descriptive labels
    combo_name = recode(combo_name,
                         "agesex"           = "Age & Sex",
                         "clinicalrisk"     = "Clinical Risk",
                         "everything"       = "Complete",
                         "pmh"              = "Past Medical History",
                         "prs_metabolomics" = "PRS & Metabolomics",
                         .default = combo_name  # Retain original if no match
    )
)

# 3. Statistical analysis

H0: There is no difference in the model performance (C-Index)

H1: There is a significant difference between the two compared models (paired t-test)

Our data:
- we can assume normally distributed data due to central limit theorem
- for continuous data
- data in one CV split is dependent, however CV splits are independent of each other
- one block is a CV-split, endpoint, combo_name combination

We adjust with BH FDR

In [44]:
library(tidyr)
library(dplyr)

In [43]:
tail(results_with_timings)

cv_split,endpoint,combo_name,model_type,c_index_train,c_index_test,delta_c,delta_c_to_cox,uno_c,chisq_20,⋯,fpr_10,fnr_10,logrank_p_val_10,hr_10,hr_lower_ci_10,hr_upper_ci_10,hr_pval_10,rmst_diff_10,rmst_pval_10,once_time
<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<dbl>
5,Breast Cancer,PRS & Metabolomics,Gradient Boosting Machine,0.6849192,0.6589368,0.025982442,-0.0015253947,0.6623627,239.2905,⋯,0.09399619,0.7648699,0.0,2.89635,2.515687,3.334613,0.0,-0.2821607,0.0,6.927794
5,Cardiovascular Disease,Age & Sex,Gradient Boosting Machine,0.6810059,0.6778891,0.003116781,-0.0002997331,0.6806534,1178.2954,⋯,0.08790774,0.7827144,0.0,2.832121,2.63877,3.039639,0.0,-0.5077832,0.0,12.692038
5,Cardiovascular Disease,Clinical Risk,Gradient Boosting Machine,0.7399726,0.7170231,0.022949508,-0.0002310683,0.7168103,1832.6252,⋯,0.08362619,0.7537577,0.0,3.48583,3.257632,3.730014,0.0,-0.6701776,0.0,50.685602
5,Cardiovascular Disease,Complete,Gradient Boosting Machine,0.7453464,0.7336246,0.011721862,-0.0047379094,0.7318651,2450.7524,⋯,0.08164629,0.7360743,0.0,3.876356,3.628085,4.141616,0.0,-0.7765789,0.0,26.013983
5,Cardiovascular Disease,Past Medical History,Gradient Boosting Machine,0.7033125,0.6961937,0.007118851,-0.000250128,0.6979892,1513.994,⋯,0.0886502,0.7692308,0.0,3.057882,2.85334,3.277085,0.0,-0.5771894,0.0,25.197383
5,Cardiovascular Disease,PRS & Metabolomics,Gradient Boosting Machine,0.721037,0.7121185,0.008918526,-0.0077835707,0.7119615,1798.1195,⋯,0.08355195,0.7530946,0.0,3.446669,3.221269,3.687841,0.0,-0.6769595,0.0,13.102193


In [45]:
subsetted_results <- results_with_timings[, c("cv_split", "model_type", "endpoint", "combo_name", "c_index_test", "once_time", "delta_c_to_cox")]

## 3.1 parametric test

In [42]:
install.packages("tidyverse")
install.packages("car")
install.packages("afex")

if (!require("emmeans")) {
  install.packages("emmeans")
}
library(emmeans)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘bit’, ‘bit64’, ‘blob’, ‘cellranger’, ‘ids’, ‘vroom’, ‘tzdb’, ‘progress’, ‘broom’, ‘conflicted’, ‘dbplyr’, ‘dtplyr’, ‘forcats’, ‘googledrive’, ‘googlesheets4’, ‘haven’, ‘hms’, ‘modelr’, ‘readr’, ‘readxl’, ‘reprex’


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘rbibutils’, ‘Deriv’, ‘microbenchmark’, ‘Rdpack’, ‘doBy’, ‘boot’, ‘minqa’, ‘nloptr’, ‘reformulas’, ‘carData’, ‘Formula’, ‘nnet’, ‘pbkrtest’, ‘lme4’


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘lmerTest’, ‘reshape2’


Loading required package: emmeans

“there is no package called ‘emmeans’”
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependency ‘estimability’


Welcome to emmeans.
Caution: You lose important informat

In [46]:
# Load required libraries
library(dplyr)
library(tidyr)

# Assuming `subsetted_results` is your main dataframe containing the data
# Define endpoints and combo_names
endpoints <- c("Alzheimer\'s Disease", "Breast Cancer", "Cardiovascular Disease")
combo_names <- c("Age & Sex", "Clinical Risk", "Everything", "Past Medical History", "PRS & Metabolomics")

# Loop over each endpoint and combo_name
for (endpointt in endpoints) {
    if (endpointt != "Breast Cancer") next
  for (combo_namee in combo_names) {
    # Filter data for specific endpoint and combo_name, then reshape
      if (combo_namee != "Age & Sex") next
    wide_df <- subsetted_results %>%
      filter(endpoint == endpointt, combo_name == combo_namee) %>%
      select(cv_split, model_type, c_index_test) %>%
      spread(key = model_type, value = c_index_test)
    
    # Extract model names
    model_names <- setdiff(names(wide_df), "cv_split") # excluding "cv_split"
    num_models <- length(model_names)
    num_comparisons <- choose(num_models, 2)
    p_values <- numeric(num_comparisons)
    pairwise_names <- character(num_comparisons)
    index <- 1

    # Perform paired t-tests for each model pair
    for (i in 1:(num_models - 1)) {
      for (j in (i + 1):num_models) {
        model1 <- model_names[i]
        model2 <- model_names[j]
        # Conduct paired t-test on matched pairs of data
        test_result <- t.test(wide_df[[model1]], wide_df[[model2]], paired = TRUE)
        p_values[index] <- test_result$p.value
        pairwise_names[index] <- paste(model1, "-", model2)
        index <- index + 1
      }
    }

    # Apply BH FDR adjustment
    adjusted_p_values <- p.adjust(p_values, method = "BH")

    # Create a dataframe for the results
    results_test_df <- data.frame(contrast = pairwise_names, p_value = p_values, adj_p_value = adjusted_p_values)

    # Filter the results to include only those contrasts containing "Cox - "
    filtered_results_test_df <- results_test_df %>%
      filter(grepl("Cox - ", contrast))

    # Print the context and results
    print(paste("This is the test for", endpointt, "and", combo_namee))
    print(filtered_results_test_df)
  }
}

[1] "This is the test for Breast Cancer and Age & Sex"
                         contrast     p_value adj_p_value
1             Cox - Deep Learning 0.405977296  0.44657503
2               Cox - Elastic Net         NaN         NaN
3 Cox - Gradient Boosting Machine 0.026759995  0.07358999
4                     Cox - Lasso         NaN         NaN
5             Cox - Random Forest 0.049806799  0.09131247
6                     Cox - Ridge         NaN         NaN
7                   Cox - XGBoost 0.005106012  0.02808307


In [56]:
# Load required libraries
library(dplyr)
library(tidyr)

# Define endpoints and combo_names (adjust as needed)
endpoints <- c("Alzheimer's Disease", "Breast Cancer", "Cardiovascular Disease")
combo_names <- c("Age & Sex", "Clinical Risk", "Complete", "Past Medical History", "PRS & Metabolomics")

# Initialize a list to store the group-wise results.
all_overview_results <- list()

for (endpointt in endpoints) {
  for (combo_namee in combo_names) {
    # Filter the data and reshape using pivot_wider.
    # Now include delta_c_to_cox as well.
    wide_df <- subsetted_results %>%
      filter(endpoint == endpointt, combo_name == combo_namee) %>%
      select(cv_split, model_type, c_index_test, once_time, delta_c_to_cox) %>%
      pivot_wider(names_from = model_type,
                  values_from = c(c_index_test, once_time, delta_c_to_cox))
    
    # Determine the models based on the c_index_test column names.
    model_cols <- names(wide_df)[grepl("^c_index_test_", names(wide_df))]
    # Remove the prefix to get the actual model names.
    model_names <- gsub("^c_index_test_", "", model_cols)
    num_models <- length(model_names)
    num_comparisons <- choose(num_models, 2)
    
    # Initialize vectors to store pair-wise results.
    ci_p_values     <- numeric(num_comparisons)
    ft_p_values     <- numeric(num_comparisons)
    delta_p_values  <- numeric(num_comparisons)
    contrasts_text  <- character(num_comparisons)
    model1_vec      <- character(num_comparisons)
    model2_vec      <- character(num_comparisons)
    index <- 1  # counter
    
    # Loop over all pairs
    for(i in 1:(num_models - 1)) {
      for(j in (i + 1):num_models) {
        mod1 <- model_names[i]
        mod2 <- model_names[j]
        
        # Extract the corresponding vectors.
        vec_ci1    <- wide_df[[paste0("c_index_test_", mod1)]]
        vec_ci2    <- wide_df[[paste0("c_index_test_", mod2)]]
        
        vec_ft1    <- wide_df[[paste0("once_time_", mod1)]]
        vec_ft2    <- wide_df[[paste0("once_time_", mod2)]]
        
        vec_delta1 <- wide_df[[paste0("delta_c_to_cox_", mod1)]]
        vec_delta2 <- wide_df[[paste0("delta_c_to_cox_", mod2)]]
        
        # Perform paired t-tests: one for c_index_test, one for fitting_time, and one for delta_c_to_cox.
        test_ci    <- t.test(vec_ci1, vec_ci2, paired = TRUE)
        test_ft    <- t.test(vec_ft1, vec_ft2, paired = TRUE)
        test_delta <- t.test(vec_delta1, vec_delta2, paired = TRUE)
        
        ci_p_values[index]    <- test_ci$p.value
        ft_p_values[index]    <- test_ft$p.value
        delta_p_values[index] <- test_delta$p.value
        contrasts_text[index] <- paste(mod1, "-", mod2)
        model1_vec[index]     <- mod1
        model2_vec[index]     <- mod2
        
        index <- index + 1
      }
    }
    
    # Apply Benjamini-Hochberg FDR adjustment separately for each metric.
    ci_adj    <- p.adjust(ci_p_values, method = "BH")
    ft_adj    <- p.adjust(ft_p_values, method = "BH")
    delta_adj <- p.adjust(delta_p_values, method = "BH")
    
    # Create a temporary data frame with these results and add context columns.
    tmp <- data.frame(
      contrast                         = contrasts_text,
      c_index_test_p_value             = ci_p_values,
      c_index_test_adj_p_value         = ci_adj,
      fitting_time_p_value             = ft_p_values,
      fitting_time_adj_p_value         = ft_adj,
      delta_c_to_cox_p_value           = delta_p_values,
      delta_c_to_cox_adj_p_value       = delta_adj,
      model1                           = model1_vec,
      model2                           = model2_vec,
      endpoint                         = endpointt,
      combo_name                       = combo_namee,
      stringsAsFactors                 = FALSE
    )
    
    # Save the temporary dataframe into our results list.
    all_overview_results[[paste(endpointt, combo_namee, sep = "_")]] <- tmp
  }
}

# Combine all temporary data frames into one overview dataframe.
overview_df <- do.call(rbind, all_overview_results)

# Subset the final dataframe to contain only the desired columns.
overview_df <- overview_df %>%
  select(endpoint, combo_name, model1, model2,
         c_index_test_p_value, c_index_test_adj_p_value,
         fitting_time_p_value, fitting_time_adj_p_value,
         delta_c_to_cox_p_value, delta_c_to_cox_adj_p_value)

rownames(overview_df) <- NULL

# (Optional) Display the overview dataframe.
# print(overview_df)

In [57]:
head(overview_df)

Unnamed: 0_level_0,endpoint,combo_name,model1,model2,c_index_test_p_value,c_index_test_adj_p_value,fitting_time_p_value,fitting_time_adj_p_value,delta_c_to_cox_p_value,delta_c_to_cox_adj_p_value
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Alzheimer's Disease,Age & Sex,Cox,Deep Learning,0.565964339,0.660709736,0.004658834,0.0072800809,0.565964339,0.660709736
2,Alzheimer's Disease,Age & Sex,Cox,Elastic Net,0.360286822,0.523461353,1.80149e-05,0.0001110025,0.360286822,0.523461353
3,Alzheimer's Disease,Age & Sex,Cox,Lasso,0.253667492,0.444374966,1.769116e-05,0.0001110025,0.253667492,0.444374966
4,Alzheimer's Disease,Age & Sex,Cox,Random Forest,0.684823031,0.737501726,0.6951426,0.6951425658,0.684823031,0.737501726
5,Alzheimer's Disease,Age & Sex,Cox,Ridge,0.373900966,0.523461353,0.1217926,0.1364076863,0.373900966,0.523461353
6,Alzheimer's Disease,Age & Sex,Cox,XGBoost,0.001216688,0.004866752,5.425605e-05,0.0002170242,0.001216688,0.004866752


In [58]:
install.packages("openxlsx")
library(openxlsx)

# Assuming overview_df is your data frame
write.xlsx(overview_df, file = "p_values_df_2.xlsx")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [60]:
uplcmd <- "dx upload p_values_df_2.xlsx --path Benchmarking/p_values_df_2.xlsx"
system(uplcmd)