In [1]:
setwd("/projects/unutmaz-lab/ruoyun/MECFS/all_tps/codes/AI/output/heldout_review/heldout_review")

In [2]:
run_glmnet_models <- function(omics) {
  # Load required packages (or ensure they're loaded beforehand)
  library(caret)
  library(glmnet)
  library(dplyr)
  
  # 1. Read the data
  #    - The train data has row.names=1, then transposed.
  #    - The outcome data, which has a column 'y_true'.
  X <- read.csv(paste0(omics, "_train.csv"), header = TRUE, row.names = 1) %>%
    t()
  y <- read.csv(paste0(omics, "_y_prob.csv"), header = TRUE)
  
  # 2. Prepare the data frame for modeling
  #    - Rename columns of X to X1, X2, ... for formula usage
  colnames(X) <- paste0("X", seq_len(ncol(X)))
  
  #    - Combine y['y_true'] with X into one data frame
  data <- cbind(y["y_true"], X)
  colnames(data) <- c("y", paste0("X", seq_len(ncol(X))))
  
  #    - Convert y to factor for classification
  data$y <- factor(data$y, levels = c(0, 1), labels = c("class0", "class1"))
  
  # 3. Define caret trainControl (10-fold CV, savePredictions, etc.)
  ctrl <- trainControl(
    method = "cv",
    number = 10,
    classProbs = TRUE,
    summaryFunction = twoClassSummary,
    savePredictions = "all"
  )
  
  # 4a. Model WITHOUT interaction terms
  form_no_int <- as.formula(
    paste("y ~", paste(colnames(X), collapse = " + "))
  )
  
  set.seed(123)
  model_no_int <- train(
    form_no_int,
    data       = data,
    method     = "glmnet",
    family     = "binomial",
    metric     = "ROC",
    trControl  = ctrl,
    tuneLength = 10,
    preProcess = c("center", "scale"),
    savePredictions = "all"
  )
  
  #    - Filter to best alpha/lambda
  best_params_no_int <- model_no_int$bestTune
  pred_df_no_int <- subset(
    model_no_int$pred,
    alpha  == best_params_no_int$alpha &
      lambda == best_params_no_int$lambda
  )
  
  #    - For each rowIndex, store the predicted probability of class1 in y
  y[pred_df_no_int$rowIndex, "glmnet_y_pred"] <- pred_df_no_int$class1
  
  # 4b. Model WITH interaction terms
  form_with_int <- as.formula(
    paste0("y ~ (", paste(colnames(X), collapse = " + "), ")^2")
  )
  
  set.seed(123)
  model_with_int <- train(
    form_with_int,
    data       = data,
    method     = "glmnet",
    family     = "binomial",
    metric     = "ROC",
    trControl  = ctrl,
    tuneLength = 10,
    preProcess = c("center", "scale"),
    savePredictions = "all"
  )
  
  #    - Filter to best alpha/lambda
  best_params_with_int <- model_with_int$bestTune
  pred_df_with_int <- subset(
    model_with_int$pred,
    alpha  == best_params_with_int$alpha &
      lambda == best_params_with_int$lambda
  )
  
  #    - For each rowIndex, store the predicted probability of class1 in y
  y[pred_df_with_int$rowIndex, "glmnet-int_y_pred"] <- pred_df_with_int$class1
  
  # 5. Write the updated 'y' table to disk
  #    Overwrites <omics>_y_prob.csv with the newly-added columns
  #write.csv(y, paste0(omics, "_y_prob.csv"), row.names = FALSE)

  holdout_X <- read.csv(paste0(omics, "_heldout.csv"), header = TRUE, row.names = 1) %>% t()
  holdout_y <- read.csv(paste0(omics, "_y_prob_held_out.csv"), header = TRUE)
  colnames(holdout_X) <- paste0("X", seq_len(ncol(holdout_X)))
  holdout_df <- data.frame(holdout_X)
  holdout_probs_no_int  <- predict(model_no_int,  newdata = holdout_df, type = "prob")
  holdout_probs_with_int <- predict(model_with_int, newdata = holdout_df, type = "prob")

  holdout_y[, "glmnet_y_pred"] <- holdout_probs_no_int$class1
  holdout_y[, "glmnet-int_y_pred"] <- holdout_probs_with_int$class1  
  
  write.csv(holdout_y, paste0(omics, "_y_prob_held_out.csv"), row.names = FALSE)

  # 6. (Optional) Return both models or just the updated data
  return(list(
    model_no_int = model_no_int,
    model_with_int = model_with_int,
    updated_y = y
  ))
}


In [3]:
results <- run_glmnet_models("specie")

Loading required package: ggplot2

Loading required package: lattice

Loading required package: Matrix

Loaded glmnet 4.1-4


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X116”
“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:

“These variables have zero variances: X116, X1:X116, X2:X116, X3:X116, X4:X116, X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X7:X116, X8:X116, X9:X116, X10:X116, X11:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X15:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X18:X116, X19:X59, X19:X116, X20:X116, X21:X116, X22:X116, X23:X116, X24:X116, X25:X116, X26:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X96, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X29:X116, X30:X116, X31:X116, X32:X116, X33:X116, X34:X116, X35:X60, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X38:X116, X39:X116, X40:X60, X40:X116, X41:X116, X42:X86, X42:X116, X43:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X45:X60, X45:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X49:X116, X50:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X60, X53:X74, X53:X79, X53:X116, X54:

“These variables have zero variances: X116, X1:X116, X2:X116, X3:X116, X4:X116, X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X7:X116, X8:X116, X9:X116, X10:X116, X11:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X15:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X18:X116, X19:X59, X19:X116, X20:X116, X21:X116, X22:X116, X23:X116, X24:X116, X25:X116, X26:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X96, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X29:X116, X30:X116, X31:X116, X32:X116, X33:X116, X34:X116, X35:X60, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X38:X116, X39:X116, X40:X60, X40:X116, X41:X116, X42:X86, X42:X116, X43:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X45:X60, X45:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X49:X116, X50:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X60, X53:X74, X53:X79, X53:X116, X54:

“These variables have zero variances: X116, X1:X116, X2:X116, X3:X116, X4:X116, X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X7:X116, X8:X116, X9:X116, X10:X116, X11:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X15:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X18:X116, X19:X59, X19:X116, X20:X116, X21:X116, X22:X116, X23:X116, X24:X116, X25:X116, X26:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X96, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X29:X116, X30:X116, X31:X116, X32:X116, X33:X116, X34:X116, X35:X60, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X38:X116, X39:X116, X40:X60, X40:X116, X41:X116, X42:X86, X42:X116, X43:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X45:X60, X45:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X49:X116, X50:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X60, X53:X74, X53:X79, X53:X116, X54:

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X86, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X86, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X74, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X86, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X78, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X113, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X86, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X86, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X74, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X86, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X78, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X113, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X74, X6:X97, X6:X99, X6:X116, X8:X116, X11:X60, X12:X60, X12:X77, X12:X116, X13:X60, X13:X97, X13:X116, X14:X77, X14:X116, X15:X60, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X60, X19:X116, X20:X60, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X113, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X73, X46:X78, X46:X79, X46:X86, X46:X97, X46:X99, X46:X102, X46:X111, X46:X116, X47:X60, X47:X73, X47:X116, X48:X60, X48:X99, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X56:X60, X57:X60, X57:X73, X57:X86, X57:X97, X57:X99, X57:X116, X58:X99, X58:X111, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X74, X6:X97, X6:X99, X6:X116, X8:X116, X11:X60, X12:X60, X12:X77, X12:X116, X13:X60, X13:X97, X13:X116, X14:X77, X14:X116, X15:X60, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X60, X19:X116, X20:X60, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X113, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X73, X46:X78, X46:X79, X46:X86, X46:X97, X46:X99, X46:X102, X46:X111, X46:X116, X47:X60, X47:X73, X47:X116, X48:X60, X48:X99, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X56:X60, X57:X60, X57:X73, X57:X86, X57:X97, X57:X99, X57:X116, X58:X99, X58:X111, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X74, X6:X97, X6:X99, X6:X116, X8:X116, X11:X60, X12:X60, X12:X77, X12:X116, X13:X60, X13:X97, X13:X116, X14:X77, X14:X116, X15:X60, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X60, X19:X116, X20:X60, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X113, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X73, X46:X78, X46:X79, X46:X86, X46:X97, X46:X99, X46:X102, X46:X111, X46:X116, X47:X60, X47:X73, X47:X116, X48:X60, X48:X99, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X56:X60, X57:X60, X57:X73, X57:X86, X57:X97, X57:X99, X57:X116, X58:X99, X58:X111, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X77, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X18:X77, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X27:X118, X28:X57, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X74, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X77, X48:X116, X51:X59, X51:X60, X51:X77, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X9

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X77, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X18:X77, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X27:X118, X28:X57, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X74, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X77, X48:X116, X51:X59, X51:X60, X51:X77, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X9

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X86, X19:X116, X24:X116, X27:X59, X27:X77, X27:X78, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X78, X28:X116, X30:X60, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X59, X52:X86, X52:X102, X52:X116, X53:X74, X53:X116, X55:X60, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X96, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X9

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X86, X19:X116, X24:X116, X27:X59, X27:X77, X27:X78, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X78, X28:X116, X30:X60, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X59, X52:X86, X52:X102, X52:X116, X53:X74, X53:X116, X55:X60, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X96, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X9

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X86, X19:X116, X24:X116, X27:X59, X27:X77, X27:X78, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X78, X28:X116, X30:X60, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X59, X52:X86, X52:X102, X52:X116, X53:X74, X53:X116, X55:X60, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X96, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X9

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X79, X12:X116, X13:X96, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X97, X16:X116, X17:X77, X17:X97, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X87, X28:X97, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X96, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X79, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X79, X12:X116, X13:X96, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X97, X16:X116, X17:X77, X17:X97, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X87, X28:X97, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X96, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X79, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X113, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X59, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X46:X118, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X59, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X100, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X113, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X59, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X46:X118, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X59, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X100, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X113, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X59, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X46:X118, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X59, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X100, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X79, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X79, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X74, X46:X77, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X77, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X10

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X79, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X79, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X74, X46:X77, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X77, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X10

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X76, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X51, X46:X76, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X101, X60:X102, X60:X

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X76, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X51, X46:X76, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X101, X60:X102, X60:X

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X76, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X51, X46:X76, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X101, X60:X102, X60:X

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X73, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X101, X60:X102, X60:X103, X60:X105, X60

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X73, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X101, X60:X102, X60:X103, X60:X105, X60

“These variables have zero variances: X5:X60, X5:X116, X6:X60, X6:X97, X6:X116, X8:X116, X12:X60, X12:X77, X12:X116, X13:X97, X13:X116, X14:X77, X14:X116, X16:X74, X16:X86, X16:X116, X17:X77, X17:X116, X18:X60, X19:X59, X19:X116, X24:X116, X27:X59, X27:X77, X27:X79, X27:X86, X27:X97, X27:X99, X27:X116, X28:X59, X28:X116, X30:X116, X34:X116, X35:X86, X35:X116, X36:X116, X37:X60, X37:X74, X37:X77, X37:X116, X39:X116, X40:X60, X42:X86, X42:X116, X44:X46, X44:X59, X44:X60, X44:X73, X44:X77, X44:X79, X44:X86, X44:X97, X44:X116, X46:X78, X46:X79, X46:X86, X46:X99, X46:X102, X46:X111, X46:X116, X47:X116, X48:X60, X48:X116, X51:X60, X51:X116, X52:X86, X52:X116, X53:X74, X53:X116, X55:X116, X57:X60, X57:X73, X57:X86, X57:X99, X57:X116, X58:X116, X59:X60, X59:X73, X59:X74, X59:X76, X59:X86, X59:X87, X59:X97, X59:X99, X59:X111, X59:X113, X59:X114, X60:X61, X60:X74, X60:X76, X60:X77, X60:X78, X60:X79, X60:X86, X60:X87, X60:X91, X60:X97, X60:X99, X60:X101, X60:X102, X60:X103, X60:X105, X60:X111, X6

In [3]:
results <- run_glmnet_models("omics")

Loading required package: ggplot2

Loading required package: lattice

Loading required package: Matrix

Loaded glmnet 4.1-4


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [5]:
results <- run_glmnet_models("immune")

In [13]:
results <- run_glmnet_models("quest")

“from glmnet C++ code (error code -45); Convergence for 45th lambda value not reached after maxit=100000 iterations; solutions for larger lambdas returned”
“from glmnet C++ code (error code -46); Convergence for 46th lambda value not reached after maxit=100000 iterations; solutions for larger lambdas returned”
“from glmnet C++ code (error code -47); Convergence for 47th lambda value not reached after maxit=100000 iterations; solutions for larger lambdas returned”
“from glmnet C++ code (error code -45); Convergence for 45th lambda value not reached after maxit=100000 iterations; solutions for larger lambdas returned”


In [7]:
results <- run_glmnet_models("metabolome")

In [8]:
run_glmnet_models <- function(omics, max_features = 300) {
  # -----------------------------
  # Load required packages
  # -----------------------------
  library(caret)
  library(glmnet)
  library(dplyr)
  library(pROC)  # for computing univariate AUC
  
  # -----------------------------
  # 1. Read the training data
  # -----------------------------
  X <- read.csv(paste0(omics, "_train.csv"), header = TRUE, row.names = 1) %>%
    t()  # transpose so rows = samples, cols = features
  
  y <- read.csv(paste0(omics, "_y_prob.csv"), header = TRUE)
  
  # -----------------------------
  # 2. Feature Selection
  # -----------------------------
  
  # 2.1 Remove near-zero-variance features
  nzv_info    <- nearZeroVar(X, saveMetrics = TRUE)
  keep_nzv    <- rownames(nzv_info)[!nzv_info$nzv]
  X_nzv       <- X[, keep_nzv, drop = FALSE]
  
  message("Removed ", sum(nzv_info$nzv), " near-zero-variance features. Remaining: ", 
          ncol(X_nzv))
  
  # 2.2 Rank by univariate AUC
  #     We need a numeric 0/1 vector for y to compute AUC easily
  y_numeric <- as.numeric(as.character(y$y_true))  # from {0,1} factor or char to numeric 0/1
  
  compute_auc <- function(feature_vals, y_num) {
    # pROC::roc(...) returns an ROC object; pROC::auc(...) extracts numeric AUC
    r <- roc(response = y_num, predictor = feature_vals, quiet = TRUE)
    as.numeric(auc(r))
  }
  
  # Compute AUC for each feature in X_nzv
  feature_names <- colnames(X_nzv)
  auc_values <- sapply(feature_names, function(fname) {
    compute_auc(X_nzv[, fname], y_numeric)
  })
  
  # Sort features by AUC (descending)
  feature_ranking <- sort(auc_values, decreasing = TRUE)
  
  # Keep top max_features
  if (length(feature_ranking) > max_features) {
    feature_ranking <- feature_ranking[seq_len(max_features)]
  }
  keep_features <- names(feature_ranking)
  
  message("Selected top ", length(keep_features), " features by univariate AUC.")
  
  # Final reduced training matrix
  X_sel <- X_nzv[, keep_features, drop = FALSE]
  
  # -----------------------------
  # 3. Prepare data for caret
  # -----------------------------
  
  # Rename columns of X_sel to X1, X2, ..., for formula usage
  colnames(X_sel) <- paste0("X", seq_len(ncol(X_sel)))
  
  # Combine y['y_true'] with X_sel
  data <- cbind(y["y_true"], X_sel)
  colnames(data) <- c("y", colnames(X_sel))
  
  # Convert y to factor for classification
  data$y <- factor(data$y, levels = c(0, 1), labels = c("class0", "class1"))
  
  # -----------------------------
  # 4. Define caret trainControl
  # -----------------------------
  ctrl <- trainControl(
    method = "cv",
    number = 10,
    classProbs = TRUE,
    summaryFunction = twoClassSummary,
    savePredictions = "all"
  )
  
  # -----------------------------
  # 5. Model WITHOUT interaction
  # -----------------------------
  form_no_int <- as.formula(
    paste("y ~", paste(colnames(X_sel), collapse = " + "))
  )
  
  set.seed(123)
  model_no_int <- train(
    form_no_int,
    data       = data,
    method     = "glmnet",
    family     = "binomial",
    metric     = "ROC",
    trControl  = ctrl,
    tuneLength = 10,
    preProcess = c("center", "scale"),
    savePredictions = "all"
  )
  
  # 5a. Out-of-fold predictions => y
  best_params_no_int <- model_no_int$bestTune
  pred_df_no_int <- subset(
    model_no_int$pred,
    alpha  == best_params_no_int$alpha &
      lambda == best_params_no_int$lambda
  )
  y[pred_df_no_int$rowIndex, "glmnet_y_pred"] <- pred_df_no_int$class1
  
  # -----------------------------
  # 6. Model WITH interaction
  # -----------------------------
  # Warning: If you keep many features, (X1 + X2 + ...)^2 can explode in size
  form_with_int <- as.formula(
    paste0("y ~ (", paste(colnames(X_sel), collapse = " + "), ")^2")
  )
  
  set.seed(123)
  model_with_int <- train(
    form_with_int,
    data       = data,
    method     = "glmnet",
    family     = "binomial",
    metric     = "ROC",
    trControl  = ctrl,
    tuneLength = 10,
    preProcess = c("center", "scale"),
    savePredictions = "all"
  )
  
  # 6a. Out-of-fold predictions => y
  best_params_with_int <- model_with_int$bestTune
  pred_df_with_int <- subset(
    model_with_int$pred,
    alpha  == best_params_with_int$alpha &
      lambda == best_params_with_int$lambda
  )
  y[pred_df_with_int$rowIndex, "glmnet-int_y_pred"] <- pred_df_with_int$class1
  
  # -----------------------------
  # 7. Write updated training y
  # -----------------------------
  #write.csv(y, paste0(omics, "_y_prob.csv"), row.names = FALSE)
  
  # -----------------------------
  # 8. Predict on held-out set
  # -----------------------------
  # We'll apply the SAME feature selection (nzv removal + top AUC subset).
  # That means: (a) remove the same near-zero-variance columns, (b) keep
  # the same top 'keep_features', then rename columns to match the training formula.
  
  holdout_path      <- paste0(omics, "_heldout.csv")
  holdout_y_path    <- paste0(omics, "_y_prob_held_out.csv")
  
  if (file.exists(holdout_path) && file.exists(holdout_y_path)) {
    holdout_X <- read.csv(holdout_path, header = TRUE, row.names = 1) %>%
      t()
    holdout_y <- read.csv(holdout_y_path, header = TRUE)
    
    # (a) remove the same near-zero-variance columns used in training
    #     i.e., keep only those in 'keep_nzv'
    common_nzv <- intersect(colnames(holdout_X), keep_nzv)
    holdout_X  <- holdout_X[, common_nzv, drop = FALSE]
    
    # (b) keep only the top 'keep_features' from training
    common_feats <- intersect(colnames(holdout_X), keep_features)
    holdout_X    <- holdout_X[, common_feats, drop = FALSE]
    
    # rename columns to X1, X2, ...
    colnames(holdout_X) <- paste0("X", seq_len(ncol(holdout_X)))
    
    # build data frame for predict()
    holdout_df <- data.frame(holdout_X)
    
    # predict probabilities from final models
    holdout_probs_no_int  <- predict(model_no_int,  newdata = holdout_df, type = "prob")
    holdout_probs_with_int <- predict(model_with_int, newdata = holdout_df, type = "prob")
    
    # attach predictions to holdout_y
    holdout_y[, "glmnet_y_pred"]     <- holdout_probs_no_int$class1
    holdout_y[, "glmnet-int_y_pred"] <- holdout_probs_with_int$class1
    
    # write updated holdout .csv
    write.csv(holdout_y, holdout_y_path, row.names = FALSE)
  } else {
    message("Held-out files not found or not provided: ", holdout_path, " / ", holdout_y_path)
  }
  
  # -----------------------------
  # 9. Return results
  # -----------------------------
  return(list(
    model_no_int       = model_no_int,
    model_with_int     = model_with_int,
    updated_y          = y,
    selected_features  = keep_features
  ))
}


In [9]:
results <- run_glmnet_models("kegg")

Type 'citation("pROC")' for a citation.


Attaching package: ‘pROC’


The following objects are masked from ‘package:stats’:

    cov, smooth, var


Removed 0 near-zero-variance features. Remaining: 3959

Selected top 300 features by univariate AUC.

