# Prep SLURM commands

In [1]:
getwd()

In [2]:
setwd("../")

In [3]:
#' Generate Chunk Ranges with Fixed Number of Chunks
#'
#' This function divides a range into a specified number of chunks, with each chunk
#' having approximately the same size.
#'
#' @param start The starting point of the range to be divided.
#' @param end The ending point of the range to be divided.
#' @param num_chunks The fixed number of chunks to divide the range into.
#'
#' @return A matrix where each row represents a chunk, with the first column being
#'         the start of the chunk and the second column being the end of the chunk.
#' @examples
#' chunk_fixed_n(1, 100, 5)
#' @export
chunk_fixed_n <- function(start, end, num_chunks) {
  chunk_size <- ceiling((end - start + 1) / num_chunks)
  chunk_ranges <- sapply(1:num_chunks, function(i) {
    chunk_start <- start + (i - 1) * chunk_size
    chunk_end <- min(chunk_start + chunk_size - 1, end)
    c(chunk_start, chunk_end)
  })
  return(chunk_ranges)
}

#' Generate Chunk Ranges with Fixed Chunk Size
#'
#' This function divides a range into chunks up to a given maximum size, dynamically
#' determining the number of chunks based on the range and maximum chunk size.
#'
#' @param start The starting point of the range to be divided.
#' @param end The ending point of the range to be divided.
#' @param max_chunk_size The maximum size that each chunk can have.
#'
#' @return A matrix where each row represents a chunk, with the first column being
#'         the start of the chunk and the second column being the end of the chunk.
#'         The last chunk may be smaller than `max_chunk_size` to fit the range.
#' @examples
#' chunk_fixed_size(1, 100, 20)
#' @export
chunk_fixed_size <- function(start, end, max_chunk_size) {
  if (length(start) > 1 || length(end) > 1) {
    stop("start and end must be single values")
  }
  if (start > end || max_chunk_size <= 0) {
    stop("Invalid arguments: ensure start <= end and max_chunk_size > 0")
  }
  
  num_chunks <- ceiling((end - start + 1) / max_chunk_size)
  chunk_ranges <- matrix(nrow = num_chunks, ncol = 2)
  
  for (i in 1:num_chunks) {
    chunk_start <- start + (i - 1) * max_chunk_size
    chunk_end <- min(chunk_start + max_chunk_size - 1, end)
    chunk_ranges[i, ] <- c(chunk_start, chunk_end)
  }
  
  return(t(chunk_ranges))
}

generate_slurm_script <- function(args, tag_pt1, tag_pt2, mem_per_cpu = "16G") {
  cpus_per_task <- if (is.null(args$num_cores) || args$num_cores == "all") {
    "#SBATCH --exclusive\n"
  } else {
    paste0("#SBATCH --cpus-per-task=", args$num_cores, "\n")
  }

  # Always set mem_per_cpu flag, even in exclusive mode
  mem_allocation <- paste0("#SBATCH --mem-per-cpu=", mem_per_cpu, "\n")

  args_string <- paste("--", names(args), "=", args, sep = "", collapse = " ")
  args_string <- paste(args_string, " --tag=", tag_pt1, "-", tag_pt2, sep = "")

  slurm_script <- paste0(
    "#!/bin/bash\n",
    if (cluster_specific_parameters) paste0(
      "#SBATCH --partition=", partition, "\n",
      "#SBATCH -A ", acct, "\n",
      "#SBATCH --time=", time, "\n"
    ),
    if (!is.null(nodes) && !is.null(ntasks_per_node)) paste0(
      "#SBATCH --nodes=", nodes, "\n",
      "#SBATCH --ntasks-per-node=", ntasks_per_node, "\n"
    ),
    cpus_per_task,
    mem_allocation,
    "#SBATCH --output=slurm_output_", tag_pt1, "-", tag_pt2, ".out\n",
    "#SBATCH --job-name=", tag_pt1, "-", tag_pt2, "\n",
    if (module_load_conda) "module load conda\n",
    "conda activate mwas\n",
    "echo 'Executing Rscript with arguments: Rscript scripts/CLI.R ", args_string, "'\n",
    "Rscript scripts/CLI.R ", args_string, "\n"
  )

  return(list(script = slurm_script, path = paste0("slurm_scripts/", tag_pt1, "-", tag_pt2, ".sh")))
}

In [4]:
# Cluster-specific parameters
cluster_specific_parameters <- TRUE # Example condition
acct <- "jhu152"
time <- "24:00:00"
partition <- "shared"

module_load_conda <- FALSE

nodes <- 1
ntasks_per_node <- 1

# Overwrite flag (set by user)
overwrite <- FALSE

In [4]:
# chunk_ranges <- chunk_fixed_size(files$first_meth_index_with_SNP_coverage[i],
#                                  files$last_meth_index_with_SNP_coverage[i],
#                                  1000)
                         

# chunk_ranges <- chunk_ranges[1:2,1:2]

chunk_ranges <- chunk_fixed_n(1, 10000, 5)

In [6]:
library(data.table)

In [7]:
matched_df <- fread("../CpGWAS/scripts/09-OUT_matched_SNP_meth_cov.csv")

In [8]:
matched_df

Chr,SNP_data,methylation_data,last_meth_value_with_SNP_coverage,first_meth_value_with_SNP_coverage,last_meth_index_with_SNP_coverage,first_meth_index_with_SNP_coverage,population,region,cov_file
<int>,<chr>,<chr>,<int>,<int>,<int>,<int>,<chr>,<chr>,<chr>
1,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_AA.rda,248918358,1069461,2202702,8982,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
2,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_AA.rda,241863783,10001,2019984,1,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
3,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_AA.rda,198099789,11602,1538467,1,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
4,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_AA.rda,189877411,69399,1387731,1,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
5,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_AA.rda,181172584,44104,1409038,1,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
6,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_AA.rda,170619093,192453,1412543,1138,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
7,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr7.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr7_AA.rda,159334659,49742,1490198,1,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
8,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr8.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr8_AA.rda,145078546,196751,1225856,1483,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
9,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr9.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr9_AA.rda,136933101,164314,1064218,2159,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv
10,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr10.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr10_AA.rda,133625493,45719,1288155,1,AA,caud,/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/AA_caud.csv


In [6]:
# Constant Arguments Setup
constant_args_df <- data.frame(
  outdir = "./output/500k_window/",
  snp_data_path = "/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen",
  methylation_data_path = "/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda",
  cov = "/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv",
  verbose = FALSE,
  lambda_choice = "1se",
  alphas = 0.5,
  allow_inefficient_parallelization = FALSE,
  n_fold = 5,
  window_sizes = "500000",
  #window_sizes = "500000",
  save_evaluation_results_each_fold = FALSE,
  save_glmnet_object = FALSE,
  omit_folds_with_na_r = TRUE,
  methInput_rds_path = "/expanse/lustre/projects/jhu152/naglemi/mwas/chr1_dfplc_all_methylation_10k_samples_a3.rds"
)

# Varying parameters
cv_eval_modes <- c("dynamic")
cores_per_alphas <- c("all") #NA)  # Include NA to signify the default value should be used
num_cores_options <- c(1)

for (chunk_range in 1:ncol(chunk_ranges)) {
  constant_args_df$chunk1 <- chunk_ranges[1, chunk_range]
  constant_args_df$chunk2 <- chunk_ranges[2, chunk_range]
    # Loop through each combination
    for (cv_eval_mode in cv_eval_modes) {
      for (cores_per_alpha in cores_per_alphas) {
        for (num_cores in num_cores_options) {
          # Update constant_args_df for the current combination
          constant_args_df$cv_eval_mode <- cv_eval_mode
          constant_args_df$num_cores <- num_cores
          if (!is.na(cores_per_alpha)) {
            constant_args_df$cores_per_alpha <- cores_per_alpha
          } else {
            constant_args_df$cores_per_alpha <- NULL
          }

          # Generate tags
          snp_base <- tools::file_path_sans_ext(basename(constant_args_df$snp_data_path))
          meth_base <- tools::file_path_sans_ext(basename(constant_args_df$methylation_data_path))
          datetime_str <- format(Sys.time(), "%Y%m%d-%H%M%S")
          tag_pt1 <- paste(snp_base, meth_base, format(constant_args_df$chunk1, scientific = FALSE), format(constant_args_df$chunk2, scientific = FALSE), cv_eval_mode, paste0(num_cores, "corestotal"), ifelse(is.na(cores_per_alpha), "defaultcore", paste0(cores_per_alpha, "corepera")), sep = "-")
          tag_pt2 <- datetime_str

          # Check for existing file
          existing_files <- list.files(path = constant_args_df$outdir, pattern = paste0("^", tag_pt1, ".*\\.rds$"), full.names = TRUE)
          if (!overwrite && length(existing_files) > 0) {
            message("File with tag ", tag_pt1, " already exists. Skipping...")
            next
          }

          # Generate and print SLURM script
          script_info <- generate_slurm_script(constant_args_df, tag_pt1, tag_pt2,
                                               mem_per_cpu = "16G")

          cat(script_info$script)

          writeLines(script_info$script, script_info$path)

          # Submit the SLURM job using the sbatch command
          system(paste("sbatch", script_info$path))

          # Implement job submission limits and intervals if necessary
          sleeptime <- 120
          #Sys.sleep(sleeptime)
          message(paste0("Sleeping for ", sleeptime, " seconds"))
        }
      }
    }
}

#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-1-2000-dynamic-1corestotal-allcorepera-20240404-110552.out
#SBATCH --job-name=libd_chr1-chr1_all-1-2000-dynamic-1corestotal-allcorepera-20240404-110552
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/expa

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=12
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-1-2000-dynamic-12corestotal-allcorepera-20240404-110552.out
#SBATCH --job-name=libd_chr1-chr1_all-1-2000-dynamic-12corestotal-allcorepera-20240404-110552
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/e

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-2001-4000-dynamic-1corestotal-allcorepera-20240404-110552.out
#SBATCH --job-name=libd_chr1-chr1_all-2001-4000-dynamic-1corestotal-allcorepera-20240404-110552
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=12
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-2001-4000-dynamic-12corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-2001-4000-dynamic-12corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_p

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-4001-6000-dynamic-1corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-4001-6000-dynamic-1corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=12
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-4001-6000-dynamic-12corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-4001-6000-dynamic-12corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_p

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-6001-8000-dynamic-1corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-6001-8000-dynamic-1corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=12
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-6001-8000-dynamic-12corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-6001-8000-dynamic-12corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_p

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-8001-10000-dynamic-1corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-8001-10000-dynamic-1corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_pa

Sleeping for 120 seconds



#!/bin/bash
#SBATCH --partition=shared
#SBATCH -A jhu152
#SBATCH --time=24:00:00
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=12
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-8001-10000-dynamic-12corestotal-allcorepera-20240404-110553.out
#SBATCH --job-name=libd_chr1-chr1_all-8001-10000-dynamic-12corestotal-allcorepera-20240404-110553
conda activate mwas
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/500k_window/ --snp_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/libd_chr1.pgen --methylation_data_path=/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/expanse/lustre/projects/jhu152/naglemi/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=500000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds

Sleeping for 120 seconds



In [7]:
tag_pt1

## Define parameters

In [8]:
# # Constant Arguments Setup
# constant_args_df <- data.frame(
#   outdir = "./output/",
#   chunk1 = 1000000,
#   chunk2 = 1001000,
#   snp_data_path = "/dcs04/lieber/statsgen/mnagle/mwas/gwas/libd_chr1.pgen",
#   methylation_data_path = "/dcs04/lieber/statsgen/mnagle/mwas/pheno/dlpfc/out/chr1_AA.rda",
#   #snp_data_path = "/Users/michaelnagle/code/mwas/gwas/libd_chr1.pgen",
#   #methylation_data_path = "/Users/michaelnagle/code/mwas/pheno/dlpfc/out/chr1_AA.rda",  
#   verbose = FALSE,
#   lambda_choice = "1se",
#   alphas = "0.25,0.5,0.75,1",
#   #num_cores = "all",
#   allow_inefficient_parallelization = FALSE,
#   n_fold = 5,
#   window_sizes = "1000,2000,5000,10000,20000,50000,100000,500000",
#   #tag = format(Sys.time(), "%Y%m%d-%H%M%S"),
#   save_evaluation_results_each_fold = FALSE,
#   save_glmnet_object = FALSE,
#   cv_eval_mode = "dynamic"
# )

# # Convert alphas to a comma-separated string
# #constant_args_df$alphas <- sapply(constant_args_df$alphas, function(x) paste(x, collapse = ","))


# # Varying parameters
# cv_eval_modes <- c("static", "dynamic")
# cores_per_alphas <- c("all", "1")

## Generate and deploy SLURM calls

In [9]:
# generate_slurm_script <- function(args, tag) {
#   # Check if cores_per_alpha is set and not NULL
#   if (!is.null(args$cores_per_alpha) && args$cores_per_alpha == "all") {
#     mem_allocation <- "0"
#     cpus_per_task <- "#SBATCH --exclusive\n"
#   } else {
#     mem_allocation <- "16G"
#     cpus_per_task <- "#SBATCH --cpus-per-task=1\n"
#   }

#   args_string <- paste("--", names(args), "=", args, sep = "", collapse = " ")
#   args_string <- paste(args_string, " --tag=", tag, sep = "")

#   slurm_script <- paste(
#     "#!/bin/bash\n",
#     cpus_per_task,
#     "#SBATCH --mem=", mem_allocation, "\n",
#     "#SBATCH --output=slurm_output_", tag, ".out\n",
#     "#SBATCH --job-name=", tag, "\n",
#     "Rscript scripts/CLI.R ", args_string, "\n",
#     sep = ""
#   )

#   return(slurm_script)
# }

# # Varying parameters
# cv_eval_modes <- c("static", "dynamic")
# cores_per_alphas <- c("1", NA)  # Include NA to signify the default value should be used

# # Loop through each combination
# for (cv_eval_mode in cv_eval_modes) {
#   for (cores_per_alpha in cores_per_alphas) {
#     # Update constant_args_df for the current combination
#     constant_args_df$cv_eval_mode <- cv_eval_mode
#     # Use an if-statement to decide whether to assign the value or leave it to default
#     if (!is.na(cores_per_alpha)) {
#       constant_args_df$cores_per_alpha <- cores_per_alpha
#     } else {
#       constant_args_df$cores_per_alpha <- NULL  # Setting it to NULL to use the function's default
#     }

#     # Generate tag with consideration for NA
#     snp_base <- tools::file_path_sans_ext(basename(constant_args_df$snp_data_path))
#     meth_base <- tools::file_path_sans_ext(basename(constant_args_df$methylation_data_path))
#     datetime_str <- format(Sys.time(), "%Y%m%d-%H%M%S")
#     cores_tag <- ifelse(is.na(cores_per_alpha), "defaultcore", paste0(cores_per_alpha, "core"))
#     tag <- paste(snp_base, meth_base, cv_eval_mode, cores_tag, datetime_str, sep = "-")

#     # Generate and print SLURM script
#     slurm_script <- generate_slurm_script(constant_args_df, tag)
#     cat(slurm_script, "\n\n")

#     slurm_script_path <- paste0("slurm_scripts/", tag, ".sh")  # Define path to save SLURM script

      
#     # Save SLURM script to a file
#     writeLines(slurm_script, slurm_script_path)

#     # Submit the SLURM job using the sbatch command
#     system(paste("sbatch", slurm_script_path))

#     # Implement job submission limits and intervals if necessary
#     # For example, to wait for 1 second between submissions:
#     Sys.sleep(42)
#     message("ZZZZzzzz....")
#   }
# }
