# Prep SLURM commands

In [1]:
getwd()

In [2]:
setwd("../")

In [3]:
# Function to generate chunk ranges
generate_chunk_ranges <- function(start, end, num_chunks) {
  chunk_size <- ceiling((end - start + 1) / num_chunks)
  chunk_ranges <- sapply(1:num_chunks, function(i) {
    chunk_start <- start + (i - 1) * chunk_size
    chunk_end <- min(chunk_start + chunk_size - 1, end)
    c(chunk_start, chunk_end)
  })
  return(chunk_ranges)
}

# Example usage
chunk_ranges <- generate_chunk_ranges(1, 10000, 5)
print(chunk_ranges)

     [,1] [,2] [,3] [,4]  [,5]
[1,]    1 2001 4001 6001  8001
[2,] 2000 4000 6000 8000 10000


In [4]:
list.files("/dcs04/lieber/statsgen/mnagle/mwas/")

In [5]:
# Constant Arguments Setup
constant_args_df <- data.frame(
  outdir = "./output/",
  snp_data_path = "/dcs04/lieber/statsgen/shizhong/michael/mwas/gwas/libd_chr1.pgen",
  methylation_data_path = "/dcs04/lieber/statsgen/shizhong/michael/mwas/pheno/dlpfc/out/chr1_all.rda",
  cov = "/dcs04/lieber/statsgen/mnagle/mwas/full_covariates/all_dlpfc.csv",
  verbose = FALSE,
  lambda_choice = "1se",
  #alphas = "0,0.25,0.5,0.75,1",
  alphas = 0.5,
  allow_inefficient_parallelization = FALSE,
  n_fold = 5,
  window_sizes = "10000",
  #window_sizes = "500000",
  save_evaluation_results_each_fold = FALSE,
  save_glmnet_object = FALSE,
  omit_folds_with_na_r = TRUE,
  methInput_rds_path = "/dcs04/lieber/statsgen/mnagle/mwas/chr1_dfplc_all_methylation_10k_samples_a3.rds"
)

# Varying parameters
cv_eval_modes <- c("dynamic")
cores_per_alphas <- c("all") #NA)  # Include NA to signify the default value should be used
num_cores_options <- c(1)

generate_slurm_script <- function(args, tag_pt1, tag_pt2, mem_per_cpu = "13G") {
  cpus_per_task <- if (is.null(args$num_cores) || args$num_cores == "all") {
    "#SBATCH --exclusive\n"
  } else {
    paste0("#SBATCH --cpus-per-task=", args$num_cores, "\n")
  }

  # Always set mem_per_cpu flag, even in exclusive mode
  mem_allocation <- paste0("#SBATCH --mem-per-cpu=", mem_per_cpu, "\n")

  args_string <- paste("--", names(args), "=", args, sep = "", collapse = " ")
  args_string <- paste(args_string, " --tag=", tag_pt1, "-", tag_pt2, sep = "")

slurm_script <- paste(
  "#!/bin/bash\n",
  cpus_per_task,
  mem_allocation,
  "#SBATCH --output=slurm_output_", tag_pt1, "-", tag_pt2, ".out\n",
  "#SBATCH --job-name=", tag_pt1, "-", tag_pt2, "\n",
  "module load conda\n",
  "conda activate mwas\n",
  "TMPDIR=/dcs04/lieber/statsgen/mnagle/mwas/CpGWAS/temp_temp\n",
  "echo 'Executing Rscript with arguments: Rscript scripts/CLI.R ", args_string, "'\n",
  "Rscript scripts/CLI.R ", args_string, "\n",
  sep = ""
)

  return(list(script = slurm_script, path = paste0("slurm_scripts/", tag_pt1, "-", tag_pt2, ".sh")))
}

# Overwrite flag (set by user)
overwrite <- FALSE

for (chunk_range in 1:ncol(chunk_ranges)) {
  constant_args_df$chunk1 <- chunk_ranges[1, chunk_range]
  constant_args_df$chunk2 <- chunk_ranges[2, chunk_range]
    # Loop through each combination
    for (cv_eval_mode in cv_eval_modes) {
      for (cores_per_alpha in cores_per_alphas) {
        for (num_cores in num_cores_options) {
          # Update constant_args_df for the current combination
          constant_args_df$cv_eval_mode <- cv_eval_mode
          constant_args_df$num_cores <- num_cores
          if (!is.na(cores_per_alpha)) {
            constant_args_df$cores_per_alpha <- cores_per_alpha
          } else {
            constant_args_df$cores_per_alpha <- NULL
          }

          # Generate tags
          snp_base <- tools::file_path_sans_ext(basename(constant_args_df$snp_data_path))
          meth_base <- tools::file_path_sans_ext(basename(constant_args_df$methylation_data_path))
          datetime_str <- format(Sys.time(), "%Y%m%d-%H%M%S")
          tag_pt1 <- paste(snp_base, meth_base, format(constant_args_df$chunk1, scientific = FALSE), format(constant_args_df$chunk2, scientific = FALSE), cv_eval_mode, paste0(num_cores, "corestotal"), ifelse(is.na(cores_per_alpha), "defaultcore", paste0(cores_per_alpha, "corepera")), sep = "-")
          tag_pt2 <- datetime_str

          # Check for existing file
          existing_files <- list.files(path = constant_args_df$outdir, pattern = tag_pt1, full.names = TRUE)
          existing_files <- existing_files[grepl("\\.rds", existing_files)]
          if (!overwrite && length(existing_files) > 0) {
            message("File with tag ", tag_pt1, " already exists. Skipping...")
            next
          }

          # Generate and print SLURM script
          script_info <- generate_slurm_script(constant_args_df, tag_pt1, tag_pt2,
                                               mem_per_cpu = "16G")

          cat(script_info$script)

          writeLines(script_info$script, script_info$path)

          # Submit the SLURM job using the sbatch command
          system(paste("sbatch", script_info$path))

          # Implement job submission limits and intervals if necessary
          #Sys.sleep(sample(c(1:21), 1))
          message("ZZZZzzzz....")
        }
      }
    }
}


#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-1-2000-dynamic-1corestotal-allcorepera-20240403-124319.out
#SBATCH --job-name=libd_chr1-chr1_all-1-2000-dynamic-1corestotal-allcorepera-20240403-124319
module load conda
conda activate mwas
TMPDIR=/dcs04/lieber/statsgen/mnagle/mwas/CpGWAS/temp_temp
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/ --snp_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/gwas/libd_chr1.pgen --methylation_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/dcs04/lieber/statsgen/mnagle/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=10000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/dcs04/lieber/statsgen/mnagle/mwas/chr1_dfplc_all_methylation_10k_

ZZZZzzzz....



#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-2001-4000-dynamic-1corestotal-allcorepera-20240403-124319.out
#SBATCH --job-name=libd_chr1-chr1_all-2001-4000-dynamic-1corestotal-allcorepera-20240403-124319
module load conda
conda activate mwas
TMPDIR=/dcs04/lieber/statsgen/mnagle/mwas/CpGWAS/temp_temp
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/ --snp_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/gwas/libd_chr1.pgen --methylation_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/dcs04/lieber/statsgen/mnagle/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=10000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/dcs04/lieber/statsgen/mnagle/mwas/chr1_dfplc_all_methylatio

ZZZZzzzz....



#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-4001-6000-dynamic-1corestotal-allcorepera-20240403-124319.out
#SBATCH --job-name=libd_chr1-chr1_all-4001-6000-dynamic-1corestotal-allcorepera-20240403-124319
module load conda
conda activate mwas
TMPDIR=/dcs04/lieber/statsgen/mnagle/mwas/CpGWAS/temp_temp
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/ --snp_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/gwas/libd_chr1.pgen --methylation_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/dcs04/lieber/statsgen/mnagle/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=10000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/dcs04/lieber/statsgen/mnagle/mwas/chr1_dfplc_all_methylatio

ZZZZzzzz....



#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-6001-8000-dynamic-1corestotal-allcorepera-20240403-124319.out
#SBATCH --job-name=libd_chr1-chr1_all-6001-8000-dynamic-1corestotal-allcorepera-20240403-124319
module load conda
conda activate mwas
TMPDIR=/dcs04/lieber/statsgen/mnagle/mwas/CpGWAS/temp_temp
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/ --snp_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/gwas/libd_chr1.pgen --methylation_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/dcs04/lieber/statsgen/mnagle/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=10000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/dcs04/lieber/statsgen/mnagle/mwas/chr1_dfplc_all_methylatio

ZZZZzzzz....



#!/bin/bash
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=16G
#SBATCH --output=slurm_output_libd_chr1-chr1_all-8001-10000-dynamic-1corestotal-allcorepera-20240403-124320.out
#SBATCH --job-name=libd_chr1-chr1_all-8001-10000-dynamic-1corestotal-allcorepera-20240403-124320
module load conda
conda activate mwas
TMPDIR=/dcs04/lieber/statsgen/mnagle/mwas/CpGWAS/temp_temp
echo 'Executing Rscript with arguments: Rscript scripts/CLI.R --outdir=./output/ --snp_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/gwas/libd_chr1.pgen --methylation_data_path=/dcs04/lieber/statsgen/shizhong/michael/mwas/pheno/dlpfc/out/chr1_all.rda --cov=/dcs04/lieber/statsgen/mnagle/mwas/full_covariates/all_dlpfc.csv --verbose=FALSE --lambda_choice=1se --alphas=0.5 --allow_inefficient_parallelization=FALSE --n_fold=5 --window_sizes=10000 --save_evaluation_results_each_fold=FALSE --save_glmnet_object=FALSE --omit_folds_with_na_r=TRUE --methInput_rds_path=/dcs04/lieber/statsgen/mnagle/mwas/chr1_dfplc_all_methylat

ZZZZzzzz....

