# Generate job submission script for various computing environments

This workflow generates job scripts for cluster job submissions.

In [23]:
[global]
# Job script file to be written to
parameter: to_script = path
# The workflow file to execute
parameter: workflow_file = path
# Path to job template
parameter: template_file = path
# Command arguments
parameter: args = str

args = '\\\n'.join(args.strip().split('\n'))

In [29]:
[farnam]
report: output = to_script, expand = True

    #!/bin/bash
    #SBATCH --partition general
    #SBATCH --nodes 1
    #SBATCH --ntasks-per-node 1
    #SBATCH --cpus-per-task 4
    #SBATCH --mem 16G
    #SBATCH --time 3-0:00:00
    #SBATCH --job-name {to_script:n}
    #SBATCH --output {to_script:n}-%J.out
    #SBATCH --error {to_script:n}-%J.log

    sos run {workflow_file:a} \
        {args} \
        -c {template_file:a} -q farnam -J 40 \
        &> {to_script:n}.log

## Example

First, configure bash variables,

In [30]:
# Set the bash variables
cwd=~/scratch60/region_extract
region_file=~/scratch60/plink-clumping/asthma.sumstats_INT_BMI.sumstats.clumped_region_modified
pheno_path=/SAY/dbgapstg/scratch/UKBiobank/phenotype_files/pleiotropy_R01/phenotypesforanalysis/normalized_phenotypes/UKB_caucasians_BMIwaisthip_AsthmaAndT2D_INT-BMI_withagesex_041720
geno_path=~/scratch60/plink-clumping/chr7_region/bgenfilepath.txt
bgen_sample_file=/SAY/dbgapstg/scratch/UKBiobank/genotype_files/ukb39554_imputeddataset/ukb32285_imputedindiv.sample
sumstats_path=/SAY/dbgapstg/scratch/UKBiobank/results/BOLTLMM_results/results_imputed_data/INT-BMI/ukb_imp_v3.UKB_caucasians_BMIwaisthip_AsthmaAndT2D_INT-BMI_withagesex_041720.BoltLMM.snp_stats.all_chr.gz
unrelated_samples=/SAY/dbgapstg/scratch/UKBiobank/genotype_files/pleiotropy_geneticfiles/unrelated_n307259/UKB_unrelatedcauc_phenotypes_asthmat2dbmiwaisthip_agesex_waisthipratio_040620
format_config_path=~/project/UKBB_GWAS_DEV/data/boltlmm_template.yml
job_size=10

Then using bash variables above, construct the command arguments for the pipeline,

In [31]:
cmd_args="""
    --cwd $cwd 
    --region-file $region_file 
    --pheno-path $pheno_path 
    --geno-path $geno_path 
    --bgen-sample-path $bgen_sample_file 
    --sumstats-path $sumstats_path 
    --format-config-path $format_config_path 
    --unrelated-samples $unrelated_samples 
    --job-size $job_size
    -s build
"""

Finally, generate the pipeline submission script,

In [32]:
sos run Get_Job_Script.ipynb farnam \
    --workflow-file ~/project/UKBB_GWAS_DEV/workflow/Region_Extraction.ipynb \
    --to-script 070120-sos-INT-BMI-region.sbatch \
    --template-file ../farnam.yml \
    --args "$cmd_args"

INFO: Running [32mfarnam[0m: 
INFO: [32mfarnam[0m is [32mcompleted[0m.
INFO: Workflow farnam (ID=23148c69b66ef62a) is executed successfully with 1 completed step.


## Result

In [33]:
%preview 070120-sos-INT-BMI-region.sbatch -l -1


#!/bin/bash
#SBATCH --partition general
#SBATCH --nodes 1
#SBATCH --ntasks-per-node 1
#SBATCH --cpus-per-task 4
#SBATCH --mem 16G
#SBATCH --time 3-0:00:00
#SBATCH --job-name 070120-sos-INT-BMI-region
#SBATCH --output 070120-sos-INT-BMI-region-%J.out
#SBATCH --error 070120-sos-INT-BMI-region-%J.log

sos run /home/gw/project/UKBB_GWAS_DEV/workflow/Region_Extraction.ipynb \
    --cwd /home/gw/scratch60/region_extract \
    --region-file /home/gw/scratch60/plink-clumping/asthma.sumstats_INT_BMI.sumstats.clumped_region_modified \
    --pheno-path /SAY/dbgapstg/scratch/UKBiobank/phenotype_files/pleiotropy_R01/phenotypesforanalysis/normalized_phenotypes/UKB_caucasians_BMIwaisthip_AsthmaAndT2D_INT-BMI_withagesex_041720 \
    --geno-path /home/gw/scratch60/plink-clumping/chr7_region/bgenfilepath.txt \
    --bgen-sample-path /SAY/dbgapstg/scratch/UKBiobank/genotype_files/ukb39554_imputeddataset/ukb32285_imputedindiv.sample \
    --sumstats-path /SAY/dbgapstg/scratch/UKBiobank/results/BOLTLMM_re