# Scripts for the annotation of 500K WES in the RAP system

## Create annotation file with annovar for 500K exomes

In [3]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/
annovar_sos=~/project/bioworkflows/variant-annotation/annovar.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/ukb23157_500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
bfiles=~/UKBiobank/RAP/ukb23158_c1_22_variants.bim
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $bfiles 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix ukb23158_c1_22_variants
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg[0m is [32mcompleted[0m.
INFO: [32mcsg[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/ukb23157_500Kexomes_annotation_2023-08-28.sbatch[0m
INFO: Workflow csg (ID=w246207202af7fa33) is executed successfully with 1 completed step.


# Re-do the annotation file based on the new pvar files generated for the 500K WES data in the RAP system

In [4]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr1_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c1.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c1
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr1_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w1bb60b674888b8a5) is executed successfully with 1 completed step.


# Do the annotation with the new VEP pipeline

In [22]:
tpl_file=~/project/bioworkflows/admin/csg.yml
vep_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
vep_sos=~/project/bioworkflows/variant-annotation/vep.ipynb
vep_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr1_pgen500KWES_anno_vep_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c1.concat.pvar
human_ancestor=/mnt/vast/hpc/csg/data_public/vep/human_ancestor.fa.gz
conservation_file=/mnt/vast/hpc/csg/data_public/vep/loftee.sql
gerp_bigwig=/mnt/vast/hpc/csg/data_public/vep/gerp_conservation_scores.homo_sapiens.GRCh38.bw
cadd_snps=/mnt/vast/hpc/csg/data_public/cadd/whole_genome_SNVs_inclAnno.tsv.gz
cadd_indels=/mnt/vast/hpc/csg/data_public/cadd/gnomad.genomes.r3.0.indel.tsv.gz
cache_version=110
clinvar_db=/mnt/vast/hpc/csg/data_public/clinvar/clinvar_20231028.vcf.gz
dir_cache=/mnt/vast/hpc/csg/data_public/vep
container=/mnt/vast/hpc/csg/containers/rare_variation_apptainer_latest.sif
walltime="60h"
mem="30G"

vep_args="""
    --cwd $vep_dir 
    --vcf  $pvar_files
    --human_ancestor $human_ancestor
    --conservation_file $conservation_file
    --gerp_bigwig $gerp_bigwig
    --cadd_snps $cadd_snps
    --cadd_indels $cadd_indels
    --cache_version $cache_version
    --clinvar_db $clinvar_db
    --dir_cache $dir_cache
    --no-output-vcf
    --job_size 1
    --walltime $walltime
    --mem $mem
    --container $container
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $vep_sos \
    --to-script $vep_sbatch \
    --args "$vep_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr1_pgen500KWES_anno_vep_2023-11-01.sbatch[0m
INFO: Workflow csg_mamba (ID=wdd4fce22ef76a4b0) is executed successfully with 1 completed step.


In [5]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr2_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c2.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c2
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr2_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=wd11a0694f160764b) is executed successfully with 1 completed step.


In [6]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr3_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c3.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c3
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr3_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w91a619f5b7fa5eb0) is executed successfully with 1 completed step.


In [7]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr4_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c4.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c4
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr4_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w9d8f641546b60bed) is executed successfully with 1 completed step.


In [8]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr5_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c5.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c5
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr5_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=wd9ba3c4f5e9d9d2e) is executed successfully with 1 completed step.


In [9]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr6_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c6.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c6
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr6_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w907da99513e20715) is executed successfully with 1 completed step.


In [10]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr7_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c7.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c7
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr7_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=wffac8df3f43dc6a7) is executed successfully with 1 completed step.


In [11]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr8_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c8.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c8
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr8_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w66d4fec5d863106d) is executed successfully with 1 completed step.


In [12]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr9_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c9.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c9
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr9_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=wcb548cbf71f5766e) is executed successfully with 1 completed step.


In [13]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr10_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c10.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c10
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr10_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w682210e09023c559) is executed successfully with 1 completed step.


In [14]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr11_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c11.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c11
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr11_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w9fd0da83d47c1fa5) is executed successfully with 1 completed step.


In [15]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr12_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c12.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c12
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr12_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w7f5a432965d82dd8) is executed successfully with 1 completed step.


In [16]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr13_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c13.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c13
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr13_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w154529b122f013c8) is executed successfully with 1 completed step.


In [17]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr14_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c14.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c14
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr14_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w2b8ea0f24aa69c3a) is executed successfully with 1 completed step.


In [18]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr15_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c15.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c15
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr15_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w379a566b42058287) is executed successfully with 1 completed step.


In [19]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr16_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c16.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c16
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr16_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w4edd46f7aac30c9d) is executed successfully with 1 completed step.


In [20]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr17_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c17.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c17
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr17_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w0d30a2fa6d09d355) is executed successfully with 1 completed step.


In [21]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr18_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c18.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c18
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr18_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=wd38a864a42ee732c) is executed successfully with 1 completed step.


In [22]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr19_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c19.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c19
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr19_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w9e23dc3d91b6f40c) is executed successfully with 1 completed step.


In [23]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr20_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c20.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c20
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr20_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w96103edc501896ae) is executed successfully with 1 completed step.


In [24]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr21_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c21.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c21
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr21_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=w64af5edc2ca7cab2) is executed successfully with 1 completed step.


In [27]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr22_pgen500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
pvar_files=~/UKBiobank/RAP/pgen_files/ukb23157_c22.concat.pvar
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $pvar_files 
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix c22
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/chr22_pgen500Kexomes_annotation_2023-10-18.sbatch[0m
INFO: Workflow csg_mamba (ID=wdcb9aeacf1c4afee) is executed successfully with 1 completed step.


## Create the anno_file, set_list_file and mask_files necessary for burden test

In [5]:
burden_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files
anno_sbatch_burden=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/ukb23157_500Kexomes_burdenfiles_$(date +"%Y-%m-%d").sbatch
annotated_file_hg38=~/UKBiobank/results/ukb23157_500Kexomes_annovar/chr22.hg38.hg38_multianno.csv
bim_name=~/UKBiobank/RAP/chr22.bim
job_size=1
name_prefix='c22'
anno_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
tpl_file=~/project/bioworkflows/admin/csg.yml
container_annovar=~/containers/gatk4-annovar.sif

anno_args="""burden_files
    --cwd $burden_dir
    --annotated_file $annotated_file_hg38
    --bim_name $bim_name
    --name_prefix $name_prefix
    --job_size $job_size
    --container_annovar $container_annovar
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $anno_sos \
    --to-script $anno_sbatch_burden\
    --args "$anno_args"


INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/ukb23157_500Kexomes_burdenfiles_2023-09-20.sbatch[0m
INFO: Workflow csg_mamba (ID=w3b02eb833862c33c) is executed successfully with 1 completed step.



In [8]:
burden_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files
anno_sbatch_burden=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/ukb23157_c1_22_500Kexomes_burdenfiles_$(date +"%Y-%m-%d").sbatch
annotated_file_hg38=`echo ~/UKBiobank/results/ukb23157_500Kexomes_annovar/chr{1..22}.hg38.hg38_multianno.csv`
bim_name=~/UKBiobank/RAP/ukb23158_c1_22_variants.bim
job_size=1
name_prefix='c1_c22'
anno_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
tpl_file=~/project/bioworkflows/admin/csg.yml
container_annovar=~/containers/gatk4-annovar.sif

anno_args="""burden_files
    --cwd $burden_dir
    --annotated_file $annotated_file_hg38
    --bim_name $bim_name
    --name_prefix $name_prefix
    --job_size $job_size
    --container_annovar $container_annovar
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $anno_sos \
    --to-script $anno_sbatch_burden\
    --args "$anno_args"


INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/ukb23157_c1_22_500Kexomes_burdenfiles_2023-09-22.sbatch[0m
INFO: Workflow csg_mamba (ID=we7b7eb07f5915300) is executed successfully with 1 completed step.



In [1]:
burden_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files
anno_sbatch_burden=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/VEP_ukb23157_cX_500KWES_burdenfiles_$(date +"%Y-%m-%d").sbatch
annovar_anno=~/UKBiobank/results/ukb23157_500Kexomes_annovar/chrx.hg38.hg38_multianno.csv
vep_anno=~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/ukb23157_cX.concat.VEP.CADD_gnomAD.formatted.tsv.gz
bim_name=~/UKBiobank/RAP/chrx.bim
job_size=1
name_prefix='cx'
anno_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
tpl_file=~/project/bioworkflows/admin/csg.yml
container_annovar=~/containers/gatk4-annovar.sif

anno_args="""burden_files
    --cwd $burden_dir
    --annovar_anno $annovar_anno
    --vep_anno $vep_anno
    --bim_name $bim_name
    --name_prefix $name_prefix
    --job_size $job_size
    --container_annovar $container_annovar
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $anno_sos \
    --to-script $anno_sbatch_burden\
    --args "$anno_args"


INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/VEP_ukb23157_cX_500KWES_burdenfiles_2023-11-06.sbatch[0m
INFO: Workflow csg_mamba (ID=w748686bf2cfbe372) is executed successfully with 1 completed step.


## Create burden_file using annovar and VEP annotation

In [4]:
burden_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/CADD_annotation
anno_sbatch_burden=~/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/CADD_annotation/VEP_ukb23157_c3_22_500KWES_burdenfiles_$(date +"%Y-%m-%d").sbatch
annovar_anno=`echo ~/UKBiobank/results/ukb23157_500Kexomes_annovar/chr{3..22}.hg38.hg38_multianno.csv`
vep_anno=`echo ~/UKBiobank/results/ukb23157_500Kexomes_annovar/pgen_annotation/ukb23157_c{3..22}.concat.VEP.CADD_gnomAD.formatted.tsv.gz`
bim_name=`echo ~/UKBiobank/RAP/chr{3..22}.bim`
job_size=1
name_prefix='c3-22'
anno_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
tpl_file=~/project/bioworkflows/admin/csg.yml
container_annovar=~/containers/gatk4-annovar.sif

anno_args="""burden_files
    --cwd $burden_dir
    --annovar_anno $annovar_anno
    --vep_anno $vep_anno
    --bim_name $bim_name
    --name_prefix $name_prefix
    --job_size $job_size
    --container_annovar $container_annovar
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $anno_sos \
    --to-script $anno_sbatch_burden\
    --args "$anno_args"


INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/results/ukb23157_500Kexomes_annovar/burden_files/CADD_annotation/VEP_ukb23157_c3_22_500KWES_burdenfiles_2023-11-06.sbatch[0m
INFO: Workflow csg_mamba (ID=w50822144d7bcc813) is executed successfully with 1 completed step.


## Annotation of significant results univariate analysis RAP

### Haid

In [1]:
tpl_file=~/project/bioworkflows/admin/csg.yml
annovar_dir=~/UKBiobank/results/ukb23157_500Kexomes_annovar/
annovar_sos=~/project/bioworkflows/variant-annotation/annovar.ipynb
annovar_sbatch=~/UKBiobank/RAP/results/autosomal/univariate/Haid/Haid_500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
sumstat_file=~/UKBiobank/RAP/results/autosomal/univariate/Haid/Haid_pval5e-08.txt
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $annovar_dir 
    --bim_name $sumstat_file
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix Haid_sig_variants
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

Traceback (most recent call last):
  File "/mnt/mfs/hgrcgrid/homes/dmc2245/micromamba/envs/pisces-rabbit/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/mnt/mfs/hgrcgrid/homes/dmc2245/micromamba/envs/pisces-rabbit/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/mnt/mfs/hgrcgrid/homes/dmc2245/micromamba/envs/pisces-rabbit/lib/python3.10/site-packages/calysto_bash/__main__.py", line 2, in <module>
    from .kernel import BashKernel
  File "/mnt/mfs/hgrcgrid/homes/dmc2245/micromamba/envs/pisces-rabbit/lib/python3.10/site-packages/calysto_bash/kernel.py", line 6, in <module>
    from metakernel import MetaKernel
  File "/mnt/mfs/hgrcgrid/homes/dmc2245/micromamba/envs/pisces-rabbit/lib/python3.10/site-packages/metakernel/__init__.py", line 2, in <module>
    from ._metakernel import (
  File "/mnt/mfs/hgrcgrid/homes/dmc2245/micromamba/envs/pisces-rabbit/lib/python3.10/site-packages/metake

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/RAP/results/autosomal/univariate/Haid/Haid_500Kexomes_annotation_2023-09-27.sbatch[0m
INFO: Workflow csg_mamba (ID=wd9ae5ebb8b5e68e7) is executed successfully with 1 completed step.



### Hdiff

In [2]:
tpl_file=~/project/bioworkflows/admin/csg.yml
cwd=~/UKBiobank/RAP/results/autosomal/univariate/Hdiff/
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/RAP/results/autosomal/univariate/Hdiff/Hdiff_500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
sumstat_file=~/UKBiobank/RAP/results/autosomal/univariate/Hdiff/Hdiff_pval5e-08.txt
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $cwd 
    --bim_name $sumstat_file
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix Hdiff_sig_variants
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/RAP/results/autosomal/univariate/Hdiff/Hdiff_500Kexomes_annotation_2023-09-28.sbatch[0m
INFO: Workflow csg_mamba (ID=w2a12aa29b0a54831) is executed successfully with 1 completed step.



### Hnoise

In [3]:
tpl_file=~/project/bioworkflows/admin/csg.yml
cwd=~/UKBiobank/RAP/results/autosomal/univariate/Hnoise/
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/RAP/results/autosomal/univariate/Hnoise/Hnoise_500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
sumstat_file=~/UKBiobank/RAP/results/autosomal/univariate/Hnoise/Hnoise_pval5e-08.txt
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $cwd 
    --bim_name $sumstat_file
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix Hnoise_sig_variants
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/RAP/results/autosomal/univariate/Hnoise/Hnoise_500Kexomes_annotation_2023-09-28.sbatch[0m
INFO: Workflow csg_mamba (ID=wd9e9c63aa988089a) is executed successfully with 1 completed step.



### Hboth

In [4]:
tpl_file=~/project/bioworkflows/admin/csg.yml
cwd=~/UKBiobank/RAP/results/autosomal/univariate/Hboth/
annovar_sos=~/project/bioworkflows/variant-annotation/annovar-rap.ipynb
annovar_sbatch=~/UKBiobank/RAP/results/autosomal/univariate/Hboth/Hboth_500Kexomes_annotation_$(date +"%Y-%m-%d").sbatch
sumstat_file=~/UKBiobank/RAP/results/autosomal/univariate/Hboth/Hboth_pval5e-08.txt
walltime="60h"
mem="30G"

annovar_args="""annovar
    --cwd $cwd 
    --bim_name $sumstat_file
    --humandb /mnt/vast/hpc/csg/isabelle/REF/humandb
    --xref_path /mnt/vast/hpc/csg/isabelle/REF/humandb 
    --job_size 1 
    --build 'hg38' 
    --name_prefix Hboth_sig_variants
    --walltime $walltime
    --mem $mem
    --container_annovar /mnt/mfs/statgen/containers/gatk4-annovar.sif
"""

sos run ~/project/UKBB_GWAS_dev/admin/Get_Job_Script.ipynb csg_mamba \
    --template-file $tpl_file \
    --workflow-file $annovar_sos \
    --to-script $annovar_sbatch \
    --args "$annovar_args"

INFO: Running [32mcsg_mamba[0m: Configuration for Columbia csg partition cluster
INFO: [32mcsg_mamba[0m is [32mcompleted[0m.
INFO: [32mcsg_mamba[0m output:   [32m/home/dmc2245/UKBiobank/RAP/results/autosomal/univariate/Hboth/Hboth_500Kexomes_annotation_2023-09-28.sbatch[0m
INFO: Workflow csg_mamba (ID=w257421ebaa5dd1ce) is executed successfully with 1 completed step.

