This repository has been archived by the owner on Oct 5, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
255 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
params.recalibrated_vcf_filename | ||
params.indel_filter_level | ||
params.snp_filter_level | ||
params.use_allele_specific_annotations | ||
|
||
process GATK_APPLY_RECALIBRATION { | ||
container "us.gcr.io/broad-gatk/gatk:4.1.1.0" | ||
|
||
input: | ||
path(input_vcf) | ||
path(input_vcf_index) | ||
path(indels_recalibration) | ||
path(indels_recalibration_index) | ||
path(indels_tranches) | ||
path(snps_recalibration) | ||
path(snps_recalibration_index) | ||
path(snps_tranches) | ||
|
||
|
||
output: | ||
path("${params.recalibrated_vcf_filename}") | ||
path("${params.recalibrated_vcf_filename}.tbi") | ||
|
||
script: | ||
""" | ||
set -euo pipefail | ||
gatk --java-options -Xms5g ApplyVQSR \ | ||
-O tmp.indel.recalibrated.vcf \ | ||
-V ${input_vcf} \ | ||
--recal-file ${indels_recalibration} \ | ||
${params.use_allele_specific_annotations ? '--use-allele-specific-annotations' : ''} \ | ||
--tranches-file ${indels_tranches} \ | ||
--truth-sensitivity-filter-level ${indel_filter_level} \ | ||
--create-output-variant-index true \ | ||
-mode INDEL | ||
gatk --java-options -Xms5g ApplyVQSR \ | ||
-O ${recalibrated_vcf_filename} \ | ||
-V tmp.indel.recalibrated.vcf \ | ||
--recal-file ${snps_recalibration} \ | ||
${params.use_allele_specific_annotations ? '--use-allele-specific-annotations' : ''} \ | ||
--tranches-file ${snps_tranches} \ | ||
--truth-sensitivity-filter-level ${snp_filter_level} \ | ||
--create-output-variant-index true \ | ||
-mode SNP | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
process GATK_COLLECT_VARIANT_CALLING_METRICS { | ||
|
||
input: | ||
path(input_vcf) | ||
path(input_vcf_index) | ||
path(dbsnp_vcf) | ||
path(dbsnp_vcf_index) | ||
path(interval_list) | ||
path(ref_dict) | ||
|
||
output: | ||
path("${params.metrics_file_prefix}.variant_calling_detail_metrics") | ||
path("${params.metrics_file_prefix}.variant_calling_summary_metrics") | ||
|
||
script: | ||
""" | ||
set -euo pipefail | ||
gatk --java-options -Xms6g CollectVariantCallingMetrics \ | ||
--INPUT ${input_vcf} \ | ||
--DBSNP ${dbsnp_vcf} \ | ||
--SEQUENCE_DICTIONARY ${ref_dict} \ | ||
--OUTPUT ${params.metrics_filename_prefix} \ | ||
--THREAD_COUNT 8 \ | ||
--TARGET_INTERVALS ${interval_list} | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
params.output_filename | ||
|
||
process GATK_GATHER_TRANCHES { | ||
container "us.gcr.io/broad-gotc-prod/gatk4-joint-genotyping:1.3.0-1527875152" | ||
|
||
input: | ||
path(tranches) | ||
|
||
|
||
output: | ||
path("${params.output_filename}") | ||
|
||
|
||
shell: | ||
// NOTE: This assumes that gsutil is installed | ||
tranches_lines = write_lines(tranches) | ||
|
||
''' | ||
set -euo pipefail | ||
tranches_fofn=!{tranches_lines} | ||
# Jose says: | ||
# Cromwell will fall over if we have it try to localize tens of thousands of files, | ||
# so we manually localize files using gsutil. | ||
# Using gsutil also lets us parallelize the localization, which (as far as we can tell) | ||
# PAPI doesn't do. | ||
# This is here to deal with the JES bug where commands may be run twice | ||
rm -rf tranches | ||
mkdir tranches | ||
RETRY_LIMIT=5 | ||
count=0 | ||
until cat $tranches_fofn | /root/google-cloud-sdk/bin/gsutil -m cp -L cp.log -c -I tranches/; do | ||
sleep 1 | ||
((count++)) && ((count >= $RETRY_LIMIT)) && break | ||
done | ||
if [ "$count" -ge "$RETRY_LIMIT" ]; then | ||
echo 'Could not copy all the tranches from the cloud' && exit 1 | ||
fi | ||
cat $tranches_fofn | rev | cut -d '/' -f 1 | rev | awk '{print "tranches/" $1}' > inputs.list | ||
/usr/gitc/gatk --java-options -Xms6g GatherTranches \ | ||
--input inputs.list \ | ||
--output !{output_filename} | ||
''' | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
process GATK_GATHER_VARIANT_CALLING_METRICS { | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
process GATK_GATHER_VCFS { | ||
container "us.gcr.io/broad-gatk/gatk:4.1.1.0" | ||
|
||
output: | ||
path("${params.output_vcf_name}") | ||
path("${params.output_vcf_name}.tbi") | ||
|
||
script: | ||
|
||
input_vcfs_str = input_vcfs.collect {" --input "} | ||
|
||
""" | ||
set -euo pipefail | ||
# --ignore-safety-checks makes a big performance difference so we include it in our invocation. | ||
# This argument disables expensive checks that the file headers contain the same set of | ||
# genotyped samples and that files are in order by position of first record. | ||
gatk --java-options -Xms6g GatherVcfsCloud \ | ||
--ignore-safety-checks \ | ||
--gather-type BLOCK \ | ||
--input ${input_vcfs_str} \ | ||
--output ${params.output_vcf_name} | ||
tabix ${params.output_vcf_name} | ||
""" | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
params.base_output_name | ||
|
||
process GATK_SELECT_FINGERPRINT_SITE_VARIANTS { | ||
|
||
input: | ||
path(input_vcf) | ||
path(haplotype_database) | ||
|
||
output: | ||
path("${params.base_output_name}.vcf.gz") | ||
path("${params.base_output_name}.vcf.gz.tbi") | ||
|
||
shell: | ||
|
||
''' | ||
set -euo pipefail | ||
function hdb_to_interval_list() { | ||
input=$1 | ||
awk 'BEGIN{IFS="\t";OFS="\t";} $0~"^@"{print;next;} $0~"#CHROM"{next;} {print $1,$2,$2,"+","interval-"NR}' $1 | ||
} | ||
hdb_to_interval_list !{haplotype_database} > hdb.interval_list | ||
gatk --java-options -Xms6g SelectVariants \ | ||
--variant !{input_vcf} \ | ||
--intervals hdb.interval_list \ | ||
--output !{params.base_output_name}.vcf.gz | ||
''' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
nextflow.enable.dsl = 2 | ||
|
||
params.model_report_filename | ||
pararms.recalibration_filename | ||
params.tranches_filename | ||
params.use_allele_specific_annotations | ||
|
||
process GATK_SNPS_VARIANT_RECALIBRATOR { | ||
container "us.gcr.io/broad-gatk/gatk:4.1.1.0" | ||
|
||
input: | ||
path(sites_only_variant_filtered_vcf) | ||
path(sites_only_variant_filtered_vcf_index) | ||
path(hapmap_resource_vcf) | ||
path(omni_resource_vcf) | ||
path(one_thousand_genomes_resource_vcf) | ||
path(dbsnp_resource_vcf) | ||
path(hapmap_resource_vcf_index) | ||
path(omni_resource_vcf_index) | ||
path(one_thousand_genomes_resource_vcf_index) | ||
path(dbsnp_resource_vcf_index) | ||
|
||
output: | ||
path("${params.recalibration_filename}") | ||
path("${params.recalibration_filename}.idx") | ||
path("${params.tranches_filename}") | ||
|
||
|
||
shell: | ||
tranche_str = recalibration_tranche_values.collect().join(" -tranche ") | ||
an_str = recalibration_annotation_values.collect().join(" -an ") | ||
|
||
''' | ||
set -euo pipefail | ||
MODEL_REPORT=!{model_report} | ||
gatk --java-options -Xms!{java_mem}g VariantRecalibrator \ | ||
-V !{sites_only_variant_filtered_vcf} \ | ||
-O !{recalibration_filename} \ | ||
--tranches-file !{tranches_filename} \ | ||
--trust-all-polymorphic \ | ||
-tranche !{tranche_str} \ | ||
-an !{an_str} \ | ||
!{params.use_allele_specific_annotations ? '--use-allele-specific-annotations' : ''} \ | ||
-mode SNP \ | ||
!{model_report_arg} \ | ||
--max-gaussians !{max_gaussians} \ | ||
-resource:hapmap,known=false,training=true,truth=true,prior=15 !{hapmap_resource_vcf} \ | ||
-resource:omni,known=false,training=true,truth=true,prior=12 !{omni_resource_vcf} \ | ||
-resource:1000G,known=false,training=true,truth=false,prior=10 !{one_thousand_genomes_resource_vcf} \ | ||
-resource:dbsnp,known=true,training=false,truth=false,prior=7 !{dbsnp_resource_vcf} | ||
''' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters