-
Notifications
You must be signed in to change notification settings - Fork 0
/
Ambarish_Kumar_SOP-GATK-SAR-CoV-2.cwl
153 lines (133 loc) · 4.18 KB
/
Ambarish_Kumar_SOP-GATK-SAR-CoV-2.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
requirements:
StepInputExpressionRequirement: {}
doc: |
Author: AMBARISH KUMAR er.ambarish@gmail.com & ambari73_sit@jnu.ac.in
This is a proposed standard operating procedure for genomic variant detection using GATK4.
It is hoped to be effective and useful for getting SARS-CoV-2 genome variants.
It uses Illumina RNASEQ reads and genome sequence.
inputs:
sars_cov_2_reference_genome:
type: File
format: edam:format_1929 # FASTA
rnaseq_left_reads:
type: File
format: edam:format_1930 # FASTQ
rnaseq_right_reads:
type: File
format: edam:format_1930 # FASTQ
steps:
index_reference_genome_with_bowtie2:
run: ../tools/bowtie2/bowtie2_build.cwl
in:
reference_in: sars_cov_2_reference_genome
bt2_index_base:
valueFrom: "sars-cov-2"
out: [ indices ]
align_rnaseq_reads_to_genome:
run: ../tools/bowtie2/bowtie2_align.cwl
in:
indices_file: index_reference_genome_with_bowtie2/indices
filelist: rnaseq_left_reads
filelist_mates: rnaseq_right_reads
output_filename:
valueFrom: sars-cov-2.sam
out: [ output ]
index_reference_genome_with_samtools:
run: ../tools/samtools/samtools_faidx.cwl
in:
sequences: sars_cov_2_reference_genome
out: [sequences_with_index]
create_sequence_dictionary:
run: ../tools/picard/picard_CreateSequenceDictionary.cwl
in:
REFERENCE: index_reference_genome_with_samtools/sequences_with_index
out: [ sequences_with_dictionary ]
update_read_group:
run: ../tools/picard/picard_AddOrReplaceReadGroups.cwl
in:
INPUT: align_rnaseq_reads_to_genome/output
OUTPUT:
valueFrom: sars-cov-2-newreadgroups.bam
RGID:
valueFrom: "1"
RGLB:
valueFrom: 445_LIB
RGPL:
valueFrom: illumina
RGSM:
valueFrom: RNA
RGPU:
valueFrom: illumina
SORT_ORDER:
valueFrom: coordinate
out: [ sequences_with_new_read_group ]
mark_duplicates:
run: ../tools/picard/picard_markdup.cwl
in:
bam_sorted: update_read_group/sequences_with_new_read_group
out: [ bam_duprem ]
split_alignments:
run: ../tools/GATK/GATK-SplitNCigarReads.cwl
in:
reference: create_sequence_dictionary/sequences_with_dictionary
reads: mark_duplicates/bam_duprem
output_filename:
valueFrom: sars-cov-2-mutantsplit.bam
# read_filter: # Not available in GATK4
# valueFrom: ReassignOneMappingQuality
out: [ output ]
index_split_alignments:
run: ../tools/samtools/samtools_index.cwl
in:
bam_sorted: split_alignments/output
out: [ bam_sorted_indexed ]
call_plausible_haplotypes_and_detect_variants:
run: ../tools/GATK/GATK-HaplotypeCaller.cwl
in:
reference: create_sequence_dictionary/sequences_with_dictionary
input: index_split_alignments/bam_sorted_indexed
output_filename:
valueFrom: sars-cov-2-mutant.vcf
out: [ output ]
filer_out_low_quality_variants:
run: ../tools/GATK/GATK-VariantFiltration.cwl
in:
reference: create_sequence_dictionary/sequences_with_dictionary
variant: call_plausible_haplotypes_and_detect_variants/output
output_filename:
valueFrom: sars-cov-2-mutantfilter.vcf
out: [output]
select_indels:
run: ../tools/GATK/GATK-SelectVariants.cwl
in:
reference: create_sequence_dictionary/sequences_with_dictionary
variant: filer_out_low_quality_variants/output
select_type_to_include:
valueFrom: INDEL
output_filename:
valueFrom: sars-cov-2-indel.vcf
out: [ output ]
select_snps:
run: ../tools/GATK/GATK-SelectVariants.cwl
in:
reference: create_sequence_dictionary/sequences_with_dictionary
variant: filer_out_low_quality_variants/output
select_type_to_include:
valueFrom: SNP
output_filename:
valueFrom: sars-cov-2-indel.vcf
out: [ output ]
outputs:
indels:
type: File
outputSource: select_indels/output
snps:
type: File
outputSource: select_snps/output
$namespaces:
edam: http://edamontology.org/
$schemas:
- http://edamontology.org/EDAM_1.18.owl