/
Snakefile_NGS
105 lines (85 loc) · 4.31 KB
/
Snakefile_NGS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
configfile: "configs/software.yaml"
configfile: "configs/base.yaml"
configfile: "configs/config.NGS.yaml"
include: "subworkflows/util.py"
# parse config values
GRAPH = config['graph']
MAPPER = config['mapper']
BCFTOOLS = exe_check_ret_abs_path(config['bcftools'])
VG = exe_check_ret_abs_path(config["vg"])
BGZIP = exe_check_ret_abs_path(config["bgzip"])
TABIX = exe_check_ret_abs_path(config["tabix"])
ZWORKDIR = config['workdir']
ZTMPDIR = 'tmp'
workdir: ZWORKDIR
include: "subworkflows/callSV.py"
SAMPLES = list_prepaire_files(config['sample_reads_list_file'])
if config['split_chr']==True:
CHRS = gfa2chrs( GRAPH + '.gfa')
#CHRS = rgfa2chrs( GRAPH + '.rgfa')
if config['mode'] == 'genotype':
rule all:
input:
expand('4.realign/2.2.filter_maf1.{chrm}.vcf.gz', chrm=CHRS),
'5.final_result/1.final_mergechr.pav.all.vcf.gz',
'5.final_result/1.final_mergechr.pav.sv.vcf.gz',
'5.final_result/2.final_mergechr.all.vcf.gz',
'5.final_result/2.final_mergechr.sv.vcf.gz',
elif config['mode'] == 'augment':
rule all:
input:
expand('7.aug_merge_rawvcf/2.merge_rawvcf.{chrm}.vcf.gz', chrm=CHRS),
expand('6.aug_dp/3.vcf_with_dp.{chrm}/{sample}.sorted.vcf.gz.tbi', sample=SAMPLES, chrm=CHRS),
expand('8.aug_realign/6.filter_maf1.{chrm}.vcf.gz', chrm=CHRS),
'9.aug_final_result/5.thin3.final_mergechr.all.vcf.gz', # Augment output InDel VCF
'9.aug_final_result/6.filter_maf1.final_mergechr.all.vcf.gz', # Augment output PAV VCF
'9.aug_final_result/6.filter_maf1.final_mergechr.all.sv.vcf.gz', # Augment output PAV VCF
elif config['split_chr']==False:
CHRS=['all']
if config['mode'] == 'genotype':
rule all:
input:
'5.final_result/1.final.all.all.vcf.gz',
'5.final_result/1.final.all.all.pav.vcf.gz',
'5.final_result/2.final.all.sv.vcf.gz',
elif config['mode'] == 'augment':
rule all:
input:
'9.aug_final_result/1.x.split.all.sv.vcf.gz', # Augment output SV VCF
'9.aug_final_result/2.SNP.split.all.snp.vcf.gz', # Augment output SV VCF
'9.aug_final_result/7.PAV.all.sv.vcf.gz', # Augment output PAV VCF
if config['mode'] == 'genotype':
include: "subworkflows/panpopbase.py"
elif config['mode'] == 'augment':
include: "subworkflows/panpopbase.py"
include: "subworkflows/panpopaug.py"
else:
raise ValueError('mode must be genotype or augment')
for dir in [ZTMPDIR]:
if not os.path.exists(dir):
os.makedirs(dir)
rule cleanall:
params:
rmfiles=expand('{graph}.{ext}', graph=GRAPH, ext=['xg', 'snarls', 'gcsa', 'gcsa.lcp', 'ids.mapping', 'dist', 'trivial.snarls', 'gfa.fa', 'min', 'snarls', 'giraffe.gbz', 'pg']) +
[ZTMPDIR] +
["2.callSV", "3.merge_rawvcf", "4.realign", "5.final_result" ] +
["6.aug_dp", "7.aug_merge_rawvcf", "8.aug_realign", "9.aug_final_result"] +
["2.callSV.aug.DPinfos.txt"] + ["2.callSV.DPinfos.txt"] +
["logs"]
shell:
"rm -rf {params.rmfiles}"
# call variants in the graph
rule callSV:
input:
expand('2.callSV/{sample}/{sample}-{graph}.{map}.q{minq}.call.ext.vcf.gz', sample=SAMPLES, graph=GRAPH, map=MAPPER, minq=MINQ),
expand('2.callSV/{sample}/{sample}-{graph}.{map}.q{minq}.call.ext.vcf.gz.tbi', sample=SAMPLES, graph=GRAPH, map=MAPPER, minq=MINQ),
#expand('2.callSV/{sample}/{sample}-{graph}.{map}.gam.avgdp', sample=SAMPLES, graph=GRAPH, map=MAPPER)
expand('2.callSV/{sample}/{sample}-{graph}.{map}.q{minq}.pack.DPinfo', sample=SAMPLES, graph=GRAPH, map=MAPPER, minq=MINQ)
# call variants in the graph
rule callSVaug:
input:
expand('2.callSV/{sample}/{sample}-{graph}.{map}.aug.q{minq}.call.ext.vcf.gz', sample=SAMPLES, graph=GRAPH, map=MAPPER, minq=MINQ),
expand('2.callSV/{sample}/{sample}-{graph}.{map}.aug.q{minq}.call.ext.vcf.gz.tbi', sample=SAMPLES, graph=GRAPH, map=MAPPER, minq=MINQ),
#expand('2.callSV/{sample}/{sample}-{graph}.{map}.aug.gam.avgdp', sample=SAMPLES, graph=GRAPH, map=MAPPER)
expand('2.callSV/{sample}/{sample}-{graph}.{map}.aug.q{minq}.pack.DPinfo', sample=SAMPLES, graph=GRAPH, map=MAPPER, minq=MINQ)