-
Notifications
You must be signed in to change notification settings - Fork 40
/
postalign_xcor.bds
133 lines (91 loc) · 4.39 KB
/
postalign_xcor.bds
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env bds
#vim: syntax=java
include "species.bds"
include "module_template.bds"
help == postalign bed/tagalign settings
fraglen0 := false help (LEGACY PARAM) Set predefined fragment length as zero for cross corr. analysis (add -speak=0 to run_spp.R).
speak_xcor := -1 help Set user-defined cross-corr. peak strandshift (-speak= in run_spp.R). Use -1 to disable (default: -1).
max_ppsize_xcor := "" help R stack size (R parameter --max-ppsize=; between 5000 and 5000000) for cross corr. analysis.
extra_param_xcor := "" help Set extra parameters for run_spp.R (cross-corr. analysis only).
mem_xcor := "15G" help Max. memory for cross-corr. analysis (default: 15G).
grp_color_xcor := "yellowgreen"
init_postalign_xcor()
void init_postalign_xcor() {
fraglen0 = get_conf_val_bool( fraglen0, ["fraglen0"] )
speak_xcor = get_conf_val_int( speak_xcor, ["speak_xcor"] )
extra_param_xcor= get_conf_val( extra_param_xcor, ["extra_param_xcor"] )
mem_xcor = get_conf_val( mem_xcor, ["mem_xcor"] )
max_ppsize_xcor = get_conf_val( max_ppsize_xcor, ["max_ppsize_xcor"] )
// backward compatibility
if ( speak_xcor == -1 && fraglen0 ) speak_xcor = 0
print("\n\n== postalign cross-corr. analysis settings\n")
print( "Max. memory for UNIX shuf\t\t\t: $mem_shuf\n")
print( "User-defined cross-corr. peak strandshift\t: $speak_xcor\n")
print( "Extra parameters for cross-corr. analysis\t: $extra_param_xcor\n")
print( "Max. memory for cross-corr. analysis\t\t: $mem_xcor\n")
print( "Stack size for cross-corr. analysis\t\t:$max_ppsize_xcor\n")
}
string subsample_tag_PE_for_xcor( string tag, int nlines, bool non_mito, string o_dir, string group ) {
prefix := replace_dir( rm_ext( tag, ["tagAlign","tag","bed"] ), o_dir )
nreads_per_mill := metric_prefix( nlines )
subsampled_tag := "$prefix."+(non_mito?"no_chrM.":"")+"$nreads_per_mill.R1.tagAlign.gz"
non_mito_param := non_mito ? "grep -v \"chrM\" | " : ""
joined := "$prefix.joined" // temporary file
joined_subsampled := "$prefix.joined.subsampled" // temporary file
in := [ tag ]
out := subsampled_tag
taskName:= "subsample_tag_PE_4_xcor " + group
mem := get_res_mem(mem_shuf,1)
wait_par( cpus )
tid := task( out<-in ) {
sys $shcmd_init
// join consecutive two lines into one
sys zcat $tag | sed 'N;s/\n/\t/' > $joined
//# Shuffle and split temporary combined file into 2 equal parts
//# Will produce $PR_PREFIX00 and $PR_PREFIX01
sys cat $joined | $non_mito_param shuf -n $nlines --random-source=$tag > $joined_subsampled
//# Subsample tagAlign file
sys awk 'BEGIN{OFS="\t"}{print $1,$2,$3,$4,$5,$6}' $joined_subsampled | \
gzip -nc > $subsampled_tag
sys rm -f $joined $joined_subsampled
sys $shcmd_finalize
}
register_par( tid, cpus )
add_task_to_graph( in, out, group )
return out
}
string[] xcor( string tag, string o_dir, string group, int nth_xcor ) {
// misc.
prefix := replace_dir( rm_ext( tag, ["tagAlign","tag","bed"] ), o_dir )
xcor_score := "$prefix.cc.qc"
xcor_plot := "$prefix.cc.plot.pdf"
param_speak := speak_xcor > -1 ? "-speak=$speak_xcor" : ""
extra_param := max_ppsize_xcor ? "--max-ppsize=$max_ppsize_xcor " : ""
in := [ tag ]
out := [ xcor_score, xcor_plot ]
taskName:= "xcor " + group
cpus := (nth_xcor==1) ? -1 : nth_xcor; mem := get_res_mem(mem_xcor,nth_xcor);
wait_par( cpus )
tid := task( out<-in ) {
sys $shcmd_init
// # if phantompeakqualtools is an old version, use run_spp_nodups.R. new version has run_spp.R only
sys if [[ $(which run_spp_nodups.R 2> /dev/null | wc -l || echo) == "1" ]]; then RUN_SPP=$(which run_spp_nodups.R); \
else RUN_SPP=$(which run_spp.R); \
fi
//# CCSCORE FILE format
//# Filename <tab> numReads <tab> estFragLen <tab> correstFragLen <tab> PhantomPeak <tab> corrphantomPeak <tab> argmincorr <tab> mincorr <tab> phantomPeakCoef <tab> relPhantomPeakCoef <tab> QualityTag
sys Rscript $extra_param ${RUN_SPP} -rf \
-c=$tag -p=$nth_xcor \
-filtchr=chrM -savp=$xcor_plot -out=$xcor_score $param_speak $extra_param_xcor
sys sed -r 's/,[^\t]+//g' $xcor_score > $xcor_score.tmp
sys mv $xcor_score.tmp $xcor_score
sys $shcmd_finalize
}
register_par( tid, cpus )
add_task_to_graph( in, out, group, "XCOR", grp_color_xcor )
return out
}
string get_fraglen( string xcor_score ) { // get FRAGLEN (3rd column of cc score file) for spp(-speak=$FRAGLEN)
cols := xcor_score.read().split("\t")
return cols[2]
}