/
run_reademption_analysis.sh
executable file
·126 lines (107 loc) · 2.54 KB
/
run_reademption_analysis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/sh
# This script perform the actual RNA-Seq analysis with READemption.
main(){
READEMPTION_FOLDER=READemption_analysis
FTP_SOURCE=ftp://ftp.ncbi.nih.gov/genomes/Bacteria/Salmonella_enterica_serovar_Typhimurium_SL1344_uid86645/
SEQ_FOLDER=$READEMPTION_FOLDER/input/reference_sequences/
set_up_analysis_folder
get_genome_fasta_files
get_gff_files
add_read_libraries
run_read_alignment
build_coverage_files
run_gene_quanti
run_deseq_analysis
run_viz_align
run_viz_gene_quanti
run_viz_deseq
}
set_up_analysis_folder(){
reademption create $READEMPTION_FOLDER
}
get_genome_fasta_files(){
cat <<EOF > bin/mod_fasta_head.py
import sys
import shutil
input_fh = open(sys.argv[1])
header = input_fh.readline()
if len(header.split()[0].split("|")) != 5:
sys.stderr.write("Unexprected fasta header: \"%s\"\n" % header[:-1])
sys.exit(2)
tmp_file_path = sys.argv[1] + "_TMP"
output_fh = open(tmp_file_path, "w")
genome_accession = header.split("|")[3]
new_header = ">%s %s" % (genome_accession, header[1:-1])
output_fh.write(new_header + "\n")
output_fh.write("".join(input_fh.readlines()))
shutil.move(tmp_file_path, sys.argv[1])
EOF
wget -cP $SEQ_FOLDER $FTP_SOURCE/*fna
for FILE in $(ls $SEQ_FOLDER/* | grep fna)
do
NEW_FILE_NAME=$(echo $FILE | sed "s/.fna/.fa/")
mv $FILE $NEW_FILE_NAME
python bin/mod_fasta_head.py $NEW_FILE_NAME
done
}
get_gff_files(){
wget -cP $READEMPTION_FOLDER/input/annotations/ \
$FTP_SOURCE/*gff
}
add_read_libraries(){
SOURCE=RNA_Seq_data/output/fasta_subsampled/
for SAMPLE in \
InSPI2_R1.fa.bz2 \
InSPI2_R2.fa.bz2 \
LSP_R1.fa.bz2 \
LSP_R2.fa.bz2
do
ln -sf ../../../$SOURCE/${SAMPLE} $READEMPTION_FOLDER/input/reads/${SAMPLE}
done
}
run_read_alignment(){
reademption \
align \
-p 16 \
-a 95 \
-l 12 \
--poly_a_clipping \
$READEMPTION_FOLDER
}
build_coverage_files(){
reademption \
coverage \
-p 16 \
$READEMPTION_FOLDER
}
run_gene_quanti(){
reademption \
gene_quanti \
-p 16 \
-a \
--features gene \
$READEMPTION_FOLDER
}
run_deseq_analysis(){
reademption \
deseq \
-l InSPI2_R1.fa.bz2,InSPI2_R2.fa.bz2,LSP_R1.fa.bz2,LSP_R2.fa.bz2 \
-c InSPI2,InSPI2,LSP,LSP \
$READEMPTION_FOLDER
}
run_viz_align(){
reademption \
viz_align \
$READEMPTION_FOLDER
}
run_viz_gene_quanti(){
reademption \
viz_gene_quanti \
$READEMPTION_FOLDER
}
run_viz_deseq(){
reademption \
viz_deseq \
$READEMPTION_FOLDER
}
main