/
fastq-dump_to_featureCounts.sh
128 lines (107 loc) 路 3.09 KB
/
fastq-dump_to_featureCounts.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#! /bin/bash
# Defining script arguments using getopts...
while getopts :i:a:f:p: OPT; do
case "$OPT" in
i) SRR=$OPTARG ;;
a) ANNOTATION=$OPTARG ;;
f) FASTA=$OPTARG ;;
p) PROCESSORS=$OPTARG ;;
esac
done
# Print usage instructions if script is called without any arguments
if [ "$1" = "" ] ; then
echo "-i Sequence Read Archive Run ID (SRR...)"
echo "-a Reference genome annotation file"
echo "-f Reference genome multi-FASTA file"
echo "-p Number of processors/threads to use"
exit 0
fi
# Beginning the main body of the script
# The sleep commands ("sleep 1s", "sleep 2s") slow down the script to make
# the output more readable in real-time
printf "\n"
echo ===========================================================================
echo fastq-dump_to_featureCounts.sh
echo Script started: $(date)
sleep 1s
echo SRR ID: $SRR
sleep 1s
echo Reference genome annotation: $ANNOTATION
sleep 1s
echo Reference genome multi-FASTA file: $FASTA
echo ===========================================================================
sleep 1s
printf "\n"
echo Listing files in directory ...
sleep 1s
ls
sleep 2s
echo Downloading compressed FASTQ reads using fastq-dump... ~~~~~~~~~~~~~~~~~~~~
until fastq-dump --gzip --skip-technical --readids --read-filter pass \
--dumpbase --split-3 --clip $SRR; do
echo fastq-dump failed, retrying in 10 seconds...
sleep 10s
done
sleep 1s
echo Listing files in directory after running fastq-dump...
sleep 1s
ls
sleep 2s
echo Indexing reference genome FASTA file using bowtie2-build ~~~~~~~~~~~~~~~~~~
sleep 2s
bowtie2-build $FASTA bowtie2_$FASTA
sleep 1s
echo Listing files in directory after running bowtie2-build...
sleep 1s
ls
sleep 2s
echo Aligning reads to reference genome using bowtie2 ~~~~~~~~~~~~~~~~~~~~~~~~~~
sleep 2s
bowtie2 -p $PROCESSORS --no-unal -x bowtie2_$FASTA \
-1 $SRR\_pass_1.fastq.gz -2 $SRR\_pass_2.fastq.gz \
-S $SRR\_$FASTA.sam
sleep 1s
echo Listing files in directory after running bowtie2...
sleep 1s
ls
sleep 2s
echo Converting alignment from SAM to BAM format using samtools view ~~~~~~~~~~~
sleep 2s
samtools view -@ $PROCESSORS -Sb $SRR\_$FASTA.sam \
> $SRR\_$FASTA.bam
sleep 1s
echo Listing files in directory after running samtools view...
sleep 1s
ls
sleep 2s
echo Sorting the BAM file using samtools sort ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sleep 2s
samtools sort -@ $PROCESSORS $SRR\_$FASTA.bam \
-o sorted_$SRR\_$FASTA.bam
sleep 1s
echo Listing files in directory after running samtools sort...
sleep 1s
ls
sleep 2s
echo Generating count table using featureCounts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sleep 2s
featureCounts -p -s 2 -T $PROCESSORS -a $ANNOTATION \
-o feature_counts_$SRR\_$FASTA.txt \
sorted_$SRR\_$FASTA.bam
sleep 1s
echo Listing files in directory after running featureCounts...
sleep 1s
ls
sleep 2s
echo Results written to feature_counts_$SRR\_$FASTA.txt
sleep 2s
echo Head of feature_counts_$SRR\_$FASTA.txt
sleep 2s
head feature_counts_$SRR\_$FASTA.txt
sleep 2s
echo Tail of feature_counts_$SRR\_$FASTA.txt
sleep 2s
tail feature_counts_$SRR\_$FASTA.txt
sleep 2s
echo Script finished: $(date)
sleep 2s