-
Notifications
You must be signed in to change notification settings - Fork 1
/
pipeline_bismark_and_erne_small.sh~
executable file
·72 lines (41 loc) · 2.59 KB
/
pipeline_bismark_and_erne_small.sh~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#pipeline with erne (http://erne.sourceforge.net/)
#and bismark (http://www.bioinformatics.babraham.ac.uk/projects/bismark/) using bowtie 2 (http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)
#edit with the actual fasta files to be used in the experiments
wd=/home/nicola/workspace/datasets/alignments/erne-vs-bismark-small #working directory
fasta=/home/nicola/workspace/datasets/genomes/50000/50000.fasta
#fasta=/home/nicola/workspace/datasets/genomes/hg19/ucsc.hg19.fasta
temp_dir=~/workspace/datasets/alignments/bismark/temp/ #bismark temp folder
#genome_folder=~/workspace/datasets/genomes/hg19/bismark/ #bismark genome folder
genome_folder=~/workspace/datasets/genomes/50000/bismark/
#erne_ref=/home/nicola/workspace/datasets/genomes/hg19/ucsc.hg19.ebm
erne_ref=/home/nicola/workspace/datasets/genomes/50000/50000.ebm
erne_bs5_benchmark=$wd/erne_bs5_benchmark.txt
erne_meth_benchmark=$wd/erne_meth_benchmark.txt
bismark_aligner_benchmark=$wd/bismark_aligner_benchmark.txt
bismark_caller_benchmark=$wd/bismark_caller_benchmark.txt
query1=$wd/query_1.fastq
query2=$wd/query_2.fastq
# 1) simulate dataset
#rm wd/*
./testcase-bs-aligner-default.sh $fasta $wd
# 2) run erne
#align with the BS-aligner erne-bs5
/usr/bin/time -v erne-bs5 --reference $erne_ref --query1 $query1 --query2 $query2 --output $wd/erne_bs5_out.bam --threads 4 2> $erne_bs5_benchmark
#call methylation with the caller erne-meth, generating methylation annotations in bismark format
/usr/bin/time -v erne-meth --fasta $fasta --input $wd/erne_bs5_out.bam --output-prefix $wd/erne_meth_out --annotations-bismark 2> $erne_meth_benchmark
# 3) run bismark
#align with the BS-aligner bismark
/usr/bin/time -v bismark --bowtie2 $genome_folder -1 $query1 -2 $query2 -o $wd --temp_dir $temp_dir -p 2 --bam 2> $bismark_aligner_benchmark
samfile=${query1}_bismark_bt2_pe.bam
#call methylation with the bismark caller, generating methylation annotations in bismark format
/usr/bin/time -v bismark_methylation_extractor -p --output $wd --gzip --CX --genome_folder $genome_folder $samfile 2> $bismark_caller_benchmark
#remove all useless files generated by bismark
rm ${query1}_bismark_bt2_pe.bedGraph $wd/*.txt.gz ${query1}_bismark_bt2_pe.M-bias.txt
# 4) print stats
min_cov=2
max_diff=0.2
#count number of calls that are 20% from the correct value (ERNE) and are covered with at least min_cov bases
printf "ERNE results:\n"
./compare_meth_annotations_cytosine_format.sh $wd/erne_meth_out_bismark.bed $wd/ $max_diff $min_cov
printf "\nBISMARK results:\n"
./compare_meth_annotations_cov_format.sh ${query1}_bismark_bt2_pe.bismark.cov $wd/ $max_diff $min_cov