-
Notifications
You must be signed in to change notification settings - Fork 0
/
assembly_bonito.sh
executable file
·52 lines (43 loc) · 1.77 KB
/
assembly_bonito.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
set -e
if [ "$#" -ne 3 ]; then
echo "Illegal number of parameters, see usage in script"
exit 1
fi
# Usage ./basecall_assembly.sh FASTQFILENAME DIRNAME GENOME_SIZE
# where FASTQFILENAME is the basecalled fastq file
# DIRNAME is the place where the assemblies are written to
# and GENOME_SIZE is expected genome size (set to 5.5m for K.p.)
# NOTE: paths to other tools are hardcoded
# The script performs the following steps:
# 1. assembly with flye
# 2. Polishing using Racon (use rebaler that does multiple rounds)
# 3. Medaka consensus on the Racon-polished assembly
FASTQNAME=$1
DIRNAME=$2
GENOME_SIZE=$3
FLYE=$WORKINGDIR/Flye-2.7.1/bin/flye
REBALER=$WORKINGDIR/Rebaler-0.2.0/rebaler-runner.py
RACON_PATH=$WORKINGDIR/racon-v1.4.13/build/bin/
MINIMAP2_PATH=$WORKINGDIR/minimap2-2.17/
MEDAKA_VENV_PATH=$WORKINGDIR/medaka/venv/bin/activate
MEDAKA_BONITO=$WORKINGDIR/bonito_medaka/oukeesfjc6406t5po0x2hlw97lnelkyl.hdf5
LOSSY_COMP_ENV_PATH=$WORKINGDIR/lossy_comp_env/bin/activate
# create DIRNAME if doesn't already exist
mkdir -p $DIRNAME
# fix empty lines which cause error by replacing them with single N
sed 's/^$/N/' $FASTQNAME > $FASTQNAME.tmp.fastq
# 1. assembly with flye
mkdir $DIRNAME/flye/
$FLYE --nano-raw $FASTQNAME.tmp.fastq --genome-size $GENOME_SIZE --out-dir $DIRNAME/flye/ --threads 12
# 2. polishing with rebaler
# add racon and minimap2 to PATH
PATH=$PATH:$RACON_PATH
PATH=$PATH:$MINIMAP2_PATH
$REBALER --threads 8 $DIRNAME/flye/assembly.fasta $FASTQNAME.tmp.fastq > $DIRNAME/rebaler.fasta
# 3. medaka consensus
# first go into virtual environment
source $MEDAKA_VENV_PATH
medaka_consensus -i $FASTQNAME.tmp.fastq -d $DIRNAME/rebaler.fasta -o $DIRNAME/medaka -t 12 -m $MEDAKA_BONITO
source $LOSSY_COMP_ENV_PATH # reactivate original venv
rm $FASTQNAME.tmp.fastq