-
Notifications
You must be signed in to change notification settings - Fork 0
/
basecall_bonito.sh
executable file
·33 lines (27 loc) · 1.07 KB
/
basecall_bonito.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash
set -e
if [ "$#" -ne 2 ]; then
echo "Illegal number of parameters, see usage in script"
exit 1
fi
# Usage ./basecall_bonito.sh FAST5DIRNAME FASTQFILENAME
# FAST5DIRNAME is directory with fast5 files
# FASTQFILENAME is desired name for basecalled fastq file
# performs basecalling with bonito
# NOTE: paths to bonito and seqtk are hardcoded
DIRNAME=$1
FASTQNAME=$2
BONITO_PATH="/raid/shubham/nanopore_lossy_compression/bonito/"
SEQTK="/raid/shubham/nanopore_lossy_compression/seqtk/seqtk"
SCRIPT_PATH="/raid/shubham/nanopore_lossy_compression/lossy_compression_evaluation/scripts"
# basecall with bonito
source $BONITO_PATH/env/bin/activate
bonito basecaller --half --device cuda:4 dna_r9.4.1 $DIRNAME > $FASTQNAME.tmp.fasta
deactivate
# convert fasta to fastq by filling in fake quality values
$SEQTK seq -F '#' $FASTQNAME.tmp.fasta > $FASTQNAME
rm $FASTQNAME.tmp.fasta
# fix fastq file issue (seqtk leaves empty fasta lines as it is which causes errors later)
cp $FASTQNAME $FASTQNAME.tmp
$SCRIPT_PATH/fix_fastq_bonito.py $FASTQNAME.tmp $FASTQNAME
rm $FASTQNAME.tmp