-
Notifications
You must be signed in to change notification settings - Fork 3
/
nanopolish_phase_reads_run.sh
executable file
·46 lines (40 loc) · 1.69 KB
/
nanopolish_phase_reads_run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/bin/bash
#PBS -q small
# Usage: qsub -F "/path/to/masked_genome.fasta /path/to/unmasked_genome.fasta /path/to/reads.fasta /path/to/variants.vcf [/path/to/alignment.bam]" nanopolish_phase_reads_run.sh
#
# NB: nanopolish relies on being able to access the fast5 files that created your fastq.
# /path/to/reads.fasta (or fastq) must be generated using either `poretools` or `nanopolish extract`
# The paths contained in the fasta header must be accessible (without symlinks!) from ~/tmp
# This is done using `rsync -a /path/to/fast5 ~/tmp/path/to/fast5`
# It is advised to call `nanopolish extract` with relative paths, not absolute paths, for this reason
set -x
cd $PBS_O_WORKDIR
MASKED_GENOME=$(realpath $1)
UNMASKED_GENOME=$(realpath $2)
FASTA=$(realpath $3)
VCF=$(realpath $4)
if [ "$#" -gt 4 ]; then
BAM=$(realpath $5)
fi
if [ $(echo $FASTA | grep -c -e "a$") -gt 0 ]; then
FMT="fasta"
elif [ $(echo $FASTA | grep -c -e "q$") -gt 0 ]; then
FMT="fastq"
else
echo "ERROR: $FASTA format not recognised"
exit 1
fi
N=10000
TMP_DIR="$PBS_O_HOME/tmp/$(dirname $FASTA)"
SCRIPTS_DIR=$PBS_O_HOME/nanopore-scripts
mkdir -p $TMP_DIR
cd $TMP_DIR
if [ ! -f $(basename $FASTA).1.$FMT ]; then
if [ "$#" -gt 4 ]; then
python $SCRIPTS_DIR/split_bam_and_fasta.py -b $BAM -f $FASTA --prefix $(basename $FASTA) --bam-suffix fastq.sorted.bam -n $N
else
python $SCRIPTS_DIR/split_fasta.py $FASTA $N
fi
fi
ARRAY_ID=$(qsub -F "$MASKED_GENOME $UNMASKED_GENOME $FASTA $VCF $TMP_DIR" -t 1-$(ls -1 $TMP_DIR/$(basename $FASTA).*.$FMT | wc -l) $SCRIPTS_DIR/nanopolish_phase_reads.sh)
qsub -W "depend=afteranyarray:$ARRAY_ID" -F "$MASKED_GENOME $UNMASKED_GENOME $FASTA $VCF $TMP_DIR" $SCRIPTS_DIR/nanopolish_phase_reads_clean.sh