-
Notifications
You must be signed in to change notification settings - Fork 17
/
phase.sh
executable file
·49 lines (35 loc) · 3.1 KB
/
phase.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
HIC=alignment/hic/split
VCF=alignment/pacbioccs/vcf
REF=$1
SAMPLE=$2
SCAFFOLDS=`cut -d$'\t' -f1 ${REF}.fai`
export HIC
export VCF
export SAMPLE
export REF
export LD_LIBRARY_PATH=/usr/local/lib/
set -ex
echo HapCUT2 phasing
\time -v parallel 'extractHAIRS --bam $HIC/hic.{}.bam --hic 1 --VCF $VCF/pacbioccs.{}.filtered.vcf --out hapcut2/hic.{}.frag --maxIS 30000000 2> hapcut2/extractHAIRS.{}.log' ::: $SCAFFOLDS 2> extractHAIRS.log
\time -v parallel 'HAPCUT2 --fragments hapcut2/hic.{}.frag --VCF $VCF/pacbioccs.{}.filtered.vcf --output hapcut2/hic.{}.hap --hic 1 2> hapcut2/HAPCUT2.{}.log' ::: $SCAFFOLDS 2> HAPCUT2.log
parallel "cut -d$'\t' -f1-11 hapcut2/hic.{}.hap > hapcut2/hic.{}.hap.cut" ::: $SCAFFOLDS
parallel 'whatshap hapcut2vcf $VCF/pacbioccs.{}.filtered.vcf hapcut2/hic.{}.hap.cut -o hapcut2/hic.{}.phased.vcf' ::: $SCAFFOLDS
echo WhatsHap phasing
\time -v parallel 'whatshap phase --reference $REF $VCF/pacbioccs.{}.filtered.vcf hapcut2/hic.{}.phased.vcf alignment/pacbioccs/split/pacbioccs.{}.bam -o whatshap/pacbioccs.hic.{}.whatshap.phased.vcf 2> whatshap/whatshap.{}.log' ::: $SCAFFOLDS 2> whatshap.phase.log
parallel 'bgzip -c whatshap/pacbioccs.hic.{}.whatshap.phased.vcf > whatshap/pacbioccs.hic.{}.whatshap.phased.vcf.gz' ::: $SCAFFOLDS
parallel 'tabix -p vcf whatshap/pacbioccs.hic.{}.whatshap.phased.vcf.gz' ::: $SCAFFOLDS
parallel 'whatshap haplotag --reference $REF whatshap/pacbioccs.hic.{}.whatshap.phased.vcf.gz alignment/pacbioccs/split/pacbioccs.{}.bam -o haplotag/$SAMPLE.pacbioccs.hic.{}.haplotag.bam 2> haplotag/haplotag.{}.log' ::: $SCAFFOLDS 2> haplotag.log
echo Assemble partitions
parallel "samtools view haplotag/$SAMPLE.pacbioccs.hic.{}.haplotag.bam | grep "HP:i:1" | awk '{print \">\"\$1\"\n\"\$10}' > haplotag/{}-SCAFF-H1.fasta" ::: $SCAFFOLDS
parallel "samtools view haplotag/$SAMPLE.pacbioccs.hic.{}.haplotag.bam | grep "HP:i:2" | awk '{print \">\"\$1\"\n\"\$10}' > haplotag/{}-SCAFF-H2.fasta" ::: $SCAFFOLDS
parallel "samtools view haplotag/$SAMPLE.pacbioccs.hic.{}.haplotag.bam | grep -v "HP:" | awk '{print \">\"\$1\"\n\"\$10}' > haplotag/{}-SCAFF-untagged.fasta" ::: $SCAFFOLDS
exit
[ -d largestBlock_vcf ] || mkdir -p largestBlock_vcf
[ -d largestBlock_haplotagBAM ] || mkdir -p largestBlock_haplotagBAM
for CHR in $SCAFFOLDS; do
PS=`grep -v '^#' whatshap/pacbioccs.hic.${CHR}.whatshap.phased.vcf | grep 'PS' | cut -d':' -f13 | sort | uniq -cd | sort -k1nr | head -1 | awk '{print $2}'`
grep -E "^#|$PS$" whatshap/pacbioccs.hic.${CHR}.whatshap.phased.vcf > largestBlock_vcf/pacbioccs.hic.${CHR}.phased.largestBlock.vcf
done
parallel 'bgzip -c largestBlock_vcf/pacbioccs.hic.{}.phased.largestBlock.vcf > largestBlock_vcf/pacbioccs.hic.{}.phased.largestBlock.vcf.gz' ::: $SCAFFOLDS
parallel 'tabix -p vcf largestBlock_vcf/pacbioccs.hic.{}.phased.largestBlock.vcf.gz' ::: $SCAFFOLDS
parallel 'whatshap haplotag --reference $REF largestBlock_vcf/pacbioccs.hic.{}.phased.largestBlock.vcf.gz alignment/pacbioccs/split/pacbioccs.{}.bam -o largestBlock_haplotagBAM/${SAMPLE}.pacbioccs.hic.{}.largestBlock.haplotag.bam 2> largestBlock_haplotagBAM/haplotag.{}.log' ::: $SCAFFOLDS 2> haplotag.largestBlock.log