/
HIV1predict.sh
209 lines (191 loc) · 8.5 KB
/
HIV1predict.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/bin/bash
#### Usage || Help menu
echo > usage.help
echo " This script runs the workflow. Use FASTA for the monomer of" >> usage.help
echo " HIV-1 protease (subtype B) in one line." >> usage.help
echo " The script needs Python/Modeller, Reduce, VMD, Lovoalign, and IsoMIF to work." >> usage.help
echo >> usage.help
echo " Example of FASTA file:" >> usage.help
echo " > Consensus HIV-1 PR (subtype B)" >> usage.help
echo " PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF" >> usage.help
echo >> usage.help
echo " sh HIV1predict.sh [HELP]" >> usage.help
echo " sh HIV1predict.sh [IN] <FASTA file> [PDB] <PDB file> [OUT] <output file>" >> usage.help
echo >> usage.help
echo " [HELP] -h shows this help text" >> usage.help
echo " [IN] -i input filename (FASTA format; one sequence only)" >> usage.help
echo " [PDB] -p input own PDB file instead of 1NH0_ref; optional" >> usage.help
echo " (assumes that the HIV-1 protease monomer is in chain A)" >> usage.help
echo " [OUT] -o output filename (outputs PDB format)" >> usage.help
echo >> usage.help
echo " Examples:" >> usage.help
echo " ./HIV1predict.sh -p 3DJK -o output.pdb -i input.fasta" >> usage.help
echo " sh HIV1predict.sh -i test -p 1NH0_ref -o test" >> usage.help
echo " sh HIV1predict.sh -i test.fasta -o test_output.pdb" >> usage.help
usage=$(cat usage.help)
#### Get info from options selected
while getopts i:o:p:h option
do
case "${option,,}" in
i) inp=${OPTARG%.fasta};;
o) out=${OPTARG%.pdb};;
p) pdb=${OPTARG%.pdb};;
h) echo "$usage" && rm usage.help && exit 0;;
esac
done
shift $(( OPTIND -1 ))
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" # source directory of the script
spinner=1 ; spinnerloop="/-\|" ; echo -n ' '
#### Folders where you can find the apps' scripts
reducedir=/applic/reduce/reduce-3.23
vmddir=/applic/VMD/vmd-1.9.3/bin
isomifdir=/applic/IsoMif/IsoMif_150311
lovodir=/applic/lovoalign/lovoalign-16.342/bin
#### Check apps
if [[ -z $reducedir ]] ; then echo "Check Reduce directory" && rm usage.help && exit 1
elif [[ -z $vmddir ]] ; then echo "Check VMD directory" && rm usage.help && exit 1
elif [[ -z $lovodir ]] ; then echo "Check LovoAlign directory" && rm usage.help && exit 1
elif [[ -z $isomifdir ]] ; then echo "Check IsoMIF directory" && rm usage.help && exit 1
else echo "Directories OK."
echo
fi
#### Let's start
if [[ -n $(ls ${inp}.fasta) ]] ; then input=${inp}.fasta ;
else input=${inp} ; fi
output=${out}.pdb
if [[ -z $pdb ]] ; then pdbfile="${DIR}"/1NH0_ref.pdb ; ref=1NH0_ref.pdb
else
if [[ -n $(ls ${pdb}.pdb) ]] ; then pdbfile=${pdb}.pdb
elif [[ -n $(ls ${pdb}) ]] ; then pdbfile=${pdb}
else wget "https://files.rcsb.org/download/${pdb}.pdb" -O ${pdb}.pdb ; pdbfile=${pdb}.pdb ; fi
fi
#### New working directory
currdir=$(pwd)
inpf=${input##*/}
outf=${output##*/}
pdbf=${pdbfile##*/}
workdir=${inpf%%.fasta}
mkdir -p ./$workdir && cp $input ${workdir}/$inpf && cp $pdbfile ${workdir}/$pdbf
cd ${workdir}/
#### Prepare the FASTA input in one line
## Is there more than one FASTA inside file?
check=$(grep ">" $inpf | wc -l)
if [[ $check -gt 1 ]] ; then
sh "${DIR}"/separateFASTA.sh $inpf
ls -I $inpf > seqfasta.tmp
echo " The input had more than one FASTA. Please run this script again,"
echo " using the new separate FASTA files."
echo
grep " _[[:digit:]]*.fasta" seqfasta.tmp
echo
rm seqfasta.tmp
rm usage.help
exit 0
fi
##
#### Get the FASTA sequence in one line
tail -c1 $inpf | read -r _ || echo >> $inpf
seqfasta=""
sed '1d' $inpf | while read sequence
do
seqfasta=$seqfasta$sequence
echo $seqfasta > seq.tmp
done
sed -i '2,$d' $inpf && cat seq.tmp >> $inpf
#sed -i '2,$d' $inpf && seq99=$(echo $seqfasta | colrm 100) && echo $seq99 >> $inpf # to keep only 99 aminoacids (beware of insertions!!)
#### Prepare PDB_ref
if [[ $pdbf != 1NH0_ref.pdb ]]
then
ref=${pdbf%%.pdb}_ref.pdb
mkdir refTMP ; cd refTMP/
cp ../$pdbf .
echo ">"${pdbf%.pdb} > ref.fasta
# Get PDB to FASTA
awk '/^ATOM/ && $3 == "CA" && $5 == "A" {print $4}' $pdbf > ref.tmp
while read aa
do
if [[ ${#aa} -gt 3 ]] ; then if [[ ${aa:0:1} == "A" ]] ; then echo ${aa:1} >> ref-fasta.tmp ; fi
else echo $aa >> ref-fasta.tmp ; fi
done < ref.tmp
cat ref-fasta.tmp | tr '\n' ' ' | sed 's/ALA/A/g;s/CYS/C/g;s/ASP/D/g;s/GLU/E/g;s/PHE/F/g;s/GLY/G/g;s/HIS/H/g;s/ILE/I/g;s/LYS/K/g;s/LEU/L/g;s/MET/M/g;s/ASN/N/g;s/PRO/P/g;s/GLN/Q/g;s/ARG/R/g;s/SER/S/g;s/THR/T/g;s/VAL/V/g;s/TRP/W/g;s/TYR/Y/g' | sed 's/ //g' >> ref.fasta
echo >> ref.fasta
sh "${DIR}"/pattern2consensus.sh >> build_ref.log
sh "${DIR}"/pattern-HIVp.sh $pdbf ref_pattern >> build_ref.log
mv *_ref_pattern/$(ls *_ref_pattern/ --sort=time | head -n 1) ../$ref
cd .. ; rm -r refTMP/
echo " Reference model finished."
echo
fi
#### Build model
echo -n " Adding mutations... "
while [ ! -f ${outf%%.pdb}.pdb ] ; do printf "\b${spinnerloop:spinner++%${#spinnerloop}:1}" ; sleep 0.3 ; done &
#while [ ! -f ${outf%%.pdb}.pdb ]; do echo -e -n '|\r\c' ; sleep 0.3 ; echo -e -n '/\r\c' ; sleep 0.3 ; echo -e -n '-\r\c' ; sleep 0.3 ; echo -e -n '\\\r\c' ; sleep 0.3 ; done &
sh "${DIR}"/mutations-consensus.sh >> build_model.log
sh "${DIR}"/pattern-HIVp.sh $ref *_pattern >> build_model.log
mv *ref_*_pattern/$(ls *ref_*_pattern/ --sort=time | head -n 1) ./${outf%%.pdb}.pdb # most recent
rm -r *ref_*_pattern/
echo
echo " Structural model finished."
echo
#### Prepare reference for IsoMIF
echo -n " Starting IsoMIF... "
while [ ! -f isomif_models/*.isomif ] ; do printf "\b${spinnerloop:spinner++%${#spinnerloop}:1}" ; sleep 0.3 ; done &
mkdir isomif_ref && cd isomif_ref
${reducedir}/reduce ../$ref > ${ref%%.pdb}_H.pdb 2>> mif_ref.log # add H
#### VMD - write centreref.tmp
echo 'set pdb [lindex $argv 0]' > centreref.tmp
echo 'set out [lindex $argv 1]' >> centreref.tmp
echo 'mol new $pdb' >> centreref.tmp
echo 'set A [atomselect top all]' >> centreref.tmp
echo 'set minus_com [vecsub {0.0 0.0 0.0} [measure center $A]]' >> centreref.tmp
echo '$A moveby $minus_com' >> centreref.tmp
echo '$A writepdb $out' >> centreref.tmp
echo 'quit' >> centreref.tmp
####
${vmddir}/vmd -dispdev none -e centreref.tmp -args ${ref%%.pdb}_H.pdb ${ref%%.pdb}_final.pdb &>> mif_ref.log
${isomifdir}/getcleft_linux_x86_64 -p ${ref%%.pdb}_final.pdb -s -t 1 -k 1 -o ${ref%%.pdb}_final &>> mif_ref.log # getCleft
${isomifdir}/mif_linux_x86_64 -p ${ref%%.pdb}_final.pdb -g ${ref%%.pdb}_final_sph_1.pdb -t ${ref%%.pdb}_final &>> mif_ref.log # obtain MIFs for the chosen cleft
perl ${isomifdir}/mifView.pl -m *.mif -o ./ # file for PyMol
# hydrophobic - cyan aromatic - orange
# Hbond donor - blue Hbond acceptor - red
# (+) charge - green (-) charge - magenta
rm centreref.tmp
cd ..
#### IsoMIF
mkdir isomif_models && cd isomif_models
referencedir=../isomif_ref
${reducedir}/reduce ../${outf%%.pdb}.pdb > ${outf%%.pdb}_H.pdb 2>> mif_models.log # add H
${lovodir}/lovoalign -p1 ${outf%%.pdb}_H.pdb -p2 ${referencedir}/${ref%%.pdb}_final.pdb -o ${outf%%.pdb}_final.pdb &>> mif_models.log # align model and reference
${isomifdir}/mif_linux_x86_64 -p ${outf%%.pdb}_final.pdb -g ${referencedir}/*_final_sph_1.pdb -t ${outf%%.pdb}_mif &>> mif_models.log
${isomifdir}/isomif_linux_x86_64 -p1 ${referencedir}/*_final.mif -p2 ${outf%%.pdb}_mif.mif -s 1 -c 1 -d 1.0 -w -o grid1_d10_ &>> isomif.log
perl ${isomifdir}/mifView.pl -m *.mif -o ./ # file for PyMol
perl ${isomifdir}/isoMifView.pl -m *.isomif -o ./ -g 1 # file for PyMol
# hydrophobic - cyan aromatic - orange
# Hbond donor - blue Hbond acceptor - red
# (+) charge - green (-) charge - magenta
cd ..
echo
echo " IsoMIF calculated."
echo
#### Resistant? Susceptible?
isomif=$(ls isomif_models/*.isomif)
alltanimoto=$(grep TANI $isomif | tr -s " " "\n" | grep TANI -n | awk -F ":" '{print $1}')
for coln in $alltanimoto
do
tcol=$(grep TANI $isomif | awk '{print $'$coln'}')
if [[ $tcol == "TANI" ]] ; then tanicol=$(( $coln + 1 )) ; tani=$(grep TANI $isomif | awk '{print $'$tanicol'}') ; fi
done
disscoef=$(awk -v n1=1 -v n2=$tani -v OFMT="%.4f" 'BEGIN{print n1-n2}')
if [[ $disscoef > 0.0603 ]] ; then result=resistant ; else result=susceptible ; fi
echo " Your "$inpf" has a dissimilarity of "$disscoef" compared to"
echo " the susceptible reference."
echo " This sequence is likely "$result" to most PIs."
echo
#### Time and Clean-up
times &>> time.tmp
tail -n1 time.tmp | awk '{print $1}'
echo
rm *.tmp
cd $currdir
rm usage.help
exit 0