-
Notifications
You must be signed in to change notification settings - Fork 1
/
raxml.py
37 lines (35 loc) · 1.29 KB
/
raxml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import sys
import fileinput
from Bio import SeqIO
path= # add path to directory containing all the fasta files
alignment=# create a new file for the alignment
raxml_input_file=open(alignment,'a')
#check length of fasta sequences then concatenate all the fastas in one file (raxml_input_file)
for f in os.listdir(path):
os.chdir(path)
FastaFile = open(f, 'rU')
for rec in SeqIO.parse(FastaFile, 'fasta'):
name = rec.id
seq = rec.seq
seqLen = len(rec)
print f,seqLen
FastaFile.close()
for f in os.listdir(path):
os.chdir(path)
x=os.path.splitext(f)[0]
id=x.split('_')[0]
for lines in fileinput.input(path+'/'+f,inplace=True):
if ("gi|444893469|emb|AL123456.3|") in lines:
lines=lines.replace("gi|444893469|emb|AL123456.3|",id)
sys.stdout.write(lines)
for lines in open(path+'/'+f).readlines():
raxml_input_file.write("%s\n" %(lines.strip()))
raxml_input_file.close()
from datetime import datetime
start_time = datetime.now()
inp_fasta=raxml_input_file
out_name=#add path to an output tree file
os.system("raxmlHPC -s {inp} -n {out} -m GTRCAT -p12345".format(inp=inp_fasta,out=out_name))
end_time = datetime.now()
print('Duration: {}'.format((end_time - start_time)))