In [1]:
import pandas as pd
import ete3
import re
import os, sys

In [3]:
proteo = pd.read_table('/work/nif/proteo_info.tab', index_col=0)
ncbi= ete3.NCBITaxa()

In [9]:
tree_file   = '/work/nif/nifHDKENB_concat/concatenated_partitions.treefile'
tree        = ete3.Tree('%s' % tree_file, format=1)
out         = open('%s.figTree' % tree_file, 'wb')
out.write("#NEXUS\nbegin taxa;\n\tdimensions ntax=%i;\n\ttaxlabels\n" %len(tree))
branch_names = {}
for node in tree.traverse():
    if node.is_leaf():
        taxid = proteo.loc[node.name, 'taxid']
        lineage = {j: i for i, j in ncbi.get_rank(ncbi.get_lineage(taxid)).items()}
        lineage_names = ncbi.get_taxid_translator(lineage.values())

        out.write('\t%s ' % (node.name))
        comment = []
        for rank in ['class', 'order', 'family', 'species']:
            if rank in lineage:
                comment.append('tax_%s="%s"' % (rank, lineage_names[lineage[rank]]))
        out.write('[&%s]\n' %' '.join(comment))

    else:
        if node.name:
            aLRT, UFBoot = node.name.split('/')
            node.name = '[&UFBoot=%.2f,aLRT=%.2f]' %(float(UFBoot), float(aLRT))

newick_text = tree.write(format=1)
newick_text = re.sub('_&UFBoot_(\d+\.\d\d)_aLRT_(\d+\.\d\d)_', '[&UFBoot=\\1,aLRT=\\2]', newick_text)
out.write(';\nend;\n')
out.write('begin trees;\n\ttree tree_1 = [&R] %s\nend;' %newick_text)
out.close()

In [27]:
tree_file   = '/work/nif/ramulu_gene_families/RAxML_fastTreeSH_Support.all_ref_genes_concat-support2'
sp_tree     = ete3.Tree('%s' % tree_file, format=1)
sp_tree.prune(tree.get_leaf_names(), preserve_branch_length=True)

out         = open('%s.figTree' % tree_file, 'wb')
out.write("#NEXUS\nbegin taxa;\n\tdimensions ntax=%i;\n\ttaxlabels\n" %len(sp_tree))
branch_names = {}
for node in sp_tree.traverse():
    if node.is_leaf():
        taxid = proteo.loc[node.name, 'taxid']
        lineage = {j: i for i, j in ncbi.get_rank(ncbi.get_lineage(taxid)).items()}
        lineage_names = ncbi.get_taxid_translator(lineage.values())

        out.write('\t%s ' % (node.name))
        comment = []
        for rank in ['class', 'order', 'family', 'species']:
            if rank in lineage:
                comment.append('tax_%s="%s"' % (rank, lineage_names[lineage[rank]]))
        out.write('[&%s]\n' %' '.join(comment))

    else:
        if node.name:
            node.name = '[&bootstrap=%s]' % node.name

newick_text = sp_tree.write(format=1)
newick_text = re.sub('_&bootstrap_(\d+)_', '[&bootstrap=\\1]', newick_text)
out.write(';\nend;\n')
out.write('begin trees;\n\ttree tree_1 = [&R] %s\nend;' %newick_text)
out.close()

In [26]:
sp_tree.children[1].support

1.0