In [49]:
import re, os, sys
import dendropy as dp
import pandas as pd
import numpy as np

In [50]:
tr_files = [i for i in os.listdir('virus_trees/') if 'tree' in i]

In [124]:
tr = dp.Tree.get_from_path('virus_trees/'+tr_files[0], 'newick')

In [127]:
def get_node_support(tr, schema = 'newick'):
    if schema == 'newick':
        nodes_support = list()
        for n in tr.postorder_node_iter():
            if n.label:
                nodes_support.append(float(n.label))
        return nodes_support
    elif schema == 'nexus':
        nodes_support = list()
        for n in tr.postorder_node_iter():
            if len(n.annotations) > 0:
                nodes_support.append(float(n.annotations[0].value))
        return nodes_support

In [137]:
out_frame = pd.DataFrame(np.empty(shape = (len(tr_files), 5)))
out_frame.columns = ('file_name', 'mean_aLRT', 'lower_aLRT', 'upper_aLRT', 'ntaxa')

for i, f in enumerate(tr_files):
    try:
        tr_temp = dp.Tree.get_from_path('virus_trees/'+f, 'newick')
        node_support_temp = get_node_support(tr_temp, 'newick')
    except:
        tr_temp = dp.Tree.get_from_path('virus_trees/'+f, 'nexus')
        node_support_temp = get_node_support(tr_temp, 'nexus')
        
    out_frame.ix[i, 'file_name'] = f
    out_frame.ix[i, 'mean_aLRT'] = np.mean(node_support_temp)
    out_frame.ix[i, ['lower_aLRT', 'upper_aLRT']] = list(np.percentile(node_support_temp, [2.5, 97.5]))
    out_frame.ix[i, 'ntaxa'] = len(tr_temp.taxon_namespace)

In [143]:
out_frame.to_csv('virus_trees_aLRT_stats.csv', index = False)