In [1]:
import json
import importlib.util
import pandas as pd
baltic_path = '/Users/jort/coding/baltic/baltic/baltic.py' # path to baltic.py file (https://github.com/evogytis/baltic)

def load_module(name, path):
    module_spec = importlib.util.spec_from_file_location(name, path)
    module = importlib.util.module_from_spec(module_spec)
    module_spec.loader.exec_module(module)
    return module

bt = load_module('bt', baltic_path)

def bt_read_in_tree_json(input_tree):
    '''read in a tree in json format'''
    with open(input_tree) as json_file:
        json_tree = json.load(json_file)
    json_translation = {'absoluteTime':lambda k: k.traits['node_attrs']['num_date']['value'],'name':'name'} ## allows baltic to find correct attributes in JSON, height and name are required at a minimum
    bt_tree, meta = bt.loadJSON(json_tree, json_translation)
    return json_tree, bt_tree

In [2]:
clade_relationships = {
    '0': None,
    '1': None,
    '1.1': '1',
    '1.1.1': '1.1',
    '1.1.2': '1.1',
    '2.4': None,
    '2.5': None,
    '2.1.2': '2.1.1',
    '2.1.3': '2.1.2',
    '2.1.1': None,
    '2.1.3.1': '2.1.2',
    '2.1.3.2': '2.1.2',
    '2.1.3.2a': '2.1.3.2',
    '2.1.3.2b': '2.1.3.2',
    '2.1.3.3': '2.1.2',
    '2.2': None,
    '2.2.2': '2.2',
    '2.2.1': '2.2',
    '2.2.1.1': '2.2.1',
    '2.2.1.1a': '2.2.1.1',
    '2.2.1.2': '2.2.1',
    '2.2.2.1': '2.2',
    '2.3.1': None,
    '2.3.2': None,
    '2.3.2.1': '2.3.2',
    '2.3.2.1a': '2.3.2.1',
    '2.3.2.1b': '2.3.2.1',
    '2.3.2.1c': '2.3.2.1',
    '2.3.2.1d': '2.3.2.1',
    '2.3.2.1e': '2.3.2.1',
    '2.3.2.1f': '2.3.2.1',
    '2.3.2.1g': '2.3.2.1',
    '2.3.3': None,
    '2.3.4': None,
    '2.3.4.1': '2.3.4',
    '2.3.4.2': '2.3.4',
    '2.3.4.3': '2.3.4',
    '2.3.4.4': '2.3.4',
    '2.3.4.4-like': '2.3.4.4',
    '2.3.4.4a': '2.3.4.4',
    '2.3.4.4b': '2.3.4.4',
    '2.3.4.4c': '2.3.4.4',
    '2.3.4.4d': '2.3.4.4',
    '2.3.4.4e': '2.3.4.4',
    '2.3.4.4f': '2.3.4.4',
    '2.3.4.4g': '2.3.4.4',
    '2.3.4.4h': '2.3.4.4',
    '3': None,
    '4': None,
    '7': None,
    '6': None,
    '7.1': '7',
    '7.2': '7',
    '5': None,
    '9': None,
    '8': None
}

In [3]:
json_file = '/Users/jort/coding/h5-clades/nextstrain-build-2.3.4.4/auspice/flu_avian_h5nx_ha.json'

json_tree, bt_tree = bt_read_in_tree_json(json_file)

clade_bt_leaves = {}

for leaf in bt_tree.getExternal():
    clade = leaf.traits['clade']
    if not clade in clade_bt_leaves:
        clade_bt_leaves[clade]  = [leaf]
    else:
        clade_bt_leaves[clade].append(leaf)

bt_lcas = {}
        
for clade, leaf_list in clade_bt_leaves.items():
    bt_lcas[clade] = bt_tree.commonAncestor(leaf_list)
    
bt_lcas_mutations = {}
    
for clade, lca in bt_lcas.items():
    bt_lcas_mutations[clade] = lca.traits['branch_attrs']['mutations']

clade_data = ['clade']
muttype_data = ['gene']
mutsite_data = ['site']
mut_data = ['alt']    

for clade, mutations in bt_lcas_mutations.items():
    if clade != '2.3.4.4' and clade_relationships[clade] != None:
        clade_data.append(clade)
        muttype_data.append('clade')
        mutsite_data.append(clade_relationships[clade])
        mut_data.append('')
    if 'nuc' in mutations:
        for mutation in [mut for mut in mutations['nuc'] if 'N' not in mut and '-' not in mut]:
            clade_data.append(clade)
            muttype_data.append('nuc')
            mutsite_data.append(mutation[1:-1])
            mut_data.append(mutation[-1])
    if 'HA' in mutations:
        for mutation in [mut for mut in mutations['HA'] if 'X' not in mut and '-' not in mut]:
            clade_data.append(clade)
            muttype_data.append('HA')
            mutsite_data.append(mutation[1:-1])
            mut_data.append(mutation[-1])
            
df = pd.DataFrame(list(zip(clade_data, muttype_data, mutsite_data, mut_data)))

df.to_csv('/Users/jort/Desktop/test_muts.tsv', sep="\t", index=False, header=False)


Tree height: 12.214898
Tree length: 785.738768
annotations present

Numbers of objects in tree: 2314 (1001 nodes and 1313 leaves)

