## Enrich the annotations on the JSON trees

This code adds two quick enhancements to the existing trees. It first adds internal node annotations to the tree with details for their composite organ types. This can be useful for exploration of the tree. Secondly it adds aggregate lengths for caterpiller branches. I tried using this to make the caterpiller branches more angled, but it ended up looking really poorly. Keep the annotations for later use.

In [1]:
import json

In [122]:
with open('ADR1_tree.json', 'r') as myfile:
    data=myfile.read()
    json_tree = json.loads(data)

In [132]:
# recursively traverse the JSON tree, setting the organ proportions at all internal nodes
# if this is a internal node, add up the proportions of a call to each child. If this 
# is a leaf node, just return its organ proportion
#
# recursion can be a problem in Python given it's lack of tail recursion optimization, 
# but this ran fine on ADR1, so I think we're ok
def aggregate_organ_proportions(node) -> dict:
    aggregated_proportions = {}
    if 'children' in node:
        for child in node['children']:
            proportions_to_add = aggregate_organ_proportions(child)
            for key,value in proportions_to_add.items():
                aggregated_proportions[key] = aggregated_proportions.get(key,0.0) + float(value)
    else:
        aggregated_proportions = node['organProportions']
    
    node['aggregated_proportions'] = aggregated_proportions
    return aggregated_proportions

# start the process at the root
final_props = aggregate_organ_proportions(json_tree[0])

# should be within the aggregate margin of error of ~1.0
print(final_props)

{'Gills': 1.0, 'Post intestine': 0.9999999999999998, 'Heart': 1.0, 'NC': 1.0, 'DHC': 1.0, 'Intestinal bulb': 1.0, 'Brain': 0.9999999999999999, 'Cardiomyocytes': 1.0, 'Right eye': 1.0, 'Left eye': 1.0, 'Blood': 1.0}


In [124]:
# set the height of each caterpillar node. The node starts at the first 'is_spine': true until we hit a 
# false node
def set_caterpillar_length(node,current_length) -> float:
    # If we're not a spine, recursively call our children with a starting length of zero, 
    # and then return current_length + 0
    if not node['is_spine']:
        if 'children' in node:
            [set_caterpillar_length(child,0) for child in node['children']]
        return current_length
    else:
        node['spine_length'] = current_length + node['length'] + max([set_caterpillar_length(child,current_length) for child in node['children']])
        return node['spine_length']

final_props = set_caterpillar_length(json_tree[0],0)

# should be ~1.0, with a bit 
print(final_props)

0.8524754818354243


In [131]:
json_text = json.dumps(json_tree, sort_keys=True, indent=4)
output_file = open("ADR1_tree_rich.json","w")
output_file.write(json_text + "\n")
output_file.close()

### Stuff below this was to correct for blood alleles, but this is already done


In [77]:
# convert our JSON back to Newick so that we can run ete3's distance calculation. As a side effect,
# store the leaf names and their organ types
leaf_names = []
leaf_organ_types = {}
leaf_organ_proportions = {}
def to_newick(node) -> str:
    if 'children' in node:
        child_str = []
        for child in node['children']:
            child_str.append(to_newick(child))
        return "((" + ",".join(child_str) + "):" + str(node['length']) + ")"
    else:
        leaf_names.append(node['name'])
        assert(len(node['organProportions']) == 1)
        leaf_organ_types[node['name']] = next(iter(node['organProportions'].keys()))
        leaf_organ_proportions[node['name']] = next(iter(node['organProportions'].values()))
        return node['name'] + ":" + str(node['length']) 

newick_str = to_newick(json_tree[0]) + ";"


In [90]:
from ete3 import Tree
ete_tree = Tree(newick_str)

In [99]:
organ_set = set(leaf_organ_types.values())
organ_set

{'Blood',
 'Brain',
 'Cardiomyocytes',
 'DHC',
 'Gills',
 'Heart',
 'Intestinal bulb',
 'Left eye',
 'NC',
 'Post intestine',
 'Right eye'}

In [106]:
# we want a specific order (currently in the paper), so set this a list
organs = ['Blood','Brain','Left eye','Right eye','Gills','Intestinal bulb','Post intestine','Cardiomyocytes','Heart','NC','DHC']

In [110]:
import numpy as np
dist_mat = np.zeros(shape=(len(organs),len(organs)))
for leaf1 in leaf_names:
    for leaf2 in leaf_names: 
        if leaf1 < leaf2:
            common = ete_tree.get_common_ancestor([leaf1,leaf2])
            dist1 = ete_tree.get_distance(leaf1, common)
            dist2 = ete_tree.get_distance(leaf2, common)
            org1 = organs.index(leaf_organ_types[leaf1])
            org2 = organs.index(leaf_organ_types[leaf2])
            dist_mat[org1,org2] += dist1
            dist_mat[org2,org1] += dist2

KeyboardInterrupt: 