In [1]:
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceMatrix, DistanceTreeConstructor
from keyname import keyname as kn
from matplotlib import colors as mpl_colors
from teeplot import teeplot as tp

import Bio
import alifedata_phyloinformatics_convert as apc
import csv
import glob
import itertools
import json
import networkx as nx
import numpy as np
import pandas as pd
import scipy as sci
import seaborn as sns
import urllib.request as request
import io
from copy import deepcopy

from etlib.SortableTree.SortableTree import SortableTree, to_tril, fix_branch_length
from etlib.PlotTools.PlotTools import enable_pretty_graphing, set_color
enable_pretty_graphing()

In [2]:
morphs = {x['stint']: x[' morph'][1:] for x in pd.read_csv('morphs_by_stint.csv').to_dict(orient='records')}

In [3]:
matrix = np.loadtxt("data/distance_matrix.matrix")
with open("data/stint_tags.json", 'r') as f:
    stint_tags = json.loads(f.read())

In [4]:
linkage_matrix = sci.cluster.hierarchy.linkage(matrix)

In [5]:
dendropy_tree = apc.scipy_linkage_matrix_to_dendropy_tree(
    linkage_matrix,
    leaf_labels=['{} ({})'.format(str(x), morphs[x]) for x in range(len(linkage_matrix) + 1)]
)

In [6]:
df = apc.dendropy_tree_to_alife_dataframe(dendropy_tree)

In [7]:
tree = apc.alife_dataframe_to_biopython_tree(df, ['label'])

In [8]:
new_tree = SortableTree(root=tree.clade)
new_tree.fix_inner_nodes()

In [9]:
def fix_tree(root):
    for node in root:
        if node.label:
            node.name = node.label
        fix_tree(node)
fix_tree(new_tree.clade)

In [10]:
Phylo.write(new_tree, "data/scipy_linkage_tree.xml", "phyloxml")

1

In [11]:
# tp.tee(
#     Phylo.draw,
#     new_tree,
#     label_func=lambda x: x.label if x.is_terminal() else None,
#     branch_labels=None,
#     label_colors=lambda x: set_color(x),
#     xlabel=["point mutation distance"],
#     ylabel=[""],
#     yticks={"ticks": [0]},
#     grid={"visible": True, "which": "both", "axis": "x", "color": "gainsboro", "linewidth": "0.5"},
#     do_show=False,
#     teeplot_subdir="scipy_linkage_tree"
# )

In [12]:
# no_outliers_tree = deepcopy(new_tree)
# no_outliers_tree.prune(target=lambda x: x.label == '0 (a)')
# no_outliers_tree.prune(target=lambda x: x.label == '1 (b)')

In [13]:
# tp.tee(
#     Phylo.draw,
#     no_outliers_tree,
#     label_func=lambda x: x.label if x.is_terminal() else None,
#     branch_labels=None,
#     label_colors=lambda x: set_color(x),
#     xlabel=["point mutation distance"],
#     ylabel=[""],
#     yticks={"ticks": [0]},
#     grid={"visible": True, "which": "both", "axis": "x", "color": "gainsboro", "linewidth": "0.5"},
#     do_show=False,
#     teeplot_subdir="scipy_linkage_tree_no_outliers"
# )