In [1]:
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceMatrix, DistanceTreeConstructor
from keyname import keyname as kn
from matplotlib import colors as mpl_colors
from teeplot import teeplot as tp

import Bio
import alifedata_phyloinformatics_convert as apc
import csv
import glob
import itertools
import json
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import networkx as nx
import numpy as np
import pandas as pd
import scipy as sci
import seaborn as sns
import urllib.request as request
import io
from copy import deepcopy


from etlib.SortableTree.SortableTree import SortableTree, find_leaves, to_tril
from etlib.PlotTools.PlotTools import enable_pretty_graphing
enable_pretty_graphing()

In [2]:
morphs = {x['stint']: x[' morph'][1:] for x in pd.read_csv('morphs_by_stint.csv').to_dict(orient='records')}
matrix = np.loadtxt("data/distance_matrix.matrix")
with open("data/stint_tags.json", 'r') as f:
    stint_tags = json.loads(f.read())

In [3]:
scorer = Phylo.TreeConstruction.ParsimonyScorer()

In [4]:
searcher = Phylo.TreeConstruction.NNITreeSearcher(scorer)

In [5]:
dm = Bio.Phylo.TreeConstruction.DistanceMatrix(
    names=[str(x) for x in range(len(stint_tags))],
    matrix=to_tril(matrix.T)
)
tree = getattr(DistanceTreeConstructor(), 'nj')(dm)

In [6]:
bigtags_per_stint = {}

for stint, tags in stint_tags.items():
    bigtag = ''.join(bin(x)[2:].zfill(64) for x in tags)
    bigtags_per_stint[stint] = bigtag

In [7]:
align_str = f"""{len(bigtags_per_stint)} {len(bigtags_per_stint['0'])}
"""

for stint, bigtag in bigtags_per_stint.items():
    align_str += str(stint).zfill(10) + str(bigtag) + "\n"

In [8]:
f = io.StringIO(align_str)

In [9]:
from Bio import AlignIO
aln = AlignIO.read(f, 'phylip')
aln

<<class 'Bio.Align.MultipleSeqAlignment'> instance (101 records of length 2240) at 7f441d878820>

In [10]:
def rename_tree(root):
    try:
        stint_id = int(root.name)
        root.name = str(stint_id).zfill(10)
    except (ValueError, TypeError):
        pass
    for node in root.clades:
        rename_tree(node)

In [11]:
fixed_tree = deepcopy(tree)
rename_tree(fixed_tree.clade)

In [12]:
constructor = Phylo.TreeConstruction.ParsimonyTreeConstructor(searcher, fixed_tree)

In [13]:
pars_tree = constructor.build_tree(aln)

KeyboardInterrupt: 

In [None]:
sorted_pars_tree = deepcopy(pars_tree)
sorted_pars_tree = SortableTree(sorted_pars_tree.clade)
sorted_pars_tree.fix_inner_nodes()
sorted_pars_tree.sort_tree(reverse=True)

In [None]:
ladderized_pars_tree = deepcopy(sorted_pars_tree)
ladderized_pars_tree.ladderize()

In [None]:
rerooted_pars_tree = deepcopy(sorted_pars_tree)
rerooted_pars_tree.root_with_outgroup({"name": "0000000002"})
rerooted_pars_tree.sort_tree(reverse=True)

In [None]:
# store parsimony trees
Phylo.write(pars_tree, "data/parsimony_tree.xml", "phyloxml")
Phylo.write(sorted_pars_tree, "data/sorted_parsimony_tree.xml", "phyloxml")
Phylo.write(ladderized_pars_tree, "data/ladderized_parsimony_tree.xml", "phyloxml")
Phylo.write(rerooted_pars_tree, "data/rerooted_parsimony_tree.xml", "phyloxml")