In [None]:
import os
import subprocess
import pandas as pd

import warnings
from ete3 import Tree, TreeStyle, NodeStyle, TextFace, AttrFace, faces
from IPython.display import Image, display

warnings.filterwarnings("ignore", category=UserWarning, module="ete3")

from jacksonii_analyses import vcf_parser, clustering
from jacksonii_analyses.plotting import *

from importlib import reload

reload(vcf_parser)

os.makedirs("../data/phylo", exist_ok=True)
os.makedirs("../data/figs", exist_ok=True)

base_alignment = "../data/phylo/snp_concat.fasta"
cleaned_alignment = "../data/phylo/snp_concat.fasta.varsites.phy"
def iqtree_cmd(fasta_path):
    return [
        "iqtree",
        "-s", fasta_path,
        "-m", "GTR+G+ASC",
        "-bb", "1000",
        "-nt", "AUTO",
        "--redo"
    ]

In [None]:
def apply_basic_node_style_to_tree(tree, color_map: dict, label_map: dict):
    for node in tree.traverse():
        nstyle = NodeStyle()
        nstyle["size"] = 0  # Hide node circles
        nstyle["hz_line_width"] = 3  # Increase horizontal line width
        nstyle["vt_line_width"] = 3  # Increase vertical line width
        if node.is_leaf():
            node.add_face(
                TextFace(
                    label_map.get(node.name, node.name),
                    fsize=8, 
                    fgcolor=color_map.get(node.name, "black"),
                    ftype="Sans",
                    fstyle="italic", 
                    bold=False,
                ), 
            column=0,
        )
        node.set_style(nstyle)
    return tree

def apply_aligned_node_style_to_tree(tree, color_map: dict, label_map: dict):
    for node in tree.traverse():
        nstyle = NodeStyle()
        nstyle["size"] = 0  # Hide node circles
        nstyle["hz_line_width"] = 3  # Increase horizontal line width
        nstyle["vt_line_width"] = 3  # Increase vertical line width
        if node.is_leaf():
            node.add_face(
                TextFace(
                    label_map.get(node.name, node.name),
                    fsize=8, 
                    fgcolor=color_map.get(node.name, "black"),
                    ftype="Sans",
                    fstyle="italic", 
                    bold=False,
                ), 
            column=0,
            position="aligned",
        )
        node.set_style(nstyle)
    return tree

def layout(node):
    if node.is_leaf():
        N = AttrFace("name", fsize=30)
        faces.add_face_to_node(N, node, 0, position="aligned")

In [None]:
pops = clustering.read_populations(
    "../data/samples/populations.txt",
)
pops["colormap"] = pops["populations_clean"].map(map_colors)
pops = pops.reset_index("sample")

# filtered samples
admixed_sample_set = pd.read_csv(
    "../data/var/admixture/admixed_individuals.csv",
)

# short location names
sample_loc = pd.read_csv(
    "../data/samples/samples_short_loc.csv",
)

pops = pops[~pops["sample"].isin(admixed_sample_set["sample"])]
pops = pops.reset_index(drop=True)

In [None]:
pops = pops.merge(
    sample_loc[["sample", "abbreviated_loc", "sample_loc"]],
    on="sample",
    how="left",
)
pops.head()

Prepare SNP matrix as a fasta input for IQTREE.

In [None]:
if not os.path.exists("../data/phylo/snp_concat.fasta"):
    vcf_parser.vcf_to_snp_fasta(
        vcf_path="../data/var/filtered_variants.vcf.gz", 
        output="../data/phylo/snp_concat.fasta",
        drop_samples=admixed_sample_set["sample"].tolist(),
    )

This part of the code ðŸ‘‡ is meant to fail. But it will generate a clean phylip file that we can use in a separate process.

In [None]:
if not os.path.exists("../data/phylo/snp_concat.fasta.varsites.phy.treefile"):
    base_run = subprocess.run(iqtree_cmd(base_alignment), capture_output=True, text=True)
    result = subprocess.run(iqtree_cmd(cleaned_alignment), capture_output=True, text=True)

    print("STDOUT:\n", result.stdout)
    print("STDERR:\n", result.stderr)

Plot resulting concatenated tree.

In [None]:
pops_idx = pops.set_index("sample")
treefile = "../data/phylo/snp_concat.fasta.varsites.phy.treefile"
t = Tree(treefile, format=1)
t.set_outgroup(t.get_midpoint_outgroup())
t.ladderize(direction=1)

tb = t.copy()
tba = t.copy()

t = apply_basic_node_style_to_tree(
    t, 
    color_map=pops_idx["colormap"].to_dict(), 
    label_map=pops_idx["sample_loc"].to_dict()
)

tb = apply_basic_node_style_to_tree(
    tb, 
    color_map={}, 
    label_map=pops_idx["sample_loc"].to_dict()
)

tba = apply_aligned_node_style_to_tree(
    tba,
    color_map={},
    label_map=pops_idx["sample_loc"].to_dict()
)


for pop, color in map_colors.items():
    samples = list(pops_idx[pops_idx["populations_clean"] == pop].index)
    clade_node = t.get_common_ancestor(samples)
    clade_node.add_face(
        TextFace(pop, fsize=8, fgcolor=color, ftype="Sans", bold=False),
        column=0,
        position="branch-top"
    )

ts = TreeStyle()
ts.mode = "r"
ts.show_leaf_name = False
ts.scale = 4000
ts.show_scale = True

# 2. These properties on TreeStyle are responsible for the dotted lines
ts.extra_branch_line_type = "dotted"
ts.extra_branch_line_color = "black"

In [None]:
t.render("../data/figs/concat-tree-display.png", tree_style=ts)
tb.render("../data/figs/concat-tree-black.svg", tree_style=ts)
display(Image(filename="../data/figs/concat-tree-display.png"))

In [None]:
tba.render("../data/figs/concat-tree-black-tip-aligned.svg", tree_style=ts)
tba.render("../data/figs/concat-tree-black-tip-aligned.png", tree_style=ts)
display(Image(filename="../data/figs/concat-tree-black-tip-aligned.png"))