# BEAST Analysis Notebook

---

# 0. SETUP

In [1]:
import os
import pandas as pd
import seaborn as sns
from Bio import Phylo, AlignIO
from functions import *

## Paths

In [2]:
#project_dir         = os.path.dirname(os.path.dirname(os.getcwd()))
#project_dir         = os.path.join(project_dir, "results")
project_dir          = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main"

tree_dir            = project_dir + "/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock"
metadata_path       = project_dir + "/iqtree/all/chromosome/full/filter5/filter-taxa/metadata.tsv"
beast_dir           = project_dir + "/beast/all/chromosome/clade"

auspice_config_path = project_dir + "/config/auspice_config.json"
out_path_colors     = project_dir + "/augur/all/chromosome/full/filter5/colors.tsv"
out_path_latlon     = project_dir + "/augur/all/chromosome/full/filter5/latlon.tsv"

# ------------------------------------------
# Alignment
constant_sites_path = project_dir + "/snippy_multi/all/chromosome/full/snippy-multi.constant_sites.txt"
aln_path            = project_dir + "/iqtree/all/chromosome/full/filter5/filter-sites/snippy-multi.snps.aln"


## Variables

In [3]:
pd.set_option("display.max_rows", 10, "display.max_columns", None)

# ------------------------------------------
BRANCH_LIST = {
    "1.ORI" : ["1.ORI1", "1.ORI2", "1.ORI3"],
    "1.IN": ["1.IN1","1.IN2","1.IN3"],  
    "1.ANT": ["1.ANT1"], 
    "1.PRE" : ["1.PRE0","1.PRE1", "1.PRE2", "1.PRE3"],
    "2.MED": ["2.MED0", "2.MED1","2.MED2","2.MED3" ],      
    "2.ANT": ["2.ANT1","2.ANT2","2.ANT3" ],    
    "4.ANT": ["4.ANT1" ],       
    "3.ANT": ["3.ANT1", "3.ANT2" ],  
    "0.ANT": ["0.ANT1", "0.ANT2","0.ANT3","0.ANT5"],         
    "0.ANT4" : ["0.ANT4"], 
    "0.PE": ["0.PE2", "0.PE4m", "0.PE4m", "0.PE4t", "0.PE4a", "0.PE5", "0.PE7", "0.PE8", "0.PE10"],   
    "0.PRE": ["0.PRE1", "0.PRE2"],        
}

NUM_STATES = 10

NO_DATA_CHAR = "NA"
JSON_INDENT=2

# ------------------------------------------
# Alignment
with open(constant_sites_path) as infile:
    data = infile.read().strip().split(",")
    constant_sites = sum([int(count) for count in data])

aln = AlignIO.read(aln_path, "fasta")
variant_sites = len(aln[0].seq)
SEQ_LEN = constant_sites + variant_sites

POSTERIOR_THRESH = 95

---

# 1. IMPORT

## Metadata

In [4]:
metadata_df = pd.read_csv(metadata_path, sep='\t')
metadata_df.set_index(metadata_df.columns[0], inplace=True)
metadata_df.fillna(NO_DATA_CHAR, inplace=True)

display(metadata_df)

Unnamed: 0_level_0,strain,date,date_bp,country,province,country_lat,country_lon,province_lat,province_lon,biovar,branch_major,branch_minor,biosample_accession,biosample_comment,branch_number,continent,date_mean,date_bp_mean,date_err,lat,lon,host_human
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Reference,CO92,1992,-29,United States of America,Colorado,39.783730,-100.445882,38.7252,-105.608,Orientalis,1.ORI,1.ORI1,SAMEA1705942,KEEP: Assembly Modern Reference,1,North America,1992.0,29.0,0.0,38.725178,-105.607716,Human
GCA_009909635.1_ASM990963v1_genomic,9_10,1923.0,-98,Russia,Rostov Oblast,64.686314,97.745306,47.6222,40.7958,Medievalis,2.MED,2.MED1,SAMN13632815,KEEP: Assembly Modern,2,Europe,1923.0,98.0,0.0,47.622245,40.795794,Human
GCA_009669545.1_ASM966954v1_genomic,42126,2006.0,-15,China,Xinjiang,35.000074,104.999927,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722925,KEEP: Assembly Modern,0,Asia,2006.0,15.0,0.0,42.480495,85.463346,Non-Human
GCA_009669555.1_ASM966955v1_genomic,42123,2005.0,-16,China,Xinjiang,35.000074,104.999927,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722924,KEEP: Assembly Modern,0,Asia,2005.0,16.0,0.0,42.480495,85.463346,Non-Human
GCA_009669565.1_ASM966956v1_genomic,42118,2005.0,-16,China,Xinjiang,35.000074,104.999927,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722923,KEEP: Assembly Modern,0,Asia,2005.0,16.0,0.0,42.480495,85.463346,Non-Human
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SAMEA7313243_45,Azov38,[1400:1700],[-621:-321],Russia,Rostov Oblast,64.686314,97.745306,47.6222,40.7958,Second Pandemic,1.PRE,1.PRE1,SAMEA7313243_45,KEEP: SRA Ancient Combined Record,1,Europe,1550.0,471.0,150.0,47.622245,40.795794,Human
SAMEA7313246_49,Gdansk8,[1400:1700],[-621:-321],Poland,Pomeranian Voivodeship,52.215933,19.134422,54.2456,18.1099,Second Pandemic,1.PRE,1.PRE1,SAMEA7313246_49,KEEP: SRA Ancient Combined Record,1,Europe,1550.0,471.0,150.0,54.245560,18.109900,Human
SAMEA6651390,AGU010,[1435:1477],[-586:-544],Lithuania,Vilnius County,55.350000,23.750000,54.8227,25.2495,Second Pandemic,1.PRE,1.PRE1,SAMEA6651390,KEEP: SRA Ancient,1,Europe,1456.0,565.0,21.0,54.822692,25.249534,Human
SAMEA6637004,AGU025,[1441:1612],[-580:-409],Lithuania,Vilnius County,55.350000,23.750000,54.8227,25.2495,Second Pandemic,1.PRE,1.PRE1,SAMEA6637004,KEEP: SRA Ancient,1,Europe,1526.5,494.5,85.5,54.822692,25.249534,Human


## Most Recent Sampling Date

In [5]:
out_path = os.path.join(beast_dir, "most_recent_sampling_dates.tsv")
mrsd_dict = {}

with open(out_path, "w") as outfile:
    for branch in BRANCH_LIST:
        branch_df = metadata_df[metadata_df["branch_minor"].isin(BRANCH_LIST[branch])]
        max_date = max(branch_df["date_mean"])
        outfile.write("{}\t{}\n".format(branch, max_date))
        mrsd_dict[branch] = max_date

## Colors

In [6]:
colors_dict = {}

colors_df = pd.read_csv(out_path_colors, sep='\t', header=None)
colors_df.columns = ["state", "value", "color"]

for state in set(colors_df["state"]):
    state_df = colors_df[colors_df["state"] == state]    
    colors_dict[state] = {}
    
    for value,color in zip(state_df["value"], state_df["color"]):
        colors_dict[state][value] = color

print(colors_dict)

{'country': {'Russia': '#8000ff', 'Lithuania': '#7215ff', 'Estonia': '#652afe', 'Germany': '#573ffd', 'China': '#4a53fc', 'Azerbaijan': '#3c66fa', 'Armenia': '#2f79f7', 'Georgia': '#228bf4', 'Mongolia': '#149df1', 'Tajikistan': '#07adee', 'Kyrgyzstan': '#07bcea', 'England': '#14c9e5', 'Spain': '#22d5e0', 'France': '#2fe0db', 'Nepal': '#3cead5', 'India': '#4af1d0', 'Kazakhstan': '#57f7c9', 'Turkmenistan': '#65fcc3', 'Uzbekistan': '#72febc', 'Iran': '#80ffb4', 'Italy': '#8dfead', 'Poland': '#9afca5', 'Switzerland': '#a8f79d', 'Norway': '#b5f194', 'The Netherlands': '#c3ea8b', 'Kenya': '#d0e083', 'Uganda': '#ddd579', 'Democratic Republic of the Congo': '#ebc970', 'Vietnam': '#f8bc66', 'Myanmar': '#ffad5d', 'United States of America': '#ff9d53', 'Peru': '#ff8b49', 'Canada': '#ff793f', 'Indonesia': '#ff6634', 'Madagascar': '#ff532a', 'Zimbabwe': '#ff3f20', 'Algeria': '#ff2a15', 'Brazil': '#ff150b', 'Bolivia': '#ff0000', '?': '#969696'}, 'branch_major': {'0.PRE': '#8000ff', '0.PE': '#4c4ffc'

## Tree Files

In [7]:
# Construct a dictionary to hold the trees
tree_dict = {}

for branch in BRANCH_LIST:
    print("Branch:", branch)
    tree_dict[branch] = {} 
    for filename in os.listdir(tree_dir):
        if not filename.endswith(".tre"): continue
        filepath = os.path.join(tree_dir, filename)
        if branch in filename:            
            # Add tree files to dict
            tree_dict[branch]["tree_file_raw"] = filepath 
            tree_dict[branch]["tree_file_edit"] = os.path.join(tree_dir, branch + ".nex")
            tree_dict[branch]["sample_rename"] = {}
            
            # Read in raw tree to deal with dashes
            with open(tree_dict[branch]["tree_file_raw"],  "r") as infile:                    
                with open(tree_dict[branch]["tree_file_edit"], "w") as outfile:           
                    raw_tree = infile.read()
                    # Remove quotations if they exist
                    raw_tree = raw_tree.replace("'","")
                    
                    # Split into lines to iterate over
                    raw_tree_lines = raw_tree.split("\n")
                    
                    # By default don't parse a line for dashes
                    taxa_line = False
                    
                    for line in raw_tree_lines: 
                                
                        if "TREE" not in line and "-" in line:
                            
                            if len(line.split(" ")) == 1:
                                name_dashes = line.strip()
                                name_no_dashes = name_dashes.replace("-","_")
                                tree_dict[branch]["sample_rename"][name_no_dashes] = name_dashes
                            line = line.replace("-","_")
                                
                        outfile.write(line + "\n")
            
            # Read in edited tree
            trees = Phylo.parse(tree_dict[branch]["tree_file_edit"], "nexus")
            # There should be only 1 tree
            for t in trees:
                tree_dict[branch]["tree"] = t
                tree_dict[branch]["tree"].ladderize(reverse=False)
                break

            # Rename sample names back to with dashes
            for c in tree_dict[branch]["tree"].find_clades():
                if c.name in tree_dict[branch]["sample_rename"]:
                    orig_name = c.name
                    c.name = tree_dict[branch]["sample_rename"][c.name]
                    #print("Rename:", orig_name, c.name)
                    
                # Strip the date suffix
                if c.name:
                    c.name = "_".join(c.name.split("_")[0:-1])
                    
            # Rename internal nodes
            node_i = 0
            for c in tree_dict[branch]["tree"].find_clades():
                if not c.name:
                    c.name = "NODE{}".format(node_i)
                    node_i += 1
    

Branch: 1.ORI
Branch: 1.IN
Branch: 1.ANT
Branch: 1.PRE
Branch: 2.MED
Branch: 2.ANT
Branch: 4.ANT
Branch: 3.ANT
Branch: 0.ANT
Branch: 0.ANT4
Branch: 0.PE
Branch: 0.PRE


## Add Tree Data to Dataframe

### Get comments from the first root that is not root

In [8]:
TREE_PARAMETERS = None

for branch in tree_dict:
    print(branch)
    
    tree = tree_dict[branch]["tree"]
    df = copy.deepcopy(metadata_df[metadata_df["branch_minor"].isin(BRANCH_LIST[branch])])
    
    # Check if a sample was missed :(
    df_samples = df.index
    tree_samples = [c.name for c in tree.find_clades()]
    for sample in df_samples:
        if sample not in tree_samples:
            df.drop(sample, inplace=True)
            print("Dropping {} from the dataframe.".format(sample))
    
    root_comment_dict = parse_comment(tree.root.comment)
    
    for c in tree.find_clades():
        if c.is_terminal():
            continue
            
        if c != tree.root:
            comment_dict = parse_comment(c.comment)
            for parameter,value in comment_dict.items():
                # Intialize parameter values
                if "range" in parameter or "95%" in parameter:
                    df[parameter] = [[NO_DATA_CHAR,NO_DATA_CHAR]] * len(df)
                    if parameter not in root_comment_dict:
                        root_comment_dict[parameter] = '{0,0}'
                else:
                    df[parameter] = [NO_DATA_CHAR] * len(df)
                    if parameter not in root_comment_dict:
                        root_comment_dict[parameter] = '0'
                    
            break

    # Update the roots comment to include missing values
    new_comment = "[&"
    new_comment_list = []
    for parameter,value in root_comment_dict.items():
        param_str = "{}={}".format(parameter, value)
        new_comment_list.append(param_str)
    new_comment += ",".join(new_comment_list) + "]"
    tree.root.comment = new_comment
    print(root_comment_dict)
    print(new_comment_list)
    
    if not TREE_PARAMETERS:
        TREE_PARAMETERS = [p for p in root_comment_dict]
    
    tree_dict[branch]["df"] = df 
    #display(df)

1.ORI
{'height_95%_HPD': '{105.49669870945002,214.40203122544787}', 'length': '0.0', 'posterior': '1.0', 'height_median': '140.51015101320417', 'height_range': '{96.56887471108806,409.75872259127294}', 'height': '148.81157814395854', 'length_range': '{0,0}', 'rate_95%_HPD': '{0,0}', 'length_95%_HPD': '{0,0}', 'rate_range': '{0,0}', 'rate': '0', 'rate_median': '0', 'length_median': '0'}
['height_95%_HPD={105.49669870945002,214.40203122544787}', 'length=0.0', 'posterior=1.0', 'height_median=140.51015101320417', 'height_range={96.56887471108806,409.75872259127294}', 'height=148.81157814395854', 'length_range={0,0}', 'rate_95%_HPD={0,0}', 'length_95%_HPD={0,0}', 'rate_range={0,0}', 'rate=0', 'rate_median=0', 'length_median=0']
1.IN
{'height_95%_HPD': '{76.51154966645281,359.64722751430185}', 'length': '0.0', 'posterior': '1.0', 'height_median': '158.79244103045914', 'height_range': '{64.03240410762909,1346.3719576228896}', 'height': '183.34686921189493', 'length_range': '{0,0}', 'rate_95%_

### Parse Tree Comments

In [9]:
for branch in tree_dict:
    print(branch)
    
    tree = tree_dict[branch]["tree"]
    df = tree_dict[branch]["df"]
    
    # Initialize new columns
    df["timetree_num_date"] = [NO_DATA_CHAR] * len(df) # Calendar date of height
    df["timetree_num_date_confidence"] = [[NO_DATA_CHAR, NO_DATA_CHAR ]] * len(df)
    df["rate_sub_year"] = [NO_DATA_CHAR] * len(df)
    df["branch_length_sub"] = [NO_DATA_CHAR] * len(df)
    df["branch_support_conf_category"] = [NO_DATA_CHAR] * len(df)
    df["branch_support_conf_char"] = [NO_DATA_CHAR] * len(df)
    
    for c in tree.find_clades():
        
        # --------------------------------------
        # Get node type
        node_type = "internal"
        if c.is_terminal():
            node_type = "terminal"
        df.at[c.name, "node_type"] = node_type
        
        # --------------------------------------
        # Parse comments
        comment_dict = parse_comment(c.comment)
        for parameter,val in comment_dict.items():

            # Ranges should be parsed into list
            if "range" in parameter or "95%" in parameter:
                val = [float(v) for v in val.strip("{}").split(",")]
            # Multiply the posterior by 100 to be comparable to UFboot
            elif parameter == "posterior":
                val = float(val) * 100
            else:
                val = float(val)
            df.at[c.name, parameter] = val
            
            
        # Tips don't have posteriors, set to 0
        if c.is_terminal():
            posterior = 0
            df.at[c.name, "posterior"] = posterior
            comment_dict["posterior"] = posterior
            
        # Set confidence category for posterior/branch support
        posterior = df["posterior"][c.name]
        branch_support_conf_category = "LOW"
        branch_support_conf_char = ""
        if posterior >= POSTERIOR_THRESH:
            branch_support_conf_category = "HIGH"
            branch_support_conf_char = "*"
        df.at[c.name,"branch_support_conf_category"] = branch_support_conf_category
        df.at[c.name,"branch_support_conf_char"] = branch_support_conf_char

        # Check for missing parameters
        # For example, the most recent sample will be missing height uncertainty
        for parameter in TREE_PARAMETERS:
            if parameter not in comment_dict:
                if "range" in parameter or "95%" in parameter:
                    val = [0,0]
                else:
                    val = 0
                df.at[c.name, parameter] = val
                comment_dict[parameter] = val
                    
                print(c.name, parameter, df["date"][c.name], mrsd_dict[branch])
            
        # Convert height to calendar dates
        height = df["height"][c.name]
        height_95_hpd = df["height_95%_HPD"][c.name]
        
        # The sample with the most recent date does not have a height 95% HPD
        if height_95_hpd[0] == NO_DATA_CHAR:
            height_95_hpd = [0,0]
            df.at[c.name, "height_95%_HPD"] = height_95_hpd
            
        height_calendar = mrsd_dict[branch] - height
        height_95_hpd_calendar = [mrsd_dict[branch] -  height_95_hpd[1], mrsd_dict[branch] -  height_95_hpd[0]]
    
        df.at[c.name, "timetree_num_date"] = height_calendar
        df.at[c.name, "timetree_num_date_confidence"] = height_95_hpd_calendar
        
        # Add another measure of substitution rate
        rate_sub_year = df["rate"][c.name] * SEQ_LEN
        df.at[c.name, "rate_sub_year"] = rate_sub_year
        
        # Measure branch length in substitutions
        branch_length_sub = rate_sub_year * df["length"][c.name]
        df.at[c.name, "branch_length_sub"] = branch_length_sub
    
    df.fillna(NO_DATA_CHAR, inplace=True)

1.ORI
GCA_001693595.1_ASM169359v1_genomic height_95%_HPD 2016.0 2016.0
GCA_001693595.1_ASM169359v1_genomic height_median 2016.0 2016.0
GCA_001693595.1_ASM169359v1_genomic height_range 2016.0 2016.0
1.IN
GCA_015158475.1_ASM1515847v1_genomic height_95%_HPD 2008.0 2008.0
GCA_015158475.1_ASM1515847v1_genomic height_median 2008.0 2008.0
GCA_015158475.1_ASM1515847v1_genomic height_range 2008.0 2008.0
1.ANT
GCA_000169615.1_ASM16961v1_genomic height_95%_HPD 2004.0 2004.0
GCA_000169615.1_ASM16961v1_genomic height_median 2004.0 2004.0
GCA_000169615.1_ASM16961v1_genomic height_range 2004.0 2004.0
1.PRE
SAMEA7313236_38 height_95%_HPD [1762:1773] 1767.5
SAMEA7313236_38 height_median [1762:1773] 1767.5
SAMEA7313236_38 height_range [1762:1773] 1767.5
2.MED
2.ANT
4.ANT
3.ANT
GCA_013391945.1_ASM1339194v1_genomic height_95%_HPD 2017.0 2017.0
GCA_013391945.1_ASM1339194v1_genomic height_median 2017.0 2017.0
GCA_013391945.1_ASM1339194v1_genomic height_range 2017.0 2017.0
0.ANT
NODE75 height_95%_HPD nan 201

## Reduced Dataframe for Auspice

In [10]:
# OPtions
print(tree_dict[branch]["df"].columns)

Index(['strain', 'date', 'date_bp', 'country', 'province', 'country_lat',
       'country_lon', 'province_lat', 'province_lon', 'biovar', 'branch_major',
       'branch_minor', 'biosample_accession', 'biosample_comment',
       'branch_number', 'continent', 'date_mean', 'date_bp_mean', 'date_err',
       'lat', 'lon', 'host_human', 'length_range', 'rate_95%_HPD',
       'length_95%_HPD', 'length', 'posterior', 'height_median', 'rate_range',
       'height_range', 'height_95%_HPD', 'rate', 'rate_median',
       'length_median', 'height', 'timetree_num_date',
       'timetree_num_date_confidence', 'rate_sub_year', 'branch_length_sub',
       'branch_support_conf_category', 'branch_support_conf_char',
       'node_type'],
      dtype='object')


In [11]:
for branch in tree_dict:
    print(branch)
    
    columns = [
        # Node type is mandatorya
        "node_type",        
        # Draw Divergence Tree
        "length",
        # Draw Time Tree
        "timetree_num_date",
        "timetree_num_date_confidence",  
        # Geo
        "country",
        "province",
        "country_lat",
        "country_lon",
        "province_lat",
        "province_lon",           
        # Stats
        "posterior",
        "branch_support_conf_category",
        "branch_support_conf_char",
        "rate",
        "rate_sub_year",
        "branch_length_sub",
        # Text descriptions
        "strain",
        "branch_major",
        "biovar",
        "host_human",
        "date_mean",
        "date_bp_mean",        
    ]
    
    auspice_df = copy.copy(tree_dict[branch]["df"][columns])
    
    # Round For Pretty Numbers
    posteriors = [float(round(p)) for p in list(auspice_df["posterior"])]
    auspice_df["posterior"] = posteriors  
    
    branch_length_times = [float(round(l)) for l in list(auspice_df["length"])] 
    auspice_df["branch_length_time"] = branch_length_times
    
    branch_length_subs = [float(round(l)) for l in list(auspice_df["branch_length_sub"])] 
    auspice_df["branch_length_sub"] = branch_length_subs 

    rate_sub_years = [float(round(s)) for s in list(auspice_df["rate_sub_year"])] 
    auspice_df["rate_sub_year"] = rate_sub_years
    
    auspice_df.rename(columns={"length": "branch_length"}, inplace=True)
    auspice_df.rename(columns={"posterior": "branch_support"}, inplace=True)
    auspice_df.rename(columns={"rate": "rate_sub"}, inplace=True)
    
    # Add blank column
    auspice_df["blank"] = [" "] * len(auspice_df)

    
    tree_dict[branch]["auspice_df"] = auspice_df
    #display(tree_dict[branch]["auspice_df"])

1.ORI
1.IN
1.ANT
1.PRE
2.MED
2.ANT
4.ANT
3.ANT
0.ANT
0.ANT4
0.PE
0.PRE


---

# Export

## Time Tree

In [12]:
for branch in tree_dict:
    out_timetree = copy.deepcopy(tree_dict[branch]["tree"])

    metadata_to_comment(out_timetree, tree_dict[branch]["df"])    
    out_timetree_nex_path = os.path.join(tree_dir, branch + ".timetree.nex")
    Phylo.write(out_timetree, out_timetree_nex_path, "nexus")

    for c in out_timetree.find_clades():
        c.comment = None

    out_timetree_nwk_path = os.path.join(tree_dir, branch + ".timetree.nwk")
    Phylo.write(out_timetree, out_timetree_nwk_path, "newick")

## Augur

In [13]:
for branch in tree_dict:
    
    augur_dict = augur_export(
        tree_path=None, 
        aln_path=None,  
        tree=tree_dict[branch]["tree"], 
        tree_df=tree_dict[branch]["auspice_df"], 
        color_keyword_exclude=["geometry"],
        type_convert = {
            "branch_number" : (lambda x : str(x))
        },
    )
    
    tree_dict[branch]["augur_dict"] = augur_dict
    
    first_taxa = list(augur_dict["nodes"].keys())[0]
    print(augur_dict["nodes"][first_taxa])

    out_path_augur_json = os.path.join(tree_dir, branch + "_augur.json" )
    utils.write_json(data=tree_dict[branch]["augur_dict"], file_name=out_path_augur_json, indent=JSON_INDENT)
    tree_dict[branch]["augur_json_path"] = out_path_augur_json

{'node_type': 'internal', 'branch_length': 0.0, 'num_date': 1867.1884218560415, 'num_date_confidence': [1801.597968774552, 1910.50330129055], 'country': 'NA', 'province': 'NA', 'country_lat': 'NA', 'country_lon': 'NA', 'province_lat': 'NA', 'province_lon': 'NA', 'branch_support': 100.0, 'branch_support_conf_category': 'HIGH', 'branch_support_conf_char': '*', 'rate_sub': 0.0, 'rate_sub_year': 0.0, 'branch_length_sub': 0.0, 'strain': 'NA', 'branch_major': 'NA', 'biovar': 'NA', 'host_human': 'NA', 'date_mean': 'NA', 'date_bp_mean': 'NA', 'branch_length_time': 0.0, 'blank': ' '}
{'node_type': 'internal', 'branch_length': 0.0, 'num_date': 1824.653130788105, 'num_date_confidence': [1648.352772485698, 1931.4884503335472], 'country': 'NA', 'province': 'NA', 'country_lat': 'NA', 'country_lon': 'NA', 'province_lat': 'NA', 'province_lon': 'NA', 'branch_support': 100.0, 'branch_support_conf_category': 'HIGH', 'branch_support_conf_char': '*', 'rate_sub': 0.0, 'rate_sub_year': 0.0, 'branch_length_su

## Auspice

In [14]:
for branch in tree_dict:
    
    # Store the color
    if branch == "0.ANT4":
        branch_major_color = colors_dict["branch_major"]["0.ANT"]
    else:
        branch_major_color = colors_dict["branch_major"][branch]

    auspice_dict = auspice_export(
        tree=tree_dict[branch]["tree"],
        augur_json_paths=tree_dict[branch]["augur_json_path"], 
        auspice_config_path=auspice_config_path, 
        auspice_colors_path=out_path_colors,
        auspice_latlons_path=out_path_latlon, 
        )


    label_col = list(tree_dict[branch]["auspice_df"])
    print(label_col)

    # Recursively add branch attrs
    branch_attributes(
        tree_dict=auspice_dict["tree"], 
        sub_dict=auspice_dict["tree"], 
        df=tree_dict[branch]["auspice_df"],
        label_col=label_col,
        )
    
    
    # Last manual changes
    auspice_dict_copy = copy.deepcopy(auspice_dict)
    for i in range(0, len(auspice_dict_copy["meta"]["colorings"])):
        coloring = auspice_dict_copy["meta"]["colorings"][i]
        for key in coloring:
            # Node type as internal or terminal
            if coloring[key] == "node_type":
                auspice_dict["meta"]["colorings"][i]['scale'] = [['internal', '#FFFFFF'], ['terminal', branch_major_color]]
                #print(auspice_dict["meta"]["colorings"][i])
            # Confidence category
            if "conf_category" in coloring[key]:
                auspice_dict["meta"]["colorings"][i]['scale'] = [['LOW', '#FFFFFF'], ['HIGH', branch_major_color]]
                #print(auspice_dict["meta"]["colorings"][i])
            # Host Human binary
            if "host_human" in coloring[key]:
                auspice_dict["meta"]["colorings"][i]['scale'] = [['Human', '#CBB742'], ['Non-Human', "#60B6F2"], ['NA', "#D6D6D6"]]

    # Write outputs - For Local Rendering
    out_path_auspice_local_json = os.path.join(tree_dir, branch + ".json" )
    utils.write_json(data=auspice_dict, file_name=out_path_auspice_local_json, indent=JSON_INDENT, include_version=False)
    export_v2.validate_data_json(out_path_auspice_local_json)
    print("Validation successful for local JSON.\n")

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']






























Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.ORI.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']






























Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.IN.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxe























































Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.PRE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projec





























['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/2.MED.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', '





























Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/2.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']






















































Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/4.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relax































Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/0.ANT.json'...
Validating that the JSON is internally consistent...




















































Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/0.ANT4.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation su







['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'branch_support', 'branch_support_conf_category', 'branch_support_conf_char', 'rate_sub', 'rate_sub_year', 'branch_length_sub', 'strain', 'branch_major', 'biovar', 'host_human', 'date_mean', 'date_bp_mean', 'branch_length_time', 'blank']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/0.PE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'b



























