---
# 1. SETUP

## Modules

In [1]:
import os
from Bio import Phylo, AlignIO
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import colors, gridspec, lines
import scipy
import math
import numpy as np
import geopandas
import datetime
import shapely
import copy
from geopy import distance
import geopy
import cartopy.crs as ccrs
from augur import utils, export_v2
import subprocess
import itertools

from functions import *

## Paths

In [2]:
try:
    WILDCARDS = snakemake.wildcards
    project_dir = os.getcwd()
except NameError:
    WILDCARDS = ["all", "chromosome", "full", "5"]
    project_dir = os.path.dirname(os.path.dirname(os.getcwd()))
    
results_dir = os.path.join(project_dir, "results/")

READS_ORIGIN = WILDCARDS[0]
LOCUS_NAME = WILDCARDS[1]
PRUNE = WILDCARDS[2]
MISSING_DATA = WILDCARDS[3]

AUSPICE_PREFIX = "plague-phylogeography-projects_main_{}".format(PRUNE)

In [3]:
# Manual override
# results_dir = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main"

In [4]:
iqtree_dir    = os.path.join(results_dir, "iqtree/all/chromosome/{}/filter{}/".format(PRUNE, MISSING_DATA))
lsd_dir       = os.path.join(results_dir, "lsd/all/chromosome/{}/filter{}/".format(PRUNE, MISSING_DATA))
mugration_dir = os.path.join(results_dir, "mugration/all/chromosome/{}/filter{}/".format(PRUNE, MISSING_DATA))
snippy_dir    = os.path.join(results_dir, "snippy_multi/all/chromosome/{}/filter{}/".format(PRUNE, MISSING_DATA))

# ------------------------------------------
# Trees
divtree_path              = iqtree_dir    + "filter-taxa/iqtree.treefile"
timetree_path             = lsd_dir       + "lsd.timetree.nex"

# ------------------------------------------
# Alignment
constant_sites_path   = results_dir + "snippy_multi/all/chromosome/full/snippy-multi.constant_sites.txt"
aln_path              = iqtree_dir + "filter-sites/snippy-multi.snps.aln"

# ------------------------------------------
# Metadata

auspice_config_path          = results_dir + "config/auspice_config.json"
metadata_path                = iqtree_dir    + "filter-taxa/metadata.tsv"

    
# ------------------------------------------
# Output
augur_dir   = os.path.join(results_dir, "augur/all/chromosome/{}/filter{}/ml".format(PRUNE, MISSING_DATA))
auspice_dir = os.path.join(results_dir, "auspice/all/chromosome/{}/filter{}/ml".format(PRUNE, MISSING_DATA))

if not os.path.exists(augur_dir):
    print(augur_dir)
    subprocess.run(["mkdir", "-p", augur_dir]) 
    
if not os.path.exists(auspice_dir):
    print(auspice_dir)
    subprocess.run(["mkdir", "-p", auspice_dir]) 

## Variables

In [5]:
pd.set_option("display.max_rows", 10, "display.max_columns", None)

NO_DATA_CHAR = "NA"
UNKNOWN_CHAR = "?"
JSON_INDENT=2
CURRENT_YEAR = datetime.datetime.utcnow().year
MUG_CONF = 0.95
ALPHA = 0.05

# ------------------------------------------
# Alignment
with open(constant_sites_path) as infile:
    data = infile.read().strip().split(",")
    constant_sites = sum([int(count) for count in data])

aln = AlignIO.read(aln_path, "fasta")
variant_sites = len(aln[0].seq)
SEQ_LEN = constant_sites + variant_sites

# ------------------------------------------
# Geo
CRS = "epsg:3857"
CRS_EPSG = ccrs.epsg('3857')
world_polygons = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))


MUG_ATTRIBUTE_LIST = [
    "country",
    "province",
    "continent",
    "host_order",
    "population",
]

In [6]:
mug_dict = {}

for attr in MUG_ATTRIBUTE_LIST:
    mug_dict[attr] = {}
    mug_dict[attr]["tree_path"] = mugration_dir + attr + ".nex".format(MISSING_DATA)
    mug_dict[attr]["conf_path"] = mugration_dir + attr + "_confidence.csv"
    mug_dict[attr]["states_path"] = mugration_dir + attr + "_states.csv"

---

# 2. IMPORT

## Mugration Confidence Tables

In [7]:
states_files = [mug_dict[attr]["states_path"] for attr in mug_dict]
conf_files = [mug_dict[attr]["conf_path"] for attr in mug_dict]

# ----------------------------------------
# Convert character states to numbers
for mugstates,mugconf in zip(states_files, conf_files):
    node_i = 0
    conf_basename = os.path.splitext(mugconf)[0]
    conf_outname = conf_basename + "_tidy.csv"
    i = 0
    state_dict = {}
    with open(mugstates, "r") as infile:
        read_line = infile.readline()
        while read_line:
            split_line = read_line.strip().split(",")
            state = split_line[1]
            if state == "nan":
                state = "NA"
            if state == UNKNOWN_CHAR:
                state_dict[-1] = state
            else:
                state_dict[i] = state
                i += 1
            read_line = infile.readline()
    # Swap first to last place
    final_i = len(state_dict) - 1   
    state_dict[final_i] = "?"
    # Remove old first
    state_dict.pop(-1)

    with open(mugconf, "r") as infile:            
        i = 0
        read_line = infile.readline()
        header = read_line.strip().split(", ")
        header[0] = "name"

        data = ""
        for i in range(0, len(header) - 1):
            char = header[i + 1]
            header[i + 1] = state_dict[i]
        header = ",".join(header)
        read_line = infile.readline()            
        while read_line:
            # Rename internal nodes
            split_line = read_line.split(", ")
            node_name = split_line[0]
            if "NODE" in node_name:
                node_name = "NODE{}".format(node_i)
                node_i += 1
            new_line = ",".join([node_name] + split_line[1:])
            data += new_line
            read_line = infile.readline()
    
    with open(conf_outname, "w") as outfile:
        outfile.write(header + "\n")
        outfile.write(data)

# ----------------------------------------
# Import the tidy files as dataframes

for attr in mug_dict:
    conf_path = mug_dict[attr]["conf_path"]
    tidy_path = os.path.splitext(conf_path)[0] + "_tidy.csv"
    df = pd.read_csv(tidy_path, sep=',', index_col = 0)
    mug_dict[attr]["conf_df"] = df

## Trees

In [8]:
tree_paths = [divtree_path, timetree_path] + [mug_dict[attr]["tree_path"] for attr in mug_dict]

for tree_path in tree_paths:
    tree_ext = os.path.splitext(tree_path)[1]
    if tree_ext == ".nwk" or tree_ext == ".newick" or tree_ext == ".treefile":
        tree = Phylo.read(tree_path, "newick")
    elif tree_ext == ".nex" or tree_ext == ".nexus":
        parse_trees = Phylo.parse(tree_path, "nexus")
        for parse_tree in parse_trees:
            clades = [c for c in parse_tree.find_clades()]
            if len(clades) > 1:
                break
        tree = parse_tree
    tree.ladderize(reverse=False)
    
    if tree_path == divtree_path:
        print("divtree:", os.path.basename(tree_path))
        divtree = tree
    elif tree_path == timetree_path:
        print("timetree:", os.path.basename(tree_path))        
        timetree = tree   
        
    # Check against mugration attributes
    for attr in mug_dict:
        if tree_path == mug_dict[attr]["tree_path"]:
            print(attr + ":", os.path.basename(tree_path))
            mug_dict[attr]["tree"] = tree         

divtree: iqtree.treefile
timetree: lsd.timetree.nex
country: country.nex
province: province.nex
continent: continent.nex
host_order: host_order.nex
population: population.nex


## Genomic Metadata

In [9]:
metadata_df = pd.read_csv(metadata_path, sep='\t')
metadata_df.set_index(metadata_df.columns[0], inplace=True)
metadata_df.fillna(NO_DATA_CHAR, inplace=True)

## Populations

In [10]:
population_list = []

for t in divtree.get_terminals():
    population = metadata_df["population"][t.name]
    if population not in population_list:
        population_list.append(population)        
        
print(population_list)

['0.PRE', '0.PE', '0.ANT', '0.ANT4', '3.ANT', '4.ANT', '2.ANT', '2.MED', '1.PRE', '1.ANT', '1.IN', '1.ORI']


## Most Recent Sampling Date for BEAST

In [11]:
out_path = os.path.join(augur_dir, "most_recent_sampling_dates.tsv")
mrsd_dict = {}

with open(out_path, "w") as outfile:
    for population in population_list:
        df = metadata_df[metadata_df["population"] == population]
        max_date = max(df["date_mean"])
        print(population, max_date)
        outfile.write("{}\t{}\n".format(population, max_date))
        mrsd_dict[population] = max_date   

0.PRE -1686.0
0.PE 2014.0
0.ANT 2019.0
0.ANT4 765.0
3.ANT 2017.0
4.ANT 2015.0
2.ANT 2008.0
2.MED 2018.0
1.PRE 1767.5
1.ANT 2004.0
1.IN 2008.0
1.ORI 2016.0


---
# 3. MERGE METADATA

## Trees

In [12]:
# Check if they're synchronized
node_i = 0

trees = [divtree, timetree] + [mug_dict[attr]["tree"] for attr in mug_dict]
divtree_num_clades = len([c for c in divtree.find_clades()])
divtree_tip_names = [t.name for t in divtree.get_terminals()]

# Check for synchrony
# Note, no checking for internal order yet
for tree in trees:
    clades = [c for c in tree.find_clades()]
    tip_names = [t.name for t in tree.get_terminals()]
    if (tip_names != divtree_tip_names) or (len(clades) != divtree_num_clades):
        print("ERROR. Trees are not synchronized.")               


# Rename internal nodes
for tree in trees:        
    node_i = 0
    for c in tree.find_clades():
        if not c.name or "NODE" in c.name:
            c.name = "NODE{}".format(node_i)
            node_i += 1

# Extract metadata from divtree
for c in divtree.find_clades():
    
    # ---------------
    # Node type
    node_type = "internal"
    if c.is_terminal():
        node_type = "terminal"
    metadata_df.at[c.name, "node_type"] = node_type

    # ---------------
    # Branch Length
    branch_length = c.branch_length
    if not branch_length:
        branch_length = 0
    metadata_df.at[c.name, "branch_length"] = branch_length    

    # ---------------
    # Branch Support
    # Give terminal nodes 0% confidence
    branch_support = c.confidence
    if c.is_terminal():
        branch_support = 0
    # Internal nodes without confidence can be 0
    elif not c.confidence:
        branch_support = 0
    metadata_df.at[c.name, "branch_support"] = branch_support

    # Confidence category
    branch_support_conf_char = ""
    if branch_support >= 95 and not c.is_terminal():
        branch_support_conf_char = "*"
    metadata_df.at[c.name, "branch_support_conf_char"] = branch_support_conf_char
    if branch_support >= 95:
        metadata_df.at[c.name, "branch_support_conf_category"] = "HIGH"
    else:
        metadata_df.at[c.name, "branch_support_conf_category"] = "LOW"

# Extract metadata from timetree
for c in timetree.find_clades():                    
    if c.comment:
        comment_dict = parse_comment(c.comment)
        for param,val in comment_dict.items():
            rename_param = "timetree_" + param
            metadata_df.at[c.name, rename_param] = val

# Extract metadata from timetree
for attr in mug_dict:
    tree = mug_dict[attr]["tree"]
    
    for c in tree.find_clades():
        if c.comment:
            comment_dict = parse_comment(c.comment)
            for param,val in comment_dict.items():
                rename_param = "mugration_" + param
                metadata_df.at[c.name, rename_param] = val 
            
metadata_df.fillna(NO_DATA_CHAR, inplace=True)
display(metadata_df)       

Unnamed: 0_level_0,strain,date,date_bp,country,province,country_lat,country_lon,province_lat,province_lon,biovar,branch_major,branch_minor,biosample_accession,biosample_comment,branch_number,continent,date_mean,date_bp_mean,date_err,lat,lon,host_human,sequencing_technology,assembly_method,host_raw,host_order,population_color,population,geometry,root_rtt_dist,clade_rtt_dist,population_rtt_dist,node_type,branch_length,branch_support,branch_support_conf_char,branch_support_conf_category,timetree_date,timetree_CI_height,timetree_CI_date,mugration_country,mugration_province,mugration_continent,mugration_host_order,mugration_population
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
Reference,CO92,1992,-29,United States of America,Colorado,39.7837,-100.446,38.7252,-105.608,Orientalis,1.ORI,1.ORI1,SAMEA1705942,KEEP: Assembly Modern Reference,1,North America,1992,29,0,38.7252,-105.608,Human,,,Human,Human,#ff0000,1.ORI,POINT (-105.607716 38.7251776),7.31686e-05,,6.3815e-06,terminal,4.004600e-06,0.0,,LOW,-29,,,United States of America,Colorado,North America,Human,1.ORI
GCA_009909635.1_ASM990963v1_genomic,9_10,1923.0,-98,Russia,Rostov Oblast,64.6863,97.7453,47.6222,40.7958,Medievalis,2.MED,2.MED1,SAMN13632815,KEEP: Assembly Modern,2,Europe,1923,98,0,47.6222,40.7958,Human,IonTorrent,Newbler v. 2.6,Homo sapiens,Human,#c5e88a,2.MED,POINT (40.7957942 47.6222451),7.30501e-05,,9.6582e-06,terminal,2.120100e-06,0.0,,LOW,-98,,,Russia,Rostov Oblast,Europe,Human,2.MED
GCA_009669545.1_ASM966954v1_genomic,42126,2006.0,-15,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722925,KEEP: Assembly Modern,0,Asia,2006,15,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Citellus undulatus,Rodentia,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.41847e-05,,1.15566e-05,terminal,0.000000e+00,0.0,,LOW,-15,,,China,Xinjiang,Asia,Rodentia,0.ANT
GCA_009669555.1_ASM966955v1_genomic,42123,2005.0,-16,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722924,KEEP: Assembly Modern,0,Asia,2005,16,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Frontopsylla elatoides,Siphonaptera,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.47035e-05,,1.20754e-05,terminal,2.356000e-07,0.0,,LOW,-16,,,China,Xinjiang,Asia,Siphonaptera,0.ANT
GCA_009669565.1_ASM966956v1_genomic,42118,2005.0,-16,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722923,KEEP: Assembly Modern,0,Asia,2005,16,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Citellus undulatus,Rodentia,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.4939e-05,,1.23109e-05,terminal,4.711000e-07,0.0,,LOW,-16,,,China,Xinjiang,Asia,Rodentia,0.ANT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE595,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.207000e-07,13.0,,LOW,-111.576,"{89.1935,137.589}","{-137.589,-89.1935}",Peru,Cajamarca,South America,Human,1.ORI
NODE596,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.356000e-07,13.0,,LOW,-111.576,"{89.1935,137.589}","{-137.589,-89.1935}",Peru,Cajamarca,South America,Rodentia,1.ORI
NODE597,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.921000e-07,46.0,,LOW,-78.3728,"{50.8548,116.815}","{-116.815,-50.8548}",Peru,Cajamarca,South America,Rodentia,1.ORI
NODE598,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,5.010000e-08,34.0,,LOW,-70.382,"{40.845,106.6}","{-106.6,-40.845}",Peru,Cajamarca,South America,Human,1.ORI


## Add Mugration Confidence and Handle Root

In [13]:
df_list = [mug_dict[attr]["conf_df"] for attr in mug_dict]

for df,attr in zip(df_list, MUG_ATTRIBUTE_LIST):
    for sample in df.index:
        max_state = ""
        max_val = 0        
        for state,val in zip(df.columns, df.loc[sample]):
            if val >= max_val:
                max_val = val
                max_state = state
                
        metadata_df.at[sample,"mugration_" + attr] = max_state      
        metadata_df.at[sample,"mugration_" + attr + "_confidence"] = round(max_val,2)
        
        #if the attr is country or province, extend this confidence to lat and lon
        if attr == "country" or attr == "province":
            metadata_df.at[sample,"mugration_" + attr + "_lat_confidence"] = max_val
            metadata_df.at[sample,"mugration_" + attr + "_lon_confidence"] = max_val

display(metadata_df.loc["NODE0"])

strain                                     NA
date                                       NA
date_bp                                    NA
country                                    NA
province                                   NA
                                       ...   
mugration_province_lat_confidence    0.561953
mugration_province_lon_confidence    0.561953
mugration_continent_confidence           0.52
mugration_host_order_confidence          0.93
mugration_population_confidence           0.5
Name: NODE0, Length: 54, dtype: object

## Latitude and Longtitude

In [14]:
latlon_country_df = pd.DataFrame()
latlon_province_df = pd.DataFrame()

df_list = [latlon_country_df, latlon_province_df]
attr_list = ["country", "province"]
conf_df_list = [mug_dict["country"]["conf_df"], mug_dict["province"]["conf_df"]]

# Create a mapping of geo name to lat,lon
for df,attr in zip(df_list, attr_list):
    # Get data
    for rec in metadata_df.iterrows():
        node_name = rec[0]
        node_type = rec[1]["node_type"]
        name = rec[1][attr]
        country = rec[1]["country"]
        
        if node_type == "internal":
            continue
        
        if attr == "province" and name == NO_DATA_CHAR and node_type == "terminal" and country != "Russia":
            # Use country instead
            name = rec[1]["country"]
            lat = rec[1]["country_lat"]
            lon = rec[1]["country_lon"]
        else:
            lat = rec[1][attr + "_lat"]
            lon = rec[1][attr + "_lon"]             


        if name not in df.index:
            df.at[name, "lat"] = lat
            df.at[name, "lon"] = lon
            df.at[name, "size"] = 1
        else:
            df["size"][name] += 1

#display(latlon_country_df)
#display(latlon_province_df)


# Update lat lon for mugration
for latlon_df, conf_df, attr in zip(df_list, conf_df_list, attr_list):
    for rec in metadata_df.iterrows():
        sample = rec[0]
        name = rec[1]["mugration_" + attr]  
        node_type = rec[1]["node_type"]  
        country = rec[1]["country"]  
        
        if attr == "province" and rec[1][attr]  == NO_DATA_CHAR and node_type == "terminal" and country != "Russia":
            # Use country instead
            name = rec[1]["country"]
            metadata_df.at[sample,"mugration_" + attr] = name
        
        lat = latlon_df["lat"][name]
        lon = latlon_df["lon"][name]
        
                
        metadata_df.at[sample, "mugration_" + attr +"_lat"] = lat
        metadata_df.at[sample, "mugration_" + attr +"_lon"] = lon
        
        
# Mapping file for auspice
out_path_latlon = os.path.join(augur_dir, "latlon.tsv")

# Countries
with open(out_path_latlon, "w") as outfile:
    for country in latlon_country_df.index:
        lat = str(latlon_country_df["lat"][country])
        lon = str(latlon_country_df["lon"][country])
        outfile.write("country" + "\t" + country + "\t" + lat + "\t" + lon + "\n")
# Provinces
with open(out_path_latlon, "a") as outfile:
    for province in latlon_province_df.index:
        lat = str(latlon_province_df["lat"][province])
        lon = str(latlon_province_df["lon"][province])
        outfile.write("province" + "\t" + province + "\t" + lat + "\t" + lon + "\n")
        
#display(metadata_df[metadata_df["continent"] == "Africa"])

## Add Sub  and Spread Rate

In [15]:
# 1. Get Branch Lengths in Num Substitutions

for c in divtree.find_clades():
    branch_length_sub = c.branch_length * SEQ_LEN
    branch_length_time = timetree.common_ancestor(c.name).branch_length
    walk_path = [divtree.root] + divtree.get_path(c)
    # Root
    if len(walk_path) == 1:
        parent = walk_path[0]
    else:
        parent = walk_path[-2]


    # 1. Rate as subs/site/year
    rate_sub = c.branch_length / branch_length_time  if branch_length_time != 0 else 0
    # 2. Rate as subs/year
    rate_sub_year = branch_length_sub / branch_length_time  if branch_length_time != 0 else 0
    
    metadata_df.at[c.name,"branch_length_sub"] = branch_length_sub
    metadata_df.at[c.name,"branch_length_time"] = branch_length_time
    metadata_df.at[c.name,"rate_sub"] = rate_sub
    metadata_df.at[c.name,"rate_sub_year"] = rate_sub_year

metadata_df.fillna(NO_DATA_CHAR, inplace=True)
display(metadata_df)

Unnamed: 0_level_0,strain,date,date_bp,country,province,country_lat,country_lon,province_lat,province_lon,biovar,branch_major,branch_minor,biosample_accession,biosample_comment,branch_number,continent,date_mean,date_bp_mean,date_err,lat,lon,host_human,sequencing_technology,assembly_method,host_raw,host_order,population_color,population,geometry,root_rtt_dist,clade_rtt_dist,population_rtt_dist,node_type,branch_length,branch_support,branch_support_conf_char,branch_support_conf_category,timetree_date,timetree_CI_height,timetree_CI_date,mugration_country,mugration_province,mugration_continent,mugration_host_order,mugration_population,mugration_country_confidence,mugration_country_lat_confidence,mugration_country_lon_confidence,mugration_province_confidence,mugration_province_lat_confidence,mugration_province_lon_confidence,mugration_continent_confidence,mugration_host_order_confidence,mugration_population_confidence,mugration_country_lat,mugration_country_lon,mugration_province_lat,mugration_province_lon,branch_length_sub,branch_length_time,rate_sub,rate_sub_year
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1
Reference,CO92,1992,-29,United States of America,Colorado,39.7837,-100.446,38.7252,-105.608,Orientalis,1.ORI,1.ORI1,SAMEA1705942,KEEP: Assembly Modern Reference,1,North America,1992,29,0,38.7252,-105.608,Human,,,Human,Human,#ff0000,1.ORI,POINT (-105.607716 38.7251776),7.31686e-05,,6.3815e-06,terminal,4.004600e-06,0.0,,LOW,-29,,,United States of America,Colorado,North America,Human,1.ORI,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,39.783730,-100.445882,38.725178,-105.607716,16.935846,162.52800,2.463945e-08,0.104203
GCA_009909635.1_ASM990963v1_genomic,9_10,1923.0,-98,Russia,Rostov Oblast,64.6863,97.7453,47.6222,40.7958,Medievalis,2.MED,2.MED1,SAMN13632815,KEEP: Assembly Modern,2,Europe,1923,98,0,47.6222,40.7958,Human,IonTorrent,Newbler v. 2.6,Homo sapiens,Human,#c5e88a,2.MED,POINT (40.7957942 47.6222451),7.30501e-05,,9.6582e-06,terminal,2.120100e-06,0.0,,LOW,-98,,,Russia,Rostov Oblast,Europe,Human,2.MED,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,64.686314,97.745306,47.622245,40.795794,8.966111,878.89200,2.412242e-09,0.010202
GCA_009669545.1_ASM966954v1_genomic,42126,2006.0,-15,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722925,KEEP: Assembly Modern,0,Asia,2006,15,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Citellus undulatus,Rodentia,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.41847e-05,,1.15566e-05,terminal,0.000000e+00,0.0,,LOW,-15,,,China,Xinjiang,Asia,Rodentia,0.ANT,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,35.000074,104.999927,42.480495,85.463346,0.000000,9.23246,0.000000e+00,0.000000
GCA_009669555.1_ASM966955v1_genomic,42123,2005.0,-16,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722924,KEEP: Assembly Modern,0,Asia,2005,16,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Frontopsylla elatoides,Siphonaptera,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.47035e-05,,1.20754e-05,terminal,2.356000e-07,0.0,,LOW,-16,,,China,Xinjiang,Asia,Siphonaptera,0.ANT,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,35.000074,104.999927,42.480495,85.463346,0.996375,24.38400,9.662073e-09,0.040862
GCA_009669565.1_ASM966956v1_genomic,42118,2005.0,-16,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722923,KEEP: Assembly Modern,0,Asia,2005,16,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Citellus undulatus,Rodentia,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.4939e-05,,1.23109e-05,terminal,4.711000e-07,0.0,,LOW,-16,,,China,Xinjiang,Asia,Rodentia,0.ANT,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,35.000074,104.999927,42.480495,85.463346,1.992328,35.90130,1.312209e-08,0.055495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE595,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.207000e-07,13.0,,LOW,-111.576,"{89.1935,137.589}","{-137.589,-89.1935}",Peru,Cajamarca,South America,Human,1.ORI,1.0,0.999999,0.999999,1.00,0.999873,0.999873,1.0,0.55,1.0,-6.869970,-75.045851,-6.250000,-78.833333,0.933362,0.00000,0.000000e+00,0.000000
NODE596,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.356000e-07,13.0,,LOW,-111.576,"{89.1935,137.589}","{-137.589,-89.1935}",Peru,Cajamarca,South America,Rodentia,1.ORI,1.0,0.999999,0.999999,1.00,0.999998,0.999998,1.0,0.61,1.0,-6.869970,-75.045851,-6.250000,-78.833333,0.996375,0.00000,0.000000e+00,0.000000
NODE597,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.921000e-07,46.0,,LOW,-78.3728,"{50.8548,116.815}","{-116.815,-50.8548}",Peru,Cajamarca,South America,Rodentia,1.ORI,1.0,0.999999,0.999999,1.00,0.999999,0.999999,1.0,0.56,1.0,-6.869970,-75.045851,-6.250000,-78.833333,1.235320,33.20350,8.797265e-09,0.037204
NODE598,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,5.010000e-08,34.0,,LOW,-70.382,"{40.845,106.6}","{-106.6,-40.845}",Peru,Cajamarca,South America,Human,1.ORI,1.0,0.999999,0.999999,1.00,0.999874,0.999874,1.0,0.60,1.0,-6.869970,-75.045851,-6.250000,-78.833333,0.211878,41.19420,1.216191e-09,0.005143


## Colors

In [27]:
out_path_colors = os.path.join(augur_dir, "colors.tsv")

colors_dict = {}
neutral_color = "#9A9A9A"

host_manual_color = {
    # Rodent is baseline
    'rodentia': neutral_color, 
    'human': '#d62728', 
    # Other mammals
    'marsupialia': "#1f77b4",
    'artiodactyla': "#1f77b4", 
    'carnivora': "#1f77b4", 
    'lagomorpha': "#1f77b4", 
    # Insects
    'siphonaptera': "#2ca02c", 
    'phthiraptera': "#2ca02c", 
    'ixodida': "#2ca02c", 
    'lepidoptera': "#2ca02c", 
    # Other
    'na': neutral_color,     
}

for attr in mug_dict:
    # Create the color map
    colors_dict[attr] = {}
    for t in divtree.get_terminals():
        attr_val = metadata_df[attr][t.name].lower()
        
        # Initialize stats and values
        if attr_val not in colors_dict[attr] and attr_val != NO_DATA_CHAR:
            colors_dict[attr][attr_val] = ""

    if attr != "host_order":
        # Create the custom color map (pyplot)
        cmap = plt.get_cmap("rainbow", len(colors_dict[attr]))
        # Convert the color map to a list of RGB values
        cmaplist = [cmap(i) for i in range(cmap.N)]
        # Convert RGB values to hex colors
        attr_hex = [colors.to_hex(col) for col in cmaplist]
    
    else:
        attr_hex = []
        for attr_val in colors_dict[attr]:
            attr_col = host_manual_color[attr_val]
            attr_hex.append(attr_col)
    
    # Assign colors to value
    for attr_val, attr_col in zip(colors_dict[attr], attr_hex):
        colors_dict[attr][attr_val] = attr_col   
        
    # Add unknown
    colors_dict[attr][UNKNOWN_CHAR] = "#969696"
    
print(colors_dict)

with open(out_path_colors, "w") as outfile:
    for attr_key in colors_dict:
        for attr_val in colors_dict[attr_key]:
            outfile.write(str(attr_key) + "\t" + str(attr_val) + "\t" + str(colors_dict[attr_key][attr_val]) + "\n")

{'country': {'russia': '#8000ff', 'lithuania': '#7215ff', 'estonia': '#652afe', 'germany': '#573ffd', 'china': '#4a53fc', 'azerbaijan': '#3c66fa', 'armenia': '#2f79f7', 'georgia': '#228bf4', 'mongolia': '#149df1', 'tajikistan': '#07adee', 'kyrgyzstan': '#07bcea', 'england': '#14c9e5', 'spain': '#22d5e0', 'france': '#2fe0db', 'nepal': '#3cead5', 'india': '#4af1d0', 'kazakhstan': '#57f7c9', 'turkmenistan': '#65fcc3', 'uzbekistan': '#72febc', 'iran': '#80ffb4', 'italy': '#8dfead', 'poland': '#9afca5', 'switzerland': '#a8f79d', 'norway': '#b5f194', 'the netherlands': '#c3ea8b', 'kenya': '#d0e083', 'uganda': '#ddd579', 'democratic republic of the congo': '#ebc970', 'vietnam': '#f8bc66', 'myanmar': '#ffad5d', 'united states of america': '#ff9d53', 'peru': '#ff8b49', 'canada': '#ff793f', 'indonesia': '#ff6634', 'madagascar': '#ff532a', 'zimbabwe': '#ff3f20', 'algeria': '#ff2a15', 'brazil': '#ff150b', 'bolivia': '#ff0000', '?': '#969696'}, 'province': {'krasnoyarsk krai': '#8000ff', 'panevezys

DEPRECATED: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



## Parse Tree Into Dataframe

In [17]:
x_posns = get_x_positions(divtree)
y_posns = get_y_positions(divtree)

# Initialize 
# Date lower and upper error bars for tips
metadata_df["date_lower"] = [NO_DATA_CHAR] * len(metadata_df)
metadata_df["date_upper"] = [NO_DATA_CHAR] * len(metadata_df)
    
for c in divtree.find_clades():
    # ------------------------------------------------------------
    # Coordinates for the divergence tree
    coord_x = [value for key,value in x_posns.items() if key.name == c.name][0]
    coord_y = [value for key,value in y_posns.items() if key.name == c.name][0]
    metadata_df.at[c.name, 'divtree_coord_x'] = coord_x
    metadata_df.at[c.name, 'divtree_coord_y'] = coord_y
    
    # ------------------------------------------------------------
    # Root to tip distance (same as div x)
    metadata_df.at[c.name, "rtt_dist"] = coord_x 

    # ------------------------------------------------------------
    # Date lower and upper error bars for tips
    date_mean = metadata_df["date_mean"][c.name] 
    if date_mean != NO_DATA_CHAR:
        metadata_df.at[c.name, "date_lower"] = date_mean - metadata_df["date_err"][c.name]
        metadata_df.at[c.name, "date_upper"] = date_mean + metadata_df["date_err"][c.name]

x_posns = get_x_positions(timetree)
y_posns = get_y_positions(timetree)


for c in timetree.find_clades():

    coord_x = [value for key,value in x_posns.items() if key.name == c.name][0]
    coord_y = [value for key,value in y_posns.items() if key.name == c.name][0]
    metadata_df.at[c.name, 'timetree_coord_x'] = coord_x
    metadata_df.at[c.name, 'timetree_coord_y'] = coord_y
    
    timetree_date = int(float(metadata_df["timetree_date"][c.name]))
    timetree_date_calendar = CURRENT_YEAR + timetree_date
    metadata_df.at[c.name, 'timetree_date_calendar'] = timetree_date_calendar
    
    timetree_CI_date = metadata_df["timetree_CI_date"][c.name]
    if timetree_CI_date == NO_DATA_CHAR:
        timetree_CI_lower_err = 0
        timetree_CI_upper_err = 0
    else:
        timetree_CI_split = [int(float(d)) for d in timetree_CI_date.strip("{}").split(",")]
        timetree_CI_calendar = [CURRENT_YEAR + d for d in timetree_CI_split]
        timetree_CI_lower_err = timetree_date_calendar - timetree_CI_calendar[0]
        timetree_CI_upper_err = timetree_CI_calendar[1] - timetree_date_calendar
    metadata_df.at[c.name,"timetree_CI_lower_err"] = timetree_CI_lower_err
    metadata_df.at[c.name,"timetree_CI_upper_err"] = timetree_CI_upper_err   

display(metadata_df)

Unnamed: 0_level_0,strain,date,date_bp,country,province,country_lat,country_lon,province_lat,province_lon,biovar,branch_major,branch_minor,biosample_accession,biosample_comment,branch_number,continent,date_mean,date_bp_mean,date_err,lat,lon,host_human,sequencing_technology,assembly_method,host_raw,host_order,population_color,population,geometry,root_rtt_dist,clade_rtt_dist,population_rtt_dist,node_type,branch_length,branch_support,branch_support_conf_char,branch_support_conf_category,timetree_date,timetree_CI_height,timetree_CI_date,mugration_country,mugration_province,mugration_continent,mugration_host_order,mugration_population,mugration_country_confidence,mugration_country_lat_confidence,mugration_country_lon_confidence,mugration_province_confidence,mugration_province_lat_confidence,mugration_province_lon_confidence,mugration_continent_confidence,mugration_host_order_confidence,mugration_population_confidence,mugration_country_lat,mugration_country_lon,mugration_province_lat,mugration_province_lon,branch_length_sub,branch_length_time,rate_sub,rate_sub_year,date_lower,date_upper,divtree_coord_x,divtree_coord_y,rtt_dist,timetree_coord_x,timetree_coord_y,timetree_date_calendar,timetree_CI_lower_err,timetree_CI_upper_err
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1
Reference,CO92,1992,-29,United States of America,Colorado,39.7837,-100.446,38.7252,-105.608,Orientalis,1.ORI,1.ORI1,SAMEA1705942,KEEP: Assembly Modern Reference,1,North America,1992,29,0,38.7252,-105.608,Human,,,Human,Human,#ff0000,1.ORI,POINT (-105.607716 38.7251776),7.31686e-05,,6.3815e-06,terminal,4.004600e-06,0.0,,LOW,-29,,,United States of America,Colorado,North America,Human,1.ORI,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,39.783730,-100.445882,38.725178,-105.607716,16.935846,162.52800,2.463945e-08,0.104203,1992,1992,0.000073,509.00,0.000073,5249.006080,509.00,1992.0,0.0,0.0
GCA_009909635.1_ASM990963v1_genomic,9_10,1923.0,-98,Russia,Rostov Oblast,64.6863,97.7453,47.6222,40.7958,Medievalis,2.MED,2.MED1,SAMN13632815,KEEP: Assembly Modern,2,Europe,1923,98,0,47.6222,40.7958,Human,IonTorrent,Newbler v. 2.6,Homo sapiens,Human,#c5e88a,2.MED,POINT (40.7957942 47.6222451),7.30501e-05,,9.6582e-06,terminal,2.120100e-06,0.0,,LOW,-98,,,Russia,Rostov Oblast,Europe,Human,2.MED,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,64.686314,97.745306,47.622245,40.795794,8.966111,878.89200,2.412242e-09,0.010202,1923,1923,0.000073,339.00,0.000073,5180.006106,339.00,1923.0,0.0,0.0
GCA_009669545.1_ASM966954v1_genomic,42126,2006.0,-15,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722925,KEEP: Assembly Modern,0,Asia,2006,15,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Citellus undulatus,Rodentia,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.41847e-05,,1.15566e-05,terminal,0.000000e+00,0.0,,LOW,-15,,,China,Xinjiang,Asia,Rodentia,0.ANT,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,35.000074,104.999927,42.480495,85.463346,0.000000,9.23246,0.000000e+00,0.000000,2006,2006,0.000054,152.00,0.000054,5263.005722,152.00,2006.0,0.0,0.0
GCA_009669555.1_ASM966955v1_genomic,42123,2005.0,-16,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722924,KEEP: Assembly Modern,0,Asia,2005,16,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Frontopsylla elatoides,Siphonaptera,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.47035e-05,,1.20754e-05,terminal,2.356000e-07,0.0,,LOW,-16,,,China,Xinjiang,Asia,Siphonaptera,0.ANT,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,35.000074,104.999927,42.480495,85.463346,0.996375,24.38400,9.662073e-09,0.040862,2005,2005,0.000055,171.00,0.000055,5262.005752,171.00,2005.0,0.0,0.0
GCA_009669565.1_ASM966956v1_genomic,42118,2005.0,-16,China,Xinjiang,35.0001,105,42.4805,85.4633,Antiqua,0.ANT,0.ANT1,SAMN07722923,KEEP: Assembly Modern,0,Asia,2005,16,0,42.4805,85.4633,Non-Human,Illumina Hiseq 2000,SOAPdenovo v. 2.04,Citellus undulatus,Rodentia,#238af5,0.ANT,POINT (85.46334640000001 42.4804953),5.4939e-05,,1.23109e-05,terminal,4.711000e-07,0.0,,LOW,-16,,,China,Xinjiang,Asia,Rodentia,0.ANT,1.0,1.000000,1.000000,1.00,1.000000,1.000000,1.0,1.00,1.0,35.000074,104.999927,42.480495,85.463346,1.992328,35.90130,1.312209e-08,0.055495,2005,2005,0.000055,173.00,0.000055,5262.005752,173.00,2005.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE595,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.207000e-07,13.0,,LOW,-111.576,"{89.1935,137.589}","{-137.589,-89.1935}",Peru,Cajamarca,South America,Human,1.ORI,1.0,0.999999,0.999999,1.00,0.999873,0.999873,1.0,0.55,1.0,-6.869970,-75.045851,-6.250000,-78.833333,0.933362,0.00000,0.000000e+00,0.000000,,,0.000072,598.25,0.000072,5166.429640,598.25,1910.0,26.0,22.0
NODE596,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.356000e-07,13.0,,LOW,-111.576,"{89.1935,137.589}","{-137.589,-89.1935}",Peru,Cajamarca,South America,Rodentia,1.ORI,1.0,0.999999,0.999999,1.00,0.999998,0.999998,1.0,0.61,1.0,-6.869970,-75.045851,-6.250000,-78.833333,0.996375,0.00000,0.000000e+00,0.000000,,,0.000072,596.75,0.000072,5166.429640,596.75,1910.0,26.0,22.0
NODE597,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,2.921000e-07,46.0,,LOW,-78.3728,"{50.8548,116.815}","{-116.815,-50.8548}",Peru,Cajamarca,South America,Rodentia,1.ORI,1.0,0.999999,0.999999,1.00,0.999999,0.999999,1.0,0.56,1.0,-6.869970,-75.045851,-6.250000,-78.833333,1.235320,33.20350,8.797265e-09,0.037204,,,0.000073,597.50,0.000073,5199.633140,597.50,1943.0,38.0,28.0
NODE598,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,internal,5.010000e-08,34.0,,LOW,-70.382,"{40.845,106.6}","{-106.6,-40.845}",Peru,Cajamarca,South America,Human,1.ORI,1.0,0.999999,0.999999,1.00,0.999874,0.999874,1.0,0.60,1.0,-6.869970,-75.045851,-6.250000,-78.833333,0.211878,41.19420,1.216191e-09,0.005143,,,0.000072,599.75,0.000072,5207.623840,599.75,1951.0,36.0,30.0


## Create Custom Columns

In [18]:
for c in divtree.get_terminals():
    strain = metadata_df["strain"][c.name]
    country = metadata_df["country"][c.name]
    province = metadata_df["country"][c.name]
    date = metadata_df["date"][c.name]
    if type(date) == str:
        date = date.strip("[]")
    metadata_df.at[c.name,"country_date_strain"] = "{} ({}) {}".format(country, date, strain)
    metadata_df.at[c.name,"province_date_strain"] = "{} ({}) {}".format(province, date, strain)
    
metadata_df.fillna(NO_DATA_CHAR, inplace=True)

---
# 4. EXPORT

## Time Tree

In [19]:
out_timetree = copy.deepcopy(timetree)

for c in out_timetree.find_clades():
    if not c.confidence:
        c.confidence = 0

metadata_to_comment(out_timetree, metadata_df)    
out_timetree_nex_path = os.path.join(augur_dir, "all.timetree.nex")
Phylo.write(out_timetree, out_timetree_nex_path, "nexus")

for c in out_timetree.find_clades():
    c.comment = None

out_timetree_nwk_path = os.path.join(augur_dir, "all.timetree.nwk")
Phylo.write(out_timetree, out_timetree_nwk_path, "newick")

1

# Divergence Tree

In [20]:
out_divtree = copy.deepcopy(divtree)

for c in out_divtree.find_clades():
    if not c.confidence:
        c.confidence = 0

metadata_to_comment(out_divtree, metadata_df)    
out_divtree_nex_path = os.path.join(augur_dir, "all.divtree.nex")
Phylo.write(out_divtree, out_divtree_nex_path, "nexus", format_branch_length="%1.10f")

for c in out_divtree.find_clades():
    c.comment = None

out_divtree_nwk_path = os.path.join(augur_dir, "all.divtree.nwk")
Phylo.write(out_divtree, out_divtree_nwk_path, "newick", format_branch_length="%1.10f")

1

## Metadata

In [21]:
out_path_metadata = out_divtree_nwk_path = os.path.join(augur_dir, "metadata.tsv")
metadata_df.to_csv(out_path_metadata, sep="\t", index=True)

## Augur

### Reduced Dataframe

In [22]:
# Remember, order mug attrs when dealing with confidence!

columns = [
    # Draw Divergence Tree
    "branch_length",
    "branch_length_sub",
    # Draw Time Tree
    "branch_length_time",
    "timetree_date_calendar",
    "date_mean",
    "date_err",
    "date_bp_mean",    
    # Branch Support
    "branch_support",
    "branch_support_conf_category",  
    "branch_support_conf_char",  
    # Filters
    "node_type",
    "branch_number",    
    # Text Description
    "biosample_accession",
    "strain",
    "country_date_strain",
    "province_date_strain",
    # Stats
    "root_rtt_dist",
    "population_rtt_dist",
    "rate_sub",
    "rate_sub_year", 
]

for attr in mug_dict:
    mug_colname = "mugration_" + attr
    conf_colname = "mugration_" + attr + "_confidence"
    
    lat_colname = attr + "_lat"
    lon_colname = attr + "_lon"
    if lat_colname in metadata_df.columns:
        lat_colname = "mugration_" + lat_colname
        lon_colname = "mugration_" + lon_colname
        columns.append(lat_colname)
        columns.append(lon_colname)
        
    columns.append(mug_colname)
    columns.append(conf_colname)
    

# Edit df
auspice_df = copy.copy(metadata_df[columns])
auspice_df["timetree_num_date_confidence"] = [NO_DATA_CHAR] * len(auspice_df)
auspice_df["node_name"] = list(auspice_df.index)
auspice_df["blank"] = [" "] * len(auspice_df)

# Add mugration confidence categories
for attr in mug_dict:
    colname = attr + "_conf_category"
    for rec in auspice_df.iterrows():
        sample = rec[0]
        conf = auspice_df["mugration_" + attr + "_confidence"][sample]
        category = "LOW"
        if conf >= 0.95:
            category = "HIGH"
        auspice_df.at[sample, colname] = category

# Date formating
for rec in metadata_df.iterrows():
    sample = rec[0]
    date = rec[1]["timetree_date_calendar"]
    date_lower = float(date) - rec[1]["timetree_CI_lower_err"]
    date_upper = float(date) + rec[1]["timetree_CI_upper_err"]
    date_confidence = [date_lower, date_upper]
    auspice_df.at[sample,"timetree_num_date_confidence"] = date_confidence
    
    # Rounding
    auspice_df.at[sample, "branch_length_sub"] = round(auspice_df["branch_length_sub"][sample])
    auspice_df.at[sample, "branch_length_time"] = round(auspice_df["branch_length_time"][sample])

        
# Rename columns
auspice_df.rename(columns={"timetree_date_calendar": "timetree_num_date"}, inplace=True)
auspice_df.rename(columns={"mugration_country_lat": "country_lat"}, inplace=True)
auspice_df.rename(columns={"mugration_country_lon": "country_lon"}, inplace=True)
auspice_df.rename(columns={"mugration_province_lat": "province_lat"}, inplace=True)
auspice_df.rename(columns={"mugration_province_lon": "province_lon"}, inplace=True)
display(auspice_df)

Unnamed: 0_level_0,branch_length,branch_length_sub,branch_length_time,timetree_num_date,date_mean,date_err,date_bp_mean,branch_support,branch_support_conf_category,branch_support_conf_char,node_type,branch_number,biosample_accession,strain,country_date_strain,province_date_strain,root_rtt_dist,population_rtt_dist,rate_sub,rate_sub_year,country_lat,country_lon,mugration_country,mugration_country_confidence,province_lat,province_lon,mugration_province,mugration_province_confidence,mugration_continent,mugration_continent_confidence,mugration_host_order,mugration_host_order_confidence,mugration_population,mugration_population_confidence,timetree_num_date_confidence,node_name,blank,country_conf_category,province_conf_category,continent_conf_category,host_order_conf_category,population_conf_category
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1
Reference,4.004600e-06,17.0,163.0,1992.0,1992,0,29,0.0,LOW,,terminal,1,SAMEA1705942,CO92,United States of America (1992) CO92,United States of America (1992) CO92,7.31686e-05,6.3815e-06,2.463945e-08,0.104203,39.783730,-100.445882,United States of America,1.0,38.725178,-105.607716,Colorado,1.00,North America,1.0,Human,1.00,1.ORI,1.0,"[1992.0, 1992.0]",Reference,,HIGH,HIGH,HIGH,HIGH,HIGH
GCA_009909635.1_ASM990963v1_genomic,2.120100e-06,9.0,879.0,1923.0,1923,0,98,0.0,LOW,,terminal,2,SAMN13632815,9_10,Russia (1923.0) 9_10,Russia (1923.0) 9_10,7.30501e-05,9.6582e-06,2.412242e-09,0.010202,64.686314,97.745306,Russia,1.0,47.622245,40.795794,Rostov Oblast,1.00,Europe,1.0,Human,1.00,2.MED,1.0,"[1923.0, 1923.0]",GCA_009909635.1_ASM990963v1_genomic,,HIGH,HIGH,HIGH,HIGH,HIGH
GCA_009669545.1_ASM966954v1_genomic,0.000000e+00,0.0,9.0,2006.0,2006,0,15,0.0,LOW,,terminal,0,SAMN07722925,42126,China (2006.0) 42126,China (2006.0) 42126,5.41847e-05,1.15566e-05,0.000000e+00,0.000000,35.000074,104.999927,China,1.0,42.480495,85.463346,Xinjiang,1.00,Asia,1.0,Rodentia,1.00,0.ANT,1.0,"[2006.0, 2006.0]",GCA_009669545.1_ASM966954v1_genomic,,HIGH,HIGH,HIGH,HIGH,HIGH
GCA_009669555.1_ASM966955v1_genomic,2.356000e-07,1.0,24.0,2005.0,2005,0,16,0.0,LOW,,terminal,0,SAMN07722924,42123,China (2005.0) 42123,China (2005.0) 42123,5.47035e-05,1.20754e-05,9.662073e-09,0.040862,35.000074,104.999927,China,1.0,42.480495,85.463346,Xinjiang,1.00,Asia,1.0,Siphonaptera,1.00,0.ANT,1.0,"[2005.0, 2005.0]",GCA_009669555.1_ASM966955v1_genomic,,HIGH,HIGH,HIGH,HIGH,HIGH
GCA_009669565.1_ASM966956v1_genomic,4.711000e-07,2.0,36.0,2005.0,2005,0,16,0.0,LOW,,terminal,0,SAMN07722923,42118,China (2005.0) 42118,China (2005.0) 42118,5.4939e-05,1.23109e-05,1.312209e-08,0.055495,35.000074,104.999927,China,1.0,42.480495,85.463346,Xinjiang,1.00,Asia,1.0,Rodentia,1.00,0.ANT,1.0,"[2005.0, 2005.0]",GCA_009669565.1_ASM966956v1_genomic,,HIGH,HIGH,HIGH,HIGH,HIGH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE595,2.207000e-07,1.0,0.0,1910.0,,,,13.0,LOW,,internal,,,,,,,,0.000000e+00,0.000000,-6.869970,-75.045851,Peru,1.0,-6.250000,-78.833333,Cajamarca,1.00,South America,1.0,Human,0.55,1.ORI,1.0,"[1884.0, 1932.0]",NODE595,,HIGH,HIGH,HIGH,LOW,HIGH
NODE596,2.356000e-07,1.0,0.0,1910.0,,,,13.0,LOW,,internal,,,,,,,,0.000000e+00,0.000000,-6.869970,-75.045851,Peru,1.0,-6.250000,-78.833333,Cajamarca,1.00,South America,1.0,Rodentia,0.61,1.ORI,1.0,"[1884.0, 1932.0]",NODE596,,HIGH,HIGH,HIGH,LOW,HIGH
NODE597,2.921000e-07,1.0,33.0,1943.0,,,,46.0,LOW,,internal,,,,,,,,8.797265e-09,0.037204,-6.869970,-75.045851,Peru,1.0,-6.250000,-78.833333,Cajamarca,1.00,South America,1.0,Rodentia,0.56,1.ORI,1.0,"[1905.0, 1971.0]",NODE597,,HIGH,HIGH,HIGH,LOW,HIGH
NODE598,5.010000e-08,0.0,41.0,1951.0,,,,34.0,LOW,,internal,,,,,,,,1.216191e-09,0.005143,-6.869970,-75.045851,Peru,1.0,-6.250000,-78.833333,Cajamarca,1.00,South America,1.0,Human,0.60,1.ORI,1.0,"[1915.0, 1981.0]",NODE598,,HIGH,HIGH,HIGH,LOW,HIGH


### JSON

In [23]:
augur_dict = augur_export(
    tree_path=None, 
    aln_path=None,  
    tree=divtree, 
    tree_df=auspice_df, 
    color_keyword_exclude=["geometry"],
    type_convert = {
        "branch_number" : (lambda x : str(x))
    },
)

print(augur_dict["nodes"]["Reference"])

out_path_augur_json = os.path.join(augur_dir, "all.json" )
utils.write_json(data=augur_dict, file_name=out_path_augur_json, indent=JSON_INDENT)

{'branch_length': 4.0046e-06, 'branch_length_sub': 17.0, 'branch_length_time': 163.0, 'num_date': 1992.0, 'date_mean': 1992.0, 'date_err': 0.0, 'date_bp_mean': 29.0, 'branch_support': 0.0, 'branch_support_conf_category': 'LOW', 'branch_support_conf_char': '', 'node_type': 'terminal', 'branch_number': '1.0', 'biosample_accession': 'SAMEA1705942', 'strain': 'CO92', 'country_date_strain': 'United States of America (1992) CO92', 'province_date_strain': 'United States of America (1992) CO92', 'root_rtt_dist': 7.316859999999999e-05, 'population_rtt_dist': 6.3815e-06, 'rate_sub': 2.4639446741484545e-08, 'rate_sub_year': 0.1042026349355188, 'country_lat': 39.7837304, 'country_lon': -100.4458825, 'country': 'United States of America', 'country_confidence': {'United States of America': 1.0}, 'province_lat': 38.7251776, 'province_lon': -105.607716, 'province': 'Colorado', 'province_confidence': {'Colorado': 1.0}, 'continent': 'North America', 'continent_confidence': {'North America': 1.0}, 'host_

## Auspice

### JSON

In [28]:
#import sys, importlib
#importlib.reload(sys.modules['functions'])
#from functions import auspice_export, branch_attributes

auspice_dict = auspice_export(
    tree=divtree,
    augur_json_paths=[out_path_augur_json], 
    auspice_config_path=auspice_config_path, 
    auspice_colors_path=out_path_colors,
    auspice_latlons_path=out_path_latlon, 
    )


label_col = list(auspice_df.columns)

# Recursively add branch attrs
branch_attributes(
    tree_dict=auspice_dict["tree"], 
    sub_dict=auspice_dict["tree"], 
    df=auspice_df,
    label_col=label_col,
    )

# Last manual changes
auspice_dict_copy = copy.deepcopy(auspice_dict)
for i in range(0, len(auspice_dict_copy["meta"]["colorings"])):
    coloring = auspice_dict_copy["meta"]["colorings"][i]
    for key in coloring:
        # Node type as internal or terminal
        if coloring[key] == "node_type":
            auspice_dict["meta"]["colorings"][i]['scale'] = [['internal', '#FFFFFF'], ['terminal', '#000000']]
            #print(auspice_dict["meta"]["colorings"][i])
        # Confidence category
        if "conf_category" in coloring[key]:
            auspice_dict["meta"]["colorings"][i]['scale'] = [['LOW', '#FFFFFF'], ['HIGH', '#000000']]
            #print(auspice_dict["meta"]["colorings"][i])
        # Host Human binary
        #if "host_human" in coloring[key]:
        #    auspice_dict["meta"]["colorings"][i]['scale'] = [['Human', '#CBB742'], ['Non-Human', "#60B6F2"], ['NA', "#D6D6D6"]]

        
# Write outputs - For Local Rendering
out_path_auspice_local_json = os.path.join(auspice_dir, "all.json" )
utils.write_json(data=auspice_dict, file_name=out_path_auspice_local_json, indent=JSON_INDENT, include_version=False)
export_v2.validate_data_json(out_path_auspice_local_json)
print("Validation successful for local JSON.\n")

#out_path_auspice_remote_json = os.path.join(auspice_dir, "{}_all.json".format(AUSPICE_PREFIX))
#utils.write_json(data=auspice_dict, file_name=out_path_auspice_local_json, indent=JSON_INDENT, include_version=False)
#export_v2.validate_data_json(out_path_auspice_local_json)
#print("Validation successful for local JSON.\n")

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.





Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/all.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.





## Subtrees

In [29]:
#import sys, importlib
#importlib.reload(sys.modules['functions'])
#from functions import extract_subtree

for population in population_list:
    timetree_copy = copy.deepcopy(timetree)
    
    # Create the subtree df
    df = metadata_df[metadata_df["population"] == population]
    color = df["population_color"][0]
    
    #if len(subtree_df) < 2: continue
    print(population, color)

    df.sort_values("timetree_coord_y", inplace=True)
    tips = df[df["node_type"] == "terminal"]
    internals = df[df["node_type"] == "internal"]
    
    # ----------------------------------------------
    # Subtree Timetree
    subtree = extract_subtree(
        tree=timetree_copy, 
        tips=list(tips.index), 
        df=metadata_df, 
        color_branches=False)
    
    metadata_to_comment(subtree, metadata_df)   


    out_subtree_nex_path = os.path.join(augur_dir, "{}.timetree.nex".format(population))
    Phylo.write(subtree, out_subtree_nex_path, "nexus")
    out_subtree_nwk_path = os.path.join(augur_dir, "{}.timetree.nwk".format(population))
    Phylo.write(subtree, out_subtree_nwk_path, "newick")
    
    # ----------------------------------------------
    # Subtree divtree
    divtree_copy = copy.deepcopy(divtree)
    subtree_div = extract_subtree(
        tree=divtree_copy, 
        tips=list(tips.index), 
        df=metadata_df, 
        color_branches=False)
    
    metadata_to_comment(subtree_div, metadata_df)   
    out_subtree_path = os.path.join(augur_dir, "{}.divtree.nex".format(population))
    Phylo.write(subtree, out_subtree_path, "nexus", format_branch_length="%1.10f")

    #fig, ax = plt.subplots(1)
    #Phylo.draw(subtree, axes=ax, show_confidence=False, label_func = lambda x: '', do_show=False)
    
    augur_dict = augur_export(
        tree_path=None, 
        aln_path=None,  
        tree=subtree, 
        tree_df=auspice_df, 
        color_keyword_exclude=["geometry"],
        type_convert = {
            "branch_number" : (lambda x : str(x))
        },
    ) 
        
    out_path_augur_json = os.path.join(augur_dir, "{}.json".format(population) )
    utils.write_json(data=augur_dict, file_name=out_path_augur_json, indent=JSON_INDENT)

    auspice_dict = auspice_export(
        tree=subtree,
        augur_json_paths=[out_path_augur_json], 
        auspice_config_path=auspice_config_path, 
        auspice_colors_path=out_path_colors,
        auspice_latlons_path=out_path_latlon, 
        )    
    
    label_col = list(auspice_df.columns)

    # Recursively add branch attrs
    branch_attributes(
        tree_dict=auspice_dict["tree"], 
        sub_dict=auspice_dict["tree"], 
        df=auspice_df,
        label_col=label_col,
        )    
    
    # Last manual changes
    auspice_dict_copy = copy.deepcopy(auspice_dict)
    for i in range(0, len(auspice_dict_copy["meta"]["colorings"])):
        coloring = auspice_dict_copy["meta"]["colorings"][i]
        for key in coloring:
            # Node type as internal or terminal
            if coloring[key] == "node_type":
                auspice_dict["meta"]["colorings"][i]['scale'] = [['internal', '#FFFFFF'], ['terminal', color]]
                #print(auspice_dict["meta"]["colorings"][i])
            # Confidence category
            if "conf_category" in coloring[key]:
                auspice_dict["meta"]["colorings"][i]['scale'] = [['LOW', '#FFFFFF'], ['HIGH', color]]
                #print(auspice_dict["meta"]["colorings"][i]) 
    
    # Write outputs - For Local Rendering
    out_path_auspice_local_json = os.path.join(auspice_dir, "{}.json".format(population) )
    utils.write_json(data=auspice_dict, file_name=out_path_auspice_local_json, indent=JSON_INDENT, include_version=False)
    export_v2.validate_data_json(out_path_auspice_local_json)
    print("Validation successful for local JSON.\n")

0.PRE #8000ff


DEPRECATED: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/0.PRE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0.PE #5148fc
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.





Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/0.PE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

0.ANT #238af5



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.





Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/0.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

0.ANT4 #0cc1e8



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/0.ANT4.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


3.ANT #3ae8d7
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/3.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.





4.ANT #68fcc1



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/4.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2.ANT #97fca7
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/2.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

2.MED #c5e88a



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.





Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/2.MED.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

1.PRE #f3c16a



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/1.PRE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

1.ANT #ff8a48



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/1.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

1.IN #ff4824




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/1.IN.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

1.ORI #ff0000



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/config/auspice_config.json'...
Validation success.





Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography/results/auspice/all/chromosome/full/filter5/ml/1.ORI.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.



