# BEAST Analysis Notebook

---

# 0. SETUP

In [1]:
import os
import pandas as pd
import seaborn as sns
from Bio import Phylo, AlignIO
from functions import *

## Paths

In [2]:
log_dir  = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade"
tree_dir = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock"
metadata_path = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/iqtree/all/chromosome/full/filter5/filter-taxa/metadata.tsv"
beast_dir = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade"

auspice_config_path = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json"
out_path_colors = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/augur/all/chromosome/full/filter5/colors.tsv"
out_path_latlon = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/augur/all/chromosome/full/filter5/latlon.tsv"

# ------------------------------------------
# Alignment
constant_sites_path   = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/snippy_multi/all/chromosome/full/snippy-multi.constant_sites.txt"
aln_path              = "/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/iqtree/all/chromosome/full/filter5/filter-sites/snippy-multi.snps.aln"

## Variables

In [3]:
# ------------------------------------------
BRANCH_LIST = {
    "1.ORI" : ["1.ORI1", "1.ORI2", "1.ORI3"],
    "1.IN": ["1.IN1","1.IN2","1.IN3"],  
    "1.ANT": ["1.ANT1"], 
    "1.PRE" : ["1.PRE0","1.PRE1", "1.PRE2", "1.PRE3"],
    "2.MED": ["2.MED0", "2.MED1","2.MED2","2.MED3" ],      
    "2.ANT": ["2.ANT1","2.ANT2","2.ANT3" ],    
    "4.ANT": ["4.ANT1" ],       
    "3.ANT": ["3.ANT1", "3.ANT2" ],  
    "0.ANT": ["0.ANT1", "0.ANT2","0.ANT3","0.ANT5"],         
    "0.ANT4" : ["0.ANT4"], 
    "0.PE": ["0.PE2", "0.PE4m", "0.PE4m", "0.PE4t", "0.PE4a", "0.PE5", "0.PE7", "0.PE8", "0.PE10"],   
    "0.PRE": ["0.PRE1", "0.PRE2"],        
}

NUM_STATES = 10

NO_DATA_CHAR = "NA"
JSON_INDENT=2

# ------------------------------------------
# Alignment
with open(constant_sites_path) as infile:
    data = infile.read().strip().split(",")
    constant_sites = sum([int(count) for count in data])

aln = AlignIO.read(aln_path, "fasta")
variant_sites = len(aln[0].seq)
SEQ_LEN = constant_sites + variant_sites

---

# 1. IMPORT

## Metadata

In [4]:
metadata_df = pd.read_csv(metadata_path, sep='\t')
metadata_df.set_index(metadata_df.columns[0], inplace=True)
metadata_df.fillna(NO_DATA_CHAR, inplace=True)

display(metadata_df)

Unnamed: 0_level_0,strain,date,date_bp,country,province,country_lat,country_lon,province_lat,province_lon,biovar,...,biosample_accession,biosample_comment,branch_number,continent,date_mean,date_bp_mean,date_err,lat,lon,host_human
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Reference,CO92,1992,-29,United States of America,Colorado,39.783730,-100.445882,38.7252,-105.608,Orientalis,...,SAMEA1705942,KEEP: Assembly Modern Reference,1,North America,1992.0,29.0,0.0,38.725178,-105.607716,Human
GCA_009909635.1_ASM990963v1_genomic,9_10,1923.0,-98,Russia,Rostov Oblast,64.686314,97.745306,47.6222,40.7958,Medievalis,...,SAMN13632815,KEEP: Assembly Modern,2,Europe,1923.0,98.0,0.0,47.622245,40.795794,Human
GCA_009669545.1_ASM966954v1_genomic,42126,2006.0,-15,China,Xinjiang,35.000074,104.999927,42.4805,85.4633,Antiqua,...,SAMN07722925,KEEP: Assembly Modern,0,Asia,2006.0,15.0,0.0,42.480495,85.463346,Non-Human
GCA_009669555.1_ASM966955v1_genomic,42123,2005.0,-16,China,Xinjiang,35.000074,104.999927,42.4805,85.4633,Antiqua,...,SAMN07722924,KEEP: Assembly Modern,0,Asia,2005.0,16.0,0.0,42.480495,85.463346,Non-Human
GCA_009669565.1_ASM966956v1_genomic,42118,2005.0,-16,China,Xinjiang,35.000074,104.999927,42.4805,85.4633,Antiqua,...,SAMN07722923,KEEP: Assembly Modern,0,Asia,2005.0,16.0,0.0,42.480495,85.463346,Non-Human
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SAMEA7313243_45,Azov38,[1400:1700],[-621:-321],Russia,Rostov Oblast,64.686314,97.745306,47.6222,40.7958,Second Pandemic,...,SAMEA7313243_45,KEEP: SRA Ancient Combined Record,1,Europe,1550.0,471.0,150.0,47.622245,40.795794,Human
SAMEA7313246_49,Gdansk8,[1400:1700],[-621:-321],Poland,Pomeranian Voivodeship,52.215933,19.134422,54.2456,18.1099,Second Pandemic,...,SAMEA7313246_49,KEEP: SRA Ancient Combined Record,1,Europe,1550.0,471.0,150.0,54.245560,18.109900,Human
SAMEA6651390,AGU010,[1435:1477],[-586:-544],Lithuania,Vilnius County,55.350000,23.750000,54.8227,25.2495,Second Pandemic,...,SAMEA6651390,KEEP: SRA Ancient,1,Europe,1456.0,565.0,21.0,54.822692,25.249534,Human
SAMEA6637004,AGU025,[1441:1612],[-580:-409],Lithuania,Vilnius County,55.350000,23.750000,54.8227,25.2495,Second Pandemic,...,SAMEA6637004,KEEP: SRA Ancient,1,Europe,1526.5,494.5,85.5,54.822692,25.249534,Human


## Most Recent Sampling Date

In [5]:
out_path = os.path.join(beast_dir, "most_recent_sampling_dates.tsv")
mrsd_dict = {}

with open(out_path, "w") as outfile:
    for branch in BRANCH_LIST:
        branch_df = metadata_df[metadata_df["branch_minor"].isin(BRANCH_LIST[branch])]
        max_date = max(branch_df["date_mean"])
        outfile.write("{}\t{}\n".format(branch, max_date))
        mrsd_dict[branch] = max_date

## Colors

In [6]:
colors_dict = {}

colors_df = pd.read_csv(out_path_colors, sep='\t', header=None)
colors_df.columns = ["state", "value", "color"]

for state in set(colors_df["state"]):
    state_df = colors_df[colors_df["state"] == state]    
    colors_dict[state] = {}
    
    for value,color in zip(state_df["value"], state_df["color"]):
        colors_dict[state][value] = color

print(colors_dict)

{'province': {'Krasnoyarsk Krai': '#8000ff', 'Panevezys County': '#7b07ff', 'Pärnu maakond': '#760eff', 'Irkutsk Oblast': '#7215ff', 'Bavaria': '#6d1dff', 'Altai Krai': '#6924fe', 'Qinghai': '#642bfe', 'Republic of Dagestan': '#6032fe', 'Goranboy District': '#5b39fd', 'Syunik Province': '#5740fd', 'Shirak Province': '#5247fc', 'Gegharkunik Province': '#4d4dfc', 'Samtskhe-Javakheti': '#4954fb', 'Shahbuz Rayon': '#445bfb', 'Samara Oblast': '#4062fa', 'Sečuán': '#3b68f9', 'Bayankhongor': '#376ff9', 'Inner Mongolia': '#3275f8', 'Sughd Province': '#2e7bf7', 'Talas Region': '#2982f6', 'Altai Republic': '#2488f5', 'Govi-Altai': '#208ef4', 'Övörkhangai': '#1b94f3', 'Ömnögovi': '#1799f2', 'Bayan-Ölgii': '#129ff1', 'Khovd': '#0ea5ef', 'Xinjiang': '#09aaee', 'Issyk-Kul Region': '#05afed', 'East of England': '#00b4ec', 'Valencia Community': '#05b9ea', 'Centre-Loire Valley': '#09bee9', 'Osh Region': '#0ec3e7', 'Naryn Region': '#12c7e6', 'Gansu': '#17cce4', 'Hovsgel': '#1bd0e3', 'Zavkhan Province': 

## Tree Files

In [7]:
# Construct a dictionary to hold the trees
tree_dict = {}

for branch in BRANCH_LIST:
    print("Branch:", branch)
    tree_dict[branch] = {} 
    for filename in os.listdir(tree_dir):
        if not filename.endswith(".tre"): continue
        filepath = os.path.join(tree_dir, filename)
        if branch in filename:            
            # Add tree files to dict
            tree_dict[branch]["tree_file_raw"] = filepath 
            tree_dict[branch]["tree_file_edit"] = os.path.join(tree_dir, branch + ".nex")
            tree_dict[branch]["sample_rename"] = {}
            
            # Read in raw tree to deal with dashes
            with open(tree_dict[branch]["tree_file_raw"],  "r") as infile:                    
                with open(tree_dict[branch]["tree_file_edit"], "w") as outfile:           
                    raw_tree = infile.read()
                    # Remove quotations if they exist
                    raw_tree = raw_tree.replace("'","")
                    
                    # Split into lines to iterate over
                    raw_tree_lines = raw_tree.split("\n")
                    
                    # By default don't parse a line for dashes
                    taxa_line = False
                    
                    for line in raw_tree_lines: 
                                
                        if "TREE" not in line and "-" in line:
                            
                            if len(line.split(" ")) == 1:
                                name_dashes = line.strip()
                                name_no_dashes = name_dashes.replace("-","_")
                                tree_dict[branch]["sample_rename"][name_no_dashes] = name_dashes
                            line = line.replace("-","_")
                                
                        outfile.write(line + "\n")
            
            # Read in edited tree
            trees = Phylo.parse(tree_dict[branch]["tree_file_edit"], "nexus")
            # There should be only 1 tree
            for t in trees:
                tree_dict[branch]["tree"] = t
                tree_dict[branch]["tree"].ladderize(reverse=False)
                break

            # Rename sample names back to with dashes
            for c in tree_dict[branch]["tree"].find_clades():
                if c.name in tree_dict[branch]["sample_rename"]:
                    orig_name = c.name
                    c.name = tree_dict[branch]["sample_rename"][c.name]
                    #print("Rename:", orig_name, c.name)
                    
                # Strip the date suffix
                if c.name:
                    c.name = "_".join(c.name.split("_")[0:-1])

Branch: 1.ORI
Branch: 1.IN
Branch: 1.ANT
Branch: 1.PRE
Branch: 2.MED
Branch: 2.ANT
Branch: 4.ANT
Branch: 3.ANT
Branch: 0.ANT
Branch: 0.ANT4
Branch: 0.PE
Branch: 0.PRE


## Add Tree Data to Dataframe

In [8]:
# Initialize new columns

metadata_df["node_type"] = [NO_DATA_CHAR] * len(metadata_df)
metadata_df["branch_length"] = [NO_DATA_CHAR] * len(metadata_df)
metadata_df["rate"] = [NO_DATA_CHAR] * len(metadata_df)
metadata_df["rate_subs"] = [NO_DATA_CHAR] * len(metadata_df)
metadata_df["timetree_num_date_confidence"] = [[0,0]] * len(metadata_df)

init_data = {col: [NO_DATA_CHAR] for col in metadata_df.columns}

for branch in BRANCH_LIST:
    tree = tree_dict[branch]["tree"]

    df = copy.deepcopy(metadata_df[metadata_df["branch_minor"].isin(BRANCH_LIST[branch])])
    
    node_i = 0
    
    # Get comment headers for df
    root_comment_dict = parse_comment(tree.root.comment)
    for parameter in root_comment_dict:
        df[parameter] = [NO_DATA_CHAR] * len(df)
    
    for c in tree.find_clades():

        # Rename internal nodes
        if not c.name:
            c.name = "NODE{}".format(node_i)
            node_i += 1

        # Initialize metadata for internal nodes
        if "NODE" in c.name:
            init_data["sample"] = c.name
            data_row = pd.DataFrame(init_data)
            data_row.set_index("sample", inplace=True)
            df = df.append(data_row)
            
        # Set node type
        if "NODE" in c.name:
            node_type = "internal"                    
        else:
            node_type = "terminal"
           
        # Parse comments
        comment_dict = parse_comment(c.comment)
        for parameter,val in comment_dict.items():
            df.at[c.name, parameter] = val
            
        # Get calendar date for node
        calendar_date = mrsd_dict[branch] - float(comment_dict["height"])
        df.at[c.name, "timetree_num_date"] = calendar_date
        
        # Rate
        if df["rate"][c.name] == NO_DATA_CHAR:
            df.at[c.name, "rate"] = 0
        else:
            df.at[c.name, "rate"] = float(df["rate"][c.name])
            
        rate = df["rate"][c.name]
        rate_subs = rate * SEQ_LEN
        df.at[c.name, "rate_subs"] = rate_subs
        
        
        # Convert
        height = comment_dict["height"]
        df.at[c.name, "height"] = float(height)
        if "height_95%_HPD" in comment_dict:
            height_95_hpd = comment_dict["height_95%_HPD"]
            height_95_hpd_list = [float(h) for h in height_95_hpd.strip("{}").split(",")]
            calendar_95_hpd = [mrsd_dict[branch] - h for h in height_95_hpd_list]
            df.at[c.name, "timetree_num_date_confidence"] = [calendar_95_hpd[1], calendar_95_hpd[0]]
        
        # Add data
        df.at[c.name, "node_type"] = node_type
        df.at[c.name, "branch_length"] = c.branch_length

        
        
    # Replace NAs for inconsistent parameters
    df.fillna(NO_DATA_CHAR, inplace=True)
    tree_dict[branch]["df"] = copy.deepcopy(df)

    
    #display(tree_dict[branch]["df"])


## Reduced Dataframe for Auspice

In [9]:
# OPtions
print(tree_dict[branch]["df"].columns)

Index(['strain', 'date', 'date_bp', 'country', 'province', 'country_lat',
       'country_lon', 'province_lat', 'province_lon', 'biovar', 'branch_major',
       'branch_minor', 'biosample_accession', 'biosample_comment',
       'branch_number', 'continent', 'date_mean', 'date_bp_mean', 'date_err',
       'lat', 'lon', 'host_human', 'node_type', 'branch_length', 'rate',
       'rate_subs', 'timetree_num_date_confidence', 'height_95%_HPD', 'length',
       'posterior', 'height_median', 'height_range', 'height',
       'timetree_num_date', 'length_range', 'rate_95%_HPD', 'length_95%_HPD',
       'rate_median', 'length_median', 'rate_range'],
      dtype='object')


In [10]:
for branch in tree_dict:
    
    columns = [
        # Node type is mandatory
        "node_type",        
        # Draw Divergence Tree
        "branch_length",
        # Draw Time Tree
        "timetree_num_date",
        "timetree_num_date_confidence",    
        "rate",
        "rate_subs",
        # Geo
        "country",
        "province",
        "country_lat",
        "country_lon",
        "province_lat",
        "province_lon",        
        # Text descriptions
        "strain",
        "branch_major",
        "biovar",
        "host_human",
        "date",
        "date_bp",        
    ]
    
    auspice_df = copy.copy(tree_dict[branch]["df"][columns])
    auspice_df.rename(columns={"rate": "rate_beast"}, inplace=True)
    auspice_df.rename(columns={"rate_subs": "rate_subs_beast"}, inplace=True)
    tree_dict[branch]["auspice_df"] = auspice_df
    display(tree_dict[branch]["auspice_df"])

Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Reference,terminal,17.516688,1992.000000,"[1992.0, 1992.0]",3.553717e-07,1.502902,United States of America,Colorado,39.7837,-100.446,38.7252,-105.608,CO92,1.ORI,Orientalis,Human,1992,-29
GCA_000834775.1_ASM83477v1_genomic,terminal,9.482611,1967.000000,"[1967.0, 1967.0]",3.123540e-07,1.320976,United States of America,Arizona,39.7837,-100.446,34.3953,-111.763,Dodson,1.ORI,Orientalis,Human,1967.0,-54
GCA_000834335.1_ASM83433v1_genomic,terminal,4.917912,1954.000000,"[1954.0, 1954.0]",2.122180e-07,0.897491,United States of America,California,39.7837,-100.446,36.7015,-118.756,Shasta,1.ORI,Orientalis,Human,1954.0,-67
GCA_000169635.1_ASM16963v1_genomic,terminal,32.332497,2005.000000,"[2005.0, 2005.0]",1.026717e-07,0.434209,Madagascar,,-18.925,46.4416,,,MG05-1020,1.ORI,Orientalis,Human,2005.0,-16
GCA_000170275.1_ASM17027v1_genomic,terminal,18.461812,1991.000000,"[1991.0, 1991.0]",8.033665e-08,0.339752,China,Yunnan,35.0001,105,25,102,F1991016,1.ORI,Orientalis,Non-Human,1991.0,-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE111,internal,0.194478,2004.241372,"[2000.475519086657, 2007.5041318052265]",1.016505e-07,0.429890,,,,,,,,,,,,
NODE112,internal,0.929602,2006.999750,"[2003.8227971142646, 2009.6153588063687]",1.311277e-07,0.554552,,,,,,,,,,,,
NODE113,internal,0.351704,2005.846571,"[2002.70281589384, 2008.677776298874]",1.653129e-07,0.699124,,,,,,,,,,,,
NODE114,internal,2.391180,2006.938556,"[2004.3050370690294, 2009.1217091576173]",1.153553e-07,0.487849,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_000324785.1_ASM32478v1_genomic,terminal,7.304944,1979.000000,"[1979.0, 1979.0]",2.189105e-07,0.925794,China,Yunnan,35.0001,105,25,102,E1979001,1.IN,Antiqua,Non-Human,1979.0,-42
GCA_000022845.1_ASM2284v1_genomic,terminal,3.860503,1976.000000,"[1976.0, 1976.0]",3.444121e-07,1.456553,China,Tibet,35.0001,105,29.8556,90.875,Z176003,1.IN,Antiqua,Non-Human,1976.0,-45
GCA_000022825.1_ASM2282v1_genomic,terminal,31.757595,1982.000000,"[1982.0, 1982.0]",2.079253e-06,8.793364,China,Yunnan,35.0001,105,25,102,D182038,1.IN,Antiqua,Non-Human,1982.0,-39
GCA_000022805.1_ASM2280v1_genomic,terminal,70.350981,2006.000000,"[2006.0, 2006.0]",1.477245e-07,0.624742,China,Yunnan,35.0001,105,25,102,D106004,1.IN,Antiqua,Non-Human,2006.0,-15
GCA_000325465.1_ASM32546v1_genomic,terminal,9.419908,1957.000000,"[1957.0, 1957.0]",2.958604e-07,1.251223,China,Yunnan,35.0001,105,25,102,YN472,1.IN,Antiqua,Non-Human,1957.0,-64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE33,internal,8.520717,1972.797250,"[1957.7256089593875, 1986.4898330318686]",5.422988e-08,0.229343,,,,,,,,,,,,
NODE34,internal,3.206524,1981.521412,"[1973.0440875505783, 1988.6716159399791]",2.454978e-07,1.038234,,,,,,,,,,,,
NODE35,internal,9.000596,1987.215157,"[1980.308465360717, 1993.659574537487]",1.360054e-07,0.575180,,,,,,,,,,,,
NODE36,internal,1.179186,1990.707595,"[1985.2879987060362, 1995.205487889126]",1.092851e-07,0.462177,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_000169615.1_ASM16961v1_genomic,terminal,51.265788,2004.0,"[0, 0]",6.444323e-08,0.272537,Uganda,,1.53336,32.2167,,,UG05-0454,1.ANT,Antiqua,Human,2004.0,-17.0
GCA_003798225.1_ASM379822v1_genomic,terminal,2.045508,1954.0,"[1954.0, 1954.0]",4.756261e-07,2.011469,Democratic Republic of the Congo,,-2.98143,23.8223,,,FDAARGOS_601,1.ANT,Antiqua,,1954.0,-67.0
GCA_000835005.1_ASM83500v1_genomic,terminal,236.991771,1985.0,"[1985.0, 1985.0]",6.444323e-08,0.272537,Kenya,Nairobi,1.44197,38.4314,-1.30317,36.8261,Nairobi,1.ANT,Antiqua,,1985.0,-36.0
GCA_000013825.1_ASM1382v1_genomic,terminal,12.265788,1965.0,"[1965.0, 1965.0]",1.15907e-07,0.490182,Democratic Republic of the Congo,,-2.98143,23.8223,,,Antiqua,1.ANT,Antiqua,Human,1965.0,-56.0
NODE0,internal,0.0,1758.910228,"[1644.8345812291716, 1855.0439441412525]",0.0,0.0,,,,,,,,,,,,
NODE1,internal,203.946262,1950.327725,"[1943.1927174315974, 1953.999874244536]",6.444323e-08,0.272537,,,,,,,,,,,,
NODE2,internal,0.77972,1954.969051,"[1948.8644061878467, 1964.7555744433698]",2.51785e-07,1.064824,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
SAMEA5818830,terminal,0.080411,1560.000000,"[1560.0, 1560.0]",3.588157e-08,0.151747,Switzerland,Nidwalden,46.8133,8.44495,46.9428,8.41198,STN021,1.PRE,Second Pandemic,Human,[1485:1635],[-536:-386]
SAMEA5818829,terminal,1.728035,1560.000000,"[1560.0, 1560.0]",3.372235e-08,0.142615,Switzerland,Nidwalden,46.8133,8.44495,46.9428,8.41198,STN020,1.PRE,Second Pandemic,Human,[1485:1635],[-536:-386]
SAMEA5818828,terminal,1.728035,1560.000000,"[1560.0, 1560.0]",3.454374e-08,0.146089,Switzerland,Nidwalden,46.8133,8.44495,46.9428,8.41198,STN019,1.PRE,Second Pandemic,Human,[1485:1635],[-536:-386]
SAMEA5818826,terminal,0.023473,1560.000000,"[1560.0, 1560.0]",3.387151e-08,0.143246,Switzerland,Nidwalden,46.8133,8.44495,46.9428,8.41198,STN014,1.PRE,Second Pandemic,Human,[1485:1635],[-536:-386]
SAMEA5818825,terminal,0.080411,1560.000000,"[1560.0, 1560.0]",1.428955e-07,0.604319,Switzerland,Nidwalden,46.8133,8.44495,46.9428,8.41198,STN013,1.PRE,Second Pandemic,Human,[1485:1635],[-536:-386]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE34,internal,2.944394,1631.712419,"[1614.2625699793778, 1647.1824752526363]",5.042076e-08,0.213234,,,,,,,,,,,,
NODE35,internal,80.544459,1690.388727,"[1655.2814046941703, 1716.0895381907403]",1.671410e-08,0.070686,,,,,,,,,,,,
NODE36,internal,17.041200,1708.538003,"[1693.4645716880063, 1719.1929733173438]",5.247133e-08,0.221906,,,,,,,,,,,,
NODE37,internal,1.765094,1716.336286,"[1708.8120020077208, 1720.7597334210177]",3.043283e-08,0.128703,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_009909635.1_ASM990963v1_genomic,terminal,13.009036,1923.000000,"[1923.0, 1923.0]",1.678153e-07,0.709707,Russia,Rostov Oblast,64.6863,97.7453,47.6222,40.7958,9_10,2.MED,Medievalis,Human,1923.0,-98
GCA_009296005.1_ASM929600v1_genomic,terminal,0.469889,1953.000000,"[1953.0, 1953.0]",8.264115e-08,0.349498,Russia,Chechnya,64.6863,97.7453,43.3976,45.6985,C-25,2.MED,Medievalis,Non-Human,1953.0,-68
GCA_008630485.1_ASM863048v1_genomic,terminal,3.250880,1997.000000,"[1996.9999999999998, 1997.0]",7.262572e-07,3.071413,Russia,Kabardino-Balkaria,64.6863,97.7453,43.4428,43.4205,C-742,2.MED,Medievalis,Non-Human,1997.0,-24
GCA_008630435.1_ASM863043v1_genomic,terminal,26.020483,1996.000000,"[1996.0, 1996.0]",6.860444e-07,2.901349,Russia,Karachay-Cherkessia,64.6863,97.7453,43.7368,41.7268,C-719,2.MED,Medievalis,Non-Human,1996.0,-25
GCA_008630395.1_ASM863039v1_genomic,terminal,4.468999,1984.000000,"[1983.9999999999998, 1984.0]",5.818262e-07,2.460600,Russia,Republic of Dagestan,64.6863,97.7453,43.0883,47.1499,C-528,2.MED,Medievalis,Non-Human,1984.0,-37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE110,internal,6.738293,1988.877114,"[1981.978551608946, 1995.1945509657712]",1.440738e-07,0.609302,,,,,,,,,,,,
NODE111,internal,9.988554,2004.492834,"[1995.7386247694087, 2008.9988057840776]",7.567338e-08,0.320030,,,,,,,,,,,,
NODE112,internal,2.784600,1991.215981,"[1985.5990761703022, 1996.0199169572434]",5.944240e-08,0.251388,,,,,,,,,,,,
NODE113,internal,0.788837,1993.385854,"[1988.484805318999, 1996.8918607345145]",1.672413e-07,0.707280,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_000834275.1_ASM83427v1_genomic,terminal,8.145852,1940.000000,"[1940.0, 1940.0]",3.959404e-06,16.744707,China,Heilongjiang,35.0001,105,48,128,Harbin35,2.ANT,Antiqua,Human,1940.0,-81
GCA_002005285.1_ASM200528v1_genomic,terminal,0.144253,1924.000000,"[1924.0, 1924.0]",3.881345e-07,1.641459,India,Maharashtra,22.3511,78.6677,18.9068,75.6742,195/P,2.ANT,Antiqua,Human,[1898:1950],[-123:-71]
GCA_001601285.1_ASM160128v1_genomic,terminal,46.915533,1970.000000,"[1970.0, 1970.0]",3.295522e-08,0.139371,Russia,Zabaykalsky Krai,64.6863,97.7453,52.2485,115.956,I-1996,2.ANT,Antiqua,Non-Human,1970.0,-51
GCA_000475135.2_YPS03_v2_genomic,terminal,12.756693,2002.000000,"[2002.0, 2002.0]",1.322908e-06,5.594709,India,Himachal Pradesh,22.3511,78.6677,31.8168,77.3493,S3,2.ANT,Antiqua,Human,2002.0,-19
GCA_000834885.1_ASM83488v1_genomic,terminal,76.145852,2008.000000,"[2008.0, 2008.0]",2.034777e-07,0.860527,China,Heilongjiang,35.0001,105,48,128,Nicholisk 41,2.ANT,Antiqua,,2008.0,-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE48,internal,1.014033,1992.403938,"[1987.0633803733706, 1995.9881444470643]",4.731970e-08,0.200120,,,,,,,,,,,,
NODE49,internal,7.487647,1996.398445,"[1989.6421990442122, 2001.937263084358]",2.958198e-08,0.125105,,,,,,,,,,,,
NODE50,internal,1.437066,1996.889808,"[1989.3174213737238, 2001.9925075131018]",2.068752e-08,0.087490,,,,,,,,,,,,
NODE51,internal,1.531915,2000.047771,"[1992.943027659938, 2004.9950608365348]",2.862714e-08,0.121067,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_009363195.1_ASM936319v1_genomic,terminal,1.242061,1977.0,"[1977.0, 1977.0]",3.121084e-07,1.319937,Russia,Tuva Republic,64.6863,97.7453,51.4017,93.8583,I-2638,4.ANT,Antiqua,Non-Human,1977.0,-44.0
GCA_009295945.1_ASM929594v1_genomic,terminal,0.13836,1984.0,"[1984.0, 1984.0]",7.191196e-07,3.041227,Russia,Tuva Republic,64.6863,97.7453,51.4017,93.8583,I-3113,4.ANT,Antiqua,Non-Human,1984.0,-37.0
GCA_003074255.1_ASM307425v1_genomic,terminal,24.687046,2012.0,"[2012.0, 2012.0]",5.791297e-08,0.24492,Russia,Altai Republic,64.6863,97.7453,50.7114,86.8572,1454,4.ANT,Antiqua,Non-Human,2012.0,-9.0
GCA_003074315.1_ASM307431v1_genomic,terminal,3.13836,1987.0,"[1987.0, 1987.0]",7.964326e-08,0.336819,Russia,Tuva Republic,64.6863,97.7453,51.4017,93.8583,I-3223,4.ANT,Antiqua,Non-Human,1987.0,-34.0
GCA_003074235.1_ASM307423v1_genomic,terminal,3.891382,1988.0,"[1988.0, 1988.0]",1.957728e-06,8.279425,Mongolia,Bayan-Ölgii,46.825,103.85,48.547,89.8549,I-3244,4.ANT,Antiqua,Non-Human,1988.0,-33.0
GCA_003074215.1_ASM307421v1_genomic,terminal,0.718675,2015.0,"[2015.0, 2015.0]",3.394977e-08,0.143577,Russia,Altai Republic,64.6863,97.7453,50.7114,86.8572,349,4.ANT,Antiqua,Non-Human,2015.0,-6.0
GCA_003074195.1_ASM307419v1_genomic,terminal,0.718675,2015.0,"[2015.0, 2015.0]",6.296806e-08,0.266298,Russia,Altai Republic,64.6863,97.7453,50.7114,86.8572,337,4.ANT,Antiqua,,2015.0,-6.0
GCA_003074155.1_ASM307415v1_genomic,terminal,1.670902,2015.0,"[2015.0, 2015.0]",3.472378e-08,0.14685,Russia,Altai Republic,64.6863,97.7453,50.7114,86.8572,338,4.ANT,Antiqua,Non-Human,2015.0,-6.0
GCA_003086015.1_ASM308601v1_genomic,terminal,36.242061,2012.0,"[2012.0, 2012.0]",2.691015e-08,0.113806,Russia,Tuva Republic,64.6863,97.7453,51.4017,93.8583,M-1944,4.ANT,Antiqua,Non-Human,2012.0,-9.0
GCA_003085915.1_ASM308591v1_genomic,terminal,8.25314,2014.0,"[2014.0, 2014.0]",2.957504e-07,1.250757,Russia,Altai Republic,64.6863,97.7453,50.7114,86.8572,517,4.ANT,Antiqua,Human,2014.0,-7.0


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_000325305.1_ASM32530v1_genomic,terminal,4.884258,1998.0,"[1998.0, 1998.0]",5.196203e-08,0.219753,Mongolia,Govi-Altai,46.825,103.85,45.3463,95.9326,MGJZ9,3.ANT,Antiqua,Non-Human,1998.0,-23.0
GCA_000325285.1_ASM32528v1_genomic,terminal,4.504057,1997.0,"[1997.0, 1997.0]",4.693787e-07,1.985049,Mongolia,Dornogovi,46.825,103.85,44.418,109.778,MGJZ7,3.ANT,Antiqua,Non-Human,1997.0,-24.0
GCA_000325265.1_ASM32526v1_genomic,terminal,4.504057,1997.0,"[1997.0, 1997.0]",1.792326e-07,0.757992,Mongolia,Dornogovi,46.825,103.85,44.418,109.778,MGJZ6,3.ANT,Antiqua,Non-Human,1997.0,-24.0
GCA_000325245.1_ASM32524v1_genomic,terminal,3.242909,1980.0,"[1980.0, 1980.0]",4.604132e-06,19.471327,Mongolia,Govi-Altai,46.825,103.85,45.3463,95.9326,MGJZ3,3.ANT,Antiqua,Non-Human,1980.0,-41.0
GCA_000325205.1_ASM32520v1_genomic,terminal,6.884258,2000.0,"[2000.0, 2000.0]",1.895825e-07,0.801763,Mongolia,Bayan-Ölgii,46.825,103.85,48.547,89.8549,MGJZ11,3.ANT,Antiqua,Non-Human,2000.0,-21.0
GCA_000324325.1_ASM32432v1_genomic,terminal,12.036414,1961.0,"[1961.0, 1961.0]",3.970867e-07,1.679319,China,Gansu,35.0001,105.0,38.0,102.0,CMCC71001,3.ANT,Antiqua,Non-Human,1961.0,-60.0
GCA_000323765.1_ASM32376v1_genomic,terminal,9.721393,1976.0,"[1976.0, 1976.0]",3.723379e-07,1.574654,China,Gansu,35.0001,105.0,38.0,102.0,C1976001,3.ANT,Antiqua,Non-Human,1976.0,-45.0
GCA_000323525.1_ASM32352v1_genomic,terminal,22.721393,1989.0,"[1989.0, 1989.0]",4.556191e-08,0.192686,China,Gansu,35.0001,105.0,38.0,102.0,71021,3.ANT,Antiqua,Non-Human,1989.0,-32.0
GCA_000323505.1_ASM32350v1_genomic,terminal,47.714694,2004.0,"[2004.0, 2004.0]",4.412225e-08,0.186597,China,Qinghai,35.0001,105.0,35.4071,95.9521,7,3.ANT,Antiqua,Human,2004.0,-17.0
GCA_013391945.1_ASM1339194v1_genomic,terminal,51.326956,2017.0,"[0, 0]",4.333042e-08,0.183249,Mongolia,Zavkhan Province,46.825,103.85,48.3444,96.6954,3256,3.ANT,Antiqua,Non-Human,2017.0,-4.0


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_009669545.1_ASM966954v1_genomic,terminal,5.410463,2006.000000,"[2006.0, 2006.0000000000002]",1.570504e-08,0.066418,China,Xinjiang,35.0001,105,42.4805,85.4633,42126,0.ANT,Antiqua,Non-Human,2006.0,-15
GCA_009669555.1_ASM966955v1_genomic,terminal,10.677114,2005.000000,"[2004.9999999999998, 2005.0000000000002]",3.697239e-08,0.156360,China,Xinjiang,35.0001,105,42.4805,85.4633,42123,0.ANT,Antiqua,Non-Human,2005.0,-16
GCA_009669565.1_ASM966956v1_genomic,terminal,2.117711,2005.000000,"[2005.0, 2005.0000000000002]",5.413330e-08,0.228935,China,Xinjiang,35.0001,105,42.4805,85.4633,42118,0.ANT,Antiqua,Non-Human,2005.0,-16
GCA_009669605.1_ASM966960v1_genomic,terminal,2.117711,2005.000000,"[2004.9999999999998, 2005.0]",3.525254e-08,0.149086,China,Xinjiang,35.0001,105,42.4805,85.4633,42117,0.ANT,Antiqua,Non-Human,2005.0,-16
GCA_009669625.1_ASM966962v1_genomic,terminal,13.851436,2005.000000,"[2005.0, 2005.0]",1.210787e-08,0.051205,China,Xinjiang,35.0001,105,42.4805,85.4633,42116,0.ANT,Antiqua,Non-Human,2005.0,-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE97,internal,2.563553,1995.240889,"[1987.4546828191246, 1998.99577465624]",2.856819e-08,0.120818,,,,,,,,,,,,
NODE98,internal,2.375887,1998.403159,"[1991.859927842856, 2000.997431030353]",2.572350e-08,0.108787,,,,,,,,,,,,
NODE99,internal,1.208979,1993.725629,"[1984.0716057453021, 2000.136072806417]",1.762941e-08,0.074557,,,,,,,,,,,,
NODE100,internal,3.754150,1996.486777,"[1989.4158198927937, 2000.9262507455383]",2.582290e-08,0.109208,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
SAMEA5661390,terminal,2.897253,478.0,"[477.9999999999999, 478.0000000000001]",1.459943e-06,6.17424,Germany,Bavaria,51.0834,10.4234,48.9468,11.4039,UNT004,0.ANT,Antiqua,Human,[419:537],[-1602:-1484]
SAMEA5661389,terminal,16.676053,516.0,"[515.9999999999999, 516.0000000000001]",3.461309e-09,0.014638,Germany,Bavaria,51.0834,10.4234,48.9468,11.4039,UNT003,0.ANT,Antiqua,Human,[433:599],[-1588:-1422]
SAMEA5661385,terminal,24.099582,517.0,"[516.9999999999999, 517.0000000000001]",7.160345e-07,3.02818,Germany,Bavaria,51.0834,10.4234,48.9468,11.4039,PET004,0.ANT,Antiqua,Human,[434:600],[-1587:-1421]
SAMEA5661384,terminal,96.812017,521.0,"[520.9999999999999, 521.0000000000001]",1.858864e-08,0.078613,Spain,Valencia Community,39.3261,-4.83798,39.682,-0.765441,VAL001,0.ANT,Antiqua,Human,[432:610],[-1589:-1411]
SAMEA5661372,terminal,6.632183,765.0,"[0, 0]",9.442931e-08,0.399351,France,Centre-Loire Valley,46.6034,1.88833,47.549,1.73241,LSD020,0.ANT,Antiqua,Human,[650:880],[-1371:-1141]
SAMEA5661367,terminal,6.632183,765.0,"[0, 0]",1.2834e-07,0.542762,France,Centre-Loire Valley,46.6034,1.88833,47.549,1.73241,LSD001,0.ANT,Antiqua,Human,[650:880],[-1371:-1141]
SAMEA5661365,terminal,13.841901,575.0,"[574.9999999999999, 575.0000000000001]",3.400581e-08,0.143814,England,East of England,52.531,-1.26491,52.22,0.487578,EDI004,0.ANT,Antiqua,Human,[500:650],[-1521:-1371]
SAMEA5661364,terminal,13.841901,575.0,"[574.9999999999999, 575.0000000000001]",1.977523e-07,0.836314,England,East of England,52.531,-1.26491,52.22,0.487578,EDI003,0.ANT,Antiqua,Human,[500:650],[-1521:-1371]
SAMEA5661363,terminal,2.289918,555.5,"[555.4999999999999, 555.5000000000001]",2.059089e-08,0.087081,England,East of England,52.531,-1.26491,52.22,0.487578,EDI001,0.ANT,Antiqua,Human,[474:637],[-1547:-1384]
SAMEA5661360,terminal,12.176053,511.5,"[511.4999999999999, 511.5000000000001]",1.884688e-07,0.797053,Germany,Bavaria,51.0834,10.4234,48.9468,11.4039,DIT003,0.ANT,Antiqua,Human,[428:595],[-1593:-1426]


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GCA_008630455.1_ASM863045v1_genomic,terminal,29.2825,2011,"[2011.0, 2011.0]",3.0165e-08,0.127571,Russia,Republic of Dagestan,64.6863,97.7453,43.0883,47.1499,C-826,0.PE,Caucasica,Non-Human,2011.0,-10
GCA_008630415.1_ASM863041v1_genomic,terminal,6.28249,1988,"[1988.0, 1988.0]",5.68933e-07,2.40607,Russia,Republic of Dagestan,64.6863,97.7453,43.0883,47.1499,C-672,0.PE,Caucasica,Non-Human,1988.0,-33
GCA_006546745.1_ASM654674v1_genomic,terminal,23.105,1990,"[1990.0, 1990.0]",1.54868e-07,0.654951,Mongolia,Bayan-Ölgii,46.825,103.85,48.547,89.8549,I-3269,0.PE,Ulegeica,Non-Human,1990.0,-31
GCA_006546725.1_ASM654672v1_genomic,terminal,23.8653,1986,"[1986.0, 1986.0]",1.12831e-07,0.477172,Mongolia,Govi-Altai,46.825,103.85,45.3463,95.9326,I-3196,0.PE,Ulegeica,Non-Human,1986.0,-35
GCA_006546665.1_ASM654666v1_genomic,terminal,20.4987,1987,"[1987.0, 1987.0]",1.64816e-07,0.697021,Mongolia,Khovd,46.825,103.85,48.0552,91.2673,I-3193,0.PE,Ulegeica,Non-Human,1987.0,-34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NODE79,internal,9.1079,1984.11,"[1977.0886858553683, 1992.827346687587]",7.64583e-08,0.32335,,,,,,,,,,,,
NODE80,internal,4.37041,1991.53,"[1981.778606751268, 1999.4364370064268]",3.8192e-07,1.61518,,,,,,,,,,,,
NODE81,internal,3.51422,1986.39,"[1978.653294424603, 1994.975657130774]",9.264e-08,0.391784,,,,,,,,,,,,
NODE82,internal,0.377691,1991.58,"[1983.1396651849832, 1999.9104904895198]",6.44544e-07,2.72584,,,,,,,,,,,,


Unnamed: 0_level_0,node_type,branch_length,timetree_num_date,timetree_num_date_confidence,rate_beast,rate_subs_beast,country,province,country_lat,country_lon,province_lat,province_lon,strain,branch_major,biovar,host_human,date,date_bp
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
SAMEA104233048,terminal,21.790965,-1944.5,"[0, 0]",2.660027e-08,0.112495,Germany,Bavaria,51.0834,10.4234,48.9468,11.4039,Post6,0.PRE,Bronze Age,Human,[-2007:-1882],[-4028:-3903]
SAMEA104233049,terminal,50.342757,-2272.0,"[-2272.0, -2272.0]",2.153232e-07,0.910623,Germany,Bavaria,51.0834,10.4234,48.9468,11.4039,1343UnTal85,0.PRE,Bronze Age,Human,[-2396:-2148],[-4417:-4169]
SAMEA104233047,terminal,236.800707,-2457.0,"[-2457.0, -2457.0]",1.069577e-08,0.045233,Estonia,Pärnu maakond,58.7524,25.3319,58.3195,24.3026,KunilaII,0.PRE,Bronze Age,Human,[-2574:-2340],[-4595:-4361]
SAMEA104233046,terminal,221.689649,-2546.5,"[-2546.5, -2546.5]",1.120606e-08,0.047392,Lithuania,Panevezys County,55.35,23.75,55.9156,25.0312,Gyvakarai1,0.PRE,Bronze Age,Human,[-2621:-2472],[-4642:-4493]
SAMEA3541827,terminal,9.325354,-2776.5,"[-2776.5, -2776.5]",1.13125e-06,4.784166,Russia,Krasnoyarsk Krai,64.6863,97.7453,63.3234,97.098,RISE509,0.PRE,Bronze Age,Human,[-2876:-2677],[-4897:-4698]
SAMEA3541826,terminal,280.290965,-1686.0,"[0, 0]",7.449197e-08,0.315034,Russia,Altai Krai,64.6863,97.7453,52.6932,82.6931,RISE505,0.PRE,Bronze Age,Human,[-1746:-1626],[-3767:-3647]
SAMEA6812877,terminal,150.119559,-2518.0,"[-2518.0, -2518.0]",8.381519e-09,0.035446,Russia,Irkutsk Oblast,64.6863,97.7453,56.637,104.719,GLZ002,0.PRE,Bronze Age,Human,[-2569:-2467],[-4590:-4488]
SAMEA6812876,terminal,1.619559,-2666.5,"[-2666.5, -2666.5]",5.942839e-07,2.513285,Russia,Irkutsk Oblast,64.6863,97.7453,56.637,104.719,GLZ001,0.PRE,Bronze Age,Human,[-2838:-2495],[-4859:-4516]
NODE0,internal,0.0,-2852.091245,"[-3000.1689872318607, -2776.5095208400344]",0.0,0.0,,,,,,,,,,,,
NODE1,internal,17.635705,-2765.279235,"[-2843.5010841256944, -2683.4217622470696]",5.861306e-07,2.478804,,,,,,,,,,,,


---

# Plot Rates

In [11]:
"""rates = [float(rate) for rate in list(tree_dict["1.ORI"]["df"]["rate"]) if rate != NO_DATA_CHAR]
print(sum(rates) / len(rates))
sns.histplot(rates)"""


'rates = [float(rate) for rate in list(tree_dict["1.ORI"]["df"]["rate"]) if rate != NO_DATA_CHAR]\nprint(sum(rates) / len(rates))\nsns.histplot(rates)'

---

# Export

## Time Tree

In [12]:
for branch in tree_dict:
    out_timetree = copy.deepcopy(tree_dict[branch]["tree"])

    metadata_to_comment(out_timetree, tree_dict[branch]["df"])    
    out_timetree_nex_path = os.path.join(tree_dir, branch + ".timetree.nex")
    Phylo.write(out_timetree, out_timetree_nex_path, "nexus")

    for c in out_timetree.find_clades():
        c.comment = None

    out_timetree_nwk_path = os.path.join(tree_dir, branch + ".timetree.nwk")
    Phylo.write(out_timetree, out_timetree_nwk_path, "newick")

## Augur

In [13]:
for branch in tree_dict:
    
    augur_dict = augur_export(
        tree_path=None, 
        aln_path=None,  
        tree=tree_dict[branch]["tree"], 
        tree_df=tree_dict[branch]["auspice_df"], 
        color_keyword_exclude=["geometry"],
        type_convert = {
            "branch_number" : (lambda x : str(x))
        },
    )
    
    tree_dict[branch]["augur_dict"] = augur_dict
    
    first_taxa = list(augur_dict["nodes"].keys())[0]
    print(augur_dict["nodes"][first_taxa])

    out_path_augur_json = os.path.join(tree_dir, branch + "_augur.json" )
    utils.write_json(data=tree_dict[branch]["augur_dict"], file_name=out_path_augur_json, indent=JSON_INDENT)
    tree_dict[branch]["augur_json_path"] = out_path_augur_json

{'node_type': 'internal', 'branch_length': 0.0, 'num_date': 1867.1884218560415, 'num_date_confidence': [1801.597968774552, 1910.50330129055], 'rate_beast': 0.0, 'rate_subs_beast': 0.0, 'country': 'NA', 'province': 'NA', 'country_lat': 'NA', 'country_lon': 'NA', 'province_lat': 'NA', 'province_lon': 'NA', 'strain': 'NA', 'branch_major': 'NA', 'biovar': 'NA', 'host_human': 'NA', 'date': 'NA', 'date_bp': 'NA'}
{'node_type': 'internal', 'branch_length': 0.0, 'num_date': 1824.653130788105, 'num_date_confidence': [1648.352772485698, 1931.4884503335472], 'rate_beast': 0.0, 'rate_subs_beast': 0.0, 'country': 'NA', 'province': 'NA', 'country_lat': 'NA', 'country_lon': 'NA', 'province_lat': 'NA', 'province_lon': 'NA', 'strain': 'NA', 'branch_major': 'NA', 'biovar': 'NA', 'host_human': 'NA', 'date': 'NA', 'date_bp': 'NA'}
{'node_type': 'internal', 'branch_length': 0.0, 'num_date': 1758.9102280437203, 'num_date_confidence': [1644.8345812291716, 1855.0439441412525], 'rate_beast': 0.0, 'rate_subs_be

## Auspice

In [14]:
for branch in tree_dict:
    
    # Store the color
    if branch == "0.ANT4":
        branch_major_color = colors_dict["branch_major"]["0.ANT"]
    else:
        branch_major_color = colors_dict["branch_major"][branch]

    auspice_dict = auspice_export(
        tree=tree_dict[branch]["tree"],
        augur_json_paths=tree_dict[branch]["augur_json_path"], 
        auspice_config_path=auspice_config_path, 
        auspice_colors_path=out_path_colors,
        auspice_latlons_path=out_path_latlon, 
        )


    label_col = list(tree_dict[branch]["auspice_df"])
    print(label_col)

    # Recursively add branch attrs
    branch_attributes(
        tree_dict=auspice_dict["tree"], 
        sub_dict=auspice_dict["tree"], 
        df=tree_dict[branch]["auspice_df"],
        label_col=label_col,
        )
    
    
    # Last manual changes
    auspice_dict_copy = copy.deepcopy(auspice_dict)
    for i in range(0, len(auspice_dict_copy["meta"]["colorings"])):
        coloring = auspice_dict_copy["meta"]["colorings"][i]
        for key in coloring:
            # Node type as internal or terminal
            if coloring[key] == "node_type":
                auspice_dict["meta"]["colorings"][i]['scale'] = [['internal', '#FFFFFF'], ['terminal', branch_major_color]]
                #print(auspice_dict["meta"]["colorings"][i])
            # Confidence category
            if "conf_category" in coloring[key]:
                auspice_dict["meta"]["colorings"][i]['scale'] = [['LOW', '#FFFFFF'], ['HIGH', branch_major_color]]
                #print(auspice_dict["meta"]["colorings"][i])

    # Write outputs - For Local Rendering
    out_path_auspice_local_json = os.path.join(tree_dir, branch + ".json" )
    utils.write_json(data=auspice_dict, file_name=out_path_auspice_local_json, indent=JSON_INDENT, include_version=False)
    export_v2.validate_data_json(out_path_auspice_local_json)
    print("Validation successful for local JSON.\n")

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']








































Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.ORI.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.








































['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.IN.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']












































































Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/1.PRE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of







































Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/2.MED.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.








































['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/2.ANT.json'...












































































Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/4.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confid







































Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/0.ANT.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confid











































































Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/0.ANT4.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/config/auspice_config.json'...
Validation success.
['node_type', 'branch_length', 'timetree_num_date', 'timetree_num_date_confidence', 'rate_beast', 'rate_subs_beast', 'country', 'province', 'country_lat', 'country_lon', 'province_lat', 'province_lon', 'strain', 'branch_major', 'biovar', 'host_human', 'date', 'date_bp']
Validating produced JSON
Validating schema of '/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade/summary_trees_noHyperPrior/relaxed_clock/0.PE.json'...
Validating that the JSON is internally consistent...
Validation successful for local JSON.

Validating schema of





































