# human cases vs. circulation times

This is a copy of the the original, but meant for running on test files and messing things up. What I learned from running these test files: 

1. I should not be using `tree.setAbsoluteTime(date_most_recent_tip)`. This is messing things up when I enumerate subtypee trees, and just calculating `absoluteTime` for tips and nodes works totally fine across subtrees, subtypee trees, and full trees. 
2. I was messing up the calculation of circulation times because of the absolute time thing, but I was also not adding the root of each subtree into the `dates` list. So I added in a line in the main script to pull the root time and add it to the date list. 

With those 2 fixes, this is now performing as expected on the test trees. 

In [1]:
import glob
import re,copy, imp
import pandas as pd 
import numpy as np

# for this to work, you will need to download the most recent version of baltic, available here 
bt = imp.load_source('baltic', '/Users/lmoncla/src/baltic/baltic/baltic.py')

%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import matplotlib.patheffects as path_effects
import matplotlib.lines as mlines
from matplotlib.font_manager import FontProperties
import matplotlib.colors as clr
import textwrap as textwrap
from textwrap import wrap

import time
from io import StringIO

import rpy2
%load_ext rpy2.ipython

In [2]:
from datetime import date
current_date = str(date.today())

In [3]:
# define colors 
domestic_color="#4E83AE"
wild_color="#CEB540"
human_color="#DE4428"

In [4]:
def get_taxa_lines(tree_path):    
    # write out a temp tree file
#     temp_tree = tree_path.replace(".trees",".temp.tree")
#     with open(temp_tree, "w") as outfile: 
#         outfile.write("")

    lines_to_write = ""
    with open(tree_path, 'rU') as infile:
        for line in infile: ## iterate through each line
            if 'state' not in line.lower(): #going to grab all the interesting stuff in the .trees file prior to the newick tree strings
                lines_to_write = lines_to_write + line

    return(lines_to_write)

In [5]:
def get_burnin_value(tree_path, burnin_percent):
    with open(tree_path, 'rU') as infile:
        numtrees = 0
        for line in infile: ## iterate through each line
            if 'state' in line.lower(): #going to grab all the interesting stuff in the .trees file prior to the newick tree strings
                numtrees += 1
    
    burnin = numtrees * burnin_percent
    return(burnin)

In [6]:
"""enumerate host transitions on the tree"""

def retrieve_subtrees(tree, trait, target_host, stop_state):
    
    traitName=trait

    tree.root.traits[traitName]='ancestor' ## give root node some trait value that's different from what the actual tree root has, so it registers as a switch

    tree_strings={target_host:[]}
    subtype_trees={target_host:[]}

    for k in sorted(tree.Objects,key=lambda x:x.height):
        kp=k.parent     # kp is the parent node of k

        ## get current node's (k) and its parent's (kp) trait states
        kloc=k.traits[traitName].lower().replace(" ","_")   # kloc = trait of k; kc = trait of k; they are the same thing
        if traitName in k.parent.traits:  # if parent has a trait block, use that trait, else assign to ancestor
            kploc=kp.traits[traitName].lower().replace(" ","_")     # kploc = trait of parental node
        else:
            kploc='ancestor'

        ## if states do not match and the child is the target host
        if kloc!=kploc and kloc == target_host:      # if node and parental node do not have the same trait
            #N_children=len(k.leaves)
            traverse_condition=lambda w:w.traits[traitName].lower().replace(" ","_") in stop_state     # traverse tree for all nodes whose traitname = kc
            #print('subtree resulting from '+kploc+' > '+kloc+' switch, traversing within '+ kloc)

            subtree=tree.subtree(k,traverse_condition=traverse_condition) ## this function returns a new baltic object that contains a trait-traversed subtree, starting from node k, for as long as the traversal stays within the starting trait value state
            
            if subtree != None:
                subtree.traverse_tree()
                subtree.sortBranches()
                if kloc in subtype_trees:
                    subtype_trees[kloc].append((kploc,subtree))
                else:
                    pass
                    
    return(subtype_trees)

In [52]:
def output_cluster_times(subtype_trees, traitName):
    cluster_times = {}
    
    for deme in subtype_trees: 
        cluster_times = {}
        subtree_number = 0
        
        for subtree in subtype_trees[deme]:  
            subtree_number += 1
            print("subtree", subtree_number)
            dates = []
            hosts = []
            trans_from = subtree[0]
            tree = subtree[1]
            print(tree.root.absoluteTime)
            dates.append(tree.root.absoluteTime)
            
            for k in tree.Objects: 
                if k.branchType == "leaf":
                    # pull the date
                    date = float(k.numName.split("|")[1])
                    print(k.numName, date)
                    host = k.traits[traitName]
                    hosts.append(host)
                    
                elif k.branchType == "node":
                    date = k.absoluteTime
                    print("node",date)
                dates.append(date)
                
            minimum_date = min(dates)
            maximum_date = max(dates)
            date_range = maximum_date - minimum_date
            
            print(minimum_date, maximum_date, date_range)
            
            n_human_tips = hosts.count("human")
            
            cluster_times[subtree_number] = {date_range: n_human_tips}

    return(cluster_times)

## Run on trees file

In [66]:
# to test on a test subtree
burnin = 0
test_trees = "beast-runs/2021-07-26-mascot-3deme-skyline-with-mig-fixed-log/with-extra-logger/test-subtree-2.trees"
taxa_lines = get_taxa_lines(test_trees)
trait = 'typeTrait'  # for the trees file, this should be typeTrait, for the mcc tree it is "max"
target_host = "domestic"
stop_state = ["domestic","human"]  # the stop state is the list of hosts for which we will continue traversing to produce a subtree; here, we want to traverse within poultry while also including human tips 

  


In [67]:
with open(test_trees, "r") as infile:


    tree = bt.loadNexus(test_trees)
    #tree.setAbsoluteTime(date_most_recent_tip)
    
    # enumerate subtrees 
    subtype_trees = retrieve_subtrees(tree, trait, target_host, stop_state)
    #print(subtype_trees)
    
#     for i in subtype_trees['domestic']: 
#         print("subtree")
#         subtree = i[1]
#         tips = []
#         for k in subtree.Objects: 
#             if k.branchType == "leaf":
#                 tips.append(k.numName)
#         print(tips)
        
        
#     #print(subtype_trees)
    cluster_times = output_cluster_times(subtype_trees, trait)
    
    ## the format here is subtree #: {circulation time, number human tips}
    print(cluster_times)


subtree 1
2006.4634706438358
A/Chicken/Turkey-Batman/09rs2842-92/2007|2007.085|2007-02-XX|Avian|West_Asia|Turkey||EPI293952|chicken|domestic 2007.085
2006.4634706438358 2007.085 0.6215293561642738
subtree 2
2007.4632536438357
node 2007.8000456438358
A/chicken/WestBengal/239022/2010|2010.03|2010-01-12|Avian|South_Asia|India|CY061302|EPI287692|chicken|domestic 2010.03
node 2008.0162026438359
A/chicken/India/88761/2008|2008.56|2008-07-24|Avian|South_Asia|India|MK392388|EPI1508899|chicken|domestic 2008.56
A/chicken/India/96880/2008|2008.178|2008-03-06|Avian|South_Asia|India|CY111121|EPI438894|chicken|domestic 2008.178
2007.4632536438357 2010.03 2.5667463561642307
subtree 3
2007.4632536438357
A/chicken/India/81890/2008|2008.055|2008-01-21|Avian|South_Asia|India|CY111073|EPI438846|chicken|domestic 2008.055
2007.4632536438357 2008.055 0.5917463561643217
subtree 4
2007.8944786438358
A/chicken/WestBengal/106181/2008|2008.347|2008-05-07|Avian|South_Asia|India|GU083626|EPI287578|chicken|domestic 

In [68]:
# test whether tips have the same absoluteTime across trees or not: 
test_subtree = "beast-runs/2021-07-26-mascot-3deme-skyline-with-mig-fixed-log/with-extra-logger/test-subtree-2.trees"


tree = bt.loadNexus(test_subtree)

for k in tree.Objects: 
    if k.branchType == "leaf": 
        if k.numName == "A/Chicken/Turkey-Batman/09rs2842-92/2007|2007.085|2007-02-XX|Avian|West_Asia|Turkey||EPI293952|chicken|domestic":
            print(k.absoluteTime)

2007.0845616438357


In [69]:
# test whether tips have the same absoluteTime across trees or not: 
test_subtree = "beast-runs/2021-07-26-mascot-3deme-skyline-with-mig-fixed-log/with-extra-logger/test-full-tree.trees"

tree = bt.loadNexus(test_subtree)

for k in tree.Objects: 
    if k.branchType == "leaf": 
        if k.numName == "A/Chicken/Turkey-Batman/09rs2842-92/2007|2007.085|2007-02-XX|Avian|West_Asia|Turkey||EPI293952|chicken|domestic":
            print(k.absoluteTime)

2007.0854002602741
