## Load in modules

In [3]:
import pandas as pd
import math
import datetime as dt
import dendropy
import tqdm
import json

# Process date formats

In [4]:
def get_date_str(dec_date):
    date = dt.datetime(int(dec_date), 1, 1) + dt.timedelta(days = (dec_date % 1) * 365)

    return dt.datetime.strftime(date, '%Y-%m-%d')


## Read and process MCC tree

In [22]:
time_treefile = 'analyses/phylogenetics/DENV1_Americas_timetree.tree'
tree = dendropy.Tree.get(path=time_treefile, schema='nexus')
most_recent_dec_date = 2023.69589

## name internal nodes
count = 0
for node in tree.preorder_node_iter():
    count += 1
    node.label = count
    
## store information from tree
edge_info = []
for edge in tqdm.tqdm(tree.postorder_edge_iter()):
    if edge.tail_node:
        head_node = edge.tail_node ## tail_node in edge indicates older node
        head_dec_date = float(head_node.annotations['num_date'].value)
        head_date = get_date_str(head_dec_date)
        
        tail_node = edge.head_node ## head_node in edge indicates younger node
        tail_dec_date = float(tail_node.annotations['num_date'].value)
        tail_date = get_date_str(tail_dec_date)
                        
        head_country = head_node.annotations['country'].value
        tail_country = tail_node.annotations['country'].value
                
        edge_info.append(
            {
                'head_node': head_node.label,
                'tail_node': tail_node.label,
                'length': edge.length,
                'head_country': head_country,
                'tail_country': tail_country,
                'head_date': head_date,
                'head_dec_date': head_dec_date,
                'tail_date': tail_date,
                'tail_dec_date': tail_dec_date
            }
        )


2087it [00:00, 163751.73it/s]


In [23]:
tree_edge_df = pd.DataFrame(edge_info)
tree_edge_df


Unnamed: 0,head_node,tail_node,length,head_country,tail_country,head_date,head_dec_date,tail_date,tail_dec_date
0,4,5,0.000000,India,India,2009-06-15,2009.453425,2009-06-15,2009.453425
1,4,6,0.000000,India,India,2009-06-15,2009.453425,2009-06-15,2009.453425
2,3,4,50.853314,Thailand,India,1958-08-08,1958.600111,2009-06-15,2009.453425
3,10,11,4.539218,Cambodia,Cambodia,1997-11-30,1997.914207,2002-06-15,2002.453425
4,10,12,7.539218,Cambodia,Cambodia,1997-11-30,1997.914207,2005-06-15,2005.453425
...,...,...,...,...,...,...,...,...,...
2081,929,1010,0.862436,Colombia,Colombia,1994-01-22,1994.060204,1994-12-03,1994.922640
2082,926,929,4.557635,Colombia,Colombia,1989-07-03,1989.502569,1994-01-22,1994.060204
2083,568,926,25.454878,East_Timor,Colombia,1964-01-18,1964.047691,1989-07-03,1989.502569
2084,2,568,20.458200,East_Timor,East_Timor,1943-08-04,1943.589490,1964-01-18,1964.047691


In [24]:
tree_edge_df.to_csv('analyses/phylogenetics/DENV1_Americas_timetree_df.tsv',
                   sep='\t', index=False)


In [25]:
tree.write(path='analyses/phylogenetics/DENV1_Americas_timetree_labelled.tree', schema='newick')
