In [2]:
import copy
import dendropy
import matplotlib.pyplot as plt
import numpy as np
import os, sys
import pandas as pd
import seaborn as sns
import config_readwrite as crw
import plot_params as pp
pp.fonts()

name = os.path.join(os.path.dirname(os.getcwd()), "config")
config, cfn = crw.read_config(name)

section="seed"
NEWICK = config[section]["newick_4223"]
RE = config[section]["results"]

if os.path.exists(RE) is False:
    os.mkdir(RE)
    
PRUNED_IDS="/wynton/home/ahituv/fongsl/nullomers/results/lock/clustalo/designed.15mer.4223/pruned_ids.txt" # write here

config[section]["pruned_ids"] = PRUNED_IDS

crw.write(config, cfn)

# viz tree

In [2]:
dtree=dendropy.Tree.get(path=NEWICK, schema="newick")

print(dtree.as_ascii_plot())

                                                      /-- 15-firstorder.547218 
                                                     /+                        
                                                     ||/- 15-firstorder.1725791
                                                     |\+                       
                                                    /+ |/ 15-firstorder.1803695
                                                    || \+                      
                                                    ||  \ 15-firstorder.1760343
                                                   /+|                         
                                                   ||\--- 15-firstorder.1466069
                                                   ||                          
         /-----------------------------------------+|   / 15-firstorder.1997898
         |                                         |\---+                      
         |                              

# prune tree

## function to prune tree
    - based on min edge distance, randomly select one from leaves with shorter edge distances

In [3]:
# # tree pruning
def getNodesPruneTree(tree, min_):
    """
    select ids to keep where edge length is longer than some minimum edge length.
    
    input
        tree (dendropy tree obj) - tree to parse through
        min_ (float) - minimum edge length to prune from. 
        
    method
        1. make empty lists 
            (1) ids - to collect nodes w/ edge lengths long enough to keep, 
            (2)  too_short - a temporary list with ids that are closely related. This gets reset often. 
        2. parse through nodes in tree
        3. prune from external leafs, not internal nodes (internal node taxon = None), count the leafs
        4. keep node taxon if node edge length is greater than min edge length
        5. assess if too_short list has any taxa (too_short length will be greater than one)
            5.1 If so, randomly pick one id from the too short list
        6. reset the too_short list for the next subtree w short edge lengths
        
        7. if subtree has edge lengths shorter than min edge length, add the taxon to the too_short list. 
            One of the edges will be randomly selected in step 5.1 or step 8. 
        8. If last leaf on the tree and edge length is too short, randomly select one of the leaves
    
    return
        len(ids) (int) - count of ids that meet minimum edge length requirements. 
        ids (list) - list of the ids that meet min edge len reqs. 
    
    """
    
    #1
    ids, too_short = [], []
    leaf_count=0
    print("pruning branches less than", min_)
    
    #2
    for nd in tree:
        
        # 3
        if nd.taxon !=None:  # lots of internal nodes named none
            leaf_count+=1
            #4
            if float(nd.edge_length) >= float(min_):  # keep trees w/ edge len greater than min edge length.  
                ids.append(nd.taxon) # retain the taxon.
            
                #5
                if len(too_short) >1:

                    #5.1
                    ids.append(list(np.random.choice(too_short, size=1))[0])
                #6
                too_short = [] # reset the too short
        
            #7
            elif float(nd.edge_length) < float(min_):  # else, the edges are short 
            
                # add to the too short list. One taxon will be randomly selected in step 5.1 or 8
                too_short.append(nd.taxon)
            
        #8 if this is the last leaf on the tree
        if leaf_count ==tree.__len__() and len(too_short)>1:
            ids.append(list(np.random.choice(too_short, size=1))[0])
            
            
    return len(ids), ids

## Rough prune
- traverse tree to find the min number of leaves trying different edge distances (branch length sizes). 

### copy original tree

In [4]:
prune_tree = copy.deepcopy(dtree)

###  w step size = 0.1

In [5]:
for min_ in np.arange(0.1,1, 0.1):

    count, ids = getNodesPruneTree(prune_tree, min_)
    print(min_, len(ids))
    if count<260:
        print("done!", min_, len(ids))
        break

pruning branches less than 0.1
0.1 2356
pruning branches less than 0.2
0.2 726
pruning branches less than 0.30000000000000004
0.30000000000000004 110
done! 0.30000000000000004 110


## Fine prune 
- traverse tree again using a finer branch length step size
### step-size = 0.01

In [7]:
id_dict = {}
for min_ in np.arange(0.22,1, 0.01):

    count, ids = getNodesPruneTree(prune_tree, min_)
    id_dict[min_]=ids
    print(min_, len(ids))
    if count<=261:
        print("done!", min_, len(ids))
        break

pruning branches less than 0.22
0.22 546
pruning branches less than 0.23
0.23 448
pruning branches less than 0.24000000000000002
0.24000000000000002 357
pruning branches less than 0.25
0.25 268
pruning branches less than 0.26
0.26 217
done! 0.26 217


# Final prune
## edge distance = 0.25, N=268 taxa

In [8]:
# should take 4 minutes
min_ = 0.25
count, ids = getNodesPruneTree(prune_tree, min_)

pruning branches less than 0.25


In [9]:
print(len(ids))

268


## write just ids

In [41]:
with open(PRUNED_IDS, "w") as writer:
    for i in ids:
        writer.write(f"{i}\n")
writer.close()

# PRUNE! 

In [11]:
prune_tree.retain_taxa(ids)
print(#dtree.as_string(schema='newick'), 
      prune_tree.__len__())

268


In [12]:
print(prune_tree.as_ascii_plot())

                                                      /-- 15-firstorder.1501054
                    /---------------------------------+                        
                    |                                 \-- 15-firstorder.1719464
                    |                                                          
                    |                              /----- 15-firstorder.561071 
                    |  /---------------------------+                           
                    |  |                           |  /-- 15-firstorder.1688854
                    |  |                           \--+                        
                    |  |                              \-- 15-firstorder.172367 
                    |  |                                                       
                    |  |                      /---------- 15-firstorder.331252 
                    |  |                      |                                
                    |  |                

## write tree

In [23]:
pruned_tree = os.path.join(RE, f"pruned-{len(ids)}.15mer.fo.null.pam.edgelen.{min_}.newick")
prune_tree.write(
    path=pruned_tree,
    schema="newick",
    )

# make svg

In [34]:
svg = os.path.join(os.path.splitext(pruned_tree)[0]+".pdf")
cmd_image =["xvfb-run ete3 view --image",  
            svg,
            "-t",
        pruned_tree]
print(" ".join(cmd_image))
#os.system(" ".join(cmd_image))

xvfb-run ete3 view --image /wynton/home/ahituv/fongsl/nullomers/results/lock/clustalo/pruned-268.15mer.fo.null.pam.edgelen.0.25.pdf -t /wynton/home/ahituv/fongsl/nullomers/results/lock/clustalo/pruned-268.15mer.fo.null.pam.edgelen.0.25.newick


In [None]:
from svglib.svglib import svg2rlg
from reportlab.graphics import renderPDF

# export pruned, full pdf

In [32]:
drawing = svg2rlg(svg)
out = os.path.join(RE, "pruned.pdf")
renderPDF.drawToFile(drawing, out)

In [33]:
svg_full="/wynton/home/ahituv/fongsl/nullomers/results/lock/clustalo/designed.15mer.4223/clustalo_default-none-none-fasttree_full/SEED_15mer.firstorder.pam.purine.nohomopoly.GC.morethan.1bp_related.fa.final_tree.png.svg"

SVG(filename=svg_full)

drawing = svg2rlg(svg_full)
out = os.path.join(RE, "full.pdf")
renderPDF.drawToFile(drawing, out)