In [None]:
import pandas as pd
from ete3 import TreeStyle, Tree, faces
from pathlib import Path

# Load data

In [10]:
# Presence of cca adding enzymes
asgard_cca_enzyme = pd.read_csv("./download/CCA_asgard_presence_absence.csv")
eury_cca_enzyme = pd.read_csv("./download/CCA_Euryarchaeota_presence_absence.csv")
tack_cca_enzyme = pd.read_csv("./download/CCA_TACK_presence_absence.csv")
dpann_cca_enzyme = pd.read_csv("./download/CCA_DPANN_presence_absence.csv")

# encoded cca
canonical_trna = pd.read_csv("./data_sets/canonical_tRNAs.csv")
non_canonical_trna = pd.read_csv("./non_canon_refolding_results/refolded.csv")
non_canonical_trna = non_canonical_trna.drop_duplicates(["name", "id", "superphyla"])

In [11]:
non_canonical_trna

Unnamed: 0.1,Unnamed: 0,superphyla,id,name,seq,refolded_sec_struct,tRNA_structure_abstracted_refolded,stem_count_refolded,canonical_refolded
0,0,Asgard,3045163_7,JASEJC010000005.trna5,GGGGCAUUGGUGUAAUGGUAGCACCCCUGGUCUGGCACCuAGGAGG...,(((((....((((.......)))).(((((........)))))..(...,(.(.).(.).(.)).,4,tRNA_4-stems
1,1,Euryarchaeota,2026739_441,JAENZL010000013.trna4,GCCAAGAUGGCGGAGCGGCUACGCAAUCGCCUGCAGAGCGAUACCa...,((((((((.((...(((((.........))).))...)).)).......,(.(.(..).)..(.).).,4,tRNA_out
2,2,Euryarchaeota,2026739_441,JAENZL010000028.trna1,AGCCCGGUAGUGUAGUGGUcaAUCAUGCGGGACUCUGACUCCCGCA...,((((((((.................(((((((.......)))))))...,(.(.).(.).).,3,tRNA_out
3,3,Asgard,2053489_161,JAGXOA010000009.trna3,GCAGGUGUAGCCUAGCUGGUAAGACGCUGGCCUUGAGAGCCUGUGC...,(((((((..(((.(((.(......))))))).(((((..((((.((...,(.(.(.(.))).(.(.(.).).).).,7,tRNA_out
4,4,Asgard,2053489_161,JAGXOA010000027.trna3,GCGGGAAUGGCUGAGUGGUaAAAGCAAUAGAUUCAAAAUCUAUCCU...,((((((...(((..........))).(((((((...)))))))(((...,(.(.).(.)(.).(.).).,5,tRNA_out
...,...,...,...,...,...,...,...,...,...
113,113,Asgard,2053491_86,JAGXKG010000001.trna9,GCUGGGAUCUCCUAGCCuGGUAUGGAGCCAGCCUGCUAAGCUGGUG...,(((((((..(((...........)))((((((.......))))))(...,(.(.)(.)(.).(.)).,5,tRNA_out
114,114,Asgard,2053491_86,JAGXKG010000001.trna14,GCGGCCGUGGUCUAGCAUGGUUAGGACUGAAGCUUCCCAAGCUUCC...,((((((((.(........((...((((((((((((...)))))))....,(.(.(.(..).)).).,4,tRNA_out
115,115,Asgard,2053491_86,JAGXKG010000001.trna21,GCCGAGGUAGCCAAGCCcGGCcaACGGCGACGGACUCAAGAUCCGU...,(((((((..(((......)))..((((.(((((.........))))...,(.(.).(.(.).).(.).).,5,tRNA_out
116,116,Asgard,2053491_86,JAGXKG010000001.trna26,GAAUGUAGUCcGGCccAGCAUGGGGGACUCUCGAUCCUCCGACCCG...,...(((...((((((........(((((....(((((........)...,.(.(.(.(.).)).).,4,tRNA_out


In [None]:
# Check for unpaired CCA ends
canonical_trna['cca_encoded'] = (
    canonical_trna['dot_bracket_sec_str'].str.endswith('...') &
    canonical_trna['seq'].str.endswith('CCA')
).astype(int)

In [None]:
# Check for unpaired CCA ends
non_canonical_trna['cca_encoded'] = (
    non_canonical_trna['refolded_sec_struct'].str.endswith('...') &
    non_canonical_trna['seq'].str.endswith('CCA')
).astype(int)

In [19]:
# Find corresponding value in column 'cca_encoded' when searching in column 'name'
def get_cca_encoded_by_name(trna_name, df):
    if df == "canon":
        row = canonical_trna[canonical_trna['name'] == trna_name]
        if not row.empty:
            return row.iloc[0]['cca_encoded']
        else:
            return None
    if df == "non_canon":
        row = non_canonical_trna[non_canonical_trna['name'] == trna_name]
        if not row.empty:
            return row.iloc[0]['cca_encoded']
        else:
            return None
    

In [13]:
superphylum_df_map = {
    'Asgard': (asgard_cca_enzyme, 'Accession'),
    'Euryarchaeota': (eury_cca_enzyme, 'Header'),
    'TACK': (tack_cca_enzyme, 'Header'),
    'DPANN': (dpann_cca_enzyme, 'Header')
}

In [22]:
# Define output directory and ensure it exists
output_dir = Path('tree_pics')
output_dir.mkdir(parents=True, exist_ok=True)

# Find all result.tree files in locarna_results/*/*.out/results/result.tree using pathlib
tree_files = list(Path('locarna_results').rglob('*.out/results/result.tree'))

# Get all unique superphyla from all trees
superphyla = set()
for tree_file in tree_files:
    t = Tree(str(tree_file))
    for leaf in t.iter_leaves():
        superphylum = leaf.name.split('__')[0]
        superphyla.add(superphylum)

superphyla = sorted(superphyla)
# palette = sns.color_palette("Set2", n_colors=len(superphyla))
#color_map = {sp: color for sp, color in zip(superphyla, palette.as_hex())}
# palette = ['#a6cee3','#1f78b4','#b2df8a','#33a02c']
palette = ['#7b3294','#c2a5cf','#a6dba0','#008837']
color_map = {sp: color for sp, color in zip(superphyla, palette)}

def heatmap_layout(node):
    if node.is_leaf():
        superphylum = node.name.split('__')[0]
        identifier = node.name.split('__')[1]
        name = node.name.split('__')[3].replace("\\","")
        node.name = f"{superphylum}_{identifier}_{name}_recovered_fullconstr" if "recovered" in node.name else f"{superphylum}_{identifier}_{name}"
     
        color = color_map.get(superphylum, "#333333")
        node.img_style["fgcolor"] = color
        node.img_style["size"] = 10
        node.img_style["bgcolor"] = color
        # Add colored text face for the node name
        # name_face = faces.TextFace(node.name, fsize=18, fgcolor=color, tight_text=True)
        # node.add_face(name_face, column=0, position="branch-right")
        
        node.color = color

        df, id_col = superphylum_df_map.get(superphylum, (None, None))
        if df is not None:
            row = df[df[id_col] == identifier]
            if not row.empty:
                cca1 = int(row.iloc[0]['CCA1'])
                cca2 = int(row.iloc[0]['CCA2'])
            else:
                cca1 = cca2 = None
        else:
            cca1 = cca2 = None
            
        cca3 = get_cca_encoded_by_name(name, df="non_canon") if "recovered" in node.name else get_cca_encoded_by_name(name, df="canon")

        heatmap_colors = []
        
        for val in (cca1, cca2, cca3):
            if val == 1:
                # heatmap_colors.append("#15c087")
                heatmap_colors.append("   \u2022   ")
                
            elif val == 0:
                # heatmap_colors.append("#b9466c")
                heatmap_colors.append("        ")
                
            else:
                # heatmap_colors.append("#cccccc")
                heatmap_colors.append("       ")
                

        for i, hcolor in enumerate(heatmap_colors):
            # rect_face = faces.RectFace(width=60, height=20, fgcolor=hcolor, bgcolor=hcolor)
            rect_face = faces.TextFace(hcolor, fsize=15, tight_text=False, fstyle="bold")
            node.add_face(rect_face, column=i+1, position="aligned", )
            

for tree_file in tree_files:
    t = Tree(str(tree_file))
    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.layout_fn = heatmap_layout
    # Add legend
    ts.legend.add_face(faces.TextFace("CCA1", fsize=10, tight_text=True),  column=0, )
    ts.legend.add_face(faces.TextFace("     CCA2   ", fsize=10,), column=1)
    ts.legend.add_face(faces.TextFace("Encoded\n    CCA", fsize=10,), column=2)
    
    name = tree_file.parts[-3][:-4]
    out_path = output_dir / f"{name}.png"
    t.render(str(out_path), tree_style=ts)
    print(f"Saved tree to: {out_path}")


Saved tree to: tree_pics/GAT.png
Saved tree to: tree_pics/GTC.png
Saved tree to: tree_pics/GCC.png
Saved tree to: tree_pics/GCA.png
Saved tree to: tree_pics/CGG.png
Saved tree to: tree_pics/CAC.png
Saved tree to: tree_pics/TAC.png
Saved tree to: tree_pics/GTA.png
Saved tree to: tree_pics/GGT.png
Saved tree to: tree_pics/GCT.png
Saved tree to: tree_pics/TAA.png
Saved tree to: tree_pics/TTC.png
Saved tree to: tree_pics/TGT.png
Saved tree to: tree_pics/TGC.png
Saved tree to: tree_pics/GGG.png
Saved tree to: tree_pics/GGA.png
Saved tree to: tree_pics/TTG.png
Saved tree to: tree_pics/TCC.png
Saved tree to: tree_pics/CCG.png
Saved tree to: tree_pics/TGG_included_recovered_fullconst.png
Saved tree to: tree_pics/CAA.png
Saved tree to: tree_pics/GTT.png
Saved tree to: tree_pics/CTG.png
Saved tree to: tree_pics/GAA.png
Saved tree to: tree_pics/CCT.png
Saved tree to: tree_pics/GGC.png
Saved tree to: tree_pics/GAG.png
Saved tree to: tree_pics/CTC.png
Saved tree to: tree_pics/CAT.png
Saved tree to: