In [1]:
#ete kernel
import pandas as pd
import matplotlib.pyplot as plt
import difflib
import os
import re
from ete3 import ClusterTree, TreeStyle, ProfileFace, Tree
from ete3.treeview.faces import add_face_to_node
import loess
from loess import loess_1d
from collections import defaultdict 
import numpy as np
import pickle

import math

os.chdir("/Users/claireleblanc/Documents/grad_school/staller_lab/Evolution_stuff/activity_heatmaps/gene_tree_heatmaps/")



# Prepare Data

In [2]:
# Read in activity data
seq_data = pd.read_csv("FullOrthologDF_20240930_from_pickle.csv", index_col=0)
seq_data

Unnamed: 0,SpeciesName,Seq,Length,WxxLF_loc,SmoothedActivites,LinearCharge,LinearHydrophobicityKD,SmoothedActivitesLoess
0,Sordariomycetes_jgi|Acral2|2019554|gm1.4974_g,MALRIEVYNRIESSTASTALQRQDLRYTFRSNARAASGQANANYQA...,2928,1006,[50849.75653537 50849.75653537 50849.75653537 ...,[ 0. 0. 0.2 ... -0.6 0. 0. ],[0. 0. 0.66666667 ... 0.273333...,"[34049.34344014826, 34049.34344014826, 34049.3..."
1,Sordariomycetes_jgi|Acral2|2027520|fgenesh1_pg...,MWLVVRAGPSPLLQDLAARCHIDGMSMPLLHFDPPDFPLTGVALGI...,455,327,[ 50311.68363476 50311.68363476 50311.683634...,[ 0. 0. 0. 0.2 0.2 0.2 0.2 0.2 0. ...,[0. 0. 0.79333333 0.65111111 0...,"[58366.833469168625, 58366.833469168625, 58366..."
2,Sordariomycetes_jgi|Acral2|2034848|fgenesh1_kg...,MSITELDDFTGFEGGASTAYSSPGAPAVFDLPGASNHVGTISPQDL...,222,94,[ 64062.65418229 64062.65418229 75414.667853...,[ 0. 0. -0.2 -0.2 -0.4 -0.6 -0.6 -0.4 -0.4 ...,[0. 0. 0.53111111 0.57333333 0...,"[36846.26635132608, 36846.26635132608, 36846.2..."
3,Sordariomycetes_jgi|Acral2|2034849|fgenesh1_kg...,MSITGNYNQHFGAAGIISSHNYVLSELDDFTGFEGGASTAYSSPGA...,243,115,[103421.45065119 103421.45065119 103421.450651...,[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,[0. 0. 0.6 0.48 0...,"[98417.43715391349, 98417.43715391349, 98417.4..."
5,Sordariomycetes_jgi|Acral2|2047914|estExt_Gene...,MADTCGGSTPLKNFSQYGSQDRSLQQDRVVHGFHGSAAAGPSTFRS...,2943,1021,[49761.33963355 49761.33963355 49761.33963355 ...,[ 0. 0. -0.2 ... -0.6 0. 0. ],[0. 0. 0.54444444 ... 0.273333...,"[36959.46539862907, 36959.46539862907, 36959.4..."
...,...,...,...,...,...,...,...,...
1207,Cimm_XP_012214147.1_CoccidioidesImmitisRS,MSTSNLPLDIGTLLDLSTDQFVEDLGSSSHSSLLDQDQLDQLINFN...,242,108,[179044.50242288 179044.50242288 179044.502422...,[ 0. 0. 0. 0. 0. 0. -0.2 -0.2 -0.2 ...,[0. 0. 0.41333333 0.45555556 0...,"[215199.18805174268, 215199.18805174268, 21519..."
1208,Cpos_XP_003070205.1_CoccidioidesPosadasiiC735,MSTSNLPLGMVSLSASAVRLVANQRPDIGTLLDLSTDQYVEDLGSS...,260,126,[ 56309.23879651 56309.23879651 56309.238796...,[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,[0. 0. 0.41333333 0.45555556 0...,"[6138.0, 6138.0, 6138.0, 6138.0, 6138.0, 6138...."
1209,Mory_EHA48851.1_MagnaportheOryzae70_15,MNNTSDLGLDDFTAFGGGASAFPSPAMPGVFDIASTTASTMGTVSP...,239,101,[165405.40017749 165405.40017749 165405.400177...,[ 0. 0. 0. -0.2 -0.2 -0.2 -0.2 -0.4 -0.4 ...,[0. 0. 0.35333333 0.23333333 0...,"[156903.4915015713, 156903.4915015713, 156903...."
1210,Nfis_EAW24893.1_NeosartoryaFischeriNRRL181,MSTPNIAQDMPDFFGLPSNDFGDDFELSTEPTMLSPNQIPTGLMAV...,251,109,[148604.20780985 148604.20780985 148604.207809...,[ 0. 0. 0. 0. 0. 0. -0.2 -0.2 -0.2 ...,[0. 0. 0.39555556 0.45333333 0...,"[177960.75802414527, 177960.75802414527, 17796..."


In [3]:
# Function to find the location of the WxxLF motif
def find_WLF(s):
    p = re.compile("W..LF")
    for m in p.finditer(s):
        return m.start()
    return -1

In [4]:
# Get location of all WxxLF motifs
seq_data.loc[:, "Location_WxxLF"] = seq_data["Seq"].apply(find_WLF)

# Getting the WxxLF motif that is the furthest into a sequence, all other sequence will be aligned to this
align_to = max(seq_data["Location_WxxLF"])
align_to

1413

In [5]:
# Calculate padding i.e. how much "sequence" needs to be added to the front to align all the WxxLF motifs
seq_data.loc[:,"pad_by"] = list(seq_data["Location_WxxLF"] * -1 + align_to)
seq_data

Unnamed: 0,SpeciesName,Seq,Length,WxxLF_loc,SmoothedActivites,LinearCharge,LinearHydrophobicityKD,SmoothedActivitesLoess,Location_WxxLF,pad_by
0,Sordariomycetes_jgi|Acral2|2019554|gm1.4974_g,MALRIEVYNRIESSTASTALQRQDLRYTFRSNARAASGQANANYQA...,2928,1006,[50849.75653537 50849.75653537 50849.75653537 ...,[ 0. 0. 0.2 ... -0.6 0. 0. ],[0. 0. 0.66666667 ... 0.273333...,"[34049.34344014826, 34049.34344014826, 34049.3...",1006,407
1,Sordariomycetes_jgi|Acral2|2027520|fgenesh1_pg...,MWLVVRAGPSPLLQDLAARCHIDGMSMPLLHFDPPDFPLTGVALGI...,455,327,[ 50311.68363476 50311.68363476 50311.683634...,[ 0. 0. 0. 0.2 0.2 0.2 0.2 0.2 0. ...,[0. 0. 0.79333333 0.65111111 0...,"[58366.833469168625, 58366.833469168625, 58366...",327,1086
2,Sordariomycetes_jgi|Acral2|2034848|fgenesh1_kg...,MSITELDDFTGFEGGASTAYSSPGAPAVFDLPGASNHVGTISPQDL...,222,94,[ 64062.65418229 64062.65418229 75414.667853...,[ 0. 0. -0.2 -0.2 -0.4 -0.6 -0.6 -0.4 -0.4 ...,[0. 0. 0.53111111 0.57333333 0...,"[36846.26635132608, 36846.26635132608, 36846.2...",94,1319
3,Sordariomycetes_jgi|Acral2|2034849|fgenesh1_kg...,MSITGNYNQHFGAAGIISSHNYVLSELDDFTGFEGGASTAYSSPGA...,243,115,[103421.45065119 103421.45065119 103421.450651...,[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,[0. 0. 0.6 0.48 0...,"[98417.43715391349, 98417.43715391349, 98417.4...",115,1298
5,Sordariomycetes_jgi|Acral2|2047914|estExt_Gene...,MADTCGGSTPLKNFSQYGSQDRSLQQDRVVHGFHGSAAAGPSTFRS...,2943,1021,[49761.33963355 49761.33963355 49761.33963355 ...,[ 0. 0. -0.2 ... -0.6 0. 0. ],[0. 0. 0.54444444 ... 0.273333...,"[36959.46539862907, 36959.46539862907, 36959.4...",1021,392
...,...,...,...,...,...,...,...,...,...,...
1207,Cimm_XP_012214147.1_CoccidioidesImmitisRS,MSTSNLPLDIGTLLDLSTDQFVEDLGSSSHSSLLDQDQLDQLINFN...,242,108,[179044.50242288 179044.50242288 179044.502422...,[ 0. 0. 0. 0. 0. 0. -0.2 -0.2 -0.2 ...,[0. 0. 0.41333333 0.45555556 0...,"[215199.18805174268, 215199.18805174268, 21519...",108,1305
1208,Cpos_XP_003070205.1_CoccidioidesPosadasiiC735,MSTSNLPLGMVSLSASAVRLVANQRPDIGTLLDLSTDQYVEDLGSS...,260,126,[ 56309.23879651 56309.23879651 56309.238796...,[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,[0. 0. 0.41333333 0.45555556 0...,"[6138.0, 6138.0, 6138.0, 6138.0, 6138.0, 6138....",126,1287
1209,Mory_EHA48851.1_MagnaportheOryzae70_15,MNNTSDLGLDDFTAFGGGASAFPSPAMPGVFDIASTTASTMGTVSP...,239,101,[165405.40017749 165405.40017749 165405.400177...,[ 0. 0. 0. -0.2 -0.2 -0.2 -0.2 -0.4 -0.4 ...,[0. 0. 0.35333333 0.23333333 0...,"[156903.4915015713, 156903.4915015713, 156903....",101,1312
1210,Nfis_EAW24893.1_NeosartoryaFischeriNRRL181,MSTPNIAQDMPDFFGLPSNDFGDDFELSTEPTMLSPNQIPTGLMAV...,251,109,[148604.20780985 148604.20780985 148604.207809...,[ 0. 0. 0. 0. 0. 0. -0.2 -0.2 -0.2 ...,[0. 0. 0.39555556 0.45333333 0...,"[177960.75802414527, 177960.75802414527, 17796...",109,1304


# Get activities for overlapping Tiles

In [6]:
# Lists are saved kind of weird in current dataframe, this formats them nicely
def read_list(s):
    if '...' in s:
        return None
    ls = s.strip().replace('[','').replace(']','').replace(',',"").split()
    ls = [float(n) for n in ls]
    return ls

In [7]:
positional_activity_df = []

# Read all the activities
for ls in seq_data["SmoothedActivitesLoess"]:
    positional_activity_df.append(read_list(ls))

# Make the tree

In [8]:
seq_data = seq_data.reset_index()

In [9]:
seq_data

Unnamed: 0,index,SpeciesName,Seq,Length,WxxLF_loc,SmoothedActivites,LinearCharge,LinearHydrophobicityKD,SmoothedActivitesLoess,Location_WxxLF,pad_by
0,0,Sordariomycetes_jgi|Acral2|2019554|gm1.4974_g,MALRIEVYNRIESSTASTALQRQDLRYTFRSNARAASGQANANYQA...,2928,1006,[50849.75653537 50849.75653537 50849.75653537 ...,[ 0. 0. 0.2 ... -0.6 0. 0. ],[0. 0. 0.66666667 ... 0.273333...,"[34049.34344014826, 34049.34344014826, 34049.3...",1006,407
1,1,Sordariomycetes_jgi|Acral2|2027520|fgenesh1_pg...,MWLVVRAGPSPLLQDLAARCHIDGMSMPLLHFDPPDFPLTGVALGI...,455,327,[ 50311.68363476 50311.68363476 50311.683634...,[ 0. 0. 0. 0.2 0.2 0.2 0.2 0.2 0. ...,[0. 0. 0.79333333 0.65111111 0...,"[58366.833469168625, 58366.833469168625, 58366...",327,1086
2,2,Sordariomycetes_jgi|Acral2|2034848|fgenesh1_kg...,MSITELDDFTGFEGGASTAYSSPGAPAVFDLPGASNHVGTISPQDL...,222,94,[ 64062.65418229 64062.65418229 75414.667853...,[ 0. 0. -0.2 -0.2 -0.4 -0.6 -0.6 -0.4 -0.4 ...,[0. 0. 0.53111111 0.57333333 0...,"[36846.26635132608, 36846.26635132608, 36846.2...",94,1319
3,3,Sordariomycetes_jgi|Acral2|2034849|fgenesh1_kg...,MSITGNYNQHFGAAGIISSHNYVLSELDDFTGFEGGASTAYSSPGA...,243,115,[103421.45065119 103421.45065119 103421.450651...,[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,[0. 0. 0.6 0.48 0...,"[98417.43715391349, 98417.43715391349, 98417.4...",115,1298
4,5,Sordariomycetes_jgi|Acral2|2047914|estExt_Gene...,MADTCGGSTPLKNFSQYGSQDRSLQQDRVVHGFHGSAAAGPSTFRS...,2943,1021,[49761.33963355 49761.33963355 49761.33963355 ...,[ 0. 0. -0.2 ... -0.6 0. 0. ],[0. 0. 0.54444444 ... 0.273333...,"[36959.46539862907, 36959.46539862907, 36959.4...",1021,392
...,...,...,...,...,...,...,...,...,...,...,...
497,1207,Cimm_XP_012214147.1_CoccidioidesImmitisRS,MSTSNLPLDIGTLLDLSTDQFVEDLGSSSHSSLLDQDQLDQLINFN...,242,108,[179044.50242288 179044.50242288 179044.502422...,[ 0. 0. 0. 0. 0. 0. -0.2 -0.2 -0.2 ...,[0. 0. 0.41333333 0.45555556 0...,"[215199.18805174268, 215199.18805174268, 21519...",108,1305
498,1208,Cpos_XP_003070205.1_CoccidioidesPosadasiiC735,MSTSNLPLGMVSLSASAVRLVANQRPDIGTLLDLSTDQYVEDLGSS...,260,126,[ 56309.23879651 56309.23879651 56309.238796...,[ 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,[0. 0. 0.41333333 0.45555556 0...,"[6138.0, 6138.0, 6138.0, 6138.0, 6138.0, 6138....",126,1287
499,1209,Mory_EHA48851.1_MagnaportheOryzae70_15,MNNTSDLGLDDFTAFGGGASAFPSPAMPGVFDIASTTASTMGTVSP...,239,101,[165405.40017749 165405.40017749 165405.400177...,[ 0. 0. 0. -0.2 -0.2 -0.2 -0.2 -0.4 -0.4 ...,[0. 0. 0.35333333 0.23333333 0...,"[156903.4915015713, 156903.4915015713, 156903....",101,1312
500,1210,Nfis_EAW24893.1_NeosartoryaFischeriNRRL181,MSTPNIAQDMPDFFGLPSNDFGDDFELSTEPTMLSPNQIPTGLMAV...,251,109,[148604.20780985 148604.20780985 148604.207809...,[ 0. 0. 0. 0. 0. 0. -0.2 -0.2 -0.2 ...,[0. 0. 0.39555556 0.45333333 0...,"[177960.75802414527, 177960.75802414527, 17796...",109,1304


In [10]:
# Pad the activities so that everything lines up
positional_activity_df_padded = []
for i in range(len(positional_activity_df)):
    pad_by = seq_data.loc[i, "pad_by"]
    sub_list = positional_activity_df[i]

    # Case where there is no activity data
    if sub_list == None:
        seq_data = seq_data.drop(i)
        continue
    
    # Put -1 in from of the current data
    new_list = [-1]*pad_by + list(sub_list)
    positional_activity_df_padded.append(new_list)

In [11]:
# Making padded activity into a dataframe
activity_position_matrix = pd.DataFrame(positional_activity_df_padded)
seq_data["SpeciesName"] = ["'" + s + "'" for s in seq_data["SpeciesName"]]
activity_position_matrix.index = seq_data["SpeciesName"]
activity_position_matrix = activity_position_matrix.fillna(0)

# Write to matrix to make it easier to read in for the correct format for ete
activity_position_matrix.to_csv("position_activities.tsv", sep='\t')

In [12]:
# Read in activity data tsv as string for ete
with open("position_activities.tsv",'r') as f:
    matrix = f.readlines()
matrix = ''.join(matrix)

# Formatting required by ete to read in the heatmap data
matrix = '#' + matrix
matrix = matrix.replace("SpeciesName","Names")

## Random code that modifies the ete functionality

In [13]:
# Replacing some of ete's built in functions with our own so that we can use the colors/formatting that we want

def get_color_gradient(self,colorscheme='Reds'):
    from PyQt5 import QtGui
    import matplotlib.colors as colors
    import matplotlib.cm as cmx
    cmap0 = colors.LinearSegmentedColormap.from_list('', ['white', 'darkblue'])
    cNorm  = colors.Normalize(vmin=0, vmax=1)
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cmap0)
    color_scale = []
    for scale in np.linspace(0, 1, 201):
        # rgba = scalarMap.to_rgba(scale)
        # hex_color = '#%02x%02x%02x' % (int(rgba[0] * 255), int(rgba[1] * 255), int(rgba[2] * 255))
        # # hex_color = '#%02x%02x%02x' %scalarMap.to_rgba(scale)[:3]
        [r,g,b,a] = scalarMap.to_rgba(scale, bytes=True)
        color_scale.append( QtGui.QColor( r, g, b, a ) )

    print(color_scale)
    return color_scale

# Replacing the get_color_gradient method with our custom method
ProfileFace.get_color_gradient = get_color_gradient

def draw_heatmap_profile(self):
    try:
        from numpy import isfinite as _isfinite, ceil
    except ImportError:
        pass
    else:
        isfinite = lambda n: n and _isfinite(n)

    from PyQt5.QtGui import QColor, QBrush, QPainter, QPixmap
    from PyQt5.QtCore import QRectF

    # Calculate vector
    vector = self.node.profile
    deviation = self.node.deviation
    # If no vector, skip
    if vector is None:
        return

    colors = self.get_color_gradient()

    leaves = self.node.get_leaves()

    vlength = len(vector)
    # pixels per array position
    img_height = self.height * len(leaves)
    profile_width = self.width
    profile_height= img_height

    x_alpha = float( profile_width / (len(vector)) )

    # Creates a pixmap
    self.pixmap = QPixmap(self.width, img_height)
    self.pixmap.fill(QColor("white"))
    p = QPainter(self.pixmap)

    x2 = 0
    y  = 0
    y_step = self.height
    for leaf in leaves:
        mean_vector = leaf.profile
        deviation_vector = leaf.deviation
        # Draw heatmap
        for pos in range(vlength):
            # first and second X pixel positions
            x1 = x2
            x2 = x1 + x_alpha
            dev1 = self.fit_to_scale(deviation_vector[pos])
            mean1 = self.fit_to_scale(mean_vector[pos])
            # Set heatmap color
            # if not np.isfinite(mean1):
            #     customColor = QColor("white")
            if (mean_vector[pos] == -1.0) or (math.isnan(mean1)):
                # color_index = abs(int(ceil(((self.center_v-mean1)*100)/(self.max_value-self.center_v))))
                customColor = QColor('white') # Color of the padding values
            elif mean1>self.center_v:
                color_index = abs(int(ceil(((self.center_v-mean1)*100)/(self.max_value-self.center_v))))
                customColor = colors[100 + color_index]
            elif mean1<self.center_v:
                color_index = abs(int(ceil(((self.center_v-mean1)*100)/(self.min_value-self.center_v))))
                customColor = colors[100 - color_index]
            else:
                # color_index = abs(int(ceil(((self.center_v-mean1)*100)/(self.max_value-self.center_v))))
                customColor = colors[100]

            # Fill bar with custom color
            p.fillRect(QRectF(x1, y, x_alpha, y_step), QBrush(customColor))
        y+= y_step
        x2 = 0
    p.end()

# Replacing the draw_heatmap_profile with our custom function
ProfileFace.draw_heatmap_profile = draw_heatmap_profile

# Making the tree

In [14]:
# Read in gene tree
t = ClusterTree('Phylogeny_test.tree', text_array=matrix)

[688] leaf names could not be mapped to the matrix rows.


In [15]:
# Find species not in the tree
set(seq_data["SpeciesName"]).difference(set(t.get_leaf_names()))

{"'Agos_ADL012C'", "'Cpar_CPAG02317'"}

In [16]:
# Lots of things in the tree that we don't have data for
set(t.get_leaf_names()).difference(set(seq_data["SpeciesName"]))

{"'Blastocladiomycota_jgi|Catan2|1451137|fgenesh1_kg.199___17___Locus3917v1rpkm44.08'",
 "'Blastocladiomycota_jgi|Catan2|1519271|estExt_Genemark1.C_1990010'",
 "'Blastocladiomycota_jgi|Catan2|248926|Catan1.CE87754_8246'",
 "'Dothideomycetes_jgi|Boeex1|291205|gw1.12.88.1'",
 "'Dothideomycetes_jgi|Boeex1|295363|gw1.12.746.1'",
 "'Dothideomycetes_jgi|Boeex1|319687|e_gw1.12.534.1'",
 "'Dothideomycetes_jgi|Boeex1|320571|e_gw1.12.746.1'",
 "'Dothideomycetes_jgi|Boeex1|343812|estExt_Genewise1.C_12_t10429'",
 "'Dothideomycetes_jgi|Boeex1|343813|estExt_Genewise1.C_12_t10430'",
 "'Dothideomycetes_jgi|Boeex1|366995|estExt_Genewise1Plus.C_12_t10424'",
 "'Dothideomycetes_jgi|Boeex1|366996|estExt_Genewise1Plus.C_12_t10425'",
 "'Dothideomycetes_jgi|Boeex1|366997|estExt_Genewise1Plus.C_12_t10426'",
 "'Dothideomycetes_jgi|Boeex1|403055|fgenesh1_kg.12___411___TRINITY_DN4084_c1_g1_i1'",
 "'Dothideomycetes_jgi|Boeex1|403056|fgenesh1_kg.12___412___TRINITY_DN4084_c1_g2_i1'",
 "'Dothideomycetes_jgi|Boeex1|42

In [17]:
# Custom defining the proteins we want to look at
yeast_clade = ["'Pezizomycetes_jgi|Ascim1|361388|estExt_Genemark1.C_290083'",
"'Pezizomycetes_jgi|Ascim1|325756|gm1.5756_g'",
"'Pezizomycetes_jgi|Ascim1|93291|CE93290_30315'",
"'Pezizomycetes_jgi|Ascim1|305880|fgenesh1_pg.29___83'",
"'Pezizomycetes_jgi|Ascim1|93296|CE93295_26347'",
"'Pezizomycetes_jgi|Ascim1|375912|estExt_fgenesh1_pg.C_290082'",
"'Pezizomycetes_jgi|Ascim1|270849|fgenesh1_kg.29___262___combest_scaffold_29_93286'",
"'Pezizomycetes_jgi|Ascim1|270848|fgenesh1_kg.29___261___combest_scaffold_29_93295'",
"'Pezizomycetes_jgi|Ascim1|93287|CE93286_27569'",
"'Orbiliomycetes_tr|S8AP85|S8AP85_DACHA'",
"'Saccharomycotina_jgi|Ascru1|80718|fgenesh1_pm.5___288'",
"'Saccharomycotina_jgi|Ascru1|70154|fgenesh1_pg.5___335'",
"'Saccharomycotina_jgi|Ascru1|13320|estExt_Genemark1.C_50328'",
"'Saccharomycotina_jgi|Ascru1|7885|gm1.2627_g'",
"'Saccharomycotina_jgi|Ascru1|91103|estExt_fgenesh1_pm.C_50288'",
"'Saccharomycotina_jgi|Ascru1|144377|CE50837_107504'",
"'Saccharomycotina_jgi|Ascru1|85934|estExt_fgenesh1_pg.C_50334'",
"'Saccharomycotina_jgi|Ascru1|144399|CE50859_98448'",
"'Saccharomycotina_jgi|Ascru1|75671|fgenesh1_kg.5___329___Locus242v1rpkm674.40'",
"'Saccharomycotina_jgi|Babin1|9317|gm1.4150_g'",
"'Saccharomycotina_jgi|Babin1|52491|estExt_Genewise1.C_10_t10408'",
"'Saccharomycotina_jgi|Babin1|52490|estExt_Genewise1.C_10_t10407'",
"'Saccharomycotina_jgi|Babin1|86272|CE17951_206468'",
"'Saccharomycotina_jgi|Babin1|65280|estExt_Genewise1Plus.C_10_t10405'",
"'Saccharomycotina_jgi|Babin1|65279|estExt_Genewise1Plus.C_10_t10404'",
"'Saccharomycotina_jgi|Babin1|65281|estExt_Genewise1Plus.C_10_t10406'",
"'Saccharomycotina_jgi|Babin1|177293|estExt_fgenesh1_pg.C_100169'",
"'Saccharomycotina_jgi|Babin1|52492|estExt_Genewise1.C_10_t10409'",
"'Saccharomycotina_jgi|Babin1|86320|CE17999_230112'",
"'Saccharomycotina_jgi|Babin1|168276|fgenesh1_pg.10___170'",
"'Saccharomycotina_jgi|Babin1|14697|estExt_Genemark1.C_100161'",
"'Saccharomycotina_jgi|Canar1|10128|gm1.5176_g'",
"'Saccharomycotina_jgi|Canar1|239270|estExt_Genewise1Plus.C_260003'",
"'Saccharomycotina_jgi|Canar1|214968|estExt_fgenesh1_pg.C_260001'",
"'Saccharomycotina_jgi|Canar1|25521|fgenesh1_pg.26___1'",
"'Saccharomycotina_jgi|Canar1|239268|estExt_Genewise1Plus.C_260001'",
"'Saccharomycotina_jgi|Canar1|108071|CE77240_21014'",
"'Saccharomycotina_jgi|Canar1|239269|estExt_Genewise1Plus.C_260002'",
"'Saccharomycotina_jgi|Canar1|19997|estExt_Genemark1.C_260001'",
"'Saccharomycotina_jgi|Canar1|226940|estExt_Genewise1.C_260001'",
"'Saccharomycotina_jgi|Canar1|226942|estExt_Genewise1.C_260003'",
"'Saccharomycotina_jgi|Canar1|226941|estExt_Genewise1.C_260002'",
"'Saccharomycotina_jgi|Canar1|204347|e_gw1.26.106.1'",
"'Saccharomycotina_jgi|Canar1|204295|e_gw1.26.105.1'",
"'Saccharomycotina_jgi|Hanpo2|22315|estExt_Genemark1.C_7_t10399'",
"'Saccharomycotina_jgi|Hanpo2|68939|estExt_Genewise1Plus.C_7_t20267'",
"'Saccharomycotina_jgi|Hanpo2|17514|fgenesh1_kg.7___202___isotig00615'",
"'Saccharomycotina_jgi|Hanpo2|68937|estExt_Genewise1Plus.C_7_t20265'",
"'Saccharomycotina_jgi|Hanpo2|28228|estExt_fgenesh1_kg.C_70202'",
"'Saccharomycotina_jgi|Hanpo2|28229|estExt_fgenesh1_kg.C_70203'",
"'Saccharomycotina_jgi|Hanpo2|60720|estExt_Genewise1.C_7_t20276'",
"'Saccharomycotina_jgi|Hanpo2|10524|gm1.4782_g'",
"'Saccharomycotina_jgi|Hanpo2|14894|fgenesh1_pg.7___313'",
"'Saccharomycotina_jgi|Hanpo2|35718|estExt_fgenesh1_pm.C_70253'",
"'Saccharomycotina_jgi|Hanpo2|68938|estExt_Genewise1Plus.C_7_t20266'",
"'Saccharomycotina_jgi|Hanpo2|25651|fgenesh1_pm.7___256'",
"'Saccharomycotina_jgi|Hanpo2|35717|estExt_fgenesh1_pm.C_70252'",
"'Saccharomycotina_jgi|Hanpo2|60718|estExt_Genewise1.C_7_t20274'",
"'Saccharomycotina_jgi|Hanpo2|17515|fgenesh1_kg.7___203___isotig00614'",
"'Saccharomycotina_jgi|Hanpo2|60719|estExt_Genewise1.C_7_t20275'",
"'Saccharomycotina_jgi|Hanpo2|32440|estExt_fgenesh1_pg.C_70308'",
"'Saccharomycotina_jgi|Hanpo2|25645|fgenesh1_pm.7___250'",
"'Saccharomycotina_jgi|Metbi1|42927|e_gw1.4.684.1'",
"'Saccharomycotina_jgi|Metbi1|32464|fgenesh1_kg.4___246___Locus2325v4rpkm10.55'",
"'Saccharomycotina_jgi|Metbi1|199455|CE110116_84150'",
"'Saccharomycotina_jgi|Metbi1|32465|fgenesh1_kg.4___247___Locus2325v3rpkm19.18'",
"'Saccharomycotina_jgi|Metbi1|12459|fgenesh1_pg.4___228'",
"'Saccharomycotina_jgi|Metbi1|20326|gw1.4.684.1'",
"'Saccharomycotina_jgi|Metbi1|44260|e_gw1.4.1332.1'",
"'Saccharomycotina_jgi|Metbi1|78834|fgenesh1_pm.4___197'",
"'Saccharomycotina_jgi|Metbi1|44286|e_gw1.4.18.1'",
"'Saccharomycotina_jgi|Metbi1|26673|gw1.4.1332.1'",
"'Saccharomycotina_jgi|Metbi1|14228|gw1.4.18.1'",
"'Clus_CLUG05303'",
"'Dhan_DEHA2B13662g'",
"'Cgui_PGUG01812.1'",
"'Saccharomycotina_jgi|Spapa3|150094|estExt_genewise1plus.C_30355'",
"'Saccharomycotina_jgi|Spapa3|150096|estExt_genewise1plus.C_30357'",
"'Saccharomycotina_jgi|Spapa3|60169|fgenesh1_kg.3___122___isotig04412'",
"'Saccharomycotina_jgi|Spapa3|49766|fgenesh1_pg.3___121'",
"'Saccharomycotina_jgi|Spapa3|54831|fgenesh1_pm.3___110'",
"'Saccharomycotina_jgi|Spapa3|135643|genewise1plus.e_gw0.3.1134.1'",
"'Saccharomycotina_jgi|Spapa3|65855|estExt_fgenesh1_pg.C_30121'",
"'Saccharomycotina_jgi|Spapa3|70803|estExt_fgenesh1_pm.C_30110'",
"'Saccharomycotina_jgi|Spapa3|137153|genewise1plus.e_gw0.3.1138.1'",
"'Saccharomycotina_jgi|Spapa3|150095|estExt_genewise1plus.C_30356'",
"'Cten_EGV62856.1_CandidaTenuisATCC10573'",
"'Ctro_CTRG02060.3'",
"'Cmal_EMG49335.1_CandidaMaltosaXu316'",
"'Calb_orf19.1358'",
"'Cdub_XP_002418875.1_CandidaDubliniensisCD36'",
"'Cpar_CPAG02317'",
"'Cort_emb_CCG25633.1_CandidaOrthopsilosis'",
"'Lelo_LELG02543'",
"'Saccharomycotina_sp|P03069|GCN4_YEAST'",
"'GCN4_P03069_S000000735'",
"'Smik_smik283_g1.1'",
"'Sbay_sbayc645_g44.1'",
"'Suva_5.73'",
"'Skud_5.91'",
"'Spar_spar361_g1.1'",
"'Cgla_CAGL0L02475g'",
"'KNAG0L00890'",
"'Scas_Scas663.23'",
"'Ndai_NDAI0A08680'",
"'Karf_KAFR0L00510'",
"'Kwal_Kwal55.20854'",
"'Klth_KLTH0F12760g'",
"'Sklu_SAKL0F05082g'",
"'Klac_KLLA0D14113g'",
"'Agos_ADL012C'",
"'Agos_NP_984085.2_AshbyaGossypiiATCC10895'",
"'Aace_AGO11762.1_SaccharomycetaceaeAshbyaAceri'",
"'Ecym_4497'",
"'Tbla_TBLA0A00980'",
"'Tpha_TPHA0L00680'",
"'Kpol_487.1_s487'",
"'Zyro_ZYRO0D09174g'",
"'Tdel_TDEL0H02940'",
"'Saccharomycotina_jgi|Wican1|37412|estExt_Genewise1.C_1_t10265'",
"'Saccharomycotina_jgi|Wican1|48663|estExt_Genewise1Plus.C_1_t10262'",
"'Saccharomycotina_jgi|Wican1|102581|estExt_fgenesh1_pg.C_1_t10149'",
"'Saccharomycotina_jgi|Wican1|75895|fgenesh1_pg.1___150'",
"'Saccharomycotina_jgi|Wican1|37414|estExt_Genewise1.C_1_t10267'",
"'Saccharomycotina_jgi|Wican1|48664|estExt_Genewise1Plus.C_1_t10263'",
"'Saccharomycotina_jgi|Wican1|66049|estExt_Genemark1.C_1_t10154'",
"'Saccharomycotina_jgi|Wican1|66051|estExt_Genemark1.C_1_t10156'",
"'Saccharomycotina_jgi|Wican1|59775|gm1.156_g'",
"'Saccharomycotina_jgi|Wican1|48665|estExt_Genewise1Plus.C_1_t10264'",
"'Saccharomycotina_jgi|Wican1|37413|estExt_Genewise1.C_1_t10266'",
"'Kpas_emb_CCA37254.1_KomagataellaPastorisCBS7435'",
"'Saccharomycotina_jgi|Nadfu1|49963|gm1.1604_g'",
"'Saccharomycotina_jgi|Nadfu1|64653|estExt_Genemark1.C_2_t20134'",
"'Saccharomycotina_jgi|Nadfu1|45620|fgenesh1_kg.2___451___isotig02372'",
"'Saccharomycotina_jgi|Nadfu1|40691|fgenesh1_pg.2___628'",
"'Saccharomycotina_jgi|Nadfu1|81990|estExt_fgenesh1_kg.C_2_t10451'",
"'Saccharomycotina_jgi|Nadfu1|77498|estExt_fgenesh1_pg.C_2_t20126'",
"'Saccharomycotina_jgi|Nadfu1|73376|estExt_fgenesh1_pm.C_2_t10470'",
"'Saccharomycotina_jgi|Nadfu1|69321|fgenesh1_pm.2___474'",
"'Saccharomycotina_jgi|Nadfu1|51114|gm1.2755_g'",
"'Saccharomycotina_jgi|Nadfu1|65686|estExt_Genemark1.C_4_t10334'",
"'Saccharomycotina_jgi|Nadfu1|78516|estExt_fgenesh1_pg.C_4_t10321'",
"'Saccharomycotina_jgi|Nadfu1|41826|fgenesh1_pg.4___328'",
"'Saccharomycotina_jgi|Nadfu1|82792|estExt_fgenesh1_kg.C_40241'",
"'Saccharomycotina_jgi|Nadfu1|70209|fgenesh1_pm.4___244'",
"'Saccharomycotina_jgi|Nadfu1|74196|estExt_fgenesh1_pm.C_40241'",
"'Saccharomycotina_jgi|Nadfu1|78517|estExt_fgenesh1_pg.C_4_t10322'",
"'Saccharomycotina_jgi|Nadfu1|46454|fgenesh1_kg.4___241___isotig01220'",
"'Saccharomycotina_jgi|Nadfu1|65687|estExt_Genemark1.C_4_t10335'",
"'Saccharomycotina_jgi|Yarlip1|99120|estExt_Genewise1.C_4_t10451'",
"'Saccharomycotina_jgi|YarliYB419|129386|fgenesh1_pg.68___18'",
"'Saccharomycotina_jgi|Yarlip1|124888|fgenesh1_kg.4___364___TRINITY_DN11188_c0_g1_i1'",
"'Saccharomycotina_jgi|Yarlip1|146445|estExt_fgenesh1_pm.C_40183'",
"'Saccharomycotina_jgi|YarliYB419|174829|estExt_fgenesh1_pg.C_680018'",
"'Saccharomycotina_jgi|Yarlip1|112453|estExt_Genewise1Plus.C_4_t10448'",
"'Saccharomycotina_jgi|Yarlip1|167611|estExt_Genemark1.C_4_t10211'",
"'Saccharomycotina_jgi|YarliYB419|113289|estExt_Genewise1Plus.C_680052'",
"'Saccharomycotina_jgi|YarliYB419|168878|estExt_fgenesh1_pm.C_680019'",
"'Saccharomycotina_jgi|YarliYB419|138573|fgenesh1_kg.68___35___TRINITY_DN11188_c0_g1_i1'",
"'Saccharomycotina_jgi|YarliYB419|153186|estExt_Genemark1.C_680019'",
"'Saccharomycotina_jgi|YarliYB419|147040|gm1.3601_g'",
"'Saccharomycotina_jgi|Yarlip1|134677|fgenesh1_pg.4___215'",
"'Saccharomycotina_jgi|YarliYB419|98471|estExt_Genewise1.C_680055'",
"'Saccharomycotina_jgi|YarliYB419|62155|gw1.68.29.1'",
"'Saccharomycotina_jgi|Yarlip1|85218|e_gw1.4.1110.1'",
"'Saccharomycotina_jgi|Yarlip1|152061|estExt_fgenesh1_pg.C_4_t10213'",
"'Saccharomycotina_jgi|Yarlip1|140783|fgenesh1_pm.4___183'",
"'Saccharomycotina_jgi|YarliYB419|83403|e_gw1.68.29.1'",
"'Saccharomycotina_jgi|Yarlip1|74382|gw1.4.1110.1'",
"'Saccharomycotina_jgi|YarliYB419|159115|fgenesh1_pm.68___19'",
"'Ylip_YALI0E27742g'",
"'Saccharomycotina_jgi|YarliYB419|40002|CE40001_57872'",
"'Saccharomycotina_jgi|Yarlip1|34948|CE34947_58199'",
"'Saccharomycotina_jgi|Yarlip1|158041|gm1.2114_g'",
"'Saccharomycotina_jgi|Wicdo1|218662|gm1.821_g'",
"'Saccharomycotina_jgi|Wicdo1|211464|fgenesh1_pg.1___627'",
"'Saccharomycotina_jgi|Wicdo1|22255|CE22254_178106'",
"'Saccharomycotina_jgi|Wicdo1|232561|estExt_fgenesh1_pg.C_1_t20127'",
"'Saccharomycotina_jgi|Wicdo1|214978|fgenesh1_kg.1___652___TRINITY_DN556_c2_g1_i1'",
"'Saccharomycotina_jgi|Wicdo1|224924|estExt_Genemark1.C_1_t20321'",
"'Saccharomycotina_jgi|Canca1|2660|gm1.2660_g'",
"'Saccharomycotina_jgi|Canca1|113519|CE42623_187453'",
"'Saccharomycotina_jgi|Canca1|113794|CE42898_208536'",
"'Saccharomycotina_jgi|Canca1|23981|e_gw1.2.2068.1'",
"'Saccharomycotina_jgi|Canca1|26257|e_gw1.2.1537.1'",
"'Saccharomycotina_jgi|Canca1|57326|fgenesh1_pm.2___827'",
"'Saccharomycotina_jgi|Canca1|31751|fgenesh1_kg.2___783___Locus179v4rpkm25.84_PRE'",
"'Saccharomycotina_jgi|Canca1|44568|fgenesh1_pg.2___1019'",
"'Saccharomycotina_jgi|Canca1|24716|e_gw1.2.3118.1'",
"'Saccharomycotina_jgi|Yarlip1|160666|gm1.4739_g'",
"'Saccharomycotina_jgi|YarliYB419|152014|estExt_Genemark1.C_350066'",
"'Saccharomycotina_jgi|YarliYB419|164091|MIX2386_5467_72'",
"'Saccharomycotina_jgi|YarliYB419|145830|gm1.2391_g'",
"'Saccharomycotina_jgi|Yarlip1|170133|estExt_Genemark1.C_140006'",
"'Saccharomycotina_jgi|YarliYB419|173630|estExt_fgenesh1_pg.C_350065'",
"'Saccharomycotina_jgi|YarliYB419|164089|MIX2384_5467_49'",
"'Saccharomycotina_jgi|YarliYB419|128135|fgenesh1_pg.35___65'",
"'Saccharomycotina_jgi|Yarlip1|154613|estExt_fgenesh1_pg.C_140006'",
"'Saccharomycotina_jgi|Yarlip1|162915|MIX860_5478_49'",
"'Saccharomycotina_jgi|Yarlip1|137369|fgenesh1_pg.14___6'",
"'Blastocladiomycota_jgi|Catan2|1506241|gm1.11555_g'",
"'Blastocladiomycota_jgi|Catan2|1519271|estExt_Genemark1.C_1990010'",
"'Blastocladiomycota_jgi|Catan2|1466814|fgenesh1_pg.199___9'",
"'Blastocladiomycota_jgi|Catan2|1451137|fgenesh1_kg.199___17___Locus3917v1rpkm44.08'",
"'Blastocladiomycota_jgi|Catan2|1097078|CE97078_6759'",
"'Blastocladiomycota_jgi|Catan2|248926|Catan1.CE87754_8246'",
"'Saccharomycotina_jgi|Lipst1_1|2531|gm1.2531_g'",
"'Saccharomycotina_jgi|Lipst1_1|257493|CE136376_15285'",
"'Saccharomycotina_jgi|Lipst1_1|26804|estExt_Genemark1.C_4_t20003'",
"'Saccharomycotina_jgi|Lipst1_1|116845|estExt_fgenesh1_pm.C_4_t20007'",
"'Saccharomycotina_jgi|Lipst1_1|110526|estExt_fgenesh1_pg.C_4_t10458'",
"'Saccharomycotina_jgi|Lipst1_1|70220|fgenesh1_kg.4___682___Locus1781v1rpkm103.27'",
"'Saccharomycotina_jgi|Lipst1_1|62564|fgenesh1_pg.4___460'",
"'Saccharomycotina_jgi|Lipst1_1|103845|fgenesh1_pm.4___507'"]


In [18]:
t = ClusterTree('Phylogeny_test.tree', text_array=matrix)

# Prune the tree to only contain the species we are interested in
leafs = t.get_leaf_names()
clade_leaves = [leaf for leaf in leafs if leaf in matrix]
clade_leaves = [leaf for leaf in clade_leaves if leaf in yeast_clade]
t.prune(clade_leaves)

array =  t.arraytable

# Calculates some stats on the matrix. Needed to establish the color gradients.
matrix_dist = [i for r in range(len(array.matrix))\
               for i in array.matrix[r] if np.isfinite(i)]
matrix_max = np.max(matrix_dist)
matrix_min = np.min(matrix_dist)
matrix_avg = matrix_min+((matrix_max-matrix_min)/2)


def mylayout(node):
    # Creates a profile face that will represent node's profile as a heatmap
    profileFace  = ProfileFace(matrix_max, 0, matrix_avg, \
                                            200, 5, "heatmap", colorscheme=0)

    # If node is a leaf, add the heatmap
    if node.is_leaf():
        # And a line profile
        add_face_to_node(profileFace, node, 0, aligned=True)
        node.img_style["size"]=0

# Use my layout to visualize the tree
ts = TreeStyle()
ts.layout_fn = mylayout
# t.show(tree_style=ts)


[688] leaf names could not be mapped to the matrix rows.


[<PyQt5.QtGui.QColor object at 0x17ab019b0>, <PyQt5.QtGui.QColor object at 0x17ab01860>, <PyQt5.QtGui.QColor object at 0x17ab01a20>, <PyQt5.QtGui.QColor object at 0x17ab01a90>, <PyQt5.QtGui.QColor object at 0x17ab01b00>, <PyQt5.QtGui.QColor object at 0x17ab01b70>, <PyQt5.QtGui.QColor object at 0x17ab01be0>, <PyQt5.QtGui.QColor object at 0x17ab01c50>, <PyQt5.QtGui.QColor object at 0x17ab01cc0>, <PyQt5.QtGui.QColor object at 0x17ab01d30>, <PyQt5.QtGui.QColor object at 0x17ab01da0>, <PyQt5.QtGui.QColor object at 0x17ab01e10>, <PyQt5.QtGui.QColor object at 0x17ab01e80>, <PyQt5.QtGui.QColor object at 0x17ab01ef0>, <PyQt5.QtGui.QColor object at 0x17ab01f60>, <PyQt5.QtGui.QColor object at 0x17ab01fd0>, <PyQt5.QtGui.QColor object at 0x17ab02040>, <PyQt5.QtGui.QColor object at 0x17ab020b0>, <PyQt5.QtGui.QColor object at 0x17ab02120>, <PyQt5.QtGui.QColor object at 0x17ab02190>, <PyQt5.QtGui.QColor object at 0x17ab02200>, <PyQt5.QtGui.QColor object at 0x17ab02270>, <PyQt5.QtGui.QColor object at 0

2024-11-20 16:36:16.626 python[62241:496872] +[IMKClient subclass]: chose IMKClient_Legacy
2024-11-20 16:36:16.626 python[62241:496872] +[IMKInputSession subclass]: chose IMKInputSession_Legacy


In [19]:
t.render("yeast_gene_tree_heatmaps_loess_smoothing.pdf", tree_style=ts)

[<PyQt5.QtGui.QColor object at 0x17b787230>, <PyQt5.QtGui.QColor object at 0x17b7871c0>, <PyQt5.QtGui.QColor object at 0x17b787310>, <PyQt5.QtGui.QColor object at 0x17b787380>, <PyQt5.QtGui.QColor object at 0x17b7873f0>, <PyQt5.QtGui.QColor object at 0x17b787460>, <PyQt5.QtGui.QColor object at 0x17b7874d0>, <PyQt5.QtGui.QColor object at 0x17b787540>, <PyQt5.QtGui.QColor object at 0x17b7875b0>, <PyQt5.QtGui.QColor object at 0x17b787620>, <PyQt5.QtGui.QColor object at 0x17b787690>, <PyQt5.QtGui.QColor object at 0x17b787700>, <PyQt5.QtGui.QColor object at 0x17b787770>, <PyQt5.QtGui.QColor object at 0x17b7877e0>, <PyQt5.QtGui.QColor object at 0x17b787850>, <PyQt5.QtGui.QColor object at 0x17b7878c0>, <PyQt5.QtGui.QColor object at 0x17b787930>, <PyQt5.QtGui.QColor object at 0x17b7879a0>, <PyQt5.QtGui.QColor object at 0x17b787a10>, <PyQt5.QtGui.QColor object at 0x17b787a80>, <PyQt5.QtGui.QColor object at 0x17b787af0>, <PyQt5.QtGui.QColor object at 0x17b787b60>, <PyQt5.QtGui.QColor object at 0

{'nodes': [[0.5, 383.576416015625, 4.5, 387.576416015625, 0, None],
  [21.49081402384551, 30.5, 25.49081402384551, 34.5, 1, None],
  [107.19709102946032, 20.0, 111.19709102946032, 24.0, 2, None],
  [110.19713867143219, 11.0, 114.19713867143219, 15.0, 3, None],
  [18.821212011190475,
   736.65283203125,
   22.821212011190475,
   740.65283203125,
   8,
   None],
  [30.741377962162296,
   670.3056640625,
   34.7413779621623,
   674.3056640625,
   9,
   None],
  [53.6478444372392, 609.611328125, 57.6478444372392, 613.611328125, 10, None],
  [66.24244934729782, 543.72265625, 70.24244934729782, 547.72265625, 11, None],
  [72.81939346666584, 470.4453125, 76.81939346666584, 474.4453125, 12, None],
  [83.46192286635862, 335.890625, 87.46192286635862, 339.890625, 13, None],
  [95.81272755896998, 126.1484375, 99.81272755896998, 130.1484375, 14, None],
  [158.17250395074575, 68.0, 162.17250395074575, 72.0, 15, None],
  [161.1725515927176, 59.0, 165.1725515927176, 63.0, 16, None],
  [101.2525504360