In [1]:
from ete3 import TreeStyle
from ete3 import PhyloTree
from ete3 import Tree, faces, TreeStyle
from PyQt5       import QtCore
from PyQt5.QtWidgets import QGraphicsRectItem
from PyQt5.QtGui import QColor, QPen, QBrush

from PyQt5.QtWidgets import QGraphicsSimpleTextItem
from PyQt5.QtGui import QFont

from random import random

import pandas as pd

import os
import re
os.chdir("..")



# Yeast genome order browser alignment stuff

In [2]:
# Setting the background colors so that only the F is black
_aabgcolors = {
    'A':"#FFFFFF" ,
    'R':"#FFFFFF" ,
    'N':"#FFFFFF" ,
    'D':"#FFFFFF" ,
    'C':"#FFFFFF" ,
    'Q':"#FFFFFF" ,
    'E':"#FFFFFF" ,
    'G':"#FFFFFF" ,
    'H':"#FFFFFF" ,
    'I':"#FFFFFF" ,
    'L':"#FFFFFF" ,
    'K':"#FFFFFF" ,
    'M':"#FFFFFF" ,
    'F':"#000000" ,
    'P':"#FFFFFF" ,
    'S':"#FFFFFF" ,
    'T':"#FFFFFF" ,
    'W':"#FFFFFF" ,
    'Y':"#FFFFFF" ,
    'V':"#FFFFFF" ,
    'B':"#FFFFFF" ,
    'Z':"#FFFFFF" ,
    'X':"#FFFFFF",
    '.':"#FFFFFF",
    '-':"#FFFFFF",
    }

# Setting the foreground colors so that only F is white
_aafgcolors = {
    'A':"#000000" ,
    'R':"#000000" ,
    'N':"#000000" ,
    'D':"#000000" ,
    'C':"#000000" ,
    'Q':"#000000" ,
    'E':"#000000" ,
    'G':"#000000" ,
    'H':"#000000" ,
    'I':"#000000" ,
    'L':"#000000" ,
    'K':"#000000" ,
    'M':"#000000" ,
    'F':"#FFFFFF" ,
    'P':"#000000" ,
    'S':"#000000" ,
    'T':"#000000" ,
    'W':"#000000" ,
    'Y':"#000000" ,
    'V':"#000000" ,
    'B':"#000000" ,
    'Z':"#000000" ,
    'X':"#000000",
    '.':"#000000",
    '-':"#000000",
}

# Code from ete package, mofifying to use the colors we set
class MySequenceFace(faces.StaticItemFace):
    """ Creates a new molecular sequence face object.


    :argument seq:  Sequence string to be drawn
    :argument seqtype: Type of sequence: "nt" or "aa"
    :argument fsize:   Font size,  (default=10)

    You can set custom colors for amino-acids or nucleotides:

    :argument None  codon       : a string that corresponds to the reverse translation of the amino-acid sequence
    :argument 11    col_w       : width of the column (if col_w is lower than font size, letter wont be displayed)
    :argument None  fg_colors   : dictionary of colors for foreground, with as keys each possible character in sequences, and as value the colors
    :argument None  bg_colors   : dictionary of colors for background, with as keys each possible character in sequences, and as value the colors
    :argument 3     alt_col_w   : works together with special_col option, defines the width of given columns
    :argument None  special_col : list of lists containing the bounds of columns to be displayed with alt_col_w as width
    :argument False interactive : more info can be displayed when mouse over sequence

    """
    def __init__(self, seq, seqtype="aa", fsize=10,
                 fg_colors=None, bg_colors=None,
                 codon=None, col_w=11, alt_col_w=3,
                 special_col=None, interactive=False):
        self.seq         = seq
        self.codon       = codon
        self.fsize       = fsize
        self.style       = seqtype
        self.col_w       = float(col_w)
        self.alt_col_w   = float(alt_col_w)
        self.special_col = special_col if special_col else []
        self.width       = 0 # will store the width of the whole sequence
        self.interact    = interactive

        if self.style == "aa":
            if not fg_colors:
                fg_colors = _aafgcolors
            if not bg_colors:
                bg_colors = _aabgcolors
        else:
            if not fg_colors:
                fg_colors = _ntfgcolors
            if not bg_colors:
                bg_colors = _ntbgcolors

        self.fg_col = self.__init_col(fg_colors)
        self.bg_col = self.__init_col(bg_colors)

        # for future?
        self.row_h       = 13.0

        super(MySequenceFace,
              self).__init__(QGraphicsRectItem(0, 0, self.width, self.row_h))

    def __init_col(self, color_dic):
        """to speed up the drawing of colored rectangles and characters"""
        new_color_dic = {}
        for car in color_dic:
            new_color_dic[car] = QBrush(QColor(color_dic[car]))
        return new_color_dic

    def update_items(self):
        #self.item = QGraphicsRectItem(0,0,self._total_w, self.row_h)
        seq_width = 0
        nopen = QPen(QtCore.Qt.NoPen)
        font = QFont("Courier", self.fsize)
        rect_cls = QGraphicsRectItem
        for i, letter in enumerate(self.seq):
            width = self.col_w
            for m in self.special_col:
                if m[0] < i <= m[1]:
                    width = self.alt_col_w
                    break
            #load interactive item if called correspondingly
            rectItem = rect_cls(0, 0, width, self.row_h, parent=self.item)
            rectItem.setX(seq_width) # to give correct X to children item
            rectItem.setBrush(self.bg_col[letter])
            rectItem.setPen(nopen)
            if self.interact:
                if self.codon:
                    rectItem.codon = '%s, %d: %s' % (self.seq[i], i,
                                                     self.codon[i*3:i*3+3])
                else:
                    rectItem.codon = '%s, %d' % (self.seq[i], i)
            # write letter if enough space
            if width >= self.fsize:
                text = QGraphicsSimpleTextItem(letter, parent=rectItem)
                text.setFont(font)
                text.setBrush(self.fg_col[letter])
                # Center text according to rectItem size
                tw = text.boundingRect().width()
                th = text.boundingRect().height()
                text.setPos((width - tw)/2, (self.row_h - th)/2)
            seq_width += width
        self.width = seq_width

In [3]:
# Get a subset of the alignment
def subset_alignment(alignment_file, start, end):
    genes = []
    seqs = []
    seq = ""

    # Read in alignment sequences and save to list
    with open(alignment_file, "r") as f:
        lines = f.readlines()
    for line in lines:
        if line.startswith(">"):
            genes.append(line.strip())
            if len(seq) > 0:
                seqs.append(seq)
                seq = ""
        else:
            seq += line.strip()
    seqs.append(seq)
    seq = ""

    # Write it to a string in fasta format
    fasta_txt = ""
    for gene, seq in zip(genes, seqs):
        fasta_txt += gene + "\n"
        fasta_txt += str(seq[start:end]) + "\n"
    
    return fasta_txt


In [7]:
# Layout function (adds things to node) to add the alignment
def test_layout_phylo_aa(node):
    '''
    layout for CodemlTree
    '''
    if hasattr(node, "collapsed"):
        if node.collapsed == 1:
            node.img_style["draw_descendants"]= False
    if node.is_leaf:
        if hasattr (node, "sequence"):
            seqface =  MySequenceFace(node.sequence, "aa",
                                      fsize=10,
                                      col_w=11, interactive=False)
            faces.add_face_to_node(seqface, node, 1, aligned=True)


alignment_file = "gcn4_paper_stuff/gain_loss_Fs/YGOB_preComputed_MSA.fasta"


fasta_txt = subset_alignment(alignment_file, 149, 223)

tree = PhyloTree("gcn4_paper_stuff/gain_loss_Fs/ygob_gene_tree.tree")
tree.link_to_alignment(alignment=fasta_txt, alg_format="fasta")

# Define your start and end positions
start_position = 30
end_position = 55

# Create the TextFace objects for the start and end positions
start_annotation = faces.TextFace(f"{start_position}", fsize=10, fgcolor="black")
end_annotation = faces.TextFace(f"{end_position}", fsize=10, fgcolor="black")

leafs = tree.get_leaf_names()
clade_leaves = [leaf for leaf in leafs if leaf in fasta_txt]
tree.prune(clade_leaves)

first_leaf = tree.get_leaves()[0]

ts = TreeStyle()
ts.layout_fn = test_layout_phylo_aa
# Add the start and end annotations to the first leaf
first_leaf.add_face(start_annotation, column=0, position="aligned")

tree.show(tree_style=ts)
# tree.render("gcn4_paper_stuff/gain_loss_Fs/ygob_MSA_gene_tree.pdf", tree_style=ts)

Warnning: [3] terminal nodes could not be found in the alignment.


In [8]:
def test_layout_phylo_aa(node):
    '''
    layout for CodemlTree
    '''
    if hasattr(node, "collapsed"):
        if node.collapsed == 1:
            node.img_style["draw_descendants"]= False
    if node.is_leaf:
        if hasattr (node, "sequence"):
            seqface =  MySequenceFace(node.sequence, "aa",
                                      fsize=10,
                                      col_w=11, interactive=False)
            faces.add_face_to_node(seqface, node, 1, aligned=True)


alignment_file = "gcn4_paper_stuff/gain_loss_Fs/ygob_MSA_species_names.fasta"


fasta_txt = subset_alignment(alignment_file, 149, 223)

tree = PhyloTree("gcn4_paper_stuff/gain_loss_Fs/ygob_species_tree.tree")
tree.link_to_alignment(alignment=fasta_txt, alg_format="fasta")

# Define your start and end positions
start_position = 30
end_position = 55

# Create the TextFace objects for the start and end positions
start_annotation = faces.TextFace(f"{start_position}", fsize=10, fgcolor="black")
end_annotation = faces.TextFace(f"{end_position}", fsize=10, fgcolor="black")

leafs = tree.get_leaf_names()
clade_leaves = [leaf for leaf in leafs if leaf in fasta_txt]
tree.prune(clade_leaves)

first_leaf = tree.get_leaves()[0]

ts = TreeStyle()
ts.layout_fn = test_layout_phylo_aa
# Add the start and end annotations to the first leaf
first_leaf.add_face(start_annotation, column=0, position="aligned")

tree.show(tree_style=ts)
# tree.render("gcn4_paper_stuff/gain_loss_Fs/ygob_MSA_species_tree.pdf", tree_style=ts)

Warnning: [3] terminal nodes could not be found in the alignment.
