In [None]:
import pymysql.cursors
from Bio.Align.Applications import MuscleCommandline
from Bio import AlignIO
from Bio.Phylo.Applications import FastTreeCommandline
from Bio import Phylo
%matplotlib inline
import scipy.cluster.hierarchy as cl
import scipy.spatial.distance as ssd
import matplotlib
import matplotlib.pyplot as plt


In [None]:
class sea_inphinity():
    def __init__(self, db_name):
        self.db_name = db_name
        self.connection()
        self.verbose = True
        
    def print_(self, string):
        if self.verbose:
            print(string)
        
    def connection(self):
        # Connect to the database
        self.connection = pymysql.connect(host='localhost',
                                     user='root',
                                     password='',
                                     db=self.db_name,
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)
        
    def simple_execute(self, sql, verbose):
        try:
            with self.connection.cursor() as cursor:
                res = cursor.execute(sql)

                if verbose:
                    self.print_(res)
                    
                return res

            self.connection.commit()
        finally:
            pass
        
    def get_list_pham(self, limit):
        cur = self.connection.cursor()

        if limit == -1:
            sql = "SELECT * FROM pham"
        else:
            sql = "SELECT * FROM pham LIMIT %d" % (limit)
        cur.execute(sql)

        for row in cur:
            self.print_(row)

        return cur
        
    
    def get_list_name_pham(self, limit):
        cur = self.connection.cursor()

        if limit == -1:
            sql = "SELECT name FROM pham GROUP BY name"
        else:
            sql = "SELECT name FROM pham GROUP BY name LIMIT %d" % (limit)
        cur.execute(sql)
        
        list_name = []
        for name in list(cur):
            list_name.append(name['name'])

        return list_name
    
    def get_specific_pham(self, name):
        cur = self.connection.cursor()

        cur.execute("SELECT GeneID FROM pham WHERE name = %s", (name))

        return cur
    
    def get_list_genes(self, gene_ids, verbose):
        cur = self.connection.cursor()
        
        sql = "SELECT * FROM gene WHERE GeneID IN ('%s');" % ( "','".join(gene_ids))
        
        cur.execute(sql)

        if verbose:
            for row in cur:
                self.print_(row)

        return list(cur.fetchall())
    
    def get_phage(self, phage_id, verbose):
        cur = self.connection.cursor()
        
        sql = "SELECT * FROM phage WHERE PhageID = '%s';" % (phage_id)
        
        cur.execute(sql)

        if verbose:
            for row in cur:
                self.print_(row)

        return list(cur.fetchall())
    
    def get_phage_from_gene_id(self, gene_id):
        phage = sea_inphinity.get_phage(sea_inphinity.get_list_genes([gene_id], False)[0]['PhageID'], False)[0]
        return phage
    
    def build_tree(self,pham):
        genes = self.get_genes_from_a_pham(pham)
        self.create_fasta(genes)
        self.align_muscle()
        self.compute_tree()
        self.draw_tree()
    
    def get_genes_from_a_pham(self, pham):
        pham_list = []
        pham = sea_inphinity.get_specific_pham(pham)
        for id in pham:
            pham_list.append(id['GeneID'])
            self.print_(id)
        #print(pham_list)
        genes = sea_inphinity.get_list_genes(pham_list, False)
        
        return genes
        
    def create_fasta(self, genes):
        print('Creation of the FASTA file')
        fasta = open("fasta.fa", "w")
        self.print_("Number of Genes: %d" % (len(genes)))
        for gene in genes:
            GeneID = gene['GeneID']
            name = gene['Name']
            description = ">%s - %s" % (GeneID, name)

            translation = gene['translation']

            self.print_(description)
            self.print_(translation)

            fasta.write(description)
            fasta.write('\n')
            fasta.write(translation)
            fasta.write('\n')

        fasta.close()
        
    def align_muscle(self):
        print('Alignment with MUSCLE')
        muscle_loc = r'/home/pa/work/muscle3.8.31_i86linux64' # modifier si nécessaire

        muscle_cline = MuscleCommandline(cmd=muscle_loc,input='fasta.fa',out='out.aln',clwstrict=True)
        stdout, stderr = muscle_cline()

        muscle_align = AlignIO.read('out.aln','clustal')
        self.print_(muscle_align)
        
    def compute_tree(self):
        print('Compute tree')
        AlignIO.convert('out.aln','clustal','intermediate.phy', 'phylip-relaxed')

        cmd_fasttree = r'fasttree'
        fasttree_cmdline = FastTreeCommandline(cmd=cmd_fasttree,fastest=True, \
                                               input='intermediate.phy',out='tree.tre')
        out_log, err_log = fasttree_cmdline()

        self.print_('Out Log:')
        self.print_(out_log)

        self.print_('Error Log')
        self.print_(err_log)
        
        self.tree = Phylo.read('tree.tre', 'newick')
        
    def draw_tree(self):
        print('Draw tree')
        dmat = []
        self.leaves = [str(cladit) for k,cladit in enumerate(self.tree.get_terminals())]
        for l1,leave1 in enumerate(self.leaves):
            d = []
            for l2,leave2 in enumerate(self.leaves):
                d.append(self.tree.distance(leave1,leave2))
            dmat.append(d)
            
        Z = cl.linkage(ssd.squareform(dmat),method='average',metric='euclidean')
        fig = plt.figure(num=None,figsize=(30,25),dpi=250)
        dendro=cl.dendrogram(Z,labels=self.leaves,color_threshold=0.06,leaf_rotation=90,leaf_font_size=10)
        plt.show()
        plt.savefig('tree.png')
        
    def print_informations_on_phage(self, gene_id):
        phage = sea_inphinity.get_phage_from_gene_id(gene_id)
        print("******************************** \
              \nGene ID: %s \nPhage ID: %s \nPhage Name: %s \nHostStrain: %s \
              \n********************************" \
              % (gene_id, phage['PhageID'], phage['Name'], phage['HostStrain']))
        
    def print_informations_on_phages(self, gene_ids):
        for gene_id in gene_ids:
            self.print_informations_on_phage(gene_id)

    
sea_inphinity = sea_inphinity('sea')

In [None]:
sea_inphinity.verbose = False
sea_inphinity.build_tree('2799')

In [None]:
sea_inphinity.get_list_pham(5)
sea_inphinity.get_list_name_pham(-1)

In [None]:
print(sea_inphinity.tree.get_terminals())

In [None]:
Clade(branch_length=0.02764, name='663557_12')
Clade(branch_length=0.0, name='Backyardigan-DRAFT_gp12')
Clade(branch_length=0.0, name='Wile-DRAFT_gp11')

In [None]:
Clade(branch_length=0.00623, name='Vix-DRAFT_gp11')
Clade(branch_length=0.0, name='Microwolf-DRAFT_gp14')
Clade(branch_length=0.0, name='205870_14')

In [None]:
print(sea_inphinity.get_list_genes(['Vix-DRAFT_gp11'], True))

In [None]:
sea_inphinity.print_informations_on_phage('Vix-DRAFT_gp11')

In [None]:
sea_inphinity.print_informations_on_phages(['Vix-DRAFT_gp11', 'Microwolf-DRAFT_gp14', '205870_14'])

In [None]:
sea_inphinity.print_informations_on_phages(['663557_12', 'Backyardigan-DRAFT_gp12', 'Wile-DRAFT_gp11'])

In [None]:
sea_inphinity.print_informations_on_phages(sea_inphinity.leaves)

In [None]:
print(sea_inphinity.get_list_name_pham(-1))

In [None]:
sea_inphinity.verbose = False
sea_inphinity.build_tree('1639')
sea_inphinity.print_informations_on_phages(sea_inphinity.leaves)