In [1]:
from Bio.Blast import NCBIXML
from Bio.Align.Applications import ClustalOmegaCommandline
from Bio import Phylo, AlignIO
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
import os

def extract_sequences_from_blast(blast_file):
    """Extrai sequências de um ficheiro BLAST XML."""
    blast_results = NCBIXML.parse(open(blast_file))
    sequences = []
    id_counts = {}
    
    for result in blast_results:
        for alignment in result.alignments:
            for hsp in alignment.hsps:
                sequence_id = alignment.hit_id
                if sequence_id in id_counts:
                    id_counts[sequence_id] += 1
                    sequence_id = f"{sequence_id}_hsps{id_counts[sequence_id]}"
                else:
                    id_counts[sequence_id] = 1

                sequence = f">{sequence_id}\n{hsp.sbjct}\n"
                sequences.append(sequence)
    
    return sequences

def save_fasta(sequences, fasta_file_path):
    """Guarda as sequências no formato FASTA."""
    with open(fasta_file_path, "w") as fasta_file:
        fasta_file.writelines(sequences)

def run_clustal_omega(fasta_file_path, output_file_path, clustalomega_exe_path):
    """Executa o Clustal Omega para alinhamento múltiplo de sequências."""
    clustalomega_cline = ClustalOmegaCommandline(clustalomega_exe_path, infile=fasta_file_path, outfile=output_file_path, force=True)
    clustalomega_cline()
    print(f"Alignment saved to {output_file_path}")

def calculate_distances(aligned_file):
    """Calcula as distâncias para o alinhamento."""
    calculator = DistanceCalculator("identity")
    return calculator.get_distance(aligned_file)

def build_phylogenetic_tree(dm, gene):
    """Constrói uma árvore filogenética a partir da matriz de distâncias."""
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)
    tree_file_path = f"{gene}_phylogenetic_tree.newick"
    Phylo.write(tree, tree_file_path, "newick")
    print(f"Phylogenetic tree saved to {tree_file_path}")

def process_gene(gene, blast_file, clustalomega_exe_path):
    """Processa um gene: extrai sequências, alinha, calcula distâncias e constrói árvore filogenética."""
    # Extrair sequências
    sequences = extract_sequences_from_blast(blast_file)
    
    # Guardar as sequências num ficheiro FASTA
    fasta_file_path = f"{gene}_aligned.fasta"
    save_fasta(sequences, fasta_file_path)

    # Alinhar sequências com Clustal Omega
    run_clustal_omega(fasta_file_path, fasta_file_path, clustalomega_exe_path)

    # Ler o alinhamento
    aligned_file = AlignIO.read(fasta_file_path, "fasta")

    # Calcular distâncias
    dm = calculate_distances(aligned_file)

    # Construir e guardar a árvore filogenética
    build_phylogenetic_tree(dm, gene)

def main():
    """Função principal que processa todos os genes."""
    genes = ["glgA", "glgB", "glgD", "93290188"]
    blast_files = [f"{gene}_blast.xml" for gene in genes]
    clustalomega_exe_path = r"C:\Program Files\clustal-omega-1.2.2-win64\clustalo.exe"

    for gene, blast_file in zip(genes, blast_files):
        process_gene(gene, blast_file, clustalomega_exe_path)

if __name__ == "__main__":
    main()


Alignment saved to glgA_aligned.fasta
Phylogenetic tree saved to glgA_phylogenetic_tree.newick
Alignment saved to glgB_aligned.fasta
Phylogenetic tree saved to glgB_phylogenetic_tree.newick
Alignment saved to glgD_aligned.fasta
Phylogenetic tree saved to glgD_phylogenetic_tree.newick
Alignment saved to 93290188_aligned.fasta
Phylogenetic tree saved to 93290188_phylogenetic_tree.newick
