In [34]:
from Bio import Entrez, SeqIO, SearchIO
from Bio.Blast import NCBIWWW, NCBIXML

def search_sequences(gene_name, db="nucleotide", retmax=10):
    handle = Entrez.esearch(db=db, term=gene_name, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()
    ids = record["IdList"]

    handle = Entrez.efetch(db=db, id=ids, rettype="gb", retmode="text")
    seq_records = list(SeqIO.parse(handle, "genbank"))
    handle.close()
    return seq_records
    
def run_blast(sequence, program="blastn", database="nr", evalue=0.001):
    result_handle = NCBIWWW.qblast(program, database, sequence.format("fasta"), expect=evalue)
    blast_record = NCBIXML.read(result_handle)
    result_handle.close()
    return blast_record

def analyze_blast_results(blast_record):
    significant_hits = []
    for alignment in blast_record.alignments:
        for hsp in alignment.hsps:
            if hsp.expect < 0.001:
                significant_hits.append({
                    "title": alignment.title,
                    "length": alignment.length,
                    "e-value": hsp.expect,
                    "identity": hsp.identities / hsp.align_length * 100
                })
    return significant_hits

def main():
    gene_name = "SO785_RS05220"
    seq_records = search_sequences(gene_name)
    
    with open(f"{gene_name}_sequences.gb", "w") as output_file:
        SeqIO.write(seq_records, output_file, "genbank")   

    print("Anotações e Características:")
    for record in seq_records:
        print(f"ID: {record.id}")
        print(f"Descrição: {record.description}")

        blast_record = run_blast(record)
        significant_hits = analyze_blast_results(blast_record)
        
        print("\nResultados Significativos do BLAST:")
        for hit in significant_hits:
            print(f"Título: {hit['title']}")
            print(f"Tamanho do alinhamento: {hit['length']}")
            print(f"E-value: {hit['e-value']:.3e}")
            print(f"Identidade: {hit['identity']:.2f}%")
            print("-" * 80)

if __name__ == "__main__":
    main()

Anotações e Características:
ID: NZ_CP139575.1
Descrição: Lactobacillus acidophilus strain ATCC 4356 chromosome, complete genome


UndefinedSequenceError: Sequence content is undefined