In [7]:
from Bio import Entrez, SeqIO

Entrez.email = "pg45861@uminho.pt"

def search_sequences(gene_name, db="nucleotide", retmax=10):
    handle = Entrez.esearch(db=db, term=gene_name, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()
    ids = record["IdList"]

    handle = Entrez.efetch(db=db, id=ids, rettype="gb", retmode="text")
    seq_records = list(SeqIO.parse(handle, "genbank"))
    handle.close()
    return seq_records

gene_name = "SO785_RS05220"
seq_records = search_sequences(gene_name)

with open(f"{gene_name}_sequences.gb", "w") as output_file:
    SeqIO.write(seq_records, output_file, "genbank")

def show_annotations(seq_records):
    for record in seq_records:
        print(f"ID: {record.id}")
        print(f"Nome: {record.name}")
        print(f"Descrição: {record.description}")
        print("Anotações:")
        for key, value in record.annotations.items():
            print(f"  {key}: {value}")
        print("\nReferências externas:")
        for dbxref in record.dbxrefs:
            print(f"  {dbxref}")
        print("-" * 40)

show_annotations(seq_records)

def analize_features(seq_records):
    for record in seq_records:
        print(f"ID: {record.id}")
        for feature in record.features:
            print(f"Tipo de Feature: {feature.type}")
            print(f"Localização: {feature.location}")
            if feature.qualifiers:
                for key, value in feature.qualifiers.items():
                    print(f"  {key}: {value}")
        print("-" * 40)

analize_features(seq_records)

def external_references(seq_records):
    references = {}
    for record in seq_records:
        for feature in record.features:
            if "db_xref" in feature.qualifiers:
                for ref in feature.qualifiers["db_xref"]:
                    db, id = ref.split(":")
                    if db not in references:
                        references[db] = []
                    references[db].append(id)
    return references

references = external_references(seq_records)
for db, ids in referencias.items():
    print(f"Base de Dados: {db}")
    for id in ids:
        print(f"  ID: {id}")



ID: NZ_CP139575.1
Nome: NZ_CP139575
Descrição: Lactobacillus acidophilus strain ATCC 4356 chromosome, complete genome
Anotações:
  molecule_type: DNA
  topology: circular
  data_file_division: CON
  date: 09-DEC-2024
  accessions: ['NZ_CP139575']
  sequence_version: 1
  keywords: ['RefSeq']
  source: Lactobacillus acidophilus
  organism: Lactobacillus acidophilus
  taxonomy: ['Bacteria', 'Bacillati', 'Bacillota', 'Bacilli', 'Lactobacillales', 'Lactobacillaceae', 'Lactobacillus']
  references: [Reference(title='The Complete Genome Sequence of Probiotic Lactobacillus acidophilus ATCC 9224 isolated from sour milk', ...), Reference(title='Direct Submission', ...)]
  comment: REFSEQ INFORMATION: The reference sequence is identical to
CP139575.1.
Bacteria and source DNA available from Anand Kumar Bikini Atoll Rd,
SM-30, Los Alamos National Lab, Los Alamos, NM.
The annotation was added by the NCBI Prokaryotic Genome Annotation
Pipeline (PGAP). Information about PGAP can be found here:
https:/