In [1]:
from Bio import Entrez

def count_rnaseq_records(email, term="Illumina MiSeq RNASeq"):
    """
    Conta o número total de registros de RNASeq no NCBI para uma plataforma de sequenciamento especificada.

    Parâmetros:
    - email: Endereço de email a ser fornecido ao NCBI.
    - term: Termo de busca para o Entrez do NCBI (padrão é para Illumina MiSeq e RNASeq).

    Retorna:
    - Número total de registros disponíveis para o termo de busca.
    """
    
    Entrez.email = email
    handle = Entrez.esearch(db="sra", term=term)
    record = Entrez.read(handle)
    handle.close()
    
    return int(record["Count"])


In [2]:
email   = "marcos.aires@fiocruz.br"
total = count_rnaseq_records(email)
print(f"Total de registros encontrados: {total}")

Total de registros encontrados: 5072


In [3]:
from Bio import Entrez

def fetch_rnaseq_data(email, term="Illumina MiSeq RNASeq", retmax=10):
    """
    Fetches raw RNASeq data from NCBI using the specified sequencing platform.

    Parameters:
    - email: Email address to be provided to NCBI.
    - term: Search term for NCBI's Entrez (default is for Illumina MiSeq and RNASeq).
    - retmax: Maximum number of records to retrieve.

    Returns:
    - List of records with accession numbers, titles, and other metadata.
    """
    Entrez.email = email
    handle = Entrez.esearch(db="sra", term=term, retmax=retmax)
    record = Entrez.read(handle)
    handle.close()
    
    id_list = record["IdList"]
    if not id_list:
        print("No records found for the provided search term.")
        return []
    
    handle = Entrez.esummary(db="sra", id=",".join(id_list))
    summary_records = Entrez.read(handle)
    handle.close()
    
    results = []
    for rec in summary_records:
        accession = rec.get("Accession", "N/A")
        title = rec.get("Title", "N/A")
        update_date = rec.get("UpdateDate", "N/A")
        results.append({
            "Accession": accession,
            "Title": title,
            "UpdateDate": update_date
        })
    
    return results


In [4]:
# Example usage:
email   = "marcos.aires@fiocruz.br"
records = fetch_rnaseq_data(email)
for rec in records:
    print(rec)

{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/25'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/25'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/13'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}
{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}


In [5]:
records

[{'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/25'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/25'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/13'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'},
 {'Accession': 'N/A', 'Title': 'N/A', 'UpdateDate': '2023/09/07'}]