In [None]:
query = 'life span' # @param {type:"string"}
max_results = 10 # @param {type:"integer"}

In [None]:
# @title Fetching paper details

import requests
import time
from tqdm import tqdm

def search_pubmed(query, retmax=100, retstart=0):
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": query,
        "retmax": retmax,
        "retstart": retstart,
        "retmode": "json"
    }
    response = requests.get(url, params=params)
    data = response.json()
    return data["esearchresult"]["idlist"], data["esearchresult"]["count"]

def fetch_pubmed_details(id_list):
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    ids = ",".join(id_list)
    params = {
        "db": "pubmed",
        "id": ids,
        "retmode": "json"
    }
    response = requests.get(url, params=params)
    return response.json()

# Example usage

#query = "cancer immunotherapy"
total_results, fetched_ids = 0, []
batch_size = 100
#max_results = 500

# Fetch results in batches
ids, count = search_pubmed(query, retmax=batch_size, retstart=total_results)
with tqdm(total=min(int(count), max_results)) as pbar:
    while total_results < int(count) and total_results < max_results:
        ids, count = search_pubmed(query, retmax=batch_size, retstart=total_results)
        if not ids:
            break
        fetched_ids.extend(ids)
        total_results += batch_size
        pbar.update(batch_size)
        # Sleep to respect rate limits
        time.sleep(0.34)  # Approximately 3 requests per second

# Fetch details in batches
all_details = []
for start in tqdm(range(0, len(fetched_ids[:max_results]), batch_size), desc="Fetching Details"):
    batch_ids = fetched_ids[start:start + batch_size]
    details = fetch_pubmed_details(batch_ids)
    all_details.append(details)
    # Sleep to respect rate limits
    time.sleep(0.34)

# Printing the fetched details
for details in all_details:
    for uid, detail in details["result"].items():
        if uid == "uids":
            continue
        url = f"https://pubmed.ncbi.nlm.nih.gov/{uid}/"
        print(f"Title: {detail['title']}")
        print(f"Source: {detail['source']}")
        print(f"PubDate: {detail['pubdate']}")
        print(f"Authors: {', '.join([author['name'] for author in detail['authors']])}")
        print(f"URL: {url}")
        print("------")

100it [00:00, 141.26it/s]
Fetching Details: 100%|██████████| 1/1 [00:02<00:00,  2.06s/it]

Title: Clinical evaluation of droplet digital PCR in the early identification of suspected sepsis patients in the emergency department: a prospective observational study.
Source: Front Cell Infect Microbiol
PubDate: 2024
Authors: Jiang S, Zhao D, Wang C, Liu X, Yang Q, Bao X, Dong T, Li G, Gu Y, Ye Y, Sun B, Xu S, Zhou X, Fan L, Tang L
URL: https://pubmed.ncbi.nlm.nih.gov/38895732/
------
Title: Digital determinants of health as a way to address multilevel complex causal model in the promotion of Digital health equity and the prevention of digital health inequities: A scoping review.
Source: J Public Health Res
PubDate: 2024 Jan
Authors: Petretto DR, Carrogu GP, Gaviano L, Berti R, Pinna M, Petretto AD, Pili R
URL: https://pubmed.ncbi.nlm.nih.gov/38895154/
------
Title: Radiation-resistant bacteria in desiccated soil and their potentiality in applied sciences.
Source: Front Microbiol
PubDate: 2024
Authors: Khan A, Liu G, Zhang G, Li X
URL: https://pubmed.ncbi.nlm.nih.gov/38894973/
----


