# Basic PubMed Search

Let's test a simple search using the Entrez API.

In [1]:
import requests
import time
import json  # for pretty printing results

In [2]:
def search_pubmed(query="ketogenic diet", max_results=5):
    """Basic PubMed search following Entrez guidelines"""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    
    params = {
        'db': 'pubmed',
        'term': query,
        'retmax': max_results,
        'retmode': 'json',
        'usehistory': 'y'
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

## Try a basic search

In [3]:
# Let's search and see the full response
result = search_pubmed("ketogenic diet", 5)

if result:
    print("Full API Response:")
    print(json.dumps(result, indent=2))
    
    print("\nFound Papers:")
    for pmid in result['esearchresult']['idlist']:
        print(f"PMID: {pmid}")

Full API Response:
{
  "header": {
    "type": "esearch",
    "version": "0.3"
  },
  "esearchresult": {
    "count": "5721",
    "retmax": "5",
    "retstart": "0",
    "querykey": "1",
    "webenv": "MCID_678670852a015c3c3307265f",
    "idlist": [
      "39805722",
      "39801794",
      "39801479",
      "39801107",
      "39796579"
    ],
    "translationset": [
      {
        "from": "ketogenic diet",
        "to": "\"diet, ketogenic\"[MeSH Terms] OR (\"diet\"[All Fields] AND \"ketogenic\"[All Fields]) OR \"ketogenic diet\"[All Fields] OR (\"ketogenic\"[All Fields] AND \"diet\"[All Fields])"
      }
    ],
    "querytranslation": "\"diet, ketogenic\"[MeSH Terms] OR (\"diet\"[All Fields] AND \"ketogenic\"[All Fields]) OR \"ketogenic diet\"[All Fields] OR (\"ketogenic\"[All Fields] AND \"diet\"[All Fields])"
  }
}

Found Papers:
PMID: 39805722
PMID: 39801794
PMID: 39801479
PMID: 39801107
PMID: 39796579


## Try a more specific search

In [4]:
# Search with a more specific query
specific_query = "ketogenic diet[Title/Abstract] AND clinical trial[Publication Type]"
result = search_pubmed(specific_query, 5)

if result:
    print(f"Total results found: {result['esearchresult']['count']}")
    print("\nPaper IDs:")
    for pmid in result['esearchresult']['idlist']:
        print(f"PMID: {pmid}")

Total results found: 291

Paper IDs:
PMID: 39771001
PMID: 39617882
PMID: 39602868
PMID: 39599743
PMID: 39543202


**Get** Specific paper information

In [5]:
# Function to get paper details
def get_paper_details(pmid):
    """Get details for a specific paper using esummary"""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    
    params = {
        'db': 'pubmed',
        'id': pmid,
        'retmode': 'json'
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return None

# Let's get details for the first paper (39805722)
result = get_paper_details("39805722")

if result:
    print("Paper Details:")
    print(json.dumps(result, indent=2))
    
    # Let's also extract some key fields
    paper_data = result['result']['39805722']
    print("\nKey Information:")
    print(f"Title: {paper_data.get('title', 'No title')}")
    print(f"Authors: {', '.join(author.get('name', '') for author in paper_data.get('authors', []))}")
    print(f"Journal: {paper_data.get('source', 'No journal')}")
    print(f"Date: {paper_data.get('pubdate', 'No date')}")
    if 'elocationid' in paper_data:
        print(f"DOI: {paper_data['elocationid']}")

Paper Details:
{
  "header": {
    "type": "esummary",
    "version": "0.3"
  },
  "result": {
    "uids": [
      "39805722"
    ],
    "39805722": {
      "uid": "39805722",
      "pubdate": "2024 Dec 20",
      "epubdate": "2024 Dec 20",
      "source": "Nutr Metab Cardiovasc Dis",
      "authors": [
        {
          "name": "Pala B",
          "authtype": "Author",
          "clusterid": ""
        },
        {
          "name": "Pennazzi L",
          "authtype": "Author",
          "clusterid": ""
        },
        {
          "name": "Nardoianni G",
          "authtype": "Author",
          "clusterid": ""
        },
        {
          "name": "Rubattu SD",
          "authtype": "Author",
          "clusterid": ""
        },
        {
          "name": "Volpe M",
          "authtype": "Author",
          "clusterid": ""
        },
        {
          "name": "Colao AM",
          "authtype": "Author",
          "clusterid": ""
        },
        {
          "name": "Barbato

In [6]:
# Get details for multiple papers
pmids = ["39805722", "39801794", "39801479"]

for pmid in pmids:
    print(f"\n{'='*80}\nChecking paper {pmid}:")
    result = get_paper_details(pmid)
    
    if result and 'result' in result:
        paper_data = result['result'][pmid]
        print(f"\nTitle: {paper_data.get('title', 'No title')[:100]}...")  # First 100 chars of title
        print(f"Date fields:")
        print(f"  pubdate: {paper_data.get('pubdate', 'Not found')}")
        print(f"  epubdate: {paper_data.get('epubdate', 'Not found')}")
        print(f"  sortpubdate: {paper_data.get('sortpubdate', 'Not found')}")
        print(f"Journal: {paper_data.get('source', 'No journal')}")
        
        # Check if it has an abstract
        if "Has Abstract" in paper_data.get('attributes', []):
            print("Has abstract: Yes")
        else:
            print("Has abstract: No")


Checking paper 39805722:

Title: Very low-calorie ketogenic diet reduces central blood pressure and cardiometabolic risk in post-meno...
Date fields:
  pubdate: 2024 Dec 20
  epubdate: 2024 Dec 20
  sortpubdate: 2024/12/20 00:00
Journal: Nutr Metab Cardiovasc Dis
Has abstract: Yes

Checking paper 39801794:

Title: Outcomes of dietary interventions in the prevention and progression of Parkinson's disease: A litera...
Date fields:
  pubdate: 2024
  epubdate: 2024 Dec 30
  sortpubdate: 2024/12/30 00:00
Journal: AIMS Neurosci
Has abstract: Yes

Checking paper 39801479:

Title: NORSE secondary to anti-GAD65 antibody-positive encephalitis treated with novel adjunctive rapid tit...
Date fields:
  pubdate: 2025 Jan 13
  epubdate: 2025 Jan 13
  sortpubdate: 2025/01/13 00:00
Journal: Epilepsia Open
Has abstract: Yes


In [7]:
import requests
from typing import Dict, List, Optional
import time

def get_papers(query: str, max_results: int = 5) -> List[Dict]:
    """
    Search PubMed and get paper details.
    Returns a list of papers with key information.
    """
    # First search for papers
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    search_params = {
        'db': 'pubmed',
        'term': query,
        'retmax': max_results,
        'retmode': 'json'
    }
    
    papers = []
    try:
        # Get paper IDs
        search_response = requests.get(search_url, params=search_params)
        search_data = search_response.json()
        pmids = search_data['esearchresult']['idlist']
        
        # Get details for each paper
        summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
        for pmid in pmids:
            # Respect rate limits
            time.sleep(0.34)  # ~3 requests per second
            
            summary_params = {
                'db': 'pubmed',
                'id': pmid,
                'retmode': 'json'
            }
            
            summary_response = requests.get(summary_url, params=summary_params)
            paper_data = summary_response.json()['result'][pmid]
            
            # Extract key information
            paper = {
                'pmid': pmid,
                'title': paper_data.get('title', ''),
                'authors': [author['name'] for author in paper_data.get('authors', [])],
                'journal': paper_data.get('source', ''),
                'date': paper_data.get('sortpubdate', '').split()[0],  # Just get YYYY/MM/DD part
                'doi': paper_data.get('elocationid', '').replace('doi: ', '')
            }
            papers.append(paper)
            
    except Exception as e:
        print(f"Error: {str(e)}")
    
    return papers

# Test it out
if __name__ == "__main__":
    papers = get_papers("ketogenic diet clinical trial")
    
    print("\nFound Papers:")
    for paper in papers:
        print(f"\nTitle: {paper['title']}")
        print(f"Authors: {', '.join(paper['authors'])}")
        print(f"Journal: {paper['journal']}")
        print(f"Date: {paper['date']}")
        print(f"DOI: {paper['doi']}")
        print("-" * 80)


Found Papers:

Title: Development and Pragmatic Randomized Controlled Trial of Healthy Ketogenic Diet Versus Energy-Restricted Diet on Weight Loss in Adults with Obesity.
Authors: Lim SL, Tay M, Ang SM, Wai SN, Ong KW, Neo WJ, Yap QV, Chan YH, Khoo CM
Journal: Nutrients
Date: 2024/12/19
DOI: 10.3390/nu16244380
--------------------------------------------------------------------------------

Title: Does the Ketogenic Diet Mediate Inflammation Markers in Obese and Overweight Adults? A Systematic Review and Meta-Analysis of Randomized Clinical Trials.
Authors: Rondanelli M, Gasparri C, Pirola M, Barrile GC, Moroni A, Sajoux I, Perna S
Journal: Nutrients
Date: 2024/11/22
DOI: 10.3390/nu16234002
--------------------------------------------------------------------------------

Title: The effects of portfolio moderate-carbohydrate and ketogenic diets on anthropometric indices, metabolic status, and hormonal levels in overweight or obese women with polycystic ovary syndrome: a randomized cont

In [8]:
import requests
from typing import Dict, List, Optional
import time
import xml.etree.ElementTree as ET

def get_papers(query: str, max_results: int = 5) -> List[Dict]:
    """
    Search PubMed and get paper details including abstracts.
    Returns a list of papers with key information and findings.
    """
    # First search for papers
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    search_params = {
        'db': 'pubmed',
        'term': query,
        'retmax': max_results,
        'retmode': 'json'
    }
    
    papers = []
    try:
        # Get paper IDs
        search_response = requests.get(search_url, params=search_params)
        search_data = search_response.json()
        pmids = search_data['esearchresult']['idlist']
        
        # Get metadata for each paper
        summary_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
        efetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
        
        for pmid in pmids:
            # Respect rate limits
            time.sleep(0.34)
            
            # Get paper metadata
            summary_params = {
                'db': 'pubmed',
                'id': pmid,
                'retmode': 'json'
            }
            
            summary_response = requests.get(summary_url, params=summary_params)
            paper_data = summary_response.json()['result'][pmid]
            
            # Get full abstract using efetch
            time.sleep(0.34)  # Rate limit for second request
            efetch_params = {
                'db': 'pubmed',
                'id': pmid,
                'retmode': 'xml',
                'rettype': 'abstract'
            }
            
            efetch_response = requests.get(efetch_url, params=efetch_params)
            abstract_xml = efetch_response.text
            
            # Parse the XML to get structured abstract if available
            try:
                root = ET.fromstring(abstract_xml)
                abstract_sections = []
                
                # Try to get structured abstract sections
                abstract_element = root.find(".//Abstract")
                if abstract_element is not None:
                    for child in abstract_element:
                        if child.tag == 'AbstractText':
                            label = child.get('Label', '')
                            text = child.text or ''
                            if label:
                                abstract_sections.append(f"{label}: {text}")
                            else:
                                abstract_sections.append(text)
                
                abstract = "\n".join(abstract_sections) if abstract_sections else "No abstract available"
                
            except ET.ParseError:
                abstract = "Error parsing abstract"
            
            # Get publication types to identify study type
            pub_types = paper_data.get('pubtype', [])
            
            # Extract key information
            paper = {
                'pmid': pmid,
                'title': paper_data.get('title', ''),
                'authors': [author['name'] for author in paper_data.get('authors', [])],
                'journal': paper_data.get('source', ''),
                'date': paper_data.get('sortpubdate', '').split()[0],  # Just get YYYY/MM/DD part
                'doi': paper_data.get('elocationid', '').replace('doi: ', ''),
                'abstract': abstract,
                'publication_types': pub_types,
                'keywords': paper_data.get('keywords', [])
            }
            papers.append(paper)
            
    except Exception as e:
        print(f"Error: {str(e)}")
    
    return papers

# Test it out
if __name__ == "__main__":
    # Use a more specific query to get relevant studies
    query = """("ketogenic diet"[Title/Abstract]) AND 
              (clinical trial[Publication Type] OR 
               systematic review[Publication Type] OR 
               meta-analysis[Publication Type])"""
              
    papers = get_papers(query)
    
    print(f"\nFound {len(papers)} Papers:")
    for paper in papers:
        print("\n" + "="*80)
        print(f"Title: {paper['title']}")
        print(f"Authors: {', '.join(paper['authors'])}")
        print(f"Journal: {paper['journal']}")
        print(f"Date: {paper['date']}")
        print(f"Study Type: {', '.join(paper['publication_types'])}")
        print(f"DOI: {paper['doi']}")
        print("\nAbstract:")
        print(paper['abstract'])
        if paper['keywords']:
            print("\nKeywords:", ', '.join(paper['keywords']))
        print("="*80)


Found 5 Papers:

Title: Therapeutic Potential of Ketogenic Interventions for Autosomal-Dominant Polycystic Kidney Disease: A Systematic Review.
Authors: Li D, Dawson J, Gunton JE
Journal: Nutrients
Date: 2024/12/31
Study Type: Journal Article, Review
DOI: 10.3390/nu17010145

Abstract:
BACKGROUND: Recent findings have highlighted that abnormal energy metabolism is a key feature of autosomal-dominant polycystic kidney disease (ADPKD). Emerging evidence suggests that nutritional ketosis could offer therapeutic benefits, including potentially slowing or even reversing disease progression. This systematic review aims to synthesise the literature on ketogenic interventions to evaluate the impact in ADPKD.
METHODS: A systematic search was conducted in Medline, Embase, and Scopus using relevant Medical Subject Headings (MeSH) and keywords. Studies assessing ketogenic interventions in the management of ADPKD in both human and animal models were selected for data extraction and analysis.
RESULT

In [10]:
from dataclasses import dataclass
from typing import List, Optional, Dict
import requests
import time
import xml.etree.ElementTree as ET

@dataclass
class StudyFindings:
    """Structured representation of a research paper's findings"""
    pmid: str
    title: str
    study_type: str
    date: str
    journal: str
    doi: Optional[str]
    background: Optional[str]
    methods: Optional[str]
    results: Optional[str]
    conclusions: Optional[str]
    keywords: List[str]
    intervention: Optional[str]  # e.g., "ketogenic diet", "Mediterranean diet"
    population: Optional[str]    # study population characteristics
    outcomes: List[str]         # measured outcomes
    key_findings: List[str]     # main results/conclusions

def extract_section_text(abstract_element: ET.Element, section_label: str) -> Optional[str]:
    """Extract text from a specific section of a structured abstract"""
    for section in abstract_element.findall(".//AbstractText"):
        if section.get('Label', '').upper() == section_label.upper():
            return section.text
    return None

def parse_abstract_sections(abstract_xml: str) -> Dict[str, str]:
    """Parse XML abstract into structured sections"""
    sections = {
        'background': None,
        'methods': None,
        'results': None,
        'conclusions': None,
        'unstructured': None
    }
    
    try:
        root = ET.fromstring(abstract_xml)
        abstract_element = root.find(".//Abstract")
        
        if abstract_element is not None:
            # Try to get structured sections
            sections['background'] = extract_section_text(abstract_element, 'BACKGROUND')
            sections['methods'] = extract_section_text(abstract_element, 'METHODS')
            sections['results'] = extract_section_text(abstract_element, 'RESULTS')
            sections['conclusions'] = extract_section_text(abstract_element, 'CONCLUSIONS')
            
            # If no structured sections found, get full text
            if not any(sections.values()):
                full_text = []
                for text_element in abstract_element.findall(".//AbstractText"):
                    if text_element.text:
                        full_text.append(text_element.text)
                sections['unstructured'] = " ".join(full_text)
    
    except ET.ParseError as e:
        print(f"Error parsing abstract XML: {e}")
    
    return sections

def extract_study_findings(paper_data: Dict, abstract_sections: Dict) -> StudyFindings:
    """Extract key findings and metadata from paper data"""
    
    # Extract intervention type from title and abstract
    intervention = None
    if "ketogenic" in paper_data.get('title', '').lower():
        intervention = "ketogenic diet"
    elif "mediterranean" in paper_data.get('title', '').lower():
        intervention = "mediterranean diet"
    
    # Try to identify population from methods or background
    population = None
    methods_text = abstract_sections.get('methods', '')
    if methods_text and 'participants' in methods_text.lower():
        # Extract text around "participants" mention
        start = methods_text.lower().find('participants')
        population = methods_text[start:start+150] if start != -1 else None
    
    # Extract outcomes from results section
    outcomes = []
    results_text = abstract_sections.get('results', '')
    if results_text:
        # Simple sentence splitting for key findings
        sentences = results_text.split('. ')
        outcomes = [s.strip() for s in sentences if any(keyword in s.lower() 
                   for keyword in ['significant', 'improved', 'reduced', 'increased'])]
    
    return StudyFindings(
        pmid=paper_data.get('uid', ''),
        title=paper_data.get('title', ''),
        study_type=', '.join(paper_data.get('pubtype', [])),
        date=paper_data.get('sortpubdate', '').split()[0],
        journal=paper_data.get('source', ''),
        doi=paper_data.get('elocationid', '').replace('doi: ', ''),
        background=abstract_sections.get('background'),
        methods=abstract_sections.get('methods'),
        results=abstract_sections.get('results'),
        conclusions=abstract_sections.get('conclusions'),
        keywords=paper_data.get('keywords', []),
        intervention=intervention,
        population=population,
        outcomes=outcomes,
        key_findings=[abstract_sections.get('conclusions', '')] if abstract_sections.get('conclusions') else []
    )

def get_paper_findings(query: str, max_results: int = 5) -> List[StudyFindings]:
    """Get structured findings from papers matching the query"""
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
    findings = []
    
    try:
        # Search for papers
        search_params = {
            'db': 'pubmed',
            'term': query,
            'retmax': max_results,
            'retmode': 'json'
        }
        search_response = requests.get(f"{base_url}/esearch.fcgi", params=search_params)
        pmids = search_response.json()['esearchresult']['idlist']
        
        for pmid in pmids:
            time.sleep(0.34)  # Rate limit
            
            # Get paper details
            summary_params = {'db': 'pubmed', 'id': pmid, 'retmode': 'json'}
            summary_response = requests.get(f"{base_url}/esummary.fcgi", params=summary_params)
            paper_data = summary_response.json()['result'][pmid]
            
            # Get abstract
            time.sleep(0.34)
            abstract_params = {'db': 'pubmed', 'id': pmid, 'retmode': 'xml', 'rettype': 'abstract'}
            abstract_response = requests.get(f"{base_url}/efetch.fcgi", params=abstract_params)
            
            # Parse and structure the findings
            abstract_sections = parse_abstract_sections(abstract_response.text)
            findings.append(extract_study_findings(paper_data, abstract_sections))
    
    except Exception as e:
        print(f"Error fetching papers: {e}")
    
    return findings

# Test the enhanced version
if __name__ == "__main__":
    query = """("ketogenic diet"[Title/Abstract]) AND 
              (clinical trial[Publication Type] OR 
               systematic review[Publication Type] OR 
               meta-analysis[Publication Type])"""
              
    findings = get_paper_findings(query)
    
    for finding in findings:
        print("\n" + "="*80)
        print(f"Title: {finding.title}")
        print(f"Type: {finding.study_type}")
        print(f"Journal: {finding.journal} ({finding.date})")
        
        if finding.intervention:
            print(f"\nIntervention: {finding.intervention}")
        if finding.population:
            print(f"Population: {finding.population}")
            
        if finding.key_findings:
            print("\nKey Findings:")
            for finding in finding.key_findings:
                print(f"- {finding}")
                
        if finding.conclusions:
            print("\nMeasured Outcomes:")
            for outcome in finding.outcomes:
                print(f"- {outcome}")
        print("="*80)


Title: Therapeutic Potential of Ketogenic Interventions for Autosomal-Dominant Polycystic Kidney Disease: A Systematic Review.
Type: Journal Article, Review
Journal: Nutrients (2024/12/31)

Intervention: ketogenic diet

Key Findings:
- Human studies are promising; however, they have been limited by small sample sizes and short durations. Larger, long-term trials are needed to assess the efficacy, adherence, and safety of ketogenic diets in people with ADPKD.


AttributeError: 'str' object has no attribute 'conclusions'