<a href="https://colab.research.google.com/github/mitchgraves/pubmed-digest-prototype/blob/main/pubmed_research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import openai
from xml.etree import ElementTree as ET

# Set up OpenAI API key
openai.api_key = 'your-key-here'

# Function to search PubMed
def search_pubmed(query, start_date, end_date, max_results=15):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    db = "pubmed"

    # Construct the query for multiple keywords
    formatted_query = " OR ".join(query.split(","))

    # Search PubMed
    search_params = {
        "db": db,
        "term": formatted_query,
        "mindate": start_date,
        "maxdate": end_date,
        "retmax": max_results,
        "retmode": "json",
        "sort": "relevance"  # Sort results by relevance
    }
    search_response = requests.get(base_url, params=search_params)
    search_data = search_response.json()

    # Get the article IDs
    article_ids = search_data.get("esearchresult", {}).get("idlist", [])
    if not article_ids:
        print("No articles found for the given query and time period.")
        return []

    return article_ids


# Function to fetch PubMed abstracts using PubMed IDs
def fetch_pubmed_abstracts(article_ids):
    fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    db = "pubmed"

    # Parameters for fetching details
    fetch_params = {
        "db": db,
        "id": ",".join(article_ids),  # Comma-separated article IDs
        "retmode": "xml"
    }
    fetch_response = requests.get(fetch_url, params=fetch_params)
    if fetch_response.status_code != 200:
        print("Error fetching article details.")
        return []

    # Parse the XML response
    root = ET.fromstring(fetch_response.content)
    articles = []

    for article in root.findall(".//PubmedArticle"):
        # Extract key details
        title = article.findtext(".//ArticleTitle", "No title available")

        # Fetch all sections of the abstract
        abstract_sections = []
        for abstract_text in article.findall(".//AbstractText"):
            abstract_sections.append(abstract_text.text)

        # Join all sections into one complete abstract
        full_abstract = "\n".join(abstract_sections) if abstract_sections else "No abstract available"

        pub_date = article.find(".//PubDate")
        if pub_date is not None:
            # Get all date components (e.g., Day, Month, Year)
            day = pub_date.findtext("Day", "")
            month = pub_date.findtext("Month", "")
            year = pub_date.findtext("Year", "")
            pub_date = f"{month} {day}, {year}" if month and day else f"{year}"  # If day/month are missing, fallback to year
        else:
            pub_date = "No publication date available"
        source = article.findtext(".//Journal/Title", "No source available")
        pmid = article.findtext(".//PMID", "No PubMed ID available")
        url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/" if pmid != "No PubMed ID available" else "No URL available"

        # Add to article list
        articles.append({
            "title": title,
            "abstract": full_abstract,
            "pub_date": pub_date,
            "source": source,
            "url": url
        })

    return articles


# Function to generate a weekly digest using OpenAI
def generate_digest(articles, field_of_research):
    # Concatenate abstracts into a single string for the prompt
    combined_abstracts = "\n\n".join([f"Title: {article['title']}\nAbstract: {article['abstract']}" for article in articles])

    system_prompt = f'''
You are a highly skilled assistant with expertise in medical research and a strong ability to communicate complex topics clearly and engagingly. Your writing should be in the style of a seasoned medical journalist, combining precision with depth, while making complex concepts accessible.

You specialize in synthesizing the latest research, highlighting key findings, and providing clear, concise summaries. When summarizing research articles, you should:
- Provide a narrative-style overview that connects ideas across multiple sources.
- Focus on the most impactful and innovative findings.
- Write as though explaining the research to a colleague who is knowledgeable but pressed for time.

Your responses should strike a balance between thoroughness and brevity, ensuring that they provide an insightful and digestible summary of recent developments in the field.'''
    user_prompt = f'''
I am a busy doctor who needs to stay up-to-date on the latest research in: {field_of_research}.

Please help me by creating a clear and informative news digest highlighting the most important and relevant studies published recently in this field. The digest should:
- Focus on key themes and groundbreaking findings, making connections between related studies.
- Be written in a narrative style, as though you are an expert researcher summarizing your findings to a colleague.
- Present a concise yet informative summary, providing enough context for the reader to understand the significance of the findings without being overwhelmed by excessive details.

Here are the top 15 PubMed papers and abstracts published in the last week:
{combined_abstracts}
'''

    # Call OpenAI GPT-3.5 API to get a response
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        max_tokens=700,
        temperature=0.3
    )

    return response.choices[0].message.content

# Integrated Example Usage
if __name__ == "__main__":
    query = "lung cancer,lung cancer screening"  # Use comma to separate multiple keywords
    start_date = "2024/10/01"  # Format: YYYY/MM/DD
    end_date = "2024/12/09"    # Format: YYYY/MM/DD

    # Step 1: Search for articles
    article_ids = search_pubmed(query, start_date, end_date)

    if article_ids:
        # Step 2: Fetch detailed abstracts
        articles_with_abstracts = fetch_pubmed_abstracts(article_ids)

        # Step 3: Generate Weekly Digest
        field_of_research = "lung cancer screening"
        digest = generate_digest(articles_with_abstracts, field_of_research)

        # Step 4: Output the digest
        print("\nWeekly Digest of Recent Research:")
        print(digest)



Weekly Digest of Recent Research:
In the rapidly evolving landscape of lung cancer screening, recent research has yielded significant insights and updates that are critical for clinicians and researchers alike. Here, I summarize the most impactful findings from the latest studies, highlighting key themes and breakthroughs that may inform your practice and understanding of lung cancer management.

### Key Findings in Lung Cancer Screening

1. **Advancements in Screening Techniques and Management**:
   The Dutch-Belgian NELSON trial has provided robust data on managing lung nodules detected through low-dose CT (LDCT) screening. The trial emphasizes the importance of nodule volumetric assessment and growth rate in distinguishing malignant from benign nodules. This approach could refine screening protocols and improve patient outcomes by reducing unnecessary interventions for benign nodules while ensuring timely action for those that are malignant.

2. **Barriers to Effective Screening**:

In [None]:
%%capture
!pip install openai==1.55.3 httpx==0.27.2 --force-reinstall --quiet

In [None]:
import os
os.kill(os.getpid(), 9)