# PubMed Research Agent
This notebook allows you to query PubMed for peer-reviewed articles on clinical and disease-related topics (e.g., 'candidate drugs targeting mutation E545K in PI3K') and summarizes abstracts using a lightweight DistilBERT model. Enter a query, review results, and choose to ask another question.

In [None]:
# Install dependencies
!pip install biopython transformers torch requests pandas ipywidgets

In [None]:
# Import libraries
from Bio import Entrez
from transformers import pipeline
import pandas as pd
import torch
import re
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output

# Set up Entrez with your email (replace with your actual email)
Entrez.email = "your.email@example.com"  # Required by NCBI

# Check for GPU availability
device = 0 if torch.cuda.is_available() else -1
print(f"Using device: {'GPU' if device == 0 else 'CPU'}")

# Initialize lightweight summarizer
try:
    summarizer = pipeline("summarization", model="distilbert-base-uncased", device=device)
except Exception as e:
    print(f"Error loading model: {e}")
    import sys
    sys.exit(1)

def search_pubmed(query, max_results=5):
    """Search PubMed for articles based on the query."""
    try:
        handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
        record = Entrez.read(handle)
        handle.close()
        return record["IdList"]
    except Exception as e:
        print(f"Error searching PubMed: {e}")
        return []

def fetch_article_details(pmids):
    """Fetch article details (title, abstract, authors) for given PMIDs."""
    articles = []
    if not pmids:
        return articles
    try:
        handle = Entrez.efetch(db="pubmed", id=",".join(pmids), retmode="xml")
        records = Entrez.read(handle)
        handle.close()
        for article in records["PubmedArticle"]:
            try:
                title = article["MedlineCitation"]["Article"]["ArticleTitle"]
                abstract = article["MedlineCitation"]["Article"].get("Abstract", {}).get("AbstractText", [""])[0]
                authors = ", ".join([author.get("LastName", "") + " " + author.get("Initials", "") 
                                     for author in article["MedlineCitation"]["Article"].get("AuthorList", [])])
                articles.append({"title": title, "abstract": abstract, "authors": authors})
            except Exception as e:
                print(f"Error processing article: {e}")
        return articles
    except Exception as e:
        print(f"Error fetching articles: { personally identifiable information}")
        return articles

def summarize_text(text, max_length=150):
    """Summarize text using the DistilBERT model."""
    if not text or len(text.strip()) == 0:
        return "No abstract available."
    try:
        text = text[:512]  # Truncate to fit DistilBERT's token limit
        summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)
        return summary[0]["summary_text"]
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return "Summary not generated."

def process_query(query):
    """Process a single query and return results."""
    clean_query = re.sub(r"XXXX", "specific mutation", query)
    pmids = search_pubmed(clean_query)
    if not pmids:
        print("No articles found.")
        return None
    articles = fetch_article_details(pmids)
    if not articles:
        print("No articles retrieved.")
        return None
    results = []
    for article in articles:
        summary = summarize_text(article["abstract"])
        results.append({
            "Title": article["title"],
            "Authors": article["authors"],
            "Abstract": article["abstract"],
            "Summary": summary
        })
    return results

# Interactive query interface
query_input = widgets.Text(
    value='',
    placeholder='Enter query (e.g., candidate drugs targeting mutation E545K in PI3K)',
    description='Query:',
    layout={'width': '600px'}
)
submit_button = widgets.Button(description="Submit Query")
continue_button = widgets.Button(description="Ask Another Question", disabled=True)
output = widgets.Output()

def on_submit_clicked(b):
    with output:
        clear_output()
        query = query_input.value.strip()
        if not query:
            print("Please enter a query.")
            return
        print(f"Processing query: {query}")
        results = process_query(query)
        if results:
            df = pd.DataFrame(results)
            output_file = f"/content/pubmed_results_{query[:20].replace(' ', '_')}.csv"
            df.to_csv(output_file, index=False)
            print(f"Results saved to {output_file}")
            files.download(output_file)
            for idx, row in df.iterrows():
                print(f"\nArticle {idx + 1}:")
                print(f"Title: {row['Title']}")
                print(f"Authors: {row['Authors']}")
                print(f"Summary: {row['Summary']}")
            continue_button.disabled = False

def on_continue_clicked(b):
    with output:
        clear_output()
        query_input.value = ''
        continue_button.disabled = True
        print("Enter a new query.")

submit_button.on_click(on_submit_clicked)
continue_button.on_click(on_continue_clicked)

display(query_input, submit_button, continue_button, output)