In [2]:
import openai
from openai import OpenAI
import chromadb
import os
from chromadb.config import Settings
from typing import List
import json
from dotenv import load_dotenv
load_dotenv()

from Bio import Entrez

openai.api_key = os.getenv('OPENAI_API_KEY')
Entrez.email = os.getenv('EMAIL')

In [81]:
#translate free text query into a query formatted for PubMed
def translate_query(user_query:str) -> str:
    system_prompt = """
    You are helping with a pubmed query.  Take the user input and extract the search terms of interest
    and convert it into a PubMed compatible search using MeSH terms as well as any publication dates if 
    the user has input those.  Return ***ONLY valid JSON*** in the reponse.  Do not explain, format or decorate
    the output.  The JSON should be formatted as follows with the key:value pairs
    {mesh_terms: a list of biomedical terms from the query as MeSH terms where possible,
    publication_date: formatted as a date range YYYY-MM-DD for the start and end dates.  Or, if no dates are input, default to January 1, 2022-December 31, 2024
    pubmed_query: a properly formatted PubMed query search string using [MeSH Terms], [All Fields], and [Publication - Date]
    """
    user_prompt = f"""User's natural language query: {user_query}"""
    
    response = openai_client.responses.create(
        model = 'gpt-4o-mini',
        input = [
            {
                'role': 'system',
                'content': system_prompt,
            },
            {
                'role':'user',
                'content':user_prompt,
            }
        ],
        temperature = 0
    )
    parsed = json.loads(response.output[0].content[0].text)
    pubmed_query = parsed['pubmed_query']
    return pubmed_query
    

In [4]:
chroma_client = chromadb.Client(Settings(persist_directory = './chroma_db'))
collection = chroma_client.get_or_create_collection('pubmed_cache')

openai_client = OpenAI()

def get_embedding(text: str) -> List[float]:
    response = openai_client.embeddings.create(
        input = text,
        model = 'text-embedding-3-small',
    )
    return response.data[0].embedding

In [5]:
#get the articles and metadata using the prompt created above
def parse_pub_date(pub_date):
    if 'Year' in pub_date:
        year = pub_date['Year']
        month = pub_date.get('Month', '01')
        day = pub_date.get('Day', '01')
        return f'{year} - {month} - {day}'
    return 'Not Available'

def get_articles(query, n_results = 5):
    results = []
    
    handle = Entrez.esearch(db = 'pubmed', term = query, retmax = n_results)
    record = Entrez.read(handle)
    id_list = record['IdList']
    
    #for each pmid, get information about the article
    for pmid in id_list:
        handle = Entrez.efetch(db = 'pubmed', id = pmid, retmode = 'xml')
        records = Entrez.read(handle)
        
        #process each article
        for record in records['PubmedArticle']:
            article = record['MedlineCitation']['Article']
            title = article.get('ArticleTitle', 'Title Not Available')
            abstract = ' '.join(article['Abstract']['AbstractText']) if 'Abstract' in article else ''
            authors_list = ', '.join(a.get('ForeName', '') + ' ' + a.get('LastName', '') for a in article.get('AuthorList', [])) or 'Authors Not Available'
            journal = article['Journal'].get('Title', 'Journal Not Available')
            keywords = ', '.join(k['DescriptorName'] for k in record['MedlineCitation'].get('MeshHeadingList', [])) or 'Keyword Not Available'
            pub_date = parse_pub_date(article['Journal']['JournalIssue']['PubDate'])
            url = f"https://www.ncbi.nlm.nih.gov/pubmed/{pmid}"
            
            new_result = {
                'PMID':pmid,
                'Title':title,
                'Authors':authors_list,
                'Abstract':abstract,
                'Journal':journal,
                'Keywords':keywords,
                'URL':url,
                'Publication Date':pub_date,
                }
        
            results.append(new_result)
         
    return results
    
#def create_string(result): #need to convert the results above to a long string to input into the LLM
#
#    text = ' '.join(f"{result[i].get('Title', '')} {result[i].get('Abstract', '')} {result[i].get('Keywords', '')} {result[i].get('Journal', '')} {result[i].get('Publication Date', '')}" for i in range(len(result)))
#
#    return text


In [11]:
query = translate_query('I want to know whats going on about robotic hernia repairs in 2024')

In [12]:
print(query)

(Hernia, Inguinal/surgery[MeSH Terms] OR robotic hernia repair[All Fields]) AND (2024[Publication Date])


In [8]:
def extract_abstracts(results):
    abstracts = []
    for i in range(len(results)):
        abstract = results[i]['Abstract']
        abstracts.append(abstract)
    combined_abstracts = ''.join(abstracts)
    return [combined_abstracts]

In [14]:
results = get_articles(query)


In [16]:
print(results)
print(len(results))

[{'PMID': '39948954', 'Title': '[A Case of Appendiceal Pseudomyxoma Peritonei Diagnosed by Inguinal Hernia and Treated with Multidisciplinary Treatment].', 'Authors': 'Akira Sogawa, Takashi Matsunaga, Yasumitsu Oe, Toru Imagami, Byonggu An, Nobuyuki Takao, Takeshi Togawa, Akiyoshi Mizumoto, Shizuki Takemura, Yutaka Yonemura', 'Abstract': 'Pseudomyxoma peritonei (PMP)is a clinical condition caused by ruptured mucinous tumors like appendix and ovaries, and large amounts of mucus fill the abdominal cavity. We experienced a case of PMP presented with an inguinal hernia. A 62-year-old man complained of a bulge in the left inguinal region. The results of the test was PMP caused by ruptured appendiceal myxoma. After intraperitoneal chemotherapy, we performed surgery. We performed right hemicolectomy, low anterior resection, peritoneum excision of the pelvic space, right diaphragm resection, omentectomy, cholecystectomy and splenectomy. Finally, hyperthermia chemotherapy was performed. Microsc

In [25]:
cache_abstracts(results)

In [24]:
#cache the results in our db
def cache_abstracts(results):
    documents = []
    ids = []
    embeddings = []
    metadatas = []
    
    for result in results:
        abstract = result['Abstract']
        documents.append(abstract)
        ids.append(result['PMID'])
        embeddings.append(get_embedding(abstract))
        metadatas.append({
            'title':result['Title'],
            'journal':result['Journal'],
            'authors':result['Authors'],
            'publication date':result['Publication Date'],
            'keywords':result['Keywords'],
        })
            
        
    collection.add(
        documents = documents,
        ids = ids,
        embeddings = embeddings,
        metadatas = metadatas,
    )

In [80]:
#take user query as above and query the vector DB first to see if there's anything similar
#def get_combined_context(user_query):
#    query = translate_query(user_query)
#    embedding = get_embedding(user_query)
#
#    rag_results = collection.query(query_embeddings = [embedding], n_results = 5)
#    rag_docs = rag_results['documents'] if rag_results['documents'] else []
#
#    pubmed_results = get_articles(query)
#    pubmed_docs = extract_abstracts(pubmed_results) #list format
#    
#    return rag_results
######
#need to finish this
######

In [19]:
r = get_combined_context('robotic hernia repair')

NameError: name 'get_combined_context' is not defined

In [20]:
def vector_db_lookup(query_embedding, n_res = 5):
    results = collection.query(query_embeddings = [query_embedding], n_results = n_res)
    if results['documents']:
        return [
            {
                'abstract':doc,
                'metadata':meta,
                'source':'RAG',
            }
            for doc, meta in zip(results['documents'][0], results['metadatas'][0])
        ]
    return []

In [None]:
def query_handler(user_query):
    query_embedding = get_embedding(user_query)
    pubmed_query = translate_query(user_query)

    rag_results = vector_db_lookup(query_embedding)

    pubmed_results = get_articles(pubmed_query)

    cache_abstracts(pubmed_results)

    #get combined context

    #send to llm for summary
    
    

In [26]:
query_embedding = get_embedding('robotic inguinal hernia')

output = vector_db_lookup(query_embedding)

In [73]:
print(output[0]['metadata'])

{'keywords': 'Humans, Female, Hernia, Inguinal, Pregnancy, Robotic Surgical Procedures, Laparoscopy, Adult, Pregnancy, Ectopic, Salpingectomy, Herniorrhaphy', 'publication date': '2024 - 01 - 01', 'journal': 'CRSLS : MIS case reports from SLS', 'authors': 'Nashali Ferrara, Michael Scutella, Hetal Lad, Tiffany Hsiung, Yasmin Abedin, Arpit Amin', 'title': 'Robotic Laparoscopic Assisted Treatment of Inguinal Hernia Containing an Incarcerated Ectopic Pregnancy.'}


In [38]:
print(results[0]['Title'])

[A Case of Appendiceal Pseudomyxoma Peritonei Diagnosed by Inguinal Hernia and Treated with Multidisciplinary Treatment].


In [98]:
#getting the output formatted to feed the LLM
def format_pubmed_articles(pubmed_results):
    pubmed_articles = []
    for i in range(len(pubmed_results)):
        
        article = "\n".join([
            f"""###Article source: Pubmed
            **Title:** {pubmed_results[i].get('Title', '')}
            **Authors:** {pubmed_results[i].get('Authors', '')}
            **Journal:** {pubmed_results[i].get('Journal', "")}
            **Abstract:** {pubmed_results[i]['Abstract']}
            ---"""
        ])
        pubmed_articles.append(article)
    return ''.join(pubmed_articles)

In [99]:
#getting the output formatted to feed the LLM
def format_rag_output(rag_results):
    rag_articles = []
    for i in range(len(rag_results)):
        article =  "\n".join([
            f"""###Article(Source: {result.get('source', 'Unknown')}
            **Title:** {rag_results[i]['metadata'].get('title', '')}
            **Authors:** {rag_results[i]['metadata'].get('authors', '')}
            **Journal:** {rag_results[i]['metadata'].get('journal', '')}
            **Abstract:** {rag_results[i]['abstract']}
            ---"""])
        rag_articles.append(article)
    return ''.join(rag_articles)

In [100]:
rag_out = format_rag_output(output)

In [101]:
pm_out = format_pubmed_articles(results)

In [120]:
def summarize(rag_pubmed_output):
    system_prompt = """
    You are a helpful assistant tasked with summarizing scientific articles for a scientist or healthcare professional
    Include the source where possible.  You can use specialized medical jargon and assume that the reader understands.
    Summarize each article highlighting the main points, any statistically significant or clinically significant results and the author's main conclusions.
    Then, at the end, provide 3-4 sentences that give a broad overview of the articles together.
    """

    articles = rag_pubmed_output

    
    response = openai_client.responses.create(
        model = 'gpt-4o-mini',
        input = [
            {
                'role': 'system',
                'content': system_prompt,
            },
            {
                'role':'user',
                'content':articles,
            }
        ],
        temperature = 0
    )
    return response.output[0].content[0].text
    

In [121]:
total_output = rag_out + pm_out
res = summarize(total_output)

In [122]:
print(res)

### Article Summaries

1. **Robotic Laparoscopic Assisted Treatment of Inguinal Hernia Containing an Incarcerated Ectopic Pregnancy**
   - **Authors:** Nashali Ferrara et al.
   - **Journal:** CRSLS: MIS case reports from SLS
   - **Summary:** This case report discusses a rare instance of an adult female with a unilateral incarcerated inguinal hernia containing ectopic pregnancy. The patient underwent laparoscopic lysis of adhesions, reduction of the ectopic pregnancy, and unilateral salpingectomy, followed by robotic-assisted transabdominal preperitoneal inguinal hernia repair (rTAPP-IHR). The authors conclude that minimally invasive techniques can be safely employed in such complex cases, highlighting the feasibility and safety of robotic-assisted surgery in this context.

2. **Total Extra Peritoneal Repair of Inguinal Hernia under General Anesthesia Versus Spinal Anesthesia**
   - **Authors:** Udita Mishra, Ghanashyam Thapa
   - **Journal:** Journal of Nepal Health Research Council
