In [8]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from Bio import Entrez

Set openai API keys

In [9]:
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

Test LLM

In [10]:
llm = ChatOpenAI(model='gpt-4o-mini',temperature=0)

Generar search queries

In [11]:
symptoms = "Ayer entrené en el gimnasio y tengo un hormigueo que me empieza en la parte baja de la espalda y baja por la pierna derecha."

pm_search_prompt = 'Eres un investigador avanzado en fisioterapia. Tu tarea es buscar papers en PubMed para encontrar un \
                    diagnóstico y tratamiento de fisioterapia para los siguientes síntomas: "{symptoms}" \
                    Dime 10 entradas de búsqueda que pondrías en la web de pubmed para encontrar papers sobre los síntomas. \
                    No emplees combinaciones como OR o AND, dime únicamente las palabras clave de búsqueda en ingles.\
                    Devuelve únicamente las entradas de búsqueda como una lista con el siguiente formato: [entry1,entry2,...], no digas nada más.'

pm_search_template = ChatPromptTemplate.from_template(pm_search_prompt)

Function to search pubmed articles

In [12]:
def generate_search(pm_search_template,symptoms,llm):
    pm_search_llm = pm_search_template | llm
    search_entries = pm_search_llm.invoke({'symptoms':symptoms})
    return search_entries.content
search_entries = generate_search(pm_search_template,symptoms,llm).replace('[','').replace(']','').replace("'","").split(', ')
print((search_entries))

['lower back pain', 'tingling leg', 'lumbar radiculopathy', 'sciatica', 'physical therapy treatment', 'nerve compression', 'exercise-induced pain', 'rehabilitation exercises', 'myofascial pain syndrome', 'neurological symptoms']


Search in pubmed API

In [13]:
def search(entry):
    Entrez.email = 'oscarvallslozano@gmail.com'
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='5',
                            retmode='xml',
                            term=entry)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'oscarvallslozano@gmail.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results


results = search(search_entries[0])
id_list = results['IdList']
papers = fetch_details(id_list)
for i, paper in enumerate(papers['PubmedArticle']):
        print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))


1) Neck Pain and Lower Back Pain.
2) Chronic Lower Back Pain in Weight Lifters: Epidemiology, Evaluation, and Management.
3) Clinical outcomes and cost-effectiveness of massage chair therapy versus basic physiotherapy in lower back pain patients: A randomized controlled trial.
4) Acute Lumbar Back Pain.
5) Treatment and ergonomics training of work-related lower back pain and body posture problems for nurses.


In [16]:
for paper in papers['PubmedArticle']:
    paper_id = paper['MedlineCitation']['PMID']
    paper_title = paper['MedlineCitation']['Article']['ArticleTitle']
    abstract_text = paper['MedlineCitation']['Article']['Abstract']['AbstractText']
    if not os.path.isfile(f'./docs/{paper_id}.txt'):
        with open(file=f'./docs/{paper_id}.txt',mode='w') as f:
            f.write(f"Title: {paper_title}\n")
            f.write(f"Abstract: {abstract_text}")
    else:
        continue

In [20]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader("./docs",glob="**/*.txt")
docs = loader.load()

In [21]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

19

In [22]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [24]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

diagnosis_template = """
You are a senior physiotherapist and you received a patient with the following {symptoms}. You have found the following scientific evidence related to the patient's case: {context}
Your task is to evaluate the case and suggest possible diagnosis and treatments to apply to the patient.
"""

rag_chain = ("context": retriever)



retrieved_docs = retriever.invoke()

len(retrieved_docs)

TypeError: BaseRetriever.invoke() missing 1 required positional argument: 'input'