In [16]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from Bio import Entrez

Set openai API keys

In [17]:
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

Test LLM

In [18]:
llm = ChatOpenAI(model='gpt-4o-mini',temperature=0)

Generar search queries

In [19]:
symptoms = "Ayer entrené en el gimnasio y tengo un hormigueo que me empieza en la parte baja de la espalda y baja por la pierna derecha."

pm_search_prompt = 'Eres un investigador avanzado en fisioterapia. Tu tarea es buscar papers en PubMed para encontrar un \
                    diagnóstico y tratamiento de fisioterapia para los siguientes síntomas: "{symptoms}" \
                    Dime 10 entradas de búsqueda que pondrías en la web de pubmed para encontrar papers sobre los síntomas. \
                    No emplees combinaciones como OR o AND, dime únicamente las palabras clave de búsqueda en ingles.\
                    Devuelve únicamente las entradas de búsqueda como una lista con el siguiente formato: [entry1,entry2,...], no digas nada más.'

pm_search_template = ChatPromptTemplate.from_template(pm_search_prompt)

Function to search pubmed articles

In [24]:
def generate_search(pm_search_template,symptoms,llm):
    pm_search_llm = pm_search_template | llm
    search_entries = pm_search_llm.invoke({'symptoms':symptoms})
    return search_entries.content
search_entries = generate_search(pm_search_template,symptoms,llm).replace('[','').replace(']','').replace("'","").split(', ')
print((search_entries))

['lower back pain', 'tingling leg', 'lumbar radiculopathy', 'sciatica', 'physical therapy treatment', 'nerve compression', 'exercise-induced pain', 'rehabilitation exercises', 'myofascial pain syndrome', 'neurological symptoms']


Search in pubmed API

In [25]:
def search(entry):
    Entrez.email = os.getenv('ENTREZ_EMAIL')
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='5',
                            retmode='xml',
                            term=entry)
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = os.getenv('ENTREZ_EMAIL')
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results


results = search(search_entries[0])
id_list = results['IdList']
papers = fetch_details(id_list)
for i, paper in enumerate(papers['PubmedArticle']):
        print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))


1) Neck Pain and Lower Back Pain.
2) Chronic Lower Back Pain in Weight Lifters: Epidemiology, Evaluation, and Management.
3) Clinical outcomes and cost-effectiveness of massage chair therapy versus basic physiotherapy in lower back pain patients: A randomized controlled trial.
4) Acute Lumbar Back Pain.
5) Treatment and ergonomics training of work-related lower back pain and body posture problems for nurses.


In [26]:
for paper in papers['PubmedArticle']:
    paper_id = paper['MedlineCitation']['PMID']
    paper_title = paper['MedlineCitation']['Article']['ArticleTitle']
    abstract_text = paper['MedlineCitation']['Article']['Abstract']['AbstractText']
    if not os.path.isfile(f'./docs/{paper_id}.txt'):
        with open(file=f'./docs/{paper_id}.txt',mode='w') as f:
            f.write(f"Title: {paper_title}\n")
            f.write(f"Abstract: {abstract_text}")
    else:
        continue

In [27]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader("./docs",glob="**/*.txt")
docs = loader.load()

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

19

In [29]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [42]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

diagnosis_template = """
You are a physiotherapist diagnosis assistance system and you received a patient with the following symptoms:{symptoms}. 
You have found the following scientific evidence related to the patient's case: {context}
Your task is to evaluate the case and suggest possible diagnosis and treatments to apply to the patient.
This suggestions will be read by the physiotherapist to be able to take faster decisions prior to the session with the patient.
The answer should be formatted with the following items:
    * Diagnosis: Propose injuries or dolencies that could be diagnosed by the patient judging by their symptoms and evidence found.
                 Propose different injuries sorted by increased severity.
                 If the injuries are severe, also suggest a way to be diagnosed or discarded by the physiotherapist.
    * Treatment: Propose some treatments that may be helpful to apply during the physiotherapy session.
    * Exercises: Suggest exercises that the patient shoud be doing after the session to recover or prevent further injuries.
Answer in spanish with long and developed reasoning.
"""
# Fix the retriever invocation
retrieved_docs = retriever.invoke(symptoms)

# Print the number of retrieved documents
print(f"Number of retrieved documents: {len(retrieved_docs)}")

# Create the RAG chain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Create the prompt template
prompt = PromptTemplate(
    input_variables=["symptoms", "context"],
    template=diagnosis_template
)

# Create the RAG chain
rag_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=False
)

# Example usage of the RAG chain
symptoms = "lower back pain and stiffness"
context = "\n".join([doc.page_content for doc in retrieved_docs])
result = rag_chain.invoke({"symptoms": symptoms, "context": context})

print(result['text'])


Number of retrieved documents: 10
**Diagnóstico:**

Basado en los síntomas de dolor y rigidez en la parte baja de la espalda, así como en la evidencia científica revisada, se pueden considerar las siguientes posibles lesiones o dolencias, ordenadas por gravedad:

1. **Esguince muscular o de ligamentos**: Esta es una de las causas más comunes de dolor lumbar, especialmente en personas que levantan pesas. Puede ser resultado de un levantamiento inadecuado o de un esfuerzo excesivo. Se puede diagnosticar a través de una evaluación clínica que incluya la historia del paciente y un examen físico.

2. **Síndrome facetario lumbar**: Este síndrome se refiere al dolor que proviene de las articulaciones facetarias de la columna. Es común en levantadores de pesas debido a la carga repetitiva en la columna. La evaluación clínica y la palpación de las articulaciones facetarias pueden ayudar a confirmar este diagnóstico.

3. **Hernia de disco**: Esta condición ocurre cuando el material del disco int