In [1]:
from langchain_community.document_loaders import DirectoryLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
import os

In [2]:
def load_document(source_doc_path):
    try:
        loader = DirectoryLoader(source_doc_path, glob="*.txt", loader_cls=TextLoader)
        # loader = TextLoader(source_doc_path, encoding = 'UTF-8')
        pages = loader.load()  # Assuming this returns a list of page contents
        embeddings = OllamaEmbeddings(model="llama3")
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=5000, chunk_overlap=1000, add_start_index=True
        )
        all_splits = text_splitter.split_documents(pages)
        # print(all_splits)
        vectorstore = Chroma.from_documents(documents = all_splits, embedding = embeddings)
        retriever = vectorstore.as_retriever(search_type="similarity",search_kwargs={"k": 3})
        print("Document loaded and processed.")
        return retriever
    except Exception as e:
        print(f"An error occurred while loading the document: {e}")
        return None

In [3]:
class LCEL_RAG:
    def __init__(self, txt_path):
        self.txt_path = txt_path
        self.retriever = load_document(self.txt_path)
        self.setup_pipeline()

    def setup_pipeline(self):
        self.template = """You are an internal medicine physician conducting a review of a patient's electronic health record. You need to find and summarize relevant and accurate information given in the text of clinical notes relating to the care they recieved at the hospital during. 
            Using the information given in the context provided, answer the question about the patient's medical records. Respond only to the question asked, response should be concise and relevant to the question. 
            If the answer cannot be deduced from the context, respond that the information can not be deduced from the information provided.
            context: {context}
            question: {question}
"""
        self.prompt = ChatPromptTemplate.from_template(self.template)
        self.model = ChatOllama(model="llama3")
        self.output_parser = StrOutputParser()
        # retrieved_docs = self.retriever.invoke(self.question)
        # print("retrieved docs", retrieved_docs)
        setup_and_retrieval = RunnableParallel(
            {"context": self.retriever, "question": RunnablePassthrough()}
        )

        self.chain = setup_and_retrieval | self.prompt | self.model | self.output_parser

    def invoke(self, question):
        if self.retriever is not None:
            return self.chain.invoke(question)
        else:
            return "Failed to load document."

In [4]:
questions = [
    "Given the HPI, chief complaint, and initial ICD diagnosis, why was the patient admitted to the hospital?",
    "What symptoms, medical condition, or other reason caused the patient to come to or be brought into the hospital?",
    "Given the History of Present Illness, summarize the patient's major medical conditions and the reason they were admitted to the hospital.",
    "Given the history of present illness, admission and discharge diagnosis, what medical conditions or symptoms was this patient treated for in the hospital?",
    "Given the history of present illness, summarize the treatments and diagnostic workup.",
    "Given the History of Present illness, what was the initial diagnostic work up and pertinent results?",
    "What medical conditions were confirmed with diagnostic workup including radiology notes, laboratory, microbiology culture, and other studies?",
    "What medical conditions were ruled out with diagnostic workup?",
    "Based on the admission medication list, what other existing or ongoing medical conditions did the patient have?",
    "Given the radiology notes, what radiology, imaging or other studies that were performed and for what indication? List the radiological and imaging studies. Summarize the type of study, the indication, and impressions from each study.",
    "Given the medical conditions and lab work, what are the pertinent positives and negatives relating to the patient's symptoms and diagnostic workup?",
    "Given the pertinent results, were there any blood culture or microbiology studies? What were the results? Did they grow anything? If so, what was the organism and what antibiotics was it sensitive to?",
    "Given the HPI, what is the differential diagnosis for the presenting symptoms or chief complaint? What is the recommended diagnostic work up?",
    "Given the admission and discharge medication lists if they are present in the note, what were the changes to the patient's medications? What medications were new? What medications were stopped?",
    "Given the history of present illness, what medications or IV therapies were used to treat the patient during this hospital admission?",
    "What were the major surgical, invasive, or diagnostic, or therapeutic procedures?",
    "What were the acute and chronic medical conditions the patient was treated for in the hospital?",
    "For each acute medical condition, what symptoms did the patient present with? What was the severity?",
    "For each acute medical condition, what was the diagnostic work up and related results?",
    "For each acute medical condition, what were the treatments?",
    "Given the Transitional issues, what are the instructions for the receiving provider? What is the recommended follow up? Are there pending results?",
    "What diagnosis did the patient receive? Cluster these with the acute medical conditions that were identified.",
    "What were the pertinent physical exam findings and notable vital signs related to the acute medical conditions?"
]


In [6]:
txt_path = './masked_data'  # Provide the path to your PDF file
rag = LCEL_RAG(txt_path)
answer = rag.invoke(questions[0])
print(answer)

Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2


Document loaded and processed.
Based on the information provided, the patient was admitted to the hospital due to LLE cellulitis, which is a serious infection that required IV antibiotics (Duonebs and CTX 1g IV) and close monitoring. The initial vitals in the ED showed concerning signs of infection, including a fever of 97.9°C and a WBC count of 12.5. Imaging studies, including LLE US and CXR, did not show evidence of deep vein thrombosis (DVT), but did reveal mild cardiomegaly and vascular congestion, which may have contributed to the patient's symptoms.

Given the severity of the infection and the patient's multiple comorbidities, including diabetes, hypertension, chronic obstructive pulmonary disease (COPD), and osteoporosis, admission to the hospital was likely necessary to provide IV antibiotics, monitor the patient's condition closely, and address any potential complications that may arise.


In [7]:
response_list = []
for question in questions:
    response_dict = {}
    response = rag.invoke(question)
    response_dict['question'] = question
    response_dict['answer'] = response
    response_list.append(response_dict)
    # print(question,"\n")
    # print(response)
    # print("\n*********************")
print(response_list)

Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 3 is greater than number of elements in index 2, updating n_results = 2


[{'question': 'Given the HPI, chief complaint, and initial ICD diagnosis, why was the patient admitted to the hospital?', 'answer': "Based on the information provided, it appears that the patient was admitted to the hospital for LLE cellulitis. The emergency department (ED) evaluation noted significant erythema and warmth in the left lower extremity, which suggests a moderate to severe skin infection. The patient was treated with Duonebs and CTX 1g IV in the ED, suggesting antibiotic therapy for cellulitis.\n\nThe patient's past medical history is notable for LLE cellulitis (left total knee arthroplasty explant and placement of antibiotic spacer due to sepsis), which may be related to their current presentation. Additionally, the patient has a history of diabetes, which can increase their risk of developing skin infections like cellulitis.\n\nGiven the severity of the patient's symptoms and the need for further evaluation and treatment, it is likely that they were admitted to the hospi

In [26]:
import json
response_json = json.dumps(response_list)
response_json

'[{"question": "Given the HPI, chief complaint, and initial ICD diagnosis, why was the patient admitted to the hospital?", "answer": "Based on the information provided, it appears that the patient was admitted to the hospital for cellulitis of the left lower limb. The chief complaint is leg swelling and abdominal distension, which suggests that the patient\'s condition was severe enough to require hospitalization.\\n\\nThe ICD code L03116, Cellulitis of left lower limb, also supports this conclusion. Additionally, the radiology reports suggest that the patient had significant findings on chest X-ray, including mild cardiomegaly and pulmonary vascular congestion, which may have contributed to their admission. The absence of deep venous thrombosis in the left lower extremity veins as determined by ultrasound further suggests that the patient\'s symptoms were related to cellulitis rather than a clot-related condition.\\n\\nGiven these findings, it is likely that the patient was admitted t

In [15]:
for i, res in enumerate (response_list):
    print("Answer:",i,res['answer'])

Answer: 0 Based on the electronic health record (EHR), it appears that the patient was admitted to the hospital due to left lower extremity (LLE) cellulitis. The patient presented with significant erythema, warmth, and tenderness in the LLE, which prompted the emergency department physician to order a computed tomography (CT) scan of the leg to evaluate for deep venous thrombosis (DVT). Although the CT scan did not show evidence of DVT, it did reveal mild cardiomegaly and vascular congestion, suggesting that the patient's symptoms may be related to an underlying cardiac condition.

The patient's past medical history is also notable, as it suggests that they have a history of chronic conditions such as hepatitis C, high blood pressure, diabetes, and psoriasis. Additionally, the patient has undergone multiple surgical procedures, including left knee replacement and hip replacements, which may have contributed to their overall health status.

In light of these findings, it is likely that 

In [16]:
for i, res in enumerate(response_list):
        # We replace double quotes with two double quotes (Excel escaping), and wrap each line in double quotes
        formatted_answer = '"' + res['answer'].replace('"', '""') + '"\n'
        print(formatted_answer)

"Based on the electronic health record (EHR), it appears that the patient was admitted to the hospital due to left lower extremity (LLE) cellulitis. The patient presented with significant erythema, warmth, and tenderness in the LLE, which prompted the emergency department physician to order a computed tomography (CT) scan of the leg to evaluate for deep venous thrombosis (DVT). Although the CT scan did not show evidence of DVT, it did reveal mild cardiomegaly and vascular congestion, suggesting that the patient's symptoms may be related to an underlying cardiac condition.

The patient's past medical history is also notable, as it suggests that they have a history of chronic conditions such as hepatitis C, high blood pressure, diabetes, and psoriasis. Additionally, the patient has undergone multiple surgical procedures, including left knee replacement and hip replacements, which may have contributed to their overall health status.

In light of these findings, it is likely that the patie

In [None]:
from langchain_community.document_loaders import JSONLoader

JSONLoader()