In [1]:
import pandas as pd

# Load your diagnosis data
df = pd.read_csv(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\diagnosis.csv")

# Create chunks (1 per row; or you can group by PatientID for patient-wise context)
documentsss = []
for idx, row in df.iterrows():
    content = f"PatientID: {row['PatientID']}. Diagnosis: {row['Diagnosis']}. State: {row['State']}. Status: {row['Status']}."
    documentsss.append(content)


In [2]:
import pandas as pd
from langchain.schema import Document

# -----------------------------------
# Loaders for each CSV file
# -----------------------------------

def load_csv_as_df(csv_path):
    return pd.read_csv(csv_path)

def load_patient_details(csv_path):
    return load_csv_as_df(csv_path)

def load_diagnosis(csv_path):
    return load_csv_as_df(csv_path)

def load_medications(csv_path):
    return load_csv_as_df(csv_path)

def load_prescriptions(csv_path):
    return load_csv_as_df(csv_path)

def load_alerts(csv_path):
    return load_csv_as_df(csv_path)

def load_diabetic_indices(csv_path):
    return load_csv_as_df(csv_path)

def load_encounters(csv_path):
    return load_csv_as_df(csv_path)

def load_immunizations(csv_path):
    return load_csv_as_df(csv_path)

# -----------------------------------
# Combiner: build per-patient documents
# -----------------------------------

def combine_patient_documents(
    patient_df,
    diagnosis_df,
    medications_df,
    prescriptions_df,
    alerts_df,
    indices_df,
    encounters_df,
    immunizations_df
):
    patient_docs = []

    for _, patient in patient_df.iterrows():
        pid = patient["PatientID"]
        parts = []

        # Patient details
        parts.append(f"PatientID: {pid}")
        parts.append(f"Name: {patient['Name']}")
        parts.append(f"Sex: {patient['Sex']}")
        parts.append(f"DOB: {patient['DOB']}")
        parts.append(f"Phone: {patient['Phone']}")
        parts.append(f"Address: {patient['Address']}")
        parts.append(f"NextOfKin: {patient['NextOfKin']} ({patient['NextOfKinPhone']}), Address: {patient['NextOfKinAddress']}")

        # Diagnoses
        diag = diagnosis_df[diagnosis_df["PatientID"] == pid]
        if not diag.empty:
            parts.append("Diagnoses:")
            for _, row in diag.iterrows():
                parts.append(f" - {row['Diagnosis']} (State: {row['State']}, Status: {row['Status']})")

        # Medications
        meds = medications_df[medications_df["PatientID"] == pid]
        if not meds.empty:
            parts.append("Medications:")
            for _, row in meds.iterrows():
                parts.append(f" - {row['Medication']} on {row['Date']}")

        # Prescriptions
        presc = prescriptions_df[prescriptions_df["PatientID"] == pid]
        if not presc.empty:
            parts.append("Prescriptions:")
            for _, row in presc.iterrows():
                parts.append(f" - {row['Prescription']}: {row['Instructions']} ({row['Date']})")

        # Alerts
        alerts = alerts_df[alerts_df["PatientID"] == pid]
        if not alerts.empty:
            parts.append("Alerts:")
            for _, row in alerts.iterrows():
                parts.append(f" - {row['Alert']}")

        # Diabetic Indices
        indices = indices_df[indices_df["PatientID"] == pid]
        if not indices.empty:
            parts.append("Diabetic Indices:")
            for _, row in indices.iterrows():
                parts.append(f" - {row['Index']}: {row['Value']} (Most Recent: {row['MostRecent']})")

        # Encounters
        enc = encounters_df[encounters_df["PatientID"] == pid]
        if not enc.empty:
            parts.append("Encounter History:")
            for _, row in enc.iterrows():
                parts.append(
                    f" - {row['Date']}, {row['Facility']}, {row['Specialty']}, {row['Clinician']}, {row['Reason']} ({row['Type']})"
                )

        # Immunizations
        imm = immunizations_df[immunizations_df["PatientID"] == pid]
        if not imm.empty:
            parts.append("Immunizations:")
            for _, row in imm.iterrows():
                parts.append(f" - {row['Immunization']}: {row['NumberReceived']} doses (Most Recent: {row['MostRecent']})")

        # Combine all parts into one text block
        full_text = "\n".join(parts)

        # Create Document
        doc = Document(page_content=full_text, metadata={"PatientID": pid})
        patient_docs.append(doc)

    return patient_docs


# Load data
patients = load_patient_details(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\patient_details.csv")
diagnoses = load_diagnosis(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\diagnosis.csv")
medications = load_medications(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\medications.csv")
prescriptions = load_prescriptions(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\prescriptions.csv")
alerts = load_alerts(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\alerts.csv")
indices = load_diabetic_indices(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\diabetic_indices.csv")
encounters = load_encounters(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\encounter_history.csv")
immunizations = load_immunizations(r"I:\Code Space\LLM Model Project\RAG\medbot\Data\immunizations.csv")

# Combine
documents = combine_patient_documents(
    patients, diagnoses, medications, prescriptions,
    alerts, indices, encounters, immunizations
)



In [3]:

from langchain.schema import Document

#lc_documents = [Document(page_content=text) for text in documents]
lc_documents = documents


In [5]:
for idx, doc in enumerate(documents):
    print(f"Document {idx+2}: {doc}")


Document 2: page_content='PatientID: GME0000
Name: Thomas Hernandez
Sex: Female
DOB: 1952/09/16
Phone: 558-590-5817
Address: USNS Schmitt, FPO AA 03097
NextOfKin: Monica Cooper (214-672-1154), Address: 657 Oconnor Lake, East Stephanieton, OR 76809
Diagnoses:
 - Asthma (State: 11/2017, Status: Ongoing)
 - Osteoporosis (State: 09/2021, Status: Ongoing)
 - Exertive stress test (State: 09/2002, Status: Resolved)
Medications:
 - Clobetasone Cream on 02/2008
 - Amoxicillin 500 mg on 04/2016
 - Lisinopril 10 mg on 07/2017
 - Hydrochlorothiazide 25 mg on 02/2018
 - Ramipril 10 mg on 01/2007
 - Glyburide 5 mg on 05/2020
Prescriptions:
 - Atorvastatin: One tab at supper (07/2020)
 - ASA: One tab at breakfast (06/2022)
 - Glyburide: Two tabs twice daily (09/2020)
Alerts:
 - No known drug allergies
 - Td due
 - Allergies – Sulfa Drugs
Diabetic Indices:
 - Urine Microalb: 5.51 (Most Recent: 07/2023)
 - BP: 141/81 (Most Recent: 11/2023)
 - BMI: 5.21 (Most Recent: 10/2023)
 - ATC: 4.76 (Most Recent: 

In [56]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from tqdm.notebook import tqdm


# Use the updated HuggingFaceEmbeddings class
embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create a Chroma vectorstore from the documents
vectorstore = Chroma.from_documents(lc_documents, embedding=embedder)


In [57]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})  # Top 5 relevant chunks


In [58]:
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv
# Load environment variables from .env
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=api_key)
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables. Please check your .env file.")

In [59]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # Simple RAG
    retriever=retriever,
)


In [60]:
question = "What medications has patient GME0000 received for diabetes?"
result = qa_chain.invoke({"query": question})

print(result['result'])


I don't have any information on PatientID GME0000.


In [61]:
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict
from langchain_core.runnables import RunnableConfig

In [62]:
# 1️⃣ Define the state structure
class DiagnosisState(TypedDict):
    query: str
    answer: str

# 2️⃣ Define the node function
def rag_agent_node(state: DiagnosisState, config: RunnableConfig) -> dict:
    query = state["query"]
    # Assuming qa_chain is already defined elsewhere
    result = qa_chain.invoke({"query": query})["result"]
    return {
        "query": query,  # preserve query in state
        "answer": result
    }

In [63]:
# 3️⃣ Build the graph
graph = StateGraph(DiagnosisState)

graph.add_node("RAGAgent", rag_agent_node)
graph.set_entry_point("RAGAgent")
graph.add_edge("RAGAgent", END)
# 4️⃣ Compile the graph
diagnosis_graph = graph.compile()

In [64]:


# 5️⃣ Run the graph
user_query = "What medications has patient GME0000 received for diabetes?"
initial_state = {"query": user_query}

result = diagnosis_graph.invoke(initial_state)

# 6️⃣ Output
print("Agent answer:", result["answer"])

Agent answer: I don't have any information on a patient with the ID GME0000 in the provided context.
