In [1]:
import pandas as pd

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()  # load variables from .env

hf_token = os.getenv("HUGGINGFACE_API_KEY")

print(f"Your token: {hf_token}")


Your token: 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


In [3]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


loader = PyPDFLoader(r"data\77 Nursing diagnosis handbook an evidence.pdf")
documents = loader.load()

In [4]:
print(documents[10])

page_content='Contributors	 ix
Vanessa Flannery, MSN, PHCNS-BC, CNE
Associate Professor
Nursing Department
Morehead State University
Morehead, Kentucky
Shari D. Froelich, DNP , MSN, MSBA, ANP , BC, 
ACHPN, PMHNP , BC
Nurse Practitioner
Alcona Health Center
Alpena, Michigan
Tracy P . George, DNP , APRN-BC, CNE
Instructor
Nursing Department
Francis Marion University
Florence, South Carolina
Susanne W . Gibbons, PhD, C-ANP/GNP
Assistant Professor
Daniel K. Inouye Graduate School of Nursing
Uniformed Services University of the Health Sciences
Bethesda, Maryland
Barbara A. Given, PhD, RN, FAAN
University Distinguished Professor
College of Nursing
Michigan State University
East Lansing, Michigan
Mila W . Grady, MSN, RN
Lecturer
College of Nursing
University of Iowa
Iowa City, Iowa
Pauline McKinney Green, PhD, RN, CNE
Professor Emeritus
Graduate Nursing
Howard University College of Nursing and Allied  
Health Sciences
Washington, DC
Sherry A. Greenberg, PhD, RN, GNP-BC
Program Director, Advan

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

In [6]:
len(chunks)

10663

In [62]:
from langchain_huggingface import HuggingFaceEmbeddings

from tqdm import tqdm

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings

# Smaller, faster, less RAM-heavy
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(chunks, embedding_model)

In [9]:
vectorstore.save_local("faiss_index")

In [10]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

# 1. Create a retriever
# retriever = vectorstore.as_retriever()

# from langchain.retrievers import MMRRetriever

# from langchain.retrievers import MMRRetriever
# retriever = vectorstore.as_retriever(search_type="mmr", lambda_mult=0.5)

In [11]:
text = chunks[0].page_content
print("Text:\n", text)  

# Get embedding vector for this text
embedding_vector = embedding_model.embed_query(text)

# Show embedding
print("\nEmbedding vector:\n", embedding_vector)

# Optional: Just show first 10 numbers to keep it short
print("\nFirst 10 values:\n", embedding_vector[:10])

Text:
 NANDA-I Diagnoses
Activity intolerance, 122
Risk for Activity intolerance, 127
Ineffective Activity planning, 127
Risk for Ineffective Activity planning, 130
Ineffective Airway clearance, 130
Risk for Allergy response, 136
Anxiety, 139
Death Anxiety, 144
Risk for Aspiration, 147
Risk for impaired Attachment, 152
Autonomic Dysreflexia, 158
Risk for Autonomic Dysreflexia, 161
Risk for Bleeding, 162
Disturbed Body Image, 167
Insufficient Breast Milk, 171
Ineffective Breastfeeding, 174

Embedding vector:
 [-0.019173411652445793, -0.004370002541691065, -0.021156122907996178, -0.019850291311740875, 0.05510607734322548, 0.033496156334877014, 0.04953979700803757, 0.03312781825661659, -0.00965894479304552, -0.01217990554869175, 0.01307009905576706, -0.03873186185956001, -0.010245931334793568, 0.02474253624677658, -0.016315823420882225, 0.019063204526901245, -0.002039895858615637, 0.021196607500314713, -0.006541956681758165, 0.02708396501839161, -0.029309850186109543, 0.012908728793263435

In [None]:
# from transformers import AutoTokenizer, AutoModelForCausalLM

# model_name = "medalpaca/medalpaca-7b"

# tokenizer = AutoTokenizer.from_pretrained(model_name, legacy=False)  # <-- use_fast=False is key
# model = AutoModelForCausalLM.from_pretrained(model_name)



In [12]:
from langchain.llms import Ollama
llm = Ollama(model="llama3")

  llm = Ollama(model="llama3")


In [None]:
# from langchain.chains import RetrievalQA
# from langchain.prompts import PromptTemplate



# # 2. Define your custom prompt
# prompt_template = PromptTemplate(
#     input_variables=["context", "question"],
#     template="""
# You are MediMind, an AI clinical decision assistant. Use the provided context from a nursing knowledge base and the user’s clinical input to generate:

# 1. Symptom Analysis:
#    - Differential Diagnoses with likelihood percentages and brief justification.
#    - Any alerts or red flags.

# 2. Clinical Decision Support:
#    - Immediate Actions.
#    - Follow-Ups.
#    - Documentation.

# ### Knowledge Context:
# {context}

# ### Clinical Input:
# {question}

# ### Your Response:
# """
# )

# # 3. Create the RetrievalQA chain
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     chain_type="stuff",
#     chain_type_kwargs={"prompt": prompt_template},
#     return_source_documents=False
# )


In [49]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="""
You are a clinical decision support assistant. Use the retrieved context and outpatient guidelines (ADA, NICE, WHO) to provide a comprehensive diagnosis and treatment plan.

Context:
{context}

Question:
{question}

Instructions:
- ONLY recommend outpatient-safe diagnosis and treatment. If any red flags are detected, mention them clearly.
- Follow evidence-based outpatient guidelines.
- Fill in EVERY section below with complete, detailed information.
- DO NOT skip any section.

Format your response like this:

**1. Symptom Analysis**
- Differential Diagnoses (include % likelihood + 1-line reasoning per item)
- Red Flags (list if any, else write "None")

**2. Clinical Decision Support**
- Immediate Outpatient Actions:
    • Vitals to check
    • Labs to order (e.g., HbA1c, CBC, TSH, etc.)
    • Physical assessments
- Medications:
    • Name
    • Dose
    • Frequency
    • Purpose
- Lifestyle Modifications:
    • Diet changes (e.g., low glycemic index, reduced carbs)
    • Exercise (type, intensity, duration per week)
    • Smoking cessation if applicable
- Follow-Up Plan:
    • When to repeat labs
    • When to schedule next visit
    • Any specialist referrals needed
- Documentation Notes:
    • Summary of findings
    • Diagnosis and plan in 1-2 sentences

Be concise but complete. Structure clearly. Do not leave any field blank.
"""
)

In [50]:
# Clinical QA Chain
clinical_qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt_template},
    return_source_documents=False
)




In [51]:
query_text = """
Context: Primary Care – Routine Visit for Fatigue
Patient Demographics: 45-year-old female
Symptoms: Fatigue for 3 months, frequent urination, increased thirst, 8-pound weight loss
Medical History: Obesity (BMI 32), family history of Type 2 diabetes
Test Results: Fasting glucose 145 mg/dL; HbA1c 7.8%
Physician Query: What is the most likely diagnosis, and what outpatient treatment plan should be followed?
"""



response = clinical_qa_chain.run({"query": query_text})
print(response)

**1. Symptom Analysis**

Differential Diagnoses:

* Prediabetes (60% likelihood): Given the patient's family history of Type 2 diabetes, fasting glucose 145 mg/dL, and HbA1c 7.8%, this is a strong possibility.
* Polycystic Kidney Disease (15% likelihood): The combination of frequent urination and increased thirst could be indicative of underlying kidney dysfunction.
* Obesity-related Sleep Apnea (10% likelihood): As the patient has obesity and fatigue, sleep apnea could be contributing to their symptoms.
* Depression (5% likelihood): Fatigue can be a symptom of depression, especially in women.

Red Flags: None

**2. Clinical Decision Support**

Immediate Outpatient Actions:

* Vitals to check:
	+ Blood pressure
	+ Pulse rate
* Labs to order:
	+ Fasting glucose and HbA1c (already done)
	+ Complete Blood Count (CBC) to rule out anemia or other blood disorders
	+ Thyroid-Stimulating Hormone (TSH) to assess thyroid function
* Physical assessments:
	+ Weight measurement to track progress
	+