In [19]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from dotenv import load_dotenv
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [2]:
load_dotenv()

True

In [3]:
def load_pdf_file(file_path):
    documents = PyPDFLoader(file_path).load()
    return documents

In [4]:
medical_document = load_pdf_file("medical_data.pdf")

In [5]:
def text_splitting(extracted_data):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
    chunks = splitter.split_documents(extracted_data)
    return chunks

In [6]:
chunks = text_splitting(medical_document)

In [7]:
def initialize_embedding():
    embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
    return embedding

In [8]:
embedding = initialize_embedding()

In [9]:
len(embedding.embed_query("Hi"))

768

In [10]:
pc = Pinecone()

In [11]:
index_name = "medisage"

pc.create_index(
    name=index_name,
    dimension=768, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [None]:
docsearch = PineconeVectorStore.from_documents(
    documents=chunks,
    embedding=embedding,
    index_name=index_name,
)

In [23]:
retriever = docsearch.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [13]:
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [20]:
system_prompt = """
You are MedBot, an AI-powered virtual medical assistant trained to provide general healthcare information, symptom explanations, and wellness guidance. You do not provide medical diagnoses or prescribe medications. Your primary role is to assist users with evidence-based health information while encouraging them to seek professional medical attention when necessary.

###**Context:**
'{context}'

### **Guidelines:**
1. **Professional & Empathetic** – Respond with medical accuracy while maintaining a compassionate tone.
2. **User-Friendly Language** – Explain medical terms in simple, easy-to-understand language.
3. **Advisory Role** – Provide general health insights, but never diagnose or prescribe treatment.
4. **Encourage Professional Consultation** – If a query requires a diagnosis, recommend consulting a healthcare professional.
5. **Ethical & Safe** – Avoid misinformation, medical treatments, or personal health predictions.

### **Capabilities:**
✔ Explain symptoms and common conditions  
✔ Provide first-aid and general wellness advice  
✔ Suggest preventive healthcare measures  
✔ Explain medical terms, tests, and procedures  
✔ Offer insights on diet, fitness, and mental well-being  

### **Limitations:**
✖ Cannot provide personalized diagnoses or treatments  
✖ Cannot prescribe medications or recommend dosages  
✖ Cannot replace professional medical advice  

### **Example Interactions:**

**User:** "I have a sore throat and mild fever. What should I do?"  
**MedBot:** "A sore throat and mild fever can be caused by viral infections like the common cold. Stay hydrated, rest well, and consider warm fluids like herbal tea. If symptoms persist for more than 3 days or worsen, consult a doctor."

**User:** "Can I take aspirin for my headache?"  
**MedBot:** "I cannot recommend specific medications, but you may try resting in a quiet room, staying hydrated, or using a cold compress. If the headache is severe or persistent, consult a healthcare professional."

**User:** "What are the symptoms of diabetes?"  
**MedBot:** "Common symptoms of diabetes include increased thirst, frequent urination, fatigue, and blurred vision. If you experience these symptoms, it is advisable to consult a doctor for proper evaluation."

Always provide clear, safe, and non-diagnostic responses, prioritizing user well-being.
"""


In [21]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [24]:
question_answer_chain = create_stuff_documents_chain(model,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [27]:
rag_chain.invoke({"input":"What is acne"})["answer"]

'Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.\nAcne vulgaris, the medical term for common acne, is the most common skin disease. It affects nearly 17 million people in the United States. While acne can arise at any age, it usually begins at puberty and worsens during adolescence. Nearly 85% of people develop acne at some time between the ages of 12-25 years. Up to 20% of women develop mild acne. It is also found in some newborns.\n\nThe sebaceous glands lie just beneath the skin’s surface. They produce an oil called sebum, the skin’s natural moisturizer. These glands and the hair follicles within which they are found are called sebaceous follicles. These follicles open onto the skin through pores. At puberty, increased levels of androgens (male hormones) cause the glands to produce too much sebum. When excess sebum combines with dead, sticky skin cells, a

In [28]:
rag_chain.invoke({"input":"What is stats"})["answer"]

'Based on the context provided:\n\nSports injuries result from acute trauma or repetitive stress associated with athletic activities. Sports injuries can affect bones or soft tissue (ligaments, muscles, tendons).\n\nIn 2002, about 20.3 million Americans suffered a sports injury. Of those, 53% were minor enough to be self-treated or left untreated. However, about 10 million Americans annually receive medical attention for their sports-related injuries. That equates to almost 26 per 1,000 people. The highest rate is among children age five to 14 years old (59.3 per 1,000 people). As many as 20% of children who play sports get hurt, and about 25% of their injuries are classified as serious.\n\nAbout 95% of sports injuries are minor soft tissue traumas.\n\nFractures account for 5–6% of all sports injuries.\n\nBrain injury is the primary cause of fatal sports-related injuries. Concussion, which is also called mild traumatic brain injury or MTBI, can result from even minor blows to the head.