In [1]:
%pwd

'c:\\Users\\Dell\\genAI Projects\\MedBot\\research'

In [2]:
import os 
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\Dell\\genAI Projects\\MedBot'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
# extract data from pdf
def load_pdf_file(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [6]:
extracted_data = load_pdf_file(data='Data')

In [7]:
#extracted_data

In [8]:
#splitting data into chunks

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks = text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 5859


In [10]:
#text_chunks

In [21]:
from langchain.embeddings import HuggingFaceEmbeddings

In [24]:
# download huggingface embedding model

def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')
    return embeddings

In [25]:
embeddings = download_hugging_face_embeddings()

In [26]:
query_result = embeddings.embed_query("Hello world")
print("Length",len(query_result))

Length 384


In [143]:
import os
from dotenv import load_dotenv
load_dotenv()

PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
GROQ_API_KEY  = os.environ.get('GROQ_API_KEY')



In [144]:
from pinecone import Pinecone

pc = Pinecone(api_key= PINECONE_API_KEY)
index = pc.Index("medbot")

In [145]:
import os 
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [147]:
#embed each chunk and upsert the embeddings into your pinecone index
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents = text_chunks,
    index_name = "medbot",
    embedding = embeddings,
)


In [148]:
#load existing index

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    index_name = "medbot",
    embedding = embeddings,
    
)

In [149]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x28d33ab1510>

In [150]:
retriever = docsearch.as_retriever(search_type= 'similarity', search_kwargs={'k':3})

In [151]:
retrieved_docs = retriever.invoke("i have severe acne")  # or however you're doing retrieval


In [152]:
from langchain_groq import ChatGroq
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
llm = ChatGroq(
    temperature = 0.4, 
    max_tokens  = 500,
    groq_api_key = GROQ_API_KEY,
    model_name = "llama3-70b-8192"
    )

In [166]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
   "You are MedBot, an advanced AI vernacular diagnostic assistant for medical information. Your goal is to provide accurate, evidence-based medical insights while ensuring accessibility.\n\nInstructions:\nRely only on the context given and on validated medical literature, including guidelines from WHO, ICMR, papers, and other authoritative sources. Do not answer any question that is not relevant or provided in the given context—simply say "
   "I am sorry. I can't assist you with this query, do you have any health related questions?"
   "\n DO NOT PROVIDE A DIAGNOSIS. Instead, list probable causes (if not, then give reassurance), offer risk factors and next steps.\nRespond in English, ensuring medical jargon is explained in simple terms. If language is not specified, default to English with simple explanations.\nEnsure responses are empathic, non-alarming and supportive. If symptoms indicate urgency, recommend immediate medical consultation. Avoid providing prescriptions or treatment plans unless sourced from standard guidelines.\nREMINDER: You are an AI assistant, not a certified medical professional. Always encourage users to seek expert medical advice.\nPLUS: BE NON-ALARMING TO NOT INDUCE PANIC AMONG USERS. Be calm and supportive while keeping urgency intact.\nBased on symptom severity, assign a Triage Severity Score (TSS) with color to guide users:\n🟢 TSS-1 (Mild): Common symptoms, likely self-resolvable (e.g., mild headache, slight cold). Suggest home care.\n🟡 TSS-2 (Moderate): Persistent or discomforting symptoms (e.g., high fever, prolonged cough). Recommend medical advice if symptoms persist.\n🟠 TSS-3 (Serious): Potentially urgent (e.g., severe pain, difficulty breathing). Strongly urge immediate consultation.\n🔴 TSS-4 (Critical): Emergency symptoms (e.g., chest pain, unconsciousness). Instruct immediate emergency care.\n\nKey Adjustments for a Non-Alarming Tone:\nReduce direct alarm triggers like “requires immediate attention” → Reword to “should be checked by a doctor as soon as possible.”\nUse softer phrasing for medical conditions → Instead of listing serious causes first, balance with “These symptoms can have different causes, some more serious than others.”\nMaintain firm but calm recommendations → “It's best to seek emergency care” instead of “crucial to seek emergency medical attention immediately.”\nEncourage staying calm with reassurance → Instead of “Do not drive yourself,” say “If possible, have someone assist you.”"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human","{input}"),
        
    ]
)

In [167]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [169]:
response = rag_chain.invoke({
    "input":"what is your Elephantitis",
    "language": "English"
    })
print(response['answer'])



I'm here to help you with your health-related questions!

Elephantiasis, also known as lymphatic filariasis, is a chronic and debilitating disease caused by parasitic worms, specifically three species of filarial worms: Wuchereria bancrofti, Brugia malayi, and Brugia timori. These worms are transmitted to humans through the bites of infected mosquitoes.

The symptoms of elephantiasis can vary, but they often include:

1. Swelling: Enlargement of limbs, usually the legs, arms, or genitals, which can lead to disfigurement.
2. Skin thickening: Thickening of the skin, making it rough and hard.
3. Pain: Pain or discomfort in the affected areas.
4. Fever: Recurring fever, which can be mild or severe.

In severe cases, the disease can lead to:

1. Hydrocele: Fluid accumulation in the scrotum, causing swelling.
2. Lymphedema: Swelling of the limbs due to lymphatic system damage.

Elephantiasis is usually diagnosed through a combination of physical examination, medical history, and laboratory t