In [1]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Extract data from PDF FILE
def load_pdf_file(data):
    loader=DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents=loader.load()
    return documents

In [8]:
extracted_data=load_pdf_file(data="../Data/")

In [None]:
#Chunking
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [10]:
text_chunks=text_split(extracted_data)
print(len(text_chunks))

5859


In [11]:
#Embedding
from langchain.embeddings import HuggingFaceEmbeddings
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [12]:
embeddings=download_hugging_face_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [13]:
query=embeddings.embed_query("hello world")
len(query)

384

In [14]:
from dotenv import load_dotenv
load_dotenv()

True

In [16]:
import os

In [17]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')

In [20]:
os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY

In [21]:
from langchain_pinecone import PineconeVectorStore
docsearch=PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name="medibot",
    embedding=embeddings
)

In [29]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name="medibot",
    embedding=embeddings
)

In [30]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1d48e293fa0>

In [24]:
GOOGLE_API_KEY=os.environ.get("GOOGLE_API_KEY")

In [25]:
os.environ["GOOGLE_API_KEY"]=GOOGLE_API_KEY

In [37]:
#integrating llm
from langchain_google_genai import ChatGoogleGenerativeAI
llm=ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite")

In [27]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


In [28]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three to five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [31]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [38]:
qa_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,qa_chain)

In [39]:
res=rag_chain.invoke({"input":"how to cure  Acromegaly and gigantism?"})
print(res["answer"])

Acromegaly and gigantism are caused by the abnormal release of growth hormone from the pituitary gland. While the provided text details the definition and diagnosis of these conditions, it does not specify a cure. Treatment options, therefore, are not discussed in this context.
