In [1]:
import os

from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.retrieval_qa.base import RetrievalQA
from pinecone import Pinecone, ServerlessSpec
from langchain.prompts import PromptTemplate


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_PATH = "data/"
def load_pdf_files(data):
    Loader = DirectoryLoader(data, glob="**/*.pdf", loader_cls=PyPDFLoader)
    
    documents = Loader.load()
    return documents
documents = load_pdf_files(DATA_PATH)

In [3]:
print("lenght of the documents:",len(documents))

lenght of the documents: 759


Creating Chunks

In [4]:
def create_chunks(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        length_function=len
    )
    docs = text_splitter.split_documents(documents)
    return docs

chunks = create_chunks(documents)
print("Number of chunks:", len(chunks))

Number of chunks: 7080


Create Vector embeddings

In [7]:
def create_embeddings(chunks):
    embeddings = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings
embeddings = create_embeddings(chunks)
print("Embeddings created")

  embeddings = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-MiniLM-L6-v2")


Embeddings created


Vector Embeddings in Pinecone DB

In [None]:
# Initialize Pinecone

pc = Pinecone(api_key="Pinecone API KEY")

# Create or connect to an index
index_name = "my-index"
if index_name not in [index.name for index in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=384,  # depends on your embedding model
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")  
    )
index = pc.Index(index_name)


In [9]:
def vector_store(chunks, embeddings, index):
    vector_store = PineconeVectorStore(
        index=index,
        embedding=embeddings
    )
    vector_store.add_documents(chunks)
    return vector_store
vector_store = vector_store(chunks, embeddings, index)
print("Vector store created")

Vector store created


In [None]:
# ======================
# 1. Initialize Pinecone
# ======================
pc = Pinecone(api_key="PINECONE_API_KEY")  # 👈 your env var or string
index_name = "my-index"
index = pc.Index(index_name)

# ======================
# 2. Load Embeddings
# ======================
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Wrap Pinecone index into LangChain VectorStore
vectorstore = PineconeVectorStore(
    index=index,
    embedding=embeddings
)

# ======================
# 3. Load LLM (Gemini)
# ======================
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key="Google API Key", temperature= 0.5  # 👈 your env var or string
)

# ======================
# 4. Build RAG Pipeline
# ======================
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)




In [11]:
# ======================
# 5. Ask a Question
# ======================
query = "What is Prognosis, explain it with examples "
result = qa_chain.invoke(query)

print("\nAnswer:", result["result"])
print("\nSources:")
for doc in result["source_documents"]:
    print("-", doc.metadata)


Answer: Based on the provided text:

**Prognosis** refers to the likely outcome or course of a medical condition or disease. It depends on various factors related to the problem's source and characteristics.

Here are examples from the text:

*   **Example 1: Pituitary Adenomas**
    When pituitary adenomas are identified as the source of increased ACTH leading to cortisol excess, the prognosis is that "about 80% of patients are cured by surgery." This indicates a generally good prognosis for this specific condition when treated surgically.

*   **Example 2: Cortisol Excess due to Other Cancer**
    If cortisol excess is due to some other form of cancer, "the prognosis depends on the type of cancer and the extent of its spread." This means the likely outcome is variable and determined by how aggressive the cancer is and how far it has advanced.

Sources:
- {'author': '', 'creationdate': '2017-05-01T10:37:35-07:00', 'creator': '', 'keywords': '', 'moddate': '2017-05-01T10:37:35-07:00',