In [3]:
%pwd


'c:\\Users\\sruja\\medical-chatbot\\research'

In [4]:
import os
os.chdir("../")

In [5]:
%pwd

'c:\\Users\\sruja\\medical-chatbot'

In [6]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def load_pdf_files(data):
    loader=DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)

    documents=loader.load()
    return documents

    

In [8]:
extracted_data=load_pdf_files("data")

In [9]:
len(extracted_data)


4505

In [39]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs
filtered_data = filter_to_minimal_docs(extracted_data)

In [40]:
len(filtered_data)

4505

In [41]:
def text_split(filtered_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50,
       
    )

    texts_chunk=text_splitter.split_documents(filtered_data)
    return texts_chunk

In [42]:
texts_chunk=text_split(filtered_data)

len(texts_chunk)

20993

In [43]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        
    )
    return embeddings
embedding = download_embeddings()


In [44]:
from dotenv import load_dotenv  
import os

# Reload environment variables to get the new HuggingFace token
load_dotenv(override=True)

True

In [45]:
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

os.environ["PINECONE_API_KEY"] = pinecone_api_key
os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token

print(f"✅ Pinecone API Key loaded: {pinecone_api_key[:20]}...")
print(f"✅ OpenAI API Key loaded: {openai_api_key[:20]}...")
print(f"✅ HuggingFace Token loaded: {huggingface_token[:20]}...")

✅ Pinecone API Key loaded: pcsk_75aS7x_SyajQGMG...
✅ OpenAI API Key loaded: sk-proj-O31Ko81fr5EI...
✅ HuggingFace Token loaded: hf_ZPoOwuhqJDrYqVIiC...


In [46]:
from pinecone import Pinecone
pinecone_api_key=pinecone_api_key

pc = Pinecone(api_key=pinecone_api_key)


In [47]:
pc

<pinecone.pinecone.Pinecone at 0x23cfc597090>

In [48]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone()

index_name = "medical-chatbot-index"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(index_name)


In [49]:
# ⚠️ WARNING: Only run this cell ONCE to upload vectors to Pinecone
# This will upload all text chunks and may take a long time
# After running once, skip this cell and use the next cell to load existing index

# from langchain_pinecone import PineconeVectorStore
# 
# docsearch = PineconeVectorStore.from_documents(
#     documents=texts_chunk,
#     embedding=embedding,
#     index_name=index_name
# )
# 
# print("✅ Vectors uploaded to Pinecone successfully!")

# ⚠️ THIS CELL IS COMMENTED OUT - USE NEXT CELL TO LOAD EXISTING INDEX

In [50]:
# ✅ USE THIS CELL - Load the existing index (doesn't re-upload vectors)
from langchain_pinecone import PineconeVectorStore  

docsearch = PineconeVectorStore(
    embedding=embedding, 
    index_name=index_name
)

print(f"✅ Connected to existing Pinecone index: {index_name}")
print("✅ Ready to perform similarity search!")

✅ Connected to existing Pinecone index: medical-chatbot-index
✅ Ready to perform similarity search!


In [51]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3}) 

In [52]:
# Using a direct API call with InferenceClient for reliability
from huggingface_hub import InferenceClient

# The API token should be loaded from .env file
client = InferenceClient(token=huggingface_token)

def get_answer(context, question):
    """
    Uses the InferenceClient to get an answer from the LLM.
    """
    # Create a prompt from the system message, context, and question
    system_message = (
        "You are a medical assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
    )
    
    prompt = f"""{system_message}

Context:
{context}

Question: {question}

Answer:"""

    try:
        response = client.text_generation(
            prompt,
            model="google/flan-t5-large",
            max_new_tokens=250,
            temperature=0.7,
        )
        return response.strip()
    except Exception as e:
        print(f"Error calling HuggingFace API: {e}")
        return "Sorry, I could not get an answer from the model."

print("✅ HuggingFace InferenceClient configured and get_answer function is ready.")

✅ HuggingFace InferenceClient configured and get_answer function is ready.


In [56]:
# Manually ask a question to test the RAG chain
question = "What is Allergic rhinitis?"
print(f"Asking: '{question}'")

# 1. Retrieve context from Pinecone
try:
    docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in docs])
    
    print("\n--- Retrieved Context ---")
    for i, doc in enumerate(docs):
        source = doc.metadata.get('source', 'Unknown')
        print(f"Context {i+1} (from {source}):\n{doc.page_content}\n")

    # 2. Get answer from the model using the retrieved context
    print("\n--- Answer from RAG chain ---")
    final_answer = get_answer(context, question)
    print(final_answer)

except Exception as e:
    print(f"An error occurred during the RAG process: {e}")

Asking: 'What is Allergic rhinitis?'


  docs = retriever.get_relevant_documents(question)



--- Retrieved Context ---
Context 1 (from data\the-gale-encyclopedia-of-medicine_compress_compressed.pdf):
caused by allergic reaction to airborne substances.
Description
Allergic rhinitis (AR) is the most common allergic
condition and one of the most common of all minor
afflictions. It affects between 10-20% of all people in the
United States, and is responsible for 2.5% of all doctor
visits.Antihistaminesand other drugs used to treat aller-
gic rhinitis make up a significant fraction of both
prescription and over-the-counter drug sales each year.
There are two types of allergic rhinitis: seasonal
and perennial. Seasonal AR occurs in the spring, sum-
mer, and early fall, when airborne plant pollens are at
their highest levels. In fact, the term hayfever is really
a misnomer, since allergy to grass pollen is only one
cause of symptoms for most people. Perennial AR
occurs all year and is usually caused by home or work-
place airborne pollutants. A person can be affected
by one or both 