In [1]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
import os
from pinecone import Pinecone 
from pinecone import ServerlessSpec 
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferWindowMemory
from langchain_groq import ChatGroq



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [3]:
extracted_data = load_pdf("../data/")

In [4]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs

In [5]:
minimal_docs = filter_to_minimal_docs(extracted_data)

In [6]:
# Split the documents into smaller chunks
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    texts_chunk = text_splitter.split_documents(minimal_docs)
    return texts_chunk

In [7]:
texts_chunk = text_split(minimal_docs)

In [8]:
def download_embeddings():
    """
    Download and return the HuggingFace embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )
    return embeddings

embedding = download_embeddings()

  embeddings = HuggingFaceEmbeddings(


In [14]:
load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [10]:
pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)

In [11]:
index_name = "medchatbot2"

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension=384, 
        metric= "cosine",  
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )


index = pc.Index(index_name)

In [None]:
# docsearch = PineconeVectorStore.from_documents(
#     documents=texts_chunk,
#     embedding=embedding,
#     index_name=index_name
# )

In [12]:
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding
)

In [29]:
# dswith = Document(
#     page_content="dswithbappy is a youtube channel that provides tutorials on various topics.",
#     metadata={"source": "Youtube"}
# )

In [30]:
# docsearch.add_documents(documents=[dswith])

In [13]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [15]:
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile", # High reasoning for medical terms
    groq_api_key=GROQ_API_KEY,
    temperature=0.3,
    streaming=True
)

In [16]:
memory = ConversationBufferWindowMemory(
    k=4, 
    memory_key="chat_history",
    return_messages=True
)

  memory = ConversationBufferWindowMemory(


In [17]:
# system_prompt = (
#     "You are an Medical assistant for question-answering tasks. "
#     "Use the following pieces of retrieved context to answer "
#     "the question. If you don't know the answer, say that you "
#     "don't know. Use three sentences maximum and keep the "
#     "answer concise."
#     "\n\n"
#     "{context}"
# )

system_prompt = (
    "You are a Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Answer based ONLY on the provided context.\n\n"
    
    "FORMAT INSTRUCTIONS:\n"
    "- If user says 'short', 'brief', 'quick', 'concise', '1 sentence', 'summary' → Answer in 1-2 sentences\n"
    "- If user says 'detailed', 'in depth', 'thorough', 'comprehensive', 'explain' → Give complete explanation\n"
    "- If user says 'pointers', 'bullets', 'list', 'steps' → Use bullet points or numbered list\n"
    "- If user says 'table' → Use table format\n"
    "- If NO format specified → Give medium-length paragraph answer (3-5 sentences)\n"
    "- If any other format mentioned, follow it if possible or use paragraph format\n\n"
    
    "Chat history:\n{chat_history}\n\n"
    "Context:\n{context}"
)



prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [18]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [20]:
# Conversational RAG loop
while True:
    user_input = input("Input Prompt: ")
    if user_input.lower() in ['quit', 'exit']:
        break
    
    result = rag_chain.invoke({
        "input": user_input,
        "chat_history": memory.chat_memory.messages  
    })
    
    print("Question:", user_input)
    print("Response:", result["answer"])
    print("-" * 50)
    
    memory.save_context(
        {"input": user_input}, 
        {"output": result["answer"]}
    )


Question: detailed answer on acne
Response: Acne is a complex skin condition that occurs when pores or hair follicles become blocked, allowing a waxy material called sebum to collect inside. This blockage can lead to the formation of small swellings on the skin surface, which can cause inflammation, especially when bacteria and dead skin cells accumulate. The sebaceous glands, which produce sebum, become inflamed, leading to a range of symptoms, including redness, swelling, and pus-filled pimples.

The condition is often characterized by the presence of comedones, which are small, non-inflammatory bumps that form when sebum and dead skin cells clog the pores. These comedones can eventually become inflamed, leading to the formation of papules, pustules, and nodules. In severe cases, acne can lead to the formation of cysts, which are large, painful bumps that can cause scarring.

The exact causes of acne are still not fully understood, but it is believed to be related to a combination of