In [None]:
%pwd

In [None]:
import os

if os.path.basename(os.getcwd()).lower() == "research":
	os.chdir("..")
elif os.path.isdir("medbot"):
	os.chdir("medbot")

print("Current working directory:", os.getcwd())

In [None]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# extract text from pdf file
def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader,
        show_progress=True
        )
    documents = loader.load()
    return documents

In [None]:
extracted_data = load_pdf("data")

In [None]:
extracted_data

In [None]:
len(extracted_data)

In [None]:
from typing import List
from langchain.schema import Document

def filter_min_doc(docs: List[Document]) -> List[Document]:
    minim_docs = []
    for doc in docs:
        src = doc.metadata.get("source")
        minim_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
        )

    )
    return minim_docs



In [None]:
minim_docs = filter_min_doc(extracted_data)

In [None]:
minim_docs

In [None]:
#split the text into chunks
def split_text(minim_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, 
        chunk_overlap=20,
        length_function=len
    )
    chunk_text = text_splitter.split_documents(minim_docs)
    return chunk_text

In [None]:
chunk_text = split_text(minim_docs)
print(f"Number of chunks: {len(chunk_text)}")

In [None]:
chunk_text

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

def download_embeddings():
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )
    return embeddings

embedding = download_embeddings()

In [None]:
embedding

In [None]:
vector = embedding.embed_query("What is the capital of France?")
vector

In [None]:
print("Vector length:", len(vector))

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()

In [None]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
from pinecone import Pinecone
pinecone_api_key= PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)
pc

In [None]:
from pinecone import ServerlessSpec

index_name = "medbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        dimension=384,
        metric="cosine" 
    )

index = pc.Index(index_name)

In [None]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=chunk_text,
    embedding=embedding,
    index_name=index_name
)

In [None]:
#load the vector store from the existing index

docsearch = PineconeVectorStore.from_existing_index(
    embedding=embedding,
    index_name=index_name
)

In [None]:
dswith = Document(
    page_content="What is the capital of France?",
    metadata={"source": "query"}
)
docsearch.add_documents([dswith])

In [None]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [None]:
ret_docs = retriever.invoke("What is acne?")
ret_docs

In [None]:
from langchain.chat_models import ChatOpenAI

chatMod = ChatOpenAI(
    model="openai/gpt-oss-20b",
    base_url = "https://integrate.api.nvidia.com/v1"

)

In [None]:
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [None]:
system_prompt = (
    "You are a helpful medical assistant for answering questions about human health. "
    "Use the following retrieved documents to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Always use all available data to answer the question. "
    "Keep your answer concise and to the point.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(chatMod, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke(
    {"input": " what is abscess incision & drainage?"}
)
print(response["answer"])

In [None]:
response = rag_chain.invoke(
    {"input": "i have headache what should I do?"}
)
print(response["answer"])