In [1]:
%pwd

'c:\\Users\\loves\\Documents\\LangChain_Projects\\Medical-Chatbot-Generative-AI\\research'

In [2]:
import os
os.chdir("../")

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
%pwd

'c:\\Users\\loves\\Documents\\LangChain_Projects\\Medical-Chatbot-Generative-AI'

In [5]:
#Etract Data From the PDF File
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [6]:
extracted_data = load_pdf_file(data = 'Data/')

In [7]:
# extracted_data

In [8]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks = text_split(extracted_data)
print("Length of Text Chunks: ", len(text_chunks))

Length of Text Chunks:  5859


In [10]:
from langchain.embeddings import HuggingFaceEmbeddings

In [11]:
#Download the Embeddings from HuggingFace
def download_huggingface_embeddings():
    from langchain.embeddings import HuggingFaceEmbeddings
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [12]:
embeddings = download_huggingface_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [13]:
query_result = embeddings.embed_query("What is the purpose of the study?")
print("Length of Query Result: ", len(query_result))

Length of Query Result:  384


In [14]:
from dotenv import load_dotenv
load_dotenv()

True

In [15]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [16]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import  ServerlessSpec
import os

pc = Pinecone(api_key= PINECONE_API_KEY)

index_name = "askmedi"

pc.create_index(
    name=index_name,
    dimension=384,  # Dimension of the embeddings
    metric="cosine",  # Similarity metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1",
        # size="small"
    )
)

In [17]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] =   OPENAI_API_KEY

In [18]:
#Embed each chunk and upsert the embeddings into your Pinecone index
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=index_name
)

In [19]:
#Load existing index
from langchain_pinecone import PineconeVectorStore
docsearch = PineconeVectorStore.from_existing_index(
    embedding=embeddings,
    index_name=index_name
)

In [20]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1e980990550>

In [21]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [None]:
retrieved_docs = retriever.invoke("What is Acne?")

In [23]:
retrieved_docs

[Document(id='5e6f5e83-27fc-4d8c-b80f-de67e233a716', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='d927be3e-90e5-4635-a76f-7226c57fbbfd', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 38.0, 'page_label': '39', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM 

In [24]:
from langchain_openai import OpenAI
llm = OpenAI(max_tokens=500, temperature=0.4)

In [25]:
from openai import OpenAI
from dotenv import load_dotenv
import os

In [26]:
# Load API key
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("Missing OPENAI_API_KEY in .env")

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=api_key,
)

In [None]:
def ask_question(question, retriever, client):
    # Retrieve relevant docs
    retrieved_docs = retriever.invoke(question)
    context = "\n\n".join([doc.page_content[:300] for doc in retrieved_docs])

    # Format system prompt using updated context
    system_prompt = (
        "You are a medical assistant chatbot named AskMedi."
        " Use the following retrieved context to answer the user's medical question."
        " Return the answer in HTML format with the following rules:\n"
        "- Use <b>...</b> to bold key medical terms.\n"
        "- Use <ul><li>...</li></ul> for bullet points if listing symptoms, causes, or treatments.\n"
        "- Keep the response short and medically accurate (3 sentences max).\n"
        "- If you don't know the answer, say so clearly."
        "\n\n"
        "{context}"
    )


    # Call the model
    response = client.chat.completions.create(
        model="mistralai/mistral-7b-instruct:free",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question}
        ]
    )

    return response.choices[0].message.content


In [53]:
response = ask_question("What is HIV?", retriever, client)
print("Answer:", response)

Answer:  HIV, or Human Immunodeficiency Virus, is a transmissible retrovirus that causes AIDS (Acquired Immunodeficiency Syndrome) in humans. There are two forms of HIV recognized: HIV-1, which causes most cases of AIDS, and HIV-2, which is less common but still causes HIV infection. HIV is typically transmitted through sexual contact, contact with infected body fluids such as blood and urine, or minor injuries that might require a blood transfusion.
