In [None]:
import os
os.getcwd()
os.chdir("../")
%pwd

In [None]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# Extract text from PDF files
def load_pdf_files(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents = loader.load()
    return documents

In [21]:
extracted_data = load_pdf_files("data")
extracted_data
len(extracted_data)

637

In [None]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src}
            )
        )
    return minimal_docs

In [None]:
minimal_docs = filter_to_minimal_docs(extracted_data)
minimal_docs

In [None]:
#split the documents into smaller chunks
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
        length_function=len
    )
    text_chunks = text_splitter.split_documents(minimal_docs)
    return(text_chunks)

In [None]:
text_chunks= text_split(minimal_docs)
print(f"Number of chunks: {len(text_chunks)}")
text_chunks

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    """
    Download and return HuggingFace Embeddings model.
    """
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
    )
    return embeddings

embedding = download_embeddings()

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()

In [None]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
from pinecone import Pinecone
pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=pinecone_api_key)
pc

In [None]:
from pinecone import ServerlessSpec
index_name = "medical-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(index_name)

In [18]:
from langchain_pinecone import PineconeVectorStore

doc_search = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embedding,
    index_name=index_name
)

In [23]:
# Add more data to existing pinecone index
document = Document(
    page_content="Wikipedia is a free, multilingual online encyclopedia created and maintained by a global community of volunteers through open collaboration, known as wiki-based editing.",
    metadata={"source": "Google"}
)

In [24]:
doc_search.add_documents(documents=[document])

['c70fe65a-9019-47fb-b8ec-d1fcd02a36e6']

In [26]:
retriever = doc_search.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [28]:
retrieved_docs = retriever.invoke("What is Osteoporosis")
retrieved_docs

[Document(id='707b9649-9ea6-45c3-8638-92c67f820360', metadata={'source': 'data/Medical_book.pdf'}, page_content='wear and tear, age and less often from inflammation.\nOsteogenesis imperfecta—Also called brittle bones,\nthis is a condition present at birth in which bones\nare abnormally fragile, brittle and break easily.\nOsteomalacia—A disease in which bones gradual-\nly soften and bend.\nOsteomyelitis —An infection of the bone marrow\nand the bone.\nOsteoporosis —A disease which occurs primarily\nin post-menopausal women in which the amount\nof bone is reduced or skeletal tissue wastes away.'),
 Document(id='add6e655-6129-4631-8905-5269f1bdc505', metadata={'source': 'data/Medical_book.pdf'}, page_content='of old bone and its replacement with new bone. But\nwhen something goes wrong with the process, bone dis-\norders may result.\nOsteoporosis is a particular concern for women\nafter menopause, as well as for older men. In osteo-\nporosis, the inside of the bones become porous and thin

In [31]:
from langchain_openai import ChatOpenAI

chatModel = ChatOpenAI(model="gpt-4o")

In [32]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [33]:
system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [34]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [35]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])

Acromegaly and gigantism are disorders caused by the abnormal release of a growth-related chemical from the pituitary gland in the brain. This release leads to increased growth in bone and soft tissue. When the abnormality occurs before bone growth stops, it results in gigantism, while issues occurring after bone growth ceases are termed acromegaly.


In [37]:
response = rag_chain.invoke({"input": "what is Acne?"})
print(response["answer"])

Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria. The medical term for common acne is acne vulgaris.


In [38]:
response = rag_chain.invoke({"input": "what is the Treatment of Acne?"})
print(response["answer"])

Acne treatment depends on its severity. For mild noninflammatory acne, topical treatments such as tretinoin, benzoyl peroxide, adapalene, or salicylic acid are used. When inflammation is present, topical antibiotics may be added, and improvement usually occurs in two to four weeks, while severe cases may require drugs like isotretinoin.


In [39]:
response = rag_chain.invoke({"input": "what is the Osteoporosis and whats the proper treatment for it?"})
print(response["answer"])

Osteoporosis is a bone disorder characterized by weakened bones, increasing the risk of fractures. Proper treatment often includes bone disorder drugs, which require a physician’s prescription, and may be taken in tablet, nasal spray, or injectable forms. Additionally, maintaining regular exercise, a balanced diet with adequate calcium and vitamin D, and, for some, hormone replacement therapy can also help manage and prevent osteoporosis.
