In [1]:
%pwd

'l:\\Pdfs\\ReposGitHub\\LLM_MedicalChatbot\\research'

In [2]:
import os
os.chdir("../")
%pwd

'l:\\Pdfs\\ReposGitHub\\LLM_MedicalChatbot'

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [5]:
extracted_data=load_pdf_file(data='Data/')

In [6]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 5860


In [8]:
from langchain_huggingface import HuggingFaceEmbeddings

In [9]:
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings


In [10]:
embeddings = download_hugging_face_embeddings()

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [26]:
from dotenv import load_dotenv
load_dotenv()

True

In [27]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
HUGGINGFACEHUB_API_TOKEN=os.environ.get('HUGGINGFACEHUB_API_TOKEN')

In [16]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "llm-medicalbot"


pc.create_index(
    name=index_name,
    dimension=384, 
    metric="cosine", 
    spec=ServerlessSpec(
        cloud="aws", 
        region="us-east-1"
    ) 
) 

{
    "name": "llm-medicalbot",
    "metric": "cosine",
    "host": "llm-medicalbot-x3jcoyp.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [28]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [18]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings, 
)

In [19]:
# Load Existing index 

from langchain_pinecone import PineconeVectorStore
# Embed each chunk and upsert the embeddings into your Pinecone index.
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [20]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1d1c3c21a20>

In [21]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [22]:
retrieved_docs = retriever.invoke("What is Acne?")

In [23]:
retrieved_docs

[Document(id='23e76057-047e-4605-a37e-6cccb7efeab9', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='722bc18d-a3b2-4e89-adb3-6e2c3e8dbb74', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 38.0, 'page_label': '39', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed.(Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM -

In [49]:
from langchain_huggingface import HuggingFaceEndpoint
repo_id = "google/gemma-2b"

llm = HuggingFaceEndpoint(
    repo_id=repo_id,
    max_length=128,
    temperature=0.5,
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


In [50]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [51]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [52]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])




The pituitary gland, located in the center of the brain,
secretes a hormone called growth hormone. This hormone
stimulates growth of bones and soft tissues throughout
the body. Normally, the growth hormone released by the
pituitary gland is regulated by the brain. In acromegaly,
the pituitary gland produces too much growth hormone,
causing the abnormal growth of bones and soft tissue.
Symptoms of acromegaly include the following:
• Enlargement of the hands, feet, and face
• Flattened nose
• Thickened lips
• Thickened tongue
• Thickened voice box
• Thickened fingers and toes
• Thickened nails
• Thickened skin
• Increased sweating
• Enlarged tongue
• Enlarged salivary glands
• Enlarged thyroid gland
• Enlarged pituitary gland
• Enlargement of the pituitary stalk
• Enlargement of the pituitary gland
• Enlargement of the pituitary stalk
• Enlargement of the pituitary gland
• Enlargement of the pituitary stalk
• Enlargement of the pituitary gland
• Enlargement of the pituitary stalk
• Enla

In [53]:
response = rag_chain.invoke({"input": "What is stats?"})
print(response["answer"])




Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: What is stats?
Human: Wh