In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
# from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [24]:
import os
from dotenv import load_dotenv
load_dotenv()  #load all the environment variables

True

In [3]:
# PINECONE_API_KEY = "f5332a0e-31e2-49be-8512-cd45f97e31e0"
# PINECONE_API_ENV = "gcp-starter"

In [4]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
extracted_data = load_pdf(r"C:\Users\Naruto\Desktop\generative_ai\generative_ai_material\project\Medical_Chat_Bot\data")

In [6]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 7020


In [8]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [None]:
# from langchain_community.embeddings import OllamaEmbeddings

In [None]:
# embeddings=(
#     OllamaEmbeddings(model="gemma:2b")  ##by default it ues llama2
# )

#### Creating pinecode index

In [1]:
import getpass
import os
import time

from pinecone import Pinecone, ServerlessSpec

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key="f5332a0e-31e2-49be-8512-cd45f97e31e0")

  from tqdm.autonotebook import tqdm


In [10]:
index_name = "medical-chat-bot"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)
index

<pinecone.data.index.Index at 0x2054a369e50>

In [11]:
embeddings = download_hugging_face_embeddings()

In [None]:
result = embeddings.embed_documents(text_chunks)
print(len(result))
print(len(result[0]))

In [12]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings,index_name=index_name)
vector_store

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x20549a9d3d0>

In [13]:
db = vector_store.from_documents(text_chunks,embeddings,index_name=index_name)

In [14]:
query = "What are Allergies"

docs=db.similarity_search(query, k=3)

print("Result", docs)

Result [Document(metadata={'page': 130.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"), Document(metadata={'page': 129.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body p

In [17]:
print(docs[0].page_content)
print(docs[1].page_content)
print(docs[2].page_content)

GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies
Allergic rhinitis is commonly triggered by
exposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.
The presence of an allergen causes the
body's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.
IgE molecules attach to mast
cells, which contain histamine.HistaminePollen grains
Lymphocyte
FIRST EXPOSURE
allergens are the following:
• plant pollens
• animal fur and dander
• body parts from house mites (microscopic creatures
found in all houses)
• house dust• mold spores• cigarette smoke• solvents• cleaners
Common food allergens include the following:
• nuts, especially peanuts, walnuts, and brazil nuts
• fish, mollusks, and shellfish• eggs• wheat• milk• food additives and preservatives
The following types of drugs commonly cause aller-
gic reactions:
• penicillin or other antibiotics
itchy, scratch

In [18]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [19]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [25]:
groq_api_key=os.getenv("GROQ_API_KEY")
groq_api_key

'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'

In [26]:
from langchain_groq import ChatGroq
model=ChatGroq(model="Gemma2-9b-It",groq_api_key=groq_api_key)
model

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002056BEAB250>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002056BEABF40>, model_name='Gemma2-9b-It', groq_api_key=SecretStr('**********'))

In [27]:
qa=RetrievalQA.from_chain_type(
    llm=model, 
    chain_type="stuff", 
    retriever=db.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [28]:
qa({"query": "What are Allergies?"})

  warn_deprecated(


{'query': 'What are Allergies?',
 'result': 'Allergic rhinitis is commonly triggered by exposure to household dust, animal fur, or pollen.  The foreign substance that triggers an allergic reaction is called an allergen.  \n',
 'source_documents': [Document(metadata={'page': 135.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
  Document(metadata={'page': 130.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.

In [3]:
from langchain_core.documents import Document

In [4]:
result = {'input': 'What are Allergies?', 'context': 
[Document(metadata={'page': 135.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'), 
Document(metadata={'page': 130.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE")], 

'answer': 'Allergic rhinitis is triggered by allergens like household dust, animal fur, or pollen.  These allergens cause the body to produce IgE antibodies, which attach to mast cells. When exposed to the allergen again, mast cells release histamine, causing symptoms like an itchy nose, eyes, and throat. \n'}

In [5]:
result['context']

[Document(metadata={'page': 135.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
 Document(metadata={'page': 130.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePo

In [6]:
result["answer"]

'Allergic rhinitis is triggered by allergens like household dust, animal fur, or pollen.  These allergens cause the body to produce IgE antibodies, which attach to mast cells. When exposed to the allergen again, mast cells release histamine, causing symptoms like an itchy nose, eyes, and throat. \n'

In [17]:
print(result['context'][0].metadata)
print("")
print(result['context'][1].metadata)

{'page': 135.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}

{'page': 130.0, 'source': 'C:\\Users\\Naruto\\Desktop\\generative_ai\\generative_ai_material\\project\\Medical_Chat_Bot\\data\\Gale Encyclopedia of Medicine Vol 1.pdf'}


In [18]:
print(result['context'][0].metadata['page'])
print("")
print(result['context'][1].metadata['page'])

135.0

130.0


In [12]:
result['context'][0].page_content + result['context'][1].page_content

"the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"