In [7]:
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate

In [8]:
with open(r'C:\Users\afz31\RAG\api_key.txt', "r") as file:
    PINECONE_API_KEY = file.read()

In [9]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)

    documents = loader.load()

    return documents

In [10]:
extracted_data = load_pdf(r"C:\Users\afz31\RAG\data")

In [11]:
# extracted_data

In [12]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [13]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 5860


In [14]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [15]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [16]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [17]:
# query_result

In [20]:
#Initializing the Pinecone
pc = Pinecone(
        api_key=PINECONE_API_KEY
    )
index_name = "medical-chatbot" # put in the name of your pinecone index here

import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [21]:
#If we already have an index we can load it like this
from langchain.vectorstores import Pinecone
docsearch=Pinecone.from_existing_index(index_name, embeddings)

query = "What are Allergies"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(metadata={}, page_content='Purpose\nAllergy is a reaction of the immune system. Nor-\nmally, the immune system responds to foreign microor-\nganisms and particles, like pollen or dust, by producing\nspecific proteins called antibodies that are capable of\nbinding to identifying molecules, or antigens, on the\nforeign organisms. This reaction between antibody and\nantigen sets off a series of reactions designed to protect\nthe body from infection. Sometimes, this same series of'), Document(metadata={}, page_content='Purpose\nAllergy is a reaction of the immune system. Nor-\nmally, the immune system responds to foreign microor-\nganisms and particles, like pollen or dust, by producing\nspecific proteins called antibodies that are capable of\nbinding to identifying molecules, or antigens, on the\nforeign organisms. This reaction between antibody and\nantigen sets off a series of reactions designed to protect\nthe body from infection. Sometimes, this same series of'), Docu

In [22]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [23]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [25]:
from langchain_community.llms import CTransformers

config = {'max_new_tokens': 1024, 'temperature': 0.8}

llm = CTransformers(model='TheBloke/Llama-2-7B-Chat-GGML', config=config)

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [26]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs)

In [27]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

  result=qa({"query": user_input})


Response :  The constant cough and production of infected sputum are common symptoms of both bronchiectasis and tuberculosis.
Response :  The cause of diarrhea can vary depending on the individual case, but some common causes include:

* Eating or drinking contaminated food or water that contains bacteria, viruses, or parasites
* Eating something that is difficult to digest, such as dairy products or certain types of food
* Medical conditions such as irritable bowel syndrome (IBS), inflammatory bowel disease (IBD), or small intestine bacterial overgrowth (SIBO)
* Reactions to certain medications or chemicals
* Infections, such as norovirus or rotavirus, which can cause diarrhea
* Other causes, such as food allergies or sensitivities, or hormonal imbalances.


KeyboardInterrupt: 