In [1]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as PC
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
#from langchain.llms import CTransformers
#from ctransformers import AutoModelForCausalLM

  from tqdm.autonotebook import tqdm


In [2]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [36]:
torch.cuda.get_device_capability()

(7, 5)

In [1]:
import torch
torch.cuda.is_available()

True

### Load PDF

In [2]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [3]:
extracted_data = load_pdf("../data/")

In [4]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [5]:
text_chunks = text_split(extracted_data)

### Embeddings

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()

In [4]:
def download_huggingface_embeddings():
    embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embedding

In [5]:
embeddings = download_huggingface_embeddings()

### Create Pinecone cluster

In [6]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')

In [7]:
index_name = 'medical-chatbot'
namespace='medical-chatbot'
vectorstore = PineconeVectorStore(pinecone_api_key=PINECONE_API_KEY,
                                  index_name=index_name,
                                  embedding=embeddings,
                                  namespace=namespace)

In [None]:
vectorstore.add_documents(text_chunks)

In [None]:
text_chunks_str = [t.page_content for t in text_chunks]
print(text_chunks_str[1])

In [None]:
#vectors = embeddings.embed_documents(text_chunks)

In [None]:
#embeddings.embed_query(text_chunks[0].page_content)

In [17]:
final_vector = []
for idx ,vector in enumerate(vectors, 1):
    vector_dict = {}
    vector_dict['id'] = f'doc1chunk{idx}'
    vector_dict['values'] = vector
    final_vector.append(vector_dict)

In [49]:
# With metadata
final_vector = []
for idx ,chunk in enumerate(text_chunks, 1):
    vector_dict = {}
    vector_dict['id'] = f'doc1chunk{idx}'
    vector_dict['values'] = embeddings.embed_query(chunk.page_content)
    vector_dict['metadata'] = {'text':chunk.page_content}
    final_vector.append(vector_dict)

In [46]:
import itertools
def chunks(iterable, batch_size=100):
    it = iter(iterable)
    chunk = tuple(itertools.islice(it, batch_size))
    while chunk:
        yield chunk
        chunk = tuple(itertools.islice(it, batch_size))

In [11]:
# configure client
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(index_name)
index.describe_index_stats()
#pc.list_indexes()


{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 2352},
                'medical-chatbot': {'vector_count': 7020}},
 'total_vector_count': 9372}

In [13]:
#index.delete(delete_all=True, namespace='medical-chatbot')

{}

In [50]:
with pc.Index('medical-chatbot', pool_threads=30) as index:
    # Send requests in parallel
    async_results = [
        index.upsert(vectors=ids_vectors_chunk, async_req=True, namespace="medical-chatbot")
        for ids_vectors_chunk in chunks(final_vector, batch_size=100)
    ]
    # Wait for and retrieve responses (this raises in case of error)
    [async_result.get() for async_result in async_results]

In [None]:
#for ids_vectors_chunk in chunks(final_vector, batch_size=100):
#    index.upsert(vectors=final_vector, namespace="medical-chatbot")

### Query

In [53]:
query = embeddings.embed_query("What are allergies")
response_vectors = index.query(namespace="medical-chatbot",vector=query, top_k=3, include_values=True, include_metadata=True)

In [None]:
response_vectors

In [None]:
for response in response_vectors['matches']:
    print(response.metadata['text'])

In [7]:
query = "What are allergies"
vectorstore.similarity_search(query=query, k=3, namespace=namespace)

[Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE", metadata={'page': 130.0, 'source': '..\\data\\Medical_book.pdf'}),
 Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens include the following:\n• nuts, especially peanuts, walnuts, and brazil nuts\n• fish, mollusks, and shellfish• eggs• wheat• milk• food additives

### LLM

In [8]:
prompt_template = """
Use th following piese of information to answer the user questions.
If you don't know the answer, just say that you dont't know the answer, don't try to make up an answer.
Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [9]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=['context','question'])
chain_type_kwargs = {'prompt': PROMPT}

In [16]:
from langchain_community.llms import CTransformers
from accelerate import Accelerator

accelerator = Accelerator()

config = {
            'max_new_tokens':256,
            'temperature':0.8,
            'gpu_layers':128
        }

llm = CTransformers(model='../models/llama-2-7b-chat.ggmlv3.q4_0.bin',
                    model_type='llama',
                    config=config,
                    gpu_layers=128
                    )

llm, config = accelerator.prepare(llm, config)

In [32]:
from ctransformers import AutoModelForCausalLM
llm = AutoModelForCausalLM.from_pretrained("../models",
                                           model_type='llama',
                                           model_file='llama-2-7b-chat.ggmlv3.q4_0.bin',
                                           gpu_layers=50)

In [None]:
from langchain.schema.retriever import BaseRetriever, Document
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun, AsyncCallbackManagerForRetrieverRun
#from langchain_core.documents import Document
from typing import List, Any

class CustomRetriever(BaseRetriever):
    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:
        documents = self.retriever.get_relevant_documents(query, callbacks=run_manager.get_child())
        documents = sorted(documents, key=lambda doc: doc.metadata.get('source'))
        return documents
    
    async def _aget_relevant_documents(
        self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun, **kwargs: Any) -> List[Document]:
        raise NotImplementedError()

In [18]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    #retriever=CustomRetriever(vectorstore.as_retriever(search_kwargs={"k": 2})),
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [1]:
import streamlit as st

In [19]:
qa.invoke("What are allergies")

{'query': 'What are allergies',
 'result': "Allergies occur when a person's immune system overreacts to something that is not harmful, called an allergen. Exposure to allergens can trigger an allergic reaction, which can cause symptoms such as sneezing, congestion, runny nose, itchy eyes, and difficulty breathing. Common allergens include pollen, dust mites, mold, pet dander, insect stings, and certain foods such as peanuts, fish, shellfish, milk, eggs, and wheat. The immune system produces antibodies called IgEg IgEg IgE(IgEg IgE (IgEosmIgE to fight against the IgEg IgEg IgEg IgEIge to IgEosmIgEg IgE to fight- IgEg IgEg IgEg IgE antibodonto IgEIggGIgE and IgE to fight IgEg IgE (IgEg IgEg IgEosomes IgEg IgEg IgEg IgE, IgE (IgE (IgE to fightinmun and IgEg IgEg IgE",
 'source_documents': [Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an a

In [14]:
while True:
    user_input = input(f'Input Query: ')
    result = qa(user_input)
    print('Response: ',result['result'])

  warn_deprecated(
