In [1]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp
from langchain_pinecone import PineconeVectorStore
from langchain_community.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


from langchain_community.llms import CTransformers
import os
import pinecone
from pinecone import Pinecone
from dotenv import load_dotenv

In [None]:
#os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
load_dotenv()
key= os.getenv("PINECONE_API_KEY")

In [None]:
PINECONE_API_KEY = key
PINECONE_API_ENV = 'gcp-starter'

In [3]:
#extract data func
def load_pdf_data(data):
    loader = DirectoryLoader(data,
                    glob='*.pdf',
                    loader_cls=PyPDFLoader)
    documents = loader.load()

    return documents

In [4]:
#path = '../'
extracted_data = load_pdf_data("../data")

In [5]:
#create chunks of text
def text_chunk_splitter(extracted_data):
    text_split = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunk = text_split.split_documents(extracted_data)

    return text_chunk

In [6]:
text_chunks = text_chunk_splitter(extracted_data)
print("chunk length:  ", len(text_chunks))

chunk length:   10484


In [7]:
#download embedding model
def download_embedding_model():
    embedding= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    return embedding


In [8]:
embedding = download_embedding_model()

In [9]:
from pinecone_text.sparse import BM25Encoder
from langchain_community.retrievers import (
    PineconeHybridSearchRetriever,
)

In [10]:
list_chunks=[x.page_content for x in text_chunks]

In [11]:
# use default tf-idf values for sparse encoder
bm25_encoder = BM25Encoder().default()

In [12]:
bm25_encoder.fit(list_chunks)

  0%|          | 0/10484 [00:00<?, ?it/s]

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x3034fd540>

In [13]:
#store the values into json file format
bm25_encoder.dump("bm25_values.json")
# load to your BM25Encoder object
bm25_encoder = BM25Encoder().load("bm25_values.json")

In [14]:
pinecone.Pinecone(
   api_key=os.getenv("PINECONE_API_KEY"),  
   environment=os.getenv("PINECONE_ENV"),  
)
index_name = "med-chatbot-hybrid"
index = pinecone.Index(index_name, host="https://med-chatbot-hybrid-mhwhc2g.svc.gcp-starter.pinecone.io")

Run this when loading the data in to the vector store for the first time

In [None]:
# Run this line when loading the embedding to the vector store first time
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embedding, index_name=index_name)

In [15]:
retriever = PineconeHybridSearchRetriever(
    embeddings=embedding, sparse_encoder=bm25_encoder, index=index
)

In [16]:
query = "what is allergy?"
#retiriever = hybrid_retriever.as_retriever(search_kwargs=dict(k=3))

docs = retriever.get_relevant_documents(query)

#print("Results", docs)

ForbiddenException: (403)
Reason: Forbidden
HTTP response headers: HTTPHeaderDict({'x-pinecone-auth-rejected-reason': 'Wrong API key', 'www-authenticate': 'Wrong API key', 'Content-Length': '9', 'content-type': 'text/plain', 'date': 'Fri, 22 Mar 2024 01:12:57 GMT', 'server': 'envoy', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: Forbidden


In [17]:
query = "what is allergy?"
#retiriever = hybrid_retriever.as_retriever(search_kwargs=dict(k=3))

#docs = retriever.get_relevant_documents(query)
docs = await retriever.aget_relevant_documents(query)
#print("Results", docs)

ForbiddenException: (403)
Reason: Forbidden
HTTP response headers: HTTPHeaderDict({'x-pinecone-auth-rejected-reason': 'Wrong API key', 'www-authenticate': 'Wrong API key', 'Content-Length': '9', 'content-type': 'text/plain', 'date': 'Fri, 22 Mar 2024 01:13:11 GMT', 'server': 'envoy', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: Forbidden


In [None]:
query_res = embedding.embed_query("Hi today is March 12 Tuesday")
print("Length: ", len(query_res))

In [None]:
from langchain_pinecone import PineconeVectorStore

In [None]:
pinecone.Pinecone(
   api_key=os.getenv("PINECONE_API_KEY"),  
   environment=os.getenv("PINECONE_ENV"),  
)
index_name = "medical-chatbot-vector-index"

In [None]:
# Run this line when loading the embedding to the vector store first time
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embedding, index_name=index_name)

In [18]:
docsearch = PineconeVectorStore.from_existing_index(index_name, embedding)
query = "what is allergy?"
retiriever = docsearch.as_retriever(search_kwargs=dict(k=3))
docs = retiriever.get_relevant_documents(query)

print("Results", docs)
#docs[0].page_content
#[doc.page_content for doc in docs]

Results [Document(page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 2591Physical allergy'), Document(page_content='KEY TERMS\nAllergen —Any substance that irritates only those\nwho are sensitive (allergic) to it.\nAsthma —Wheezing (labored breathing) due to\nallergies or irritation of the lungs.\nDecongestant —Medicines that shrink blood ves-\nsels and consequently mucus membranes. Pseu-doephedrine, phenylephrine, and phenylpropano-lamine are the most common.\nSinus —Air-filled cavities surrounding the eyes and'), Document(page_content='most serious human immunodeficiency disorder(s). It isa group of congenital disorders in which both thehumoral part of the patient’s immune system and theKEY TERMS\nAllergy —Altered body reaction, usually hypersen-\nsitivity, as a response to exposure to a specific sub-stance.\nAntibody —Any of a large number of proteins that\nare produced after stimulation by an antigen andact specifically against the antigen in an immuneresponse.\nAntihistamine —A drug th

In [None]:
prompt_template = """ 
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.


Context : {context}
Question: {question}

Only return helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
context = "\n\n".join([doc.page_content for doc in docs])
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
#PROMPT = ChatPromptTemplate(prompt_template) #,input_variables=["context", "question"])

#prompt = PROMPT.format(context=context,question= query) 
chain_type_kwargs = {"prompt": PROMPT}

In [None]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [None]:
from llama_cpp import Llama

## N.B
CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.2.56


In [None]:
llm = LlamaCpp(
    model_path= "../model/gguf/llama-2-7b-chat.Q3_K_M.gguf",
    n_gpu_layers= -1,
    n_batch= 512,
    max_tokens= 512,
    temperature=0.7,
    callback_manager=callback_manager,
    #verbose=True # Verbose is required to pass to the callback manager
)

In [None]:
QA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = retiriever,
    #retriever = docsearch.as_retriever(search_kwargs={'k':3}),
    return_source_documents = True,
    chain_type_kwargs=chain_type_kwargs
)


In [None]:
import textwrap

In [None]:
import sys
while True:
    user_input = input(f"Input prompt: ")
    if user_input == 'exit':
        print("Exiting")
        sys.exit()
    if user_input == ' ':
        continue
    result = QA({'query': user_input})
    result = textwrap.fill(result["result"], width=50)
    #print(f"Answer: {wraped_result['wraped_result']}")
    print(f"Answer: {result}")

In [None]:
# while True:
#     user_Input: input(f"Input prompt: ")
#     result = QA({"query": user_Input})
#     print("Response: ", result["result"])