In [1]:
print("Hi Karthik")

Hi Karthik


In [2]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import pypdf

  from tqdm.autonotebook import tqdm


In [3]:
PINECONE_API_KEY = " ************* "
PINECONE_API_ENV = " ************* "

In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [6]:
extracted_data = load_pdf("data/")

In [7]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [8]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 2757


In [9]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [10]:
embeddings = download_hugging_face_embeddings()

In [11]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [12]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [15]:
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)

index_name="m-chatbot"

docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [16]:
docsearch=Pinecone.from_existing_index(index_name, embeddings)

query = "Explain Antiretroviral treatment"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='viremia and is probably a lso altered by antiretroviral therapy, which reduces viremia and viral \nlevels in semen. Antiretroviral treatment, when taken as post -exposure prophylaxis by the \nhost soon after sexual exposure to an infected partner, may also reduce the probability of \ntransmission. Transmission could theoretically occur from sexual behaviour involving contact \nwith any of the fluids in which it has been found, but the concentration of HIV found in saliva', metadata={}), Document(page_content='life in such people. This antiviral chemotherapy though not a cure has proved to be useful in \nprolonging the life of AIDS patients.  \nThese drugs neither improve nor restore the immune system nor do they destroy the virus \ninstalled in the cells.  \n3. Specific Prophylaxis  \nUntil more effective cure is available the attempt will be to treat the manifestations of AIDS.  \nPneumocystis carnii  – Primary prophylaxis against P.carnii infection shou

In [17]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [37]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [38]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8},lib='avx')

In [39]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [41]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

Response :  Antiretroviral therapy (ART) is a treatment that uses medications to suppress the virus, reducing the amount of HIV in the body and preventing damage to the immune system. These medications can also reduce the risk of transmission of HIV to others through sexual contact or mother-to-child transmission during pregnancy, childbirth, or breastfeeding. However, these drugs do not cure HIV nor do they restore or improve the immune system. They are used to prolong the life of AIDS patients and help manage their symptoms.


PineconeProtocolError: Failed to connect; did you specify the correct index name?