In [10]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [11]:

PINECONE_API_KEY="0d2ae181-e770-47e7-af93-99be55f929a3"
PINECONE_API_ENV="gcp-starter"

In [12]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                             glob="*.pdf",
                             loader_cls=PyPDFLoader)
    documents= loader.load()
    return documents


In [19]:
extracted_data=load_pdf("data/")

FileNotFoundError: Directory not found: 'data/'

In [7]:
# Text Chunks
def text_split(extracted_data):
    text_splitter= RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [8]:
text_chunks=text_split(extracted_data)
print("No of chunks:",len(text_chunks))

NameError: name 'extracted_data' is not defined

In [12]:
# downloading embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [13]:
embeddings= download_hugging_face_embeddings()

In [14]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [15]:
query_result= embeddings.embed_query("Hi Parv")
print("Length:",len(query_result))

Length: 384


In [16]:
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)
index_name="medical-chatbot"

docsearch=Pinecone.from_texts([t.page_content for t in text_chunks],embeddings,index_name=index_name)

In [17]:
docsearch=Pinecone.from_existing_index(index_name,embeddings)

query="What is acne"

docs=docsearch.similarity_search(query,k=4)

print("Result:", docs)

Result: [Document(page_content='acne vulgaris The common form of acne, in\nteens and young adults, that is due to overactivity of', metadata={}), Document(page_content='of acne, consists of a widened hair follicle filled withskin debris, bacteria, and oil called sebum. A black-head has a wide opening to the skin and is cappedwith a blackened mass of skin debris. In contrast, aclosed comedo, commonly called a whitehead, hasan obstructed opening to the skin and may ruptureto cause a low-grade skin inflammatory reaction inthe area.\nbladder A hollow organ in the lower abdomen\nthat stores urine. The kidneys filter waste from the', metadata={}), Document(page_content='the oil (sebaceous) glands in the skin that becomeplugged and inflamed. Acne typically develops whenthe oil glands come to life around puberty and arestimulated by male hormones that are produced inthe adrenal glands of both boys and girls.Treatments include keeping the skin clean andavoiding irritating soaps, foods, drinks, 

In [18]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know , dont't try to make up an answer.

Context:{context}
Question:{question}

Only return the helpful answer below nothing else.
Helpful answer:
"""

In [19]:
PROMPT=PromptTemplate(template=prompt_template,input_variables=["context","question"])
chain_type_kwargs={"prompt":PROMPT}

In [18]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q2_K.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-658d5c33-4ad2d4a45719fc9f7a7d7106;091c8845-a86f-4340-8d7c-dbd6d496c757)

Repository Not Found for url: https://huggingface.co/api/models/model/llama-2-7b-chat.ggmlv3.q2_K.bin/revision/main.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated.
Invalid username or password.

In [21]:
qa=RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

Response :  Acne vulgaris is a common form of acne that occurs in teens and young adults due to overactivity of the sebaceous glands, which produce oil called sebum. The excess oil and skin debris clog the hair follicles, causing blackheads or whiteheads.
