In [2]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")

In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf("data")

In [6]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks = text_split(extracted_data)
print(f"Chunks Length: {len(text_chunks)}")

Chunks Length: 2163


In [7]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings
embeddings = download_hugging_face_embeddings()
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [8]:
#Initializing the Pinecone
import pinecone
pinecone = pinecone.Pinecone(api_key = PINECONE_API_KEY, environment = PINECONE_API_ENV)
index_name = "diabetes-chatbot"

#Creating Embeddings for Each of The Text Chunks & storing
docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [9]:
docsearch = Pinecone.from_existing_index(index_name, embeddings)

query = "What is Type 1 Diabetes?"
docs=docsearch.similarity_search(query, k=3) # k is the number of results to return
print("Result", docs)

Result [Document(page_content='appear to be linked to type 1 diabetes. These include genetics,\nautoantibodies, viruses, cow’s milk, and oxygen free radicals.\nGenetics\nScientists have long suspected that genetics play a role in \ntype 1 diabetes. If your mother or father had diabetes, forWHAT IS DIABETES? 13'), Document(page_content='appear to be linked to type 1 diabetes. These include genetics,\nautoantibodies, viruses, cow’s milk, and oxygen free radicals.\nGenetics\nScientists have long suspected that genetics play a role in \ntype 1 diabetes. If your mother or father had diabetes, forWHAT IS DIABETES? 13'), Document(page_content='appear to be linked to type 1 diabetes. These include genetics,\nautoantibodies, viruses, cow’s milk, and oxygen free radicals.\nGenetics\nScientists have long suspected that genetics play a role in \ntype 1 diabetes. If your mother or father had diabetes, forWHAT IS DIABETES? 13')]


In [10]:
prompt_template = """
    Use the following information to provide a helpful response to the user's question.
    If you're uncertain about the answer, please indicate that you're unsure rather than providing inaccurate information.

    Context: {context}
    Question: {question}

    Provide a concise and informative response to the user's question below.
"""

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [11]:
llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512, 'temperature':0.8}
)

In [12]:
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs
)

In [13]:
result = qa.invoke({"query": "What is Type 2 Diabetes?"})
print("Response : ", result["result"])

Response :  
Type 2 diabetes, also known as non-insulin dependent diabetes, is a chronic condition in which the body does not produce or effectively use insulin, leading to high blood sugar levels. It accounts for about 90% of all diabetes cases and typically affects adults who are overweight or obese. Symptoms may include feeling tired, blurred vision, slow healing of cuts or wounds, and frequent urination. Treatment options include lifestyle changes such as a healthy diet and regular exercise, as well as medications including metformin and insulin therapy.
If you have any further questions or concerns about type 2 diabetes, please let me know!


In [15]:
while True:
    user_input = input(f"Input Prompt:")
    result = qa.invoke({"query": user_input})
    print("Response : ", result["result"])

Response :  
Type 2 diabetes, also known as non-insulin dependent diabetes, is a chronic condition in which the body does not produce or effectively use insulin, leading to high blood sugar levels. It is typically associated with obesity and physical inactivity, although it can also occur in people who are thin or have a family history of type 2 diabetes. Symptoms may include increased thirst and urination, blurred vision, fatigue, and tingling or numbness in the hands and feet. Treatment typically involves lifestyle changes such as a healthy diet and regular exercise, as well as medication to lower blood sugar levels. In some cases, insulin therapy may be necessary. If left untreated, type 2 diabetes can lead to serious complications such as heart disease, kidney failure, and nerve damage.



KeyboardInterrupt

