In [1]:
import numpy as np
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyMuPDFLoader,DirectoryLoader

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import FAISS
from datasets import load_dataset
from langchain.schema import Document


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# this funtion for Load pdf 

def load_pdf(data):
   loader = DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
   doc = loader.load()

   return doc

In [2]:
%pwd

'c:\\Users\\shame\\OneDrive\\Documents\\GitHub\\Medical-Bot'

In [8]:
pdf_directory = "C:/Users/shame/OneDrive/Documents/GitHub/Medical-Bot/Data/"  
pdf_documents = load_pdf(pdf_directory)

In [9]:
#text split 
def text_split_chunks(documents, chunk_size=500, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks 


In [10]:
pdf_chunks = text_split_chunks(pdf_documents)
print("Number of PDF chunks:", len(pdf_chunks))

Number of PDF chunks: 10484


In [11]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [12]:
embedding_model = download_hugging_face_embeddings()



In [13]:
embedding_model

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [14]:
index_path = "FAISS_index"  # Path to save the FAISS index

In [15]:

def store_in_faiss_vector_db(texts,embedding_model, index_path):
    
    # Create FAISS vector store from the embeddings
    vector_store = FAISS.from_documents(pdf_chunks,embedding_model)
    
    # Save the vector store locally
    vector_store.save_local(index_path)
    
    return vector_store



In [16]:
index_path="FAISS_index"
vector_store = store_in_faiss_vector_db(pdf_chunks,embedding_model,index_path)

In [17]:

def store_in_faiss_vector_db(texts,embedding_model, index_path):
    
    # Create FAISS vector store from the embeddings
    vector_store = FAISS.from_documents(pdf_chunks,embedding_model)
    
    # Save the vector store locally
    vector_store.save_local(index_path)
    
    return vector_store



In [18]:
index_path="FAISS_index"
vector_store = store_in_faiss_vector_db(pdf_chunks,embedding_model,index_path)

In [19]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [20]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [21]:
llm=CTransformers(model="D:/Medical-Bot/Model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [22]:
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=vector_store.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [23]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

Response :  - Pain can be acute or chronic, and it can be caused by a variety of factors such as injury, illness, or medical condition.
- It is usually felt as a sensation of discomfort, distress, or agony in the affected area.
- Pain can also have emotional and psychological effects on a person, such as anxiety, depression, or post-traumatic stress disorder (PTSD).


KeyboardInterrupt: 