In [1]:
%pwd

'c:\\Users\\Hp\\Desktop\\Medical-chatbot\\End-to-end-medical-chatbot-Gen-AI\\research'

In [28]:
import os 
os.chdir("../")

In [29]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


In [30]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [31]:
extracted_data=load_pdf_file(data='Data/')

FileNotFoundError: Directory not found: 'Data/'

In [6]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 5859


In [8]:
from langchain.embeddings import HuggingFaceEmbeddings

In [9]:
#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [10]:
embeddings = download_hugging_face_embeddings()

  embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [11]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [12]:
from langchain_chroma import Chroma 

texts = [doc.page_content for doc in text_chunks]
persist_directory = "./chroma_index"

vectorstore = Chroma.from_texts(texts=texts, embedding=embeddings,
                                 persist_directory=persist_directory)

In [None]:
# persist the chrpoma_index to disk
#vectorstore.persist()

# load existing vectors from persist_directory
#vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k':2})

In [14]:
retrieved_docs = retriever.invoke("what are allergies?")

In [15]:
print(retrieved_docs[0].page_content)

reaction. Allergic rhinitis is characterized by an itchy,
runny nose, often with a scratchy or irritated throat due
to post-nasal drip. Inflammation of the thin membrane
covering the eye (allergic conjunctivitis) causes redness,
irritation, and increased tearing in the eyes. Asthma caus-
es wheezing, coughing, and shortness of breath. Symp-
toms of food allergies depend on the tissues most sensi-
tive to the allergen and whether the allergen spread sys-


In [16]:
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers 
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.memory import ConversationBufferMemory 
from langchain_core.runnables import RunnableParallel, RunnablePassthrough,  RunnableLambda
 


In [17]:
# Prompt template for RAG
prompt_template = (
    "You are an assistant for question-answering tasks."
    "Use the following pieces of retrieved context to answer"
    "the question. If you don't know the answer, say that you" 
    "don't know. Use three sentences maximum and keep the answer concise"
    "\n\n" 
    "Context:\n{context}"
    "\n\n" 
    "Question:\n{question}"
)

prompt = PromptTemplate(
        input_variables=["context", "question"],
        template=prompt_template
)

In [18]:
# Local LLM using CTransformers (LLaMA)
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':256,
                          'temperature':0.8})

# Memory for maintaining chat context
memory = ConversationBufferMemory(
    memory_key="chat_history", 
    return_messages=True,
    input_key="question"
)


  memory = ConversationBufferMemory(


In [19]:
# 4. Document chain
doc_chain = create_stuff_documents_chain(llm, prompt)


In [20]:
def get_recent_chat_history(x, k=3):
    history = memory.load_memory_variables(x)["chat_history"]
    return history[-k:] if len(history) > k else history


In [21]:
# 5. Build RAG chain with memory in LCEL 
retrieval_chain = (
    RunnableParallel({
        "context": RunnableLambda(lambda x: retriever.invoke(x["question"])),
        "question": RunnablePassthrough(),
        "chat_history": RunnableLambda(lambda x: get_recent_chat_history(x))
    }) 
    | doc_chain
)


In [22]:
# 6. Wrap in memory.save_context for chat tracking
def ask_question(user_input):
    inputs = {"question": user_input}
    response = retrieval_chain.invoke(inputs)
    memory.save_context(inputs, {"output": response})
    return response

# 7. Ask a question
answer = ask_question("What are allergies?")
print(answer)





In [23]:
answer = ask_question("how are they treated?")
print(answer)




In [24]:
answer = ask_question("how are they treated?")
print(answer)




In [25]:
answer = ask_question("how are allergies treated?")
print(answer)




In [26]:
answer = ask_question("severve back pain at my upper back, what is the cause and possible treatment?")
print(answer)



Answer:
I don't know the exact cause of severe back pain at your upper back. However, I can suggest some possible causes such as muscle strain or inflammation due to overuse or injury. Treatment options may include rest, physical therapy, and medications for pain relief. It is important to consult with a healthcare professional for proper diagnosis and treatment.


In [27]:
answer = ask_question("what is Abscess?")
print(answer)



Answer:
An abscess is a localized collection of pus that forms in response to aninfection. The pus is a mixture of dead white blood cells, bacteria, and other debris that the body cannot remove on its own. If theabscess bursts, it can lead to further infection and potentially serious complications. The treatment for an abscess typically involves draining the pus and antibiotics to kill the underlyingbacterial infection.
