In [1]:
print("Hello world")

Hello world


In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'e:\\LLM- Med Chatbot\\Medical-chatbot-using-LLM'

In [4]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader , DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

## Extract the data from the PDF

In [5]:
def load_pdf(data):
    
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [6]:
extraccted_data=load_pdf(data ="Data/")

In [7]:
extraccted_data

[Document(metadata={'source': 'Data\\Apurba_Sastry_3e.pdf', 'page': 0}, page_content='DOC in MayKing'),
 Document(metadata={'source': 'Data\\Apurba_Sastry_3e.pdf', 'page': 1}, page_content='Essentials of\nMedical Microbiology\nhttps://t.me/docinmayking\n'),
 Document(metadata={'source': 'Data\\Apurba_Sastry_3e.pdf', 'page': 2}, page_content='Essentials of\nMedical Microbiology\nApurba S Sastry MD (JIPMER) DNB MNAMS PDCR\nHospital Infection Control Officer\nOfficer In-charge, HICC\nAntimicrobial Stewardship Lead\nAssociate Professor\nDepartment of Microbiology\nJawaharlal Institute of Postgraduate Medical Education and Research (JIPMER), Puducherry, India\nSandhya Bhat (Gold medalist) MD DNB MNAMS PDCR\nProfessor\nDepartment of Microbiology\nPondicherry Institute of Medical Sciences (PIMS) \n(A Unit of Madras Medical Mission)\nPuducherry, India\nCo-Editors\nAnand Bhimaray Janagond MD DNB\nProfessor\nDepartment of Microbiology\nS Nijalingappa Medical College\nBagalkot, Karnataka, India \

## Split the extracted data into Text Chunks

In [8]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500 , chunk_overlap = 20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks


In [9]:
text_chunks=text_split(extraccted_data)
print("length of the text_chunk :",len(text_chunks))

length of the text_chunk : 13301


## Downloading the Embeddings from Hugging face

In [10]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [11]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-MiniLM-L6-v2")


In [12]:
query_result = embeddings.embed_query("Hello world!")
print("length of the query result :",len(query_result))

length of the query result : 384


In [18]:
# query_result

In [14]:
from dotenv import load_dotenv
load_dotenv()

True

In [15]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
GROQ_API_KEY = os.environ.get('GROQ_API_KEY')

In [22]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

In [24]:
index_name = "medicalchatbot"

pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [25]:
index_name = "medicalchatbot"

In [26]:
import os
os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY
os.environ["GROQ_API_KEY"]=GROQ_API_KEY


In [28]:
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_documents(
    documents=text_chunks,
    index_name= index_name,
    embedding=embeddings
)

In [29]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x1d94f2374d0>

In [30]:
retriver = docsearch.as_retriever(search_type="similarity",search_kwargs={"k":4})

## example of retrival from the pinecone database

In [31]:
retrived_docs = retriver.invoke("what is Malaria?")

In [32]:
retrived_docs

[Document(metadata={'page': 287.0, 'source': 'Data\\medical_book.pdf'}, page_content='features.\nMalaria—Disease caused by the presence of sporo-\nzoan parasites of the genus Plasmodium in the red\nblood cells, transmitted by the bite of anopheline\nmosquitoes, and characterized by severe and recur-\nring attacks of chills and fever).\nMicrotubules —Slender, elongated anatomical\nchannels in worms.\nNematode—Roundworm.\nOrganism—A single, independent life form, such as\na bacterium, a plant or an animal.\nParasite—An organism that lives in or with another'),
 Document(metadata={'page': 369.0, 'source': 'Data\\Apurba_Sastry_3e.pdf'}, page_content='MALARIA \nHistory \nMalaria is one of the oldest documented diseases of \nmankind. The name “Malaria” (“Mal” means bad and “aria” \nmeans air) was derived from the ancient false belief that \n“disease is spread by air pollution through stagnant water \nand marshy lands” \n \x99 Sir Alphonse Laveran in 1902 and Sir Ronald Ross in 1907 \nwon the

## connecting it  to api

In [33]:
from langchain_groq import ChatGroq
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [34]:
llm = ChatGroq(api_key=GROQ_API_KEY, temperature=0.4, max_tokens=500)

In [35]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [36]:
# Define the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [37]:
# Create the question-answering chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriver,question_answer_chain)

In [79]:
response = rag_chain.invoke({"input":"what is Abscess?"})
print(response["answer"])

An abscess is an area of inflamed and injured body tissue that fills with pus. The overall outcome of an abscess can be determined by other diseases the patient has, or if it ruptures into neighboring areas or permits the spread of infection.


In [38]:
response_2 = rag_chain.invoke({"input":"What is Abscess and what are its's Symptoms and prevention ?"})
print(response_2["answer"])

An abscess is an area of inflamed and injured body tissue that fills with pus. It typically affects adolescence and early adulthood, with the infection often being polymicrobial and involving both aerobic and anaerobic gram-negative bacilli. Diagnosis is usually made clinically, supported by ultrasonography. Surgical removal of the affected tissue and adjunctive use of broad-spectrum antibiotics are typically required for treatment.
