In [1]:
%pwd

'c:\\Users\\patel\\OneDrive\\Desktop\\Projects\\CareBot\\research'

In [2]:
import os
os.chdir('../')
%pwd

'c:\\Users\\patel\\OneDrive\\Desktop\\Projects\\CareBot'

In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


Extracting the Data (Gale Encyclopedia of Medicine).

In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                              glob='*.pdf',
                              loader_cls = PyPDFLoader)
    documents = loader.load()
    return documents

In [5]:
extracted_data = load_pdf(data = 'data/')

Chunking the data

In [6]:
def text_chunking(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [7]:
text_chunks = text_chunking(extracted_data)
print('Length of the text chunks:', len(text_chunks))

Length of the text chunks: 5859


In [1]:
from langchain.embeddings import HuggingFaceEmbeddings

def donwload_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name= 'sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [2]:
embeddings = donwload_huggingface_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name= 'sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
query_result = embeddings.embed_query("Hello World!")
print('length :' , len(query_result))


length : 384


Creating Indexes using pinecone

In [4]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from dotenv import load_dotenv
import os

load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')


pc = Pinecone(api_key = PINECONE_API_KEY)

index_name = "carebot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric = 'cosine',
        spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
        )
    )


# address the dimension dynamicc.....

In [6]:
import os 
load_dotenv()
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
# OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
# os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

Creating Vector Store

In [13]:
from langchain_pinecone import PineconeVectorStore

vs = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
    
)

Loading the Vector Database 

In [7]:
from langchain_pinecone import PineconeVectorStore
vs = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
vs

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x150bed36050>

In [8]:
retriever = vs.as_retriever(search_type='similarity', search_kwargs={'k':3})

Trial of the search


In [9]:
docs = retriever.invoke('What is back Acne?')
docs

[Document(id='783a6e47-cff9-45f3-8035-716e4e8ed06e', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='b6f05515-2c5e-4d56-842b-d2b743ef9173', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 37.0, 'page_label': '38', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when the\npores of the skin become clogged with oil, dead skin\ncells, and bacteri

In [15]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
# from langchain_huggingface.chat_models.huggingface import ChatHuggingFace
# from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
# from langchain_core.prompts import PromptTemplate
# from langchain.chains import RetrievalQA


# huggingface_repo_id = "openai/gpt-oss-20b"

# def load_llm(huggingface_repo_id):
#     llm = HuggingFaceEndpoint(
#         repo_id = huggingface_repo_id,
#         huggingfacehub_api_token=HF_TOKEN,
#         task='conversational',  
#         max_new_tokens = 256,
#         temperature = 0.3           
#     )
#     return llm

# chat = ChatHuggingFace(llm = load_llm(huggingface_repo_id))

#genai

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",       # or "gemini-1.5-flash" if you want it faster/cheaper
    temperature=0.3,
    max_output_tokens=512,
    google_api_key=GOOGLE_API_KEY  # or rely on env var only
)

memory = ConversationBufferMemory(
    memory_key="chat_history",   # this key will be used in the prompt
    return_messages=True
)


  memory = ConversationBufferMemory(


In [16]:
system_prompt ="""You are an assistant for the question answer tasks. Use the following pieces of retrieved context to answer the question. If you don't 
    know the asnwer, say that you don't know. Don't provide anything out of the given context. Use three sentences maximum and keep the answer concise. \n\n
    
    Chat history: {chat_history}
    Context : {context}
    Question : {question}
    """


prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["chat_history", "context", "question"]
)

In [17]:
from langchain.chains import ConversationalRetrievalChain
conv_rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt},
    return_source_documents=True,
    get_chat_history=lambda h: h  # h is a list of messages; we pass it straight to {chat_history}
)

# Initialize the model
# model = load_llm()

# def get_answer(query, context):
#     """Function to get answer from the model using context"""
#     prompt = f"""Context: {context}
    
# Question: {query}

# Answer the question based on the context above. Keep it concise and within 3 sentences. If you can't find the answer in the context, say "I don't know"."""
    
#     response = model.generate_content(prompt)
#     return response.text

In [42]:
# Test the model
query = "What is acne?"
docs = retriever.get_relevant_documents(query)
context = "\n".join(doc.page_content for doc in docs)

try:
    answer = get_answer(query, context)
    print(f"Question: {query}")
    print(f"Answer: {answer}")
except Exception as e:
    print(f"Error: {e}")

  docs = retriever.get_relevant_documents(query)


  docs = retriever.get_relevant_documents(query)


Error: 404 models/gemini-pro is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.


In [None]:
# from langchain.chains import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain_core.prompts import ChatPromptTemplate

# system_prompt = (
#     "You are an assistant for the question answer tasks. Use the following pieces of retrieved context to answer the question. If you don't " \
#     "know the asnwer, say that you don't know. Use three sentences maximum and keep the answer concise. \n\n"
#     "{context}"
# )

# prompt = ChatPromptTemplate.from_messages(
#     [
#         ('system', system_prompt),
#         ('human', '{input}')
#     ]
# )

In [None]:
# from langchain_community.llms import Ollama
# llm = Ollama(model = 'mistral')

In [None]:
# question_answer_chain = create_stuff_documents_chain(llm, prompt)
# print(question_answer_chain)
# rag_chain = create_retrieval_chain(retriever, question_answer_chain)

bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for the question answer tasks. Use the following pieces of retrieved context to answer the question. If you don't know the asnwer, say that you don't know. Use three sentences maximum and keep the answer concise. \n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])
| Ollama(model='mistral')
| StrOutputParser() kwargs={} config={'run_name': 'stuff_documents_chain'} config_factories=[]


In [None]:
# response = rag_chain.invoke({'input': 'What is Acne'})
# print(response['answer'])

 Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria. Acne vulgaris, also known as common acne, is the most prevalent form and affects nearly 17 million people in the U.S.


In [None]:
# response = rag_chain.invoke({'input': 'What is cure for back acne?'})
# print(response['answer'])

 The treatment for back acne can involve topical drugs like tretinoin, benzoyl peroxide, adapalene, or salicylic acid to reduce the formation of new comedones. Shampooing often, wearing hair off the face, avoiding foods that trigger flare-ups, and reducing stress are also recommended. Additionally, alternative treatments focus on proper cleansing, a balanced diet high in fiber, zinc, and raw foods, and avoiding certain triggers like alcohol and processed foods.


In [None]:
# response = rag_chain.invoke({'input': 'How much does samsung odyssey g7 cost?'})
# print(response['answer'])

 I'm sorry for any confusion, but the context provided doesn't mention the Samsung Odyssey G7 or its price. It seems to be discussing the cost of Alexander technique lessons, bone grafting procedures, and appendectomy-related costs. If you need help finding the price of Samsung Odyssey G7, I would recommend checking electronics retailers such as Best Buy, Amazon, or directly on Samsung's official website.
