<a href="https://colab.research.google.com/github/sindla97/RAG/blob/dev/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Aproach
1. import pdf files to the knowledge base
2. Perfrom semantic segmentation using a sentense transfromer type model and convert them to embeddings
3. use a rerank model to rank the retrived embeddings
4. Use the LLM to generate response based on the query and retrvied emebeddings


Try to provide an option to update the knowlege base when new documents are provided,

In [None]:
!pip install langchain-google-genai pypdf langchain_experimental langchain-pinecone langchain-huggingface

In [None]:
from langchain_community.document_loaders import PyPDFLoader

pdf_path = "/content/AI_Agent_white_paper_by_google_1737132048.pdf"
loader = PyPDFLoader(pdf_path)
loader2=PyPDFLoader(pdf_path, mode='single')

In [None]:
all_pages=loader.load()
# we can use load_and_split() but we have to provide the textsplitter criterion if not it uses Recrusivetextsplitter by default
all_pages[10].page_content

### loading the whole document and invoking the llm for summary and generating embeddings would work for small documents but when the documnet size is large, it has to be chunked and summarized indiviudally and combine to generating embeddings


#Summarize using the whole document

In [None]:
whole_doc=loader2.load() # loading the whole pdf as a single document for answering summary type questions
whole_doc[0].page_content

In [None]:
from google.colab import userdata
google_api_key=userdata.get('GOOGLE_API_KEY')


In [None]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

# Initialize the text splitter
text_splitter = SemanticChunker(GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key))
chunks = text_splitter.create_documents([''.join([page.page_content for page in all_pages])]) # create document takes in list of dcoumetns and splits it, we have to pass the whole document as string

In [None]:
chunks[0].page_content

In [None]:
len(chunks)

In [None]:
# importing the llm to summarize the whole document  an retrvial
#send the retrived documents to llm and ouptut in a structured format
from langchain_google_genai import ChatGoogleGenerativeAI

output_llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro", google_api_key=google_api_key)

#lets build a message


In [None]:
messages = [
    ("system", "please provide a summary of this document"),
    ("human", f"please proivde a detailed summary of the document: {whole_doc[0].page_content}"),
]
summary=output_llm.invoke(messages)
summary # summary of the whole document

In [None]:
from langchain.schema import Document

ai_message = summary.content
doc = Document(page_content=ai_message) # convert the AI message output to a documemnt

print(doc)

In [None]:
chunks.append(doc) # append the document tothe other chunks

In [None]:
# lets use a vector store  to store these embeddings
from langchain_pinecone.vectorstores import PineconeVectorStore
vecstore=PineconeVectorStore(embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key),pinecone_api_key=userdata.get('pinecone_api'), index_name='rag-test')
vecstore

In [None]:
vecstore.delete(delete_all=True) # delete any existing embeddings in the index
vecstore.add_documents(chunks) # create embeddings (based on chunk's text) and add to Pinecone

In [None]:
vecstore.search(query='what are the contents of the document Agentic AI', search_type='similarity') # quick check for the retrival

In [None]:
vecstore.search(query='what is the summary of the document Agentic AI', search_type='similarity', k=4)

In [None]:
user_query='what is the summary of the document Agents'
retrived_contents=vecstore.search(query=user_query, search_type='similarity', k=4)
message=f"Based on the contents provided {''.join([x.page_content for x in retrived_contents])} answer the user question:{user_query} in a structured format and ask a followup question"
print(output_llm.invoke(message).content)

In [None]:
def chat(user_query):
  retrived_contents=vecstore.search(query=user_query, search_type='similarity', k=4)
  message=f"Based on the contents provided {''.join([x.page_content for x in retrived_contents])} answer the user question:{user_query} in a structured format and ask a followup question"
  print(output_llm.invoke(message).content)
  new_user_query=input()
  if len(new_user_query)>0:
    chat(new_user_query)

chat('what are the contents of the document Agentic AI')

# lets summarize the whole document and add it to the vector store
we can utilize langchain summarize feature but it requires frequent invokes with the llm, if the llm is hosted and accessed through inference endpoints it does cost and trigger alerts for quota and usage


## To access models in huggingface we have2 options
### 1. using HuggingFaceEndpoint
### 2.HuggingFacePipeline
## huggingfaceendpoint helps hosting the model in remote server and takes care of the architecure but we do have to pay as a service
## huggingfacepipeline downloads the model into your local and you would need GPU to host and run the model but it is free of cost


In [None]:
userdata.get('huggingface_api')
from huggingface_hub import login
login(token=userdata.get('huggingface_api'))

In [None]:
from langchain_huggingface import HuggingFaceEndpoint, HuggingFacePipeline
from transformers import pipeline
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


hf = HuggingFacePipeline.from_model_id(
    model_id="google/gemma-3-1b-it",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 20000, 'trust_remote_code':True
     }
)
#HuggingFacePipeline will download model weights and host model locally
# i did not have enough memory to summary the document,
# one can try HuggingFaceEndpoint to use inference providers for the summary but is a paid service

In [None]:
from langchain.chains import load_summarize_chain
summarizer=load_summarize_chain(llm=output_llm, chain_type='map_reduce', verbose=True)
summarizer

In [None]:
summary=summarizer.invoke(chunks[:9])# cutting chunks only with information
vecstore.add_documents(Document(page_content=summary['output_text']))


#lets add a fetaure to look at chat history and answer the questions with less/no context referring to previous chat