In [13]:
# step-1 :load documents any type
from langchain_community.document_loaders import PyPDFLoader

loder=PyPDFLoader('YOLO.pdf')
data=loder.load()

In [14]:
# step-2: split document into smaller chunks 
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_spliter=RecursiveCharacterTextSplitter(chunk_size=1000)
docs=text_spliter.split_documents(data)

In [15]:
docs[3]

Document(metadata={'source': 'YOLO.pdf', 'page': 3}, page_content='Proposal + Classification')

In [16]:
# step-3: Perform Text Embeddings
from langchain_chroma import Chroma
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

embeddings=GoogleGenerativeAIEmbeddings(model='models/embedding-001')

In [17]:
# step-4 :store vectors into Chroma,FAISS,PINECONE Vector DBs
vec_store=Chroma.from_documents(documents=docs,embedding=embeddings)

In [18]:
# step-5: retrive data from VecDB using retrivers
retriever=vec_store.as_retriever(search_type='similarity',search_kwargs={'k':10})
retrieved_docs=retriever.invoke("what is in new yolo")
retrieved_docs[5].page_content

'Outline \n1. Review: R-CNN \n2. YOLO:     -- Detection Procedure \n                    -- Network Design \n                    -- Training Part \n                    -- Experiments'

In [19]:
# step-6 : load LLM model
from langchain_google_genai import ChatGoogleGenerativeAI
llm=ChatGoogleGenerativeAI(model='models/gemini-1.5-pro',temperature=0,max_tokens=None)

In [20]:
# step-7 :chains and reterival question answer
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt="""
You are assistant for question and answering tasks.
if you don't know the answer ,say that you don't know
answer concise
\n\n
{context}
"""

prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ('human','question:{input}')
    ]
)

In [23]:

question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)
response=rag_chain.invoke({"input":"yolo works only for object detection?"})
response['answer']

'No, YOLO can be used for other tasks like image classification and tracking. \n'