## AGENTIC RAG
1. 2 vector databases - FAISS, ChromaDB <> 2 retrievers
2. 1 Wikipedia Loader, 1 ArxivLoader
3. Rewriter
4. Generator

In [36]:
from langchain_community.document_loaders import WebBaseLoader, DirectoryLoader, TextLoader
from langchain_community.vectorstores import FAISS,Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage,AIMessage,SystemMessage
from langchain_core.prompts import ChatPromptTemplate,HumanMessagePromptTemplate,SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap,RunnableLambda,RunnableParallel,RunnablePassthrough
from typing import List,TypedDict,Literal
from langchain.tools.retriever import create_retriever_tool
from langgraph.graph import END,StateGraph
from langchain.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.arxiv.tool import ArxivQueryRun
from langchain.tools import WikipediaQueryRun
from langgraph.prebuilt import ToolNode
from pydantic import BaseModel,Field
from langgraph.graph.message import add_messages
from langchain_tavily import TavilySearch

from dotenv import load_dotenv
load_dotenv()

llm_model = ChatGoogleGenerativeAI(model = 'gemini-2.0-flash',max_retries=2)
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') ##embedding model

In [18]:
### Retriever Tools ###
docs = DirectoryLoader(
    path='data',
    glob='*.txt',
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
).load()
print(f"Number of docs loaded : {len(docs)}")
print(f"Document preview (200 characters) : {docs[0].page_content[0:200]}")
print(f"Metadata preview : {docs[0].metadata}\n")


doc_chunks = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    separators=["\n"," "]
).split_documents(docs)

print(f"Number of chunks created : {len(doc_chunks)}")
print(f"Chunks preview (200 characters) : {doc_chunks[0]}")

## creating chromadb retriever ##
persist_dir = './chromadb'
vector_store1 = Chroma.from_documents(
    documents=doc_chunks,
    embedding=embedding_model,
    collection_name='vect_Store2',
    persist_directory=persist_dir
)
retriever1 = vector_store1.as_retriever(search_type='mmr',
                                      search_kwargs={'k':3})
print(f"Number of vectors stored : {vector_store1._collection.count()}")


Number of docs loaded : 5
Document preview (200 characters) : Agentic AI: The Evolution of Autonomous Intelligence

Agentic AI represents the next major step in artificial intelligence—systems that not only respond intelligently but also act autonomously toward 
Metadata preview : {'source': 'data\\agentic_ai.txt'}

Number of chunks created : 34
Chunks preview (200 characters) : page_content='Agentic AI: The Evolution of Autonomous Intelligence

Agentic AI represents the next major step in artificial intelligence—systems that not only respond intelligently but also act autonomously toward goals. Unlike traditional AI, which passively reacts to inputs, agentic AI operates through active reasoning, planning, and tool usage. These systems can decompose complex problems into smaller tasks, make decisions, and interact with external data sources to achieve objectives efficiently.' metadata={'source': 'data\\agentic_ai.txt'}
Number of vectors stored : 34


In [19]:
### Retriever 2 ###
docs2 = DirectoryLoader(
    path='data2',
    glob='*.txt',
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
).load()
print(f"Number of docs loaded : {len(docs2)}")
print(f"Document preview (200 characters) : {docs2[0].page_content[0:200]}")
print(f"Metadata preview : {docs2[0].metadata}\n")


doc_chunks2 = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    separators=["\n"," "]
).split_documents(docs2)

print(f"Number of chunks created : {len(doc_chunks2)}")
print(f"Chunks preview (200 characters) : {doc_chunks2[0]}")

## creating chromadb retriever ##
persist_dir = './chromadb'
vector_store2 = Chroma.from_documents(
    documents=doc_chunks2,
    embedding=embedding_model,
    collection_name='vect_Store3',
    persist_directory=persist_dir
)
retriever2 = vector_store2.as_retriever(search_type='mmr',
                                      search_kwargs={'k':3})
print(f"Number of vectors stored : {vector_store2._collection.count()}")


Number of docs loaded : 5
Document preview (200 characters) : Art: The Language of the Soul

Art is humanity’s oldest and most universal form of expression. Long before the invention of language, humans painted on cave walls, sang around fires, and danced to rhy
Metadata preview : {'source': 'data2\\art.txt'}

Number of chunks created : 19
Chunks preview (200 characters) : page_content='Art: The Language of the Soul

Art is humanity’s oldest and most universal form of expression. Long before the invention of language, humans painted on cave walls, sang around fires, and danced to rhythms of the earth. Through art, we translate emotion into form, silence into sound, and imagination into reality.' metadata={'source': 'data2\\art.txt'}
Number of vectors stored : 19


In [None]:
### WebLoader <> FAISS vectore store ###
url = ['https://www.ey.com/en_in/insights/tax/economy-watch/indian-economy-by-twenty-fifty-in-pursuit-to-achieve-the-thirty-trillion-dollar-mark',
       'https://www.pib.gov.in/PressNoteDetails.aspx?NoteId=154840&ModuleId=3',
       'https://www.goldmansachs.com/insights/articles/why-the-indian-economy-is-buzzing-with-energy-and-optimism']
document3 = [WebBaseLoader(link).load() for link in url]
docs3 = [doc for index in document3 for doc in index]


doc_chunks3 = RecursiveCharacterTextSplitter(
    chunk_size = 800,
    chunk_overlap = 80,
    separators=["\n\n","\n"," "]
).split_documents(docs3)

print(f"Number of chunks created : {len(doc_chunks3)}")
## creating chromadb retriever ##
vector_store3 = FAISS.from_documents(documents=doc_chunks3,
                                     embedding=embedding_model)
retriever3 = vector_store3.as_retriever(search_type='mmr',

                                      search_kwargs={'k':3})

Number of chunks created : 97


In [57]:
## Create tools ##
retriever_tool1 = create_retriever_tool(
    retriever=retriever1,
    name='retriever_ai_context',
    description='Vector store retriever for AI and AI use cases, Information on Machine & Deep Learning & Agentic AI'
)

retriever_tool2 = create_retriever_tool(
    retriever=retriever2,
    name='retriever_art_context',
    description='Vector store retriever for Art & History, Human Mind etc'
)

retriever_tool3 = create_retriever_tool(
    retriever=retriever3,
    name='retriever_indian_economy',
    description='Vector store retriever for Indian Economy'
)

## wikipedia and arxiv tools ##
wikipedia_tool = WikipediaQueryRun(api_wrapper = WikipediaAPIWrapper())
arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())

## taviliy web search tools ##
search_tool = TavilySearch(search_depth="advanced")