## Autonomour RAG 
1. Query Decomposer/Planner Agent
2. Tool Selector -> Retriever, Websearch 
3. Reflection 
4. Retry Loop

In [None]:
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain_community.vectorstores import FAISS,Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import AIMessage,HumanMessage,SystemMessage,BaseMessage,ToolMessage
from langchain_core.prompts import ChatPromptTemplate,HumanMessagePromptTemplate,SystemMessagePromptTemplate
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap,RunnableLambda,RunnableParallel,RunnablePassthrough
from langchain.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools.arxiv.tool import ArxivQueryRun
from langchain.tools import WikipediaQueryRun
from langchain.tools.retriever import create_retriever_tool
from langchain_tavily import TavilySearch
from langgraph.graph import END,START,StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode
from pydantic import BaseModel,Field

from dotenv import load_dotenv
load_dotenv()

llm_model = ChatGoogleGenerativeAI(model = 'gemini-2.0-flash',max_retries=2)
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',) ##embedding model

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
docs = DirectoryLoader(
    path = 'data',
    glob="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
).load()
print(f"Number of docs loaded: {len(docs)}")
print(f"Document preview: {docs[2].page_content[:200]}")
print(f"Meta data preview: {docs[2].metadata}")



doc_chunks = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    separators=["\n"," "]
).split_documents(docs)
print("\n")
print(f"Number of chunks created : {len(doc_chunks)}")
print(f"Chunks preview (200 characters) : {doc_chunks[0]}")

### vector store ###
persist_dir = "./chromadb2"
vector_store1 = Chroma.from_documents(
    documents=doc_chunks,
    embedding= embedding_model,
    collection_name='vec_store1',
    persist_directory=persist_dir
)

print('\n')
retreiver1 = vector_store1.as_retriever(
    search_type = 'mmr', search_kwargs = {'k':3})
print(f"Number of vectors stored : {vector_store1._collection.count()}")


Number of docs loaded: 5
Document preview: AI in Healthcare: The New Frontier of Intelligent Medicine

Artificial Intelligence (AI) is revolutionizing healthcare by enabling faster diagnosis, personalized treatment, and more efficient clinical
Meta data preview: {'source': 'data\\ai_in_healthcare.txt'}


Number of chunks created : 34
Chunks preview (200 characters) : page_content='Agentic AI: The Evolution of Autonomous Intelligence

Agentic AI represents the next major step in artificial intelligence—systems that not only respond intelligently but also act autonomously toward goals. Unlike traditional AI, which passively reacts to inputs, agentic AI operates through active reasoning, planning, and tool usage. These systems can decompose complex problems into smaller tasks, make decisions, and interact with external data sources to achieve objectives efficiently.' metadata={'source': 'data\\agentic_ai.txt'}


Number of vectors stored : 34


In [10]:
docs2 = DirectoryLoader(
    path="data3",
    glob = "*.txt",
    loader_cls = TextLoader,
    loader_kwargs={'encoding':'utf-8'}
).load()

print(f"Number of documents loaded : {len(docs)}")
print(f"Document Data preview : {docs2[0].page_content[:400]}")
print(f"Meta data preview {docs2[0].metadata}")


doc_chunks2 = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    separators=["\n"," "]
).split_documents(docs2)

print(f"Number of chunks created : {len(doc_chunks2)}")
print(f"Chunk preview : {doc_chunks2[0].page_content}")

persist_dir = './chromadb2'
vector_store2 = Chroma.from_documents(
    documents= doc_chunks2,
    embedding = embedding_model,
    collection_name='vec_store2',
    persist_directory=persist_dir
)

print('\n')
retriever2 = vector_store2.as_retriever(
            search_type = 'mmr',
            search_kwargs = {'k':3}
)
print(f"Number of vectors created : {vector_store2._collection.count()}")

Number of documents loaded : 5
Document Data preview : AI and Sustainability: Technology for a Greener Future

Artificial Intelligence (AI) is emerging as a powerful tool for achieving sustainability goals. From optimizing energy consumption to monitoring deforestation, AI technologies enable data-driven environmental stewardship. When applied responsibly, AI can accelerate humanity’s transition to a low-carbon and resource-efficient future.

In energ
Meta data preview {'source': 'data3\\ai_and_sustainability.txt'}
Number of chunks created : 21
Chunk preview : AI and Sustainability: Technology for a Greener Future

Artificial Intelligence (AI) is emerging as a powerful tool for achieving sustainability goals. From optimizing energy consumption to monitoring deforestation, AI technologies enable data-driven environmental stewardship. When applied responsibly, AI can accelerate humanity’s transition to a low-carbon and resource-efficient future.


Number of vectors created : 21


In [12]:
retrieve_ai_context_tool = create_retriever_tool(
    retriever= retreiver1,
    name='retriever_ai_context',
    description='Vector store retriever for AI and AI use cases, Information on Machine & Deep Learning & Agentic AI'
)

retrieve_esg_context_tool = create_retriever_tool(
    retriever= retriever2,
    name = 'retrieve_esg_context',
    description = 'Vector store retriever for ESG, AI & Sustainability,Greenwashing content'
)

## wikipedia ##
wikipedia_tool = WikipediaQueryRun(api_wrapper = WikipediaAPIWrapper())
arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
## tavily search ##
tavily_search_tool = TavilySearch(search_depth = 'advanced')

tools = [retrieve_ai_context_tool,retrieve_esg_context_tool,wikipedia_tool,arxiv_tool,tavily_search_tool]
llm_with_tools = llm_model.bind_tools(tools)