<a href="https://colab.research.google.com/github/nalivaikaanastasiya-dev/Agentic-RAG-Streamlit-App/blob/main/agentic_rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# agentic_rag.ipynb
# Agentic RAG pipeline with Groq + Tavily + LangChain

# =========================
# 1. Install dependencies
# =========================
!pip install -q langchain langchain-community langchain-groq tavily-python faiss-cpu python-dotenv

# =========================
# 2. Imports
# =========================
import os
import logging
from dotenv import load_dotenv

from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.agents import Tool, initialize_agent
from langchain.chains import RetrievalQA

from tavily import TavilyClient

# =========================
# 3. Load environment variables
# =========================
load_dotenv()

assert os.getenv("GROQ_API_KEY"), "GROQ_API_KEY is missing"
assert os.getenv("TAVILY_API_KEY"), "TAVILY_API_KEY is missing"

os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2", "true")
os.environ["LANGCHAIN_ENDPOINT"] = os.getenv("LANGCHAIN_ENDPOINT", "https://api.langsmith.com")

# =========================
# 4. Logging
# =========================
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("AgenticRAG")

# =========================
# 5. Initialize LLM
# =========================
llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0,
    groq_api_key=os.getenv("GROQ_API_KEY")
)

# =========================
# 6. Knowledge Base & Retriever
# =========================
docs = [
    Document(
        page_content="Agentic systems follow a loop: reason, retrieve, read, synthesize, and answer.",
        metadata={"source": "kb:agentic-loop"}
    ),
    Document(
        page_content="Agents should use tools like web search when internal knowledge is insufficient.",
        metadata={"source": "kb:tools"}
    ),
    Document(
        page_content="Answers should always include source attribution.",
        metadata={"source": "kb:sources"}
    ),
]

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# =========================
# 7. Retrieval QA Chain
# =========================
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# =========================
# 8. Tavily Web Search Tool
# =========================
tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))

def tavily_search(query: str) -> str:
    """Search the web using Tavily"""
    try:
        results = tavily_client.search(query, max_results=3)
        return "\n".join(
            f"{r['title']} - {r['url']}" for r in results["results"]
        )
    except Exception as e:
        logger.error(e)
        return "Web search failed."

# =========================
# 9. Agent Tools
# =========================
tools = [
    Tool(
        name="KnowledgeBase",
        func=lambda q: retrieval_qa.run(q),
        description="Use this tool to search the internal knowledge base."
    ),
    Tool(
        name="WebSearch",
        func=tavily_search,
        description="Use this tool to search the web when knowledge base is insufficient."
    )
]

# =========================
# 10. Agent Initialization
# =========================
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent="structured-chat-zero-shot-react-description",
    verbose=True
)

# =========================
# 11. Public API function
# =========================
def get_answer(query: str) -> str:
    """
    Reason -> Retrieve -> Read -> Synthesize -> Answer
    Returns an answer with source attribution.
    """
    try:
        return agent.run(query)
    except Exception as e:
        logger.error(e)
        return "Agent failed to answer the question."

# =========================
# 12. Demo
# =========================
questions = [
    "What is an agentic reasoning loop?",
    "When should an agent use tools?",
    "Why is source attribution important?"
]

for q in questions:
    print("Q:", q)
    print(get_answer(q))
    print("-----")

