## Simplified Adaptive RAG Example: Wikipedia Data (India Economy & AI)

In [None]:

import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


### Load Data from Wikipedia

In [None]:

from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://en.wikipedia.org/wiki/Economy_of_India",
    "https://en.wikipedia.org/wiki/Artificial_intelligence"
]

loaders = [WebBaseLoader(url) for url in urls]
docs = [loader.load() for loader in loaders]
doc_list = [item for sublist in docs for item in sublist]
len(doc_list)


### Split & Store in Vector Database (FAISS)

In [None]:

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=800, chunk_overlap=100
)
doc_split = text_splitter.split_documents(doc_list)

vectorstore = FAISS.from_documents(doc_split, OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


### Router (Decide whether to use Vectorstore or fallback to Web Search)

In [None]:

from typing import Literal
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI

class RouteQuery(BaseModel):
    datasource: Literal["vectorstore", "web_search"] = Field(
        description="Choose 'vectorstore' if the answer is likely in Wikipedia docs, otherwise 'web_search'."
    )

llm_router = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_router = llm_router.with_structured_output(RouteQuery)

structured_router.invoke({"input": "What is the GDP of India?"})


### Retrieval Grader (Check if retrieved docs are relevant)

In [None]:

class GradeDocuments(BaseModel):
    binary_score: str = Field(description="Relevant to the question? yes or no")

llm_grader = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_grader = llm_grader.with_structured_output(GradeDocuments)

retrieved_docs = retriever.invoke("What is the GDP of India?")
structured_grader.invoke({"question": "What is the GDP of India?", "documents": str(retrieved_docs)})


### Generate Final Answer with RAG

In [None]:

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

question = "What is the GDP of India?"

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = prompt | llm | StrOutputParser()

docs = retriever.invoke(question)
answer = rag_chain.invoke({"context": format_docs(docs), "question": question})
print(answer)


### Try Another Question

In [None]:

question2 = "How is Artificial Intelligence impacting jobs?"

docs2 = retriever.invoke(question2)
answer2 = rag_chain.invoke({"context": format_docs(docs2), "question": question2})
print(answer2)
