In [2]:
example_topic = "Impact of millon-plus token context window language models on RAG"

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import BaseModel, Field
from typing import List, Optional
from langchain_core.prompts import ChatPromptTemplate

fast_llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.0,)
long_context_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
    temperature=0.0)

gen_related_topics_prompt = ChatPromptTemplate.from_template(
    """I'm writing a Wikipedia page for a topic mentionde below. Please identify and recomend some Wikipedia pagens on clasely related topics
    
    Please list the as many subject and urls as you can
    
    Topic of interest: {topic}"""
)

class RelatedSubject(BaseModel):
    topics: List[str] = Field(
    description="Comprehensive list of related subjects as background research",
    )

expand_chain = gen_related_topics_prompt | fast_llm.with_structured_output(RelatedSubject)

related_subjects = await expand_chain.ainvoke({'topic': example_topic})
related_subjects

RelatedSubject(topics=['Large language models', 'Retrieval augmented generation', 'Context window', 'Natural language processing', 'Artificial intelligence', 'Machine learning', 'Deep learning', 'Transformer networks', 'Wikipedia'])

In [6]:
from langchain_community.retrievers import WikipediaRetriever
from langchain_core.runnables import RunnableLambda, chain as as_runnable

wikipedia_retriever = WikipediaRetriever(load_all_available_meta=True, top_k_results=1)

def format_doc(doc, max_length=1000):
    related = "- ".join(doc.metadata['categories'])
    return f"### {doc.metadata['title']}\n\nSummary: {doc.page_content}\nRelated: {related}\n\n"[:max_length]

def format_docs(docs):
    return "\n\n".join([format_doc(doc) for doc in docs])


class Editor(BaseModel):
    affiliation: str = Field(
        description="Primary affiliation of the editor"
    )
    name: str = Field(
        description="Name of the editor",
    )
    role: str = Field(
        description="Role of the editor in the context of the topic."
    )
    description: str = Field(
        description="Description of the editor's focus, concers, and motives`"
    )
    @property
    def persona(self) -> str:
        return f"Name: {self.name}\nRole: {self.role}\nAffiliation: {self.affiliation}\nDescription: {self.description}"
    
class Perspectives(BaseModel):
    editors: List[Editor] = Field(
        description="List of editors with their perspectives on the topic"
    )

gen_perspectives_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",
         """You need to select a diverse(and distinc) group of Wikipedia editors who will work together to create a comprehensive article on the topic.
         You can use other Wikipedia pages of related topics for inspiration. For each editor, add description of what they will focus on.
         
         Wiki page outlines of related topics for inspiration: 
         {examples}"""),
         ("user","Topic of interest: {topic}"),
    ]
)

gen_perspectives_chain = gen_perspectives_prompt | ChatGoogleGenerativeAI(model='gemini-2.0-flash').with_structured_output(Perspectives)

@as_runnable
async def survey_subjects(topics: str):
    reletaed_subjects = await expand_chain.ainvoke({'topic': topics})
    retrieved_docs = await wikipedia_retriever.abatch(reletaed_subjects.topics, return_exceptions=True)
    all_docs = []
    for docs in retrieved_docs:
        if isinstance(docs, BaseException):
            continue
        all_docs.extend(docs)
    formatted = format_docs(all_docs)
    return await gen_perspectives_chain.ainvoke({
        "examples": formatted,
        "topic": topics
    })

perspectives = await survey_subjects.ainvoke(example_topic)
perspectives.dict()

C:\Users\petro_m\AppData\Local\Temp\ipykernel_6980\833876501.py:66: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  perspectives.dict()


{'editors': [{'affiliation': 'Independent AI Researcher',
   'name': 'Dr. Anya Sharma',
   'role': 'Performance Analyst',
   'description': 'Focuses on the quantitative analysis of RAG performance with large context windows, including metrics like recall, precision, and latency. Concerned with benchmarking different models and retrieval strategies.'},
  {'affiliation': 'Large Language Model Developer',
   'name': 'Dr. Kenji Tanaka',
   'role': 'Model Architect',
   'description': 'Interested in the architectural modifications and training techniques required to effectively utilize million-plus token context windows. Focuses on model efficiency and scalability.'},
  {'affiliation': 'Enterprise Knowledge Management',
   'name': 'Ms. Ingrid Olsen',
   'role': 'Enterprise Application Specialist',
   'description': 'Concerned with the practical applications of large context RAG in enterprise settings, including knowledge retrieval, document summarization, and question answering. Focuses on 

In [8]:
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict
from langchain_core.messages import AnyMessage, AIMessage, BaseMessage, HumanMessage, ToolMessage
from typing import Annotated, Sequence
from langchain_core.prompts import MessagesPlaceholder

def add_messages(left, right):
    if not isinstance(left, list):
        left = [left]
    if not isinstance(right, list):
        right = [right]
    return left+right

def update_references(references, new_references):
    if not references:
        references = {}
    references.update(new_references)
    return references

def update_editor(editor, new_editor):
    # Can only set at the outset
    if not editor:
        return new_editor
    return editor


class InterviewState(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    references: Annotated[Optional[dict], update_references]
    editor: Annotated[Optional[Editor], update_editor]


gen_qn_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an experienced Wikipedia writer and want to edit a specific page. \
Besides your identity as a Wikipedia writer, you have a specific focus when researching the topic. \
Now, you are chatting with an expert to get information. Ask good questions to get more useful information.

When you have no more questions to ask, say "Thank you so much for your help!" to end the conversation.\
Please only ask one question at a time and don't ask what you have asked before.\
Your questions should be related to the topic you want to write.
Be comprehensive and curious, gaining as much unique insight from the expert as possible.\

Stay true to your specific perspective:

{persona}""",
        ),
        MessagesPlaceholder(variable_name="messages", optional=True),
    ]
)

def tag_with_name(ai_message: AIMessage, name:str):
    ai_message.name = name
    return ai_message


def swap_roles(state: InterviewState, name):
    converted = []
    for message in state['messages']:
        if isinstance(message, AIMessage) and message.name != name:
            message = HumanMessage(**message.dict(exclude=("type")))
        converted.append(message)
    return {'messages': converted}


@as_runnable
async def generate_question(state:InterviewState):
    editor = state['editor']
    gn_chain = (
        RunnableLambda(swap_roles).bind(name=editor.name)
        | gen_qn_prompt.partial(persona=editor.persona)
        | fast_llm
        | RunnableLambda(tag_with_name).bind(name=editor.name)
    )
    result = await gn_chain.ainvoke(state)
    return {"messages": [result]}


print(perspectives.editors[0])

messages = [
    HumanMessage(f"So you said you were wrting an article on {example_topic}")
]

question = await generate_question.ainvoke(
    {'editor':perspectives.editors[0],
     "messages":messages}
)

question['messages'][0].content

affiliation='Independent AI Researcher' name='Dr. Anya Sharma' role='Performance Analyst' description='Focuses on the quantitative analysis of RAG performance with large context windows, including metrics like recall, precision, and latency. Concerned with benchmarking different models and retrieval strategies.'


"Hello Dr. Sharma, it's a pleasure to speak with you.  My name is Alex, and I'm a Wikipedia editor working on an article about the impact of million-plus token context window language models on Retrieval Augmented Generation (RAG).  My focus is on the practical implications and limitations of these models for real-world applications.  My first question is:  What are some of the most significant challenges you've encountered in benchmarking the recall and precision of RAG systems using these extremely large context windows, beyond the obvious computational constraints?"

## Expert

In [9]:
class Queries(BaseModel):
    queries: List[str] = Field(
        description="Comprehensive list of search engine queries to answer the user's questons."

    )

gen_queries_prompt = ChatPromptTemplate.from_messages(
    [
        (
            'system',
            "You are a helpful research assistant. Query the search engine to answer the user's questions."
        ),
        MessagesPlaceholder(variable_name="messages", optional=True)
    ]
)
gen_queries_chain = gen_queries_prompt | ChatGoogleGenerativeAI(model="gemini-2.0-flash").with_structured_output(Queries, include_raw=True)

queries = await gen_queries_chain.ainvoke({
    'messages': [HumanMessage(content=question['messages'][0].content)]
})
print(f"Queries: {queries['parsed'].queries}")

Queries: ['challenges in benchmarking RAG systems with large context windows recall precision']
